aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog137
-rw-r--r--gcc/DATESTAMP2
-rw-r--r--gcc/Makefile.in2
-rw-r--r--gcc/auto-profile.cc24
-rw-r--r--gcc/avoid-store-forwarding.cc100
-rw-r--r--gcc/common/config/riscv/riscv-common.cc3
-rw-r--r--gcc/config/aarch64/aarch64-protos.h1
-rw-r--r--gcc/config/aarch64/aarch64-sve-builtins-sme.def3
-rw-r--r--gcc/config/aarch64/aarch64-sve-builtins.cc3
-rw-r--r--gcc/config/aarch64/aarch64-sve.md276
-rw-r--r--gcc/config/aarch64/aarch64.cc61
-rw-r--r--gcc/config/gcn/gcn.cc18
-rw-r--r--gcc/config/i386/i386-modes.def2
-rw-r--r--gcc/config/i386/i386.cc44
-rw-r--r--gcc/config/i386/i386.md3
-rw-r--r--gcc/config/i386/sse.md13
-rw-r--r--gcc/config/riscv/gen-riscv-mcpu-texi.cc43
-rw-r--r--gcc/config/riscv/gen-riscv-mtune-texi.cc41
-rw-r--r--gcc/config/riscv/t-riscv37
-rw-r--r--gcc/config/s390/s390-protos.h2
-rw-r--r--gcc/config/s390/s390.cc161
-rw-r--r--gcc/config/s390/s390.md21
-rw-r--r--gcc/cp/parser.cc32
-rw-r--r--gcc/cp/pt.cc52
-rw-r--r--gcc/cprop.cc24
-rw-r--r--gcc/doc/invoke.texi23
-rw-r--r--gcc/doc/riscv-mcpu.texi69
-rw-r--r--gcc/doc/riscv-mtune.texi59
-rw-r--r--gcc/doc/tm.texi9
-rw-r--r--gcc/doc/tm.texi.in2
-rw-r--r--gcc/fortran/check.cc21
-rw-r--r--gcc/fortran/gfortran.h2
-rw-r--r--gcc/fortran/intrinsic.cc8
-rw-r--r--gcc/fortran/intrinsic.h2
-rw-r--r--gcc/fortran/intrinsic.texi64
-rw-r--r--gcc/fortran/iresolve.cc13
-rw-r--r--gcc/fortran/trans-array.cc12
-rw-r--r--gcc/fortran/trans-array.h5
-rw-r--r--gcc/fortran/trans-decl.cc14
-rw-r--r--gcc/fortran/trans-intrinsic.cc72
-rw-r--r--gcc/fortran/trans-stmt.cc7
-rw-r--r--gcc/fortran/trans.h2
-rw-r--r--gcc/gcov-io.cc2
-rw-r--r--gcc/gcov-io.h1
-rw-r--r--gcc/hooks.cc7
-rw-r--r--gcc/hooks.h1
-rw-r--r--gcc/m2/ChangeLog33
-rw-r--r--gcc/m2/gm2-compiler/M2GenGCC.mod5
-rw-r--r--gcc/m2/gm2-compiler/M2Range.mod2
-rw-r--r--gcc/match.pd22
-rw-r--r--gcc/optc-save-gen.awk19
-rw-r--r--gcc/params.opt6
-rw-r--r--gcc/predict.cc28
-rw-r--r--gcc/simplify-rtx.cc84
-rw-r--r--gcc/symtab.cc5
-rw-r--r--gcc/target.def14
-rw-r--r--gcc/testsuite/ChangeLog108
-rw-r--r--gcc/testsuite/g++.dg/cpp/if-comma-1.C42
-rw-r--r--gcc/testsuite/g++.dg/cpp1z/nontype8.C12
-rw-r--r--gcc/testsuite/g++.dg/modules/class-11_a.H4
-rw-r--r--gcc/testsuite/g++.dg/tc1/dr49.C4
-rw-r--r--gcc/testsuite/g++.dg/template/func2.C3
-rw-r--r--gcc/testsuite/g++.dg/tree-prof/eh1.C34
-rw-r--r--gcc/testsuite/g++.dg/warn/pr121133-1.C16
-rw-r--r--gcc/testsuite/g++.dg/warn/pr121133-2.C5
-rw-r--r--gcc/testsuite/g++.dg/warn/pr121133-3.C5
-rw-r--r--gcc/testsuite/g++.dg/warn/pr121133-4.C5
-rw-r--r--gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_binary_bf16_2.C18
-rw-r--r--gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_1.C35
-rw-r--r--gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_2.C14
-rw-r--r--gcc/testsuite/g++.target/aarch64/sve/unpacked_ternary_bf16_1.C27
-rw-r--r--gcc/testsuite/g++.target/aarch64/sve/unpacked_ternary_bf16_2.C11
-rw-r--r--gcc/testsuite/gcc.dg/pr120660.c19
-rw-r--r--gcc/testsuite/gcc.dg/torture/pr121295-1.c13
-rw-r--r--gcc/testsuite/gcc.dg/vect/pr112325.c1
-rw-r--r--gcc/testsuite/gcc.dg/vect/pr117888-1.c1
-rw-r--r--gcc/testsuite/gcc.dg/vect/pr120687-1.c16
-rw-r--r--gcc/testsuite/gcc.dg/vect/pr120687-2.c17
-rw-r--r--gcc/testsuite/gcc.dg/vect/pr120687-3.c16
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-early-break_137-pr121190.c62
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-early-break_138-pr121020.c54
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-early-break_52.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-simd-pr121130.c11
-rw-r--r--gcc/testsuite/gcc.target/aarch64/cmpbr.c40
-rw-r--r--gcc/testsuite/gcc.target/aarch64/pr121300.c9
-rw-r--r--gcc/testsuite/gcc.target/aarch64/saturating_arithmetic_1.c12
-rw-r--r--gcc/testsuite/gcc.target/aarch64/saturating_arithmetic_2.c8
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmax_2.c24
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmin_2.c24
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fadd_2.c28
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fdiv_2.c22
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmaxnm_2.c24
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fminnm_2.c24
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_1.c51
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_2.c22
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_1.c51
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_2.c22
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmul_2.c22
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_1.c51
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_2.c22
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_1.c51
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_2.c22
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fsubr_2.c26
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmla_1.c38
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmla_2.c15
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmls_1.c38
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmls_2.c15
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmla_1.c38
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmla_2.c15
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmls_1.c38
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmls_2.c15
-rw-r--r--gcc/testsuite/gcc.target/i386/pr119795.c26
-rw-r--r--gcc/testsuite/gcc.target/i386/pr120427-5.c10
-rw-r--r--gcc/testsuite/gcc.target/i386/pr121208-1a.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/pr121208-1b.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/pr121274.c24
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u64.c5
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u8.c2
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u16.c1
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u32.c2
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u64.c6
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u8.c1
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u16.c1
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u32.c1
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u64.c1
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u8.c1
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u16.c1
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u32.c1
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u64.c1
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u8.c1
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_binary.h17
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_binary_data.h196
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-u16.c17
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-u32.c17
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-u64.c17
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-u8.c17
-rw-r--r--gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-1-u16-from-u32.c11
-rw-r--r--gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-1-u8-from-u16.c11
-rw-r--r--gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-1-u8-from-u32.c11
-rw-r--r--gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-2-u16-from-u64.c11
-rw-r--r--gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-2-u32-from-u64.c11
-rw-r--r--gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-2-u8-from-u64.c11
-rw-r--r--gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u16-from-u32.c16
-rw-r--r--gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u16-from-u64.c2
-rw-r--r--gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u32-from-u64.c2
-rw-r--r--gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u8-from-u16.c16
-rw-r--r--gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u8-from-u32.c16
-rw-r--r--gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u8-from-u64.c2
-rw-r--r--gcc/testsuite/gcc.target/s390/spaceship-fp-1.c23
-rw-r--r--gcc/testsuite/gcc.target/s390/spaceship-fp-2.c23
-rw-r--r--gcc/testsuite/gcc.target/s390/spaceship-fp-3.c23
-rw-r--r--gcc/testsuite/gcc.target/s390/spaceship-fp-4.c53
-rw-r--r--gcc/testsuite/gcc.target/s390/spaceship-int-1.c30
-rw-r--r--gcc/testsuite/gcc.target/s390/spaceship-int-2.c24
-rw-r--r--gcc/testsuite/gcc.target/s390/spaceship-int-3.c21
-rw-r--r--gcc/testsuite/gfortran.dg/split_1.f9028
-rw-r--r--gcc/testsuite/gfortran.dg/split_2.f9022
-rw-r--r--gcc/testsuite/gfortran.dg/split_3.f9011
-rw-r--r--gcc/testsuite/gfortran.dg/split_4.f9011
-rw-r--r--gcc/testsuite/lib/profopt.exp2
-rw-r--r--gcc/tree-if-conv.cc67
-rw-r--r--gcc/tree-ssa-reassoc.cc10
-rw-r--r--gcc/tree-vect-data-refs.cc20
-rw-r--r--gcc/tree-vect-loop-manip.cc2
-rw-r--r--gcc/tree-vect-slp.cc3
-rw-r--r--gcc/tree-vect-stmts.cc127
-rw-r--r--gcc/tree-vectorizer.h4
-rw-r--r--gcc/vec.h4
168 files changed, 3606 insertions, 432 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 332776e..3170604 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,140 @@
+2025-07-30 Andrew Pinski <quic_apinski@quicinc.com>
+
+ PR rtl-optimization/121302
+ * simplify-rtx.cc (simplify_context::simplify_subreg): Use
+ byte instead of 0 when calling simplify_subreg.
+
+2025-07-29 Spencer Abson <spencer.abson@arm.com>
+
+ * config/aarch64/aarch64-protos.h (aarch64_sve_emit_masked_fp_pred):
+ Declare.
+ * config/aarch64/aarch64-sve.md (and<mode>3): Change this to...
+ (@and<mode>3): ...this, so that we can use gen_and3.
+ (@cond_<optab><mode>): Extend from SVE_FULL_F_B16B16 to SVE_F_B16B16,
+ use aarch64_predicate_operand.
+ (*cond_<optab><mode>_2_strict): Likewise.
+ (*cond_<optab><mode>_3_strict): Likewise.
+ (*cond_<optab><mode>_any_strict): Likwise.
+ (*cond_<optab><mode>_2_const_strict): Extend from SVE_FULL_F to SVE_F,
+ use aarch64_predicate_operand.
+ (*cond_<optab><mode>_any_const_strict): Likewise.
+ (*cond_sub<mode>_3_const_strict): Likwise.
+ (*cond_sub<mode>_const_strict): Likewise.
+ (*vcond_mask_<mode><vpred>): Use aarch64_predicate_operand, and update
+ the comment here.
+ * config/aarch64/aarch64.cc (aarch64_sve_emit_masked_fp_pred): New
+ function. Helper to mask the predicate in conditional expanders.
+
+2025-07-29 Dongyan Chen <chendongyan@isrc.iscas.ac.cn>
+
+ * Makefile.in: Add riscv-mcpu.texi and riscv-mtune.texi to the list
+ of files to be processed by the Texinfo generator.
+ * config/riscv/t-riscv: Add rule for generating riscv-mcpu.texi
+ and riscv-mtune.texi.
+ * doc/invoke.texi: Replace hand‑written extension table with
+ `@include riscv-mcpu.texi` and `@include riscv-mtune.texi` to
+ pull in auto‑generated entries.
+ * config/riscv/gen-riscv-mcpu-texi.cc: New file.
+ * config/riscv/gen-riscv-mtune-texi.cc: New file.
+ * doc/riscv-mcpu.texi: New file.
+ * doc/riscv-mtune.texi: New file.
+
+2025-07-29 Richard Sandiford <richard.sandiford@arm.com>
+
+ * simplify-rtx.cc (simplify_context::simplify_subreg): Distribute
+ lowpart subregs through AND/IOR/XOR, if doing so eliminates one
+ of the terms.
+ (test_scalar_int_ext_ops): Add some tests of the above for integers.
+ * config/aarch64/aarch64.cc (aarch64_test_sve_folding): Likewise
+ add tests for predicate modes.
+
+2025-07-29 Richard Sandiford <richard.sandiford@arm.com>
+
+ * config/aarch64/aarch64-sve-builtins.cc
+ (function_expander::get_reg_target): Check whether the target
+ is a valid register_operand.
+
+2025-07-29 Konstantinos Eleftheriou <konstantinos.eleftheriou@vrull.eu>
+
+ PR rtl-optimization/120660
+ * avoid-store-forwarding.cc (process_store_forwarding):
+ Fix instruction generation when haveing multiple stores with
+ base offset.
+
+2025-07-29 Christoph Müllner <christoph.muellner@vrull.eu>
+
+ * common/config/riscv/riscv-common.cc (riscv_ext_is_subset):
+ Remove use of structured binding to fix compiler warning.
+
+2025-07-29 Konstantinos Eleftheriou <konstantinos.eleftheriou@vrull.eu>
+
+ PR rtl-optimization/119795
+ * avoid-store-forwarding.cc
+ (store_forwarding_analyzer::avoid_store_forwarding): Skip
+ transformations for stores that operate on the same address
+ range as deleted ones.
+
+2025-07-29 Pan Li <pan2.li@intel.com>
+
+ * match.pd: Add mul based unsigned SAT_MUL.
+
+2025-07-29 Richard Biener <rguenther@suse.de>
+
+ PR tree-optimization/120687
+ * tree-ssa-reassoc.cc (reassociate_bb): Do not disturb
+ the sorted operand order in the early pass.
+ * tree-vect-slp.cc (vect_analyze_slp): Dump when a detected
+ reduction chain fails SLP discovery.
+
+2025-07-29 Alfie Richards <alfie.richards@arm.com>
+
+ PR middle-end/121261
+ * vec.h: Add null ptr check.
+
+2025-07-29 Jakub Jelinek <jakub@redhat.com>
+
+ PR middle-end/121159
+ * calls.cc (can_implement_as_sibling_call_p): Don't reject declared
+ noreturn functions in musttail calls.
+
+2025-07-29 Andrew Pinski <quic_apinski@quicinc.com>
+
+ * output.h (MAX_ALIGN_MERGABLE): New define.
+ * tree-switch-conversion.cc (switch_conversion::build_one_array):
+ Use MAX_ALIGN_MERGABLE instead of 256.
+ * varasm.cc (mergeable_string_section): Likewise
+ (mergeable_constant_section): Likewise
+
+2025-07-29 Andrew Pinski <quic_apinski@quicinc.com>
+
+ PR middle-end/120523
+ * output.h (mergeable_constant_section): New declaration taking
+ unsigned HOST_WIDE_INT for the size.
+ * tree-switch-conversion.cc (switch_conversion::build_one_array):
+ Increase the alignment of CSWTCH for sizes less than 32bytes.
+ * varasm.cc (mergeable_constant_section): Split out twice.
+ One that takes the size in unsigned HOST_WIDE_INT and the
+ other size in a tree.
+ (default_elf_select_section): Pass DECL_SIZE instead of
+ DECL_MODE to mergeable_constant_section.
+
+2025-07-29 Richard Biener <rguenther@suse.de>
+
+ * tree-vect-stmts.cc (vectorizable_load): Un-factor VMAT
+ specific code to their handling blocks.
+
+2025-07-29 Richard Biener <rguenther@suse.de>
+
+ * tree-vectorizer.h (gather_scatter_info::offset_dt): Remove.
+ * tree-vect-data-refs.cc (vect_describe_gather_scatter_call):
+ Do not set it.
+ (vect_check_gather_scatter): Likewise.
+ * tree-vect-stmts.cc (vect_truncate_gather_scatter_offset):
+ Likewise.
+ (get_group_load_store_type): Use the vector type of the offset
+ SLP child. Do not re-check vect_is_simple_use validated by
+ SLP build.
+
2025-07-28 Georg-Johann Lay <avr@gjlay.de>
PR target/121277
diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP
index 5d7eb78..3db1b8e 100644
--- a/gcc/DATESTAMP
+++ b/gcc/DATESTAMP
@@ -1 +1 @@
-20250729
+20250730
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 7314a3b..d7d5cbe 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -3720,7 +3720,7 @@ TEXI_GCC_FILES = gcc.texi gcc-common.texi gcc-vers.texi frontends.texi \
contribute.texi compat.texi funding.texi gnu.texi gpl_v3.texi \
fdl.texi contrib.texi cppenv.texi cppopts.texi avr-mmcu.texi \
implement-c.texi implement-cxx.texi gcov-tool.texi gcov-dump.texi \
- lto-dump.texi riscv-ext.texi
+ lto-dump.texi riscv-ext.texi riscv-mcpu.texi riscv-mtune.texi
# we explicitly use $(srcdir)/doc/tm.texi here to avoid confusion with
# the generated tm.texi; the latter might have a more recent timestamp,
diff --git a/gcc/auto-profile.cc b/gcc/auto-profile.cc
index d1954b4..7ff9526 100644
--- a/gcc/auto-profile.cc
+++ b/gcc/auto-profile.cc
@@ -885,6 +885,8 @@ string_table::read ()
{
vector_.quick_push (xstrdup (gcov_read_string ()));
map_[vector_.last ()] = i;
+ if (gcov_is_error ())
+ return false;
}
return true;
}
@@ -1629,7 +1631,13 @@ function_instance::match (cgraph_node *node,
if (iter->first != end_location
&& iter->first != start_location
&& (iter->first & 65535) != zero_location
- && iter->first)
+ && iter->first
+ /* FIXME: dwarf5 does not represent inline stack of debug
+ statements and consequently create_gcov is sometimes
+ mixing up statements from other functions. Do not warn
+ user about this until this problem is solved.
+ We still write info into dump file. */
+ && 0)
{
if (!warned)
warned = warning_at (DECL_SOURCE_LOCATION (node->decl),
@@ -2741,14 +2749,22 @@ read_profile (void)
/* autofdo_source_profile. */
afdo_source_profile = autofdo_source_profile::create ();
- if (afdo_source_profile == NULL)
+ if (afdo_source_profile == NULL
+ || gcov_is_error ())
{
error ("cannot read function profile from %s", auto_profile_file);
+ delete afdo_source_profile;
+ afdo_source_profile = NULL;
return;
}
/* autofdo_module_profile. */
fake_read_autofdo_module_profile ();
+ if (gcov_is_error ())
+ {
+ error ("cannot read module profile from %s", auto_profile_file);
+ return;
+ }
}
/* From AutoFDO profiles, find values inside STMT for that we want to measure
@@ -3425,7 +3441,7 @@ add_scale (vec <scale> *scales, profile_count annotated, profile_count orig)
annotated.dump (dump_file);
fprintf (dump_file, "\n");
}
- if (orig.force_nonzero () == orig)
+ if (orig.nonzero_p ())
{
sreal scale
= annotated.guessed_local ()
@@ -3672,7 +3688,7 @@ afdo_adjust_guessed_profile (bb_set *annotated_bb)
{
if (dump_file)
fprintf (dump_file,
- " Can not determine count from the boundary; giving up");
+ " Can not determine count from the boundary; giving up\n");
continue;
}
gcc_checking_assert (scales.length ());
diff --git a/gcc/avoid-store-forwarding.cc b/gcc/avoid-store-forwarding.cc
index 785efd2..1de6fd6 100644
--- a/gcc/avoid-store-forwarding.cc
+++ b/gcc/avoid-store-forwarding.cc
@@ -231,20 +231,39 @@ process_store_forwarding (vec<store_fwd_info> &stores, rtx_insn *load_insn,
int move_to_front = -1;
int total_cost = 0;
+ int base_offset_index = -1;
+
+ /* Find the last store that has the same offset the load, in the case that
+ we're eliminating the load. We will try to use it as a base register
+ to avoid bit inserts (see second loop below). We want the last one, as
+ it will be wider and we don't want to overwrite the base register if
+ there are many of them. */
+ if (load_elim)
+ {
+ FOR_EACH_VEC_ELT_REVERSE (stores, i, it)
+ {
+ const bool has_base_offset
+ = known_eq (poly_uint64 (it->offset),
+ subreg_size_lowpart_offset (MEM_SIZE (it->store_mem),
+ load_size));
+ if (has_base_offset)
+ {
+ base_offset_index = i;
+ break;
+ }
+ }
+ }
/* Check if we can emit bit insert instructions for all forwarded stores. */
FOR_EACH_VEC_ELT (stores, i, it)
{
it->mov_reg = gen_reg_rtx (GET_MODE (it->store_mem));
rtx_insn *insns = NULL;
- const bool has_base_offset
- = known_eq (poly_uint64 (it->offset),
- subreg_size_lowpart_offset (MEM_SIZE (it->store_mem),
- load_size));
-
- /* If we're eliminating the load then find the store with zero offset
- and use it as the base register to avoid a bit insert if possible. */
- if (load_elim && has_base_offset)
+
+ /* Check if this is a store with base offset, if we're eliminating the
+ load, and use it as the base register to avoid a bit insert if
+ possible. Load elimination is implied by base_offset_index != -1. */
+ if (i == (unsigned) base_offset_index)
{
start_sequence ();
@@ -437,9 +456,22 @@ store_forwarding_analyzer::avoid_store_forwarding (basic_block bb)
return;
auto_vec<store_fwd_info, 8> store_exprs;
+ auto_vec<rtx> store_exprs_del;
rtx_insn *insn;
unsigned int insn_cnt = 0;
+ /* We are iterating over the basic block's instructions detecting store
+ instructions. Upon reaching a load instruction, we check if any of the
+ previously detected stores could result in store forwarding. In that
+ case, we try to reorder the load and store instructions.
+ We skip this transformation when we encounter complex memory operations,
+ instructions that might throw an exception, instruction dependencies,
+ etc. This is done by clearing the vector of detected stores, while
+ keeping the removed stores in another vector. By doing so, we can check
+ if any of the removed stores operated on the load's address range, when
+ reaching a subsequent store that operates on the same address range,
+ as this would lead to incorrect values on the register that keeps the
+ loaded value. */
FOR_BB_INSNS (bb, insn)
{
if (!NONDEBUG_INSN_P (insn))
@@ -452,6 +484,10 @@ store_forwarding_analyzer::avoid_store_forwarding (basic_block bb)
if (!set || insn_could_throw_p (insn))
{
+ unsigned int i;
+ store_fwd_info *it;
+ FOR_EACH_VEC_ELT (store_exprs, i, it)
+ store_exprs_del.safe_push (it->store_mem);
store_exprs.truncate (0);
continue;
}
@@ -475,6 +511,10 @@ store_forwarding_analyzer::avoid_store_forwarding (basic_block bb)
|| (load_mem && (!MEM_SIZE_KNOWN_P (load_mem)
|| !MEM_SIZE (load_mem).is_constant ())))
{
+ unsigned int i;
+ store_fwd_info *it;
+ FOR_EACH_VEC_ELT (store_exprs, i, it)
+ store_exprs_del.safe_push (it->store_mem);
store_exprs.truncate (0);
continue;
}
@@ -526,6 +566,7 @@ store_forwarding_analyzer::avoid_store_forwarding (basic_block bb)
it->remove = true;
removed_count++;
remove_rest = true;
+ store_exprs_del.safe_push (it->store_mem);
}
}
}
@@ -565,23 +606,46 @@ store_forwarding_analyzer::avoid_store_forwarding (basic_block bb)
it->remove = true;
removed_count++;
remove_rest = true;
+ forwardings.truncate (0);
}
else if (is_store_forwarding (store_mem, load_mem, &off_val))
{
+ unsigned int j;
+ rtx *del_it;
+ bool same_range_as_removed = false;
+
+ /* Check if another store in the load's address range has
+ been deleted due to a constraint violation. In this case
+ we can't forward any other stores that operate in this
+ range, as it would lead to partial update of the register
+ that holds the loaded value. */
+ FOR_EACH_VEC_ELT (store_exprs_del, j, del_it)
+ {
+ rtx del_store_mem = *del_it;
+ same_range_as_removed
+ = is_store_forwarding (del_store_mem, load_mem, NULL);
+ if (same_range_as_removed)
+ break;
+ }
+
/* Check if moving this store after the load is legal. */
bool write_dep = false;
- for (unsigned int j = store_exprs.length () - 1; j != i; j--)
+ if (!same_range_as_removed)
{
- if (!store_exprs[j].forwarded
- && output_dependence (store_mem,
- store_exprs[j].store_mem))
+ unsigned int j = store_exprs.length () - 1;
+ for (; j != i; j--)
{
- write_dep = true;
- break;
+ if (!store_exprs[j].forwarded
+ && output_dependence (store_mem,
+ store_exprs[j].store_mem))
+ {
+ write_dep = true;
+ break;
+ }
}
}
- if (!write_dep)
+ if (!same_range_as_removed && !write_dep)
{
it->forwarded = true;
it->offset = off_val;
@@ -601,6 +665,7 @@ store_forwarding_analyzer::avoid_store_forwarding (basic_block bb)
it->remove = true;
removed_count++;
remove_rest = true;
+ forwardings.truncate (0);
}
}
@@ -608,9 +673,12 @@ store_forwarding_analyzer::avoid_store_forwarding (basic_block bb)
process_store_forwarding (forwardings, insn, load_mem);
}
+ /* Abort in case that we encounter a memory read/write that is not a
+ simple store/load, as we can't make safe assumptions about the
+ side-effects of this. */
if ((writes_mem && !is_simple_store)
|| (reads_mem && !is_simple_load))
- store_exprs.truncate (0);
+ return;
if (removed_count)
{
diff --git a/gcc/common/config/riscv/riscv-common.cc b/gcc/common/config/riscv/riscv-common.cc
index 82037a3..da3cb9f 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -1606,8 +1606,9 @@ bool
riscv_ext_is_subset (struct cl_target_option *opts,
struct cl_target_option *subset)
{
- for (const auto &[ext_name, ext_info] : riscv_ext_infos)
+ for (const auto &riscv_ext_info : riscv_ext_infos)
{
+ const auto &ext_info = riscv_ext_info.second;
if (ext_info.check_opts (opts) && !ext_info.check_opts (subset))
return false;
}
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index e946e8d..38c307c 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1031,6 +1031,7 @@ rtx aarch64_pfalse_reg (machine_mode);
bool aarch64_sve_same_pred_for_ptest_p (rtx *, rtx *);
rtx aarch64_sve_packed_pred (machine_mode);
rtx aarch64_sve_fp_pred (machine_mode, rtx *);
+rtx aarch64_sve_emit_masked_fp_pred (machine_mode, rtx);
void aarch64_emit_load_store_through_mode (rtx, rtx, machine_mode);
bool aarch64_expand_maskloadstore (rtx *, machine_mode);
void aarch64_emit_sve_pred_move (rtx, rtx, rtx);
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.def b/gcc/config/aarch64/aarch64-sve-builtins-sme.def
index 8e6aadc..117b70e 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sme.def
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sme.def
@@ -92,7 +92,8 @@ DEF_SME_FUNCTION (svstr_zt, str_zt, none, none)
DEF_SME_FUNCTION (svzero_zt, inherent_zt, none, none)
#undef REQUIRED_EXTENSIONS
-#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME2 && AARCH64_FL_FAMINMAX)
+#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME2 \
+ | AARCH64_FL_FAMINMAX)
DEF_SME_FUNCTION_GS (svamin, binary_opt_single_n, all_float, x24, none)
DEF_SME_FUNCTION_GS (svamax, binary_opt_single_n, all_float, x24, none)
#undef REQUIRED_EXTENSIONS
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
index 2b627a9..01833a8 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -4004,7 +4004,8 @@ rtx
function_expander::get_reg_target ()
{
machine_mode target_mode = result_mode ();
- if (!possible_target || GET_MODE (possible_target) != target_mode)
+ if (!possible_target
+ || !register_operand (possible_target, target_mode))
possible_target = gen_reg_rtx (target_mode);
return possible_target;
}
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index b252eef..80a3288 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -5605,18 +5605,21 @@
;; Predicated floating-point operations with merging.
(define_expand "@cond_<optab><mode>"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ (unspec:SVE_F_B16B16
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F_B16B16 2 "<sve_pred_fp_rhs1_operand>")
- (match_operand:SVE_FULL_F_B16B16 3 "<sve_pred_fp_rhs2_operand>")]
+ (match_operand:SVE_F_B16B16 2 "<sve_pred_fp_rhs1_operand>")
+ (match_operand:SVE_F_B16B16 3 "<sve_pred_fp_rhs2_operand>")]
SVE_COND_FP_BINARY)
- (match_operand:SVE_FULL_F_B16B16 4 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_F_B16B16 4 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
+ {
+ operands[1] = aarch64_sve_emit_masked_fp_pred (<MODE>mode, operands[1]);
+ }
)
;; Predicated floating-point operations, merging with the first input.
@@ -5644,14 +5647,14 @@
)
(define_insn "*cond_<optab><mode>_2_strict"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F_B16B16
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")]
SVE_COND_FP_BINARY)
(match_dup 2)]
UNSPEC_SEL))]
@@ -5687,14 +5690,14 @@
)
(define_insn "*cond_<optab><mode>_2_const_strict"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F 2 "register_operand")
- (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
+ (match_operand:SVE_F 2 "register_operand")
+ (match_operand:SVE_F 3 "<sve_pred_fp_rhs2_immediate>")]
SVE_COND_FP_BINARY_I1)
(match_dup 2)]
UNSPEC_SEL))]
@@ -5730,14 +5733,14 @@
)
(define_insn "*cond_<optab><mode>_3_strict"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F_B16B16
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")]
SVE_COND_FP_BINARY)
(match_dup 3)]
UNSPEC_SEL))]
@@ -5794,16 +5797,16 @@
)
(define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F_B16B16
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")]
SVE_COND_FP_BINARY)
- (match_operand:SVE_FULL_F_B16B16 4 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_F_B16B16 4 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE
&& (<supports_bf16> || !<is_bf16>)
@@ -5868,16 +5871,16 @@
)
(define_insn_and_rewrite "*cond_<optab><mode>_any_const_strict"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F 2 "register_operand")
- (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
+ (match_operand:SVE_F 2 "register_operand")
+ (match_operand:SVE_F 3 "<sve_pred_fp_rhs2_immediate>")]
SVE_COND_FP_BINARY_I1)
- (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
{@ [ cons: =0 , 1 , 2 , 4 ]
@@ -5953,14 +5956,14 @@
)
(define_insn "*cond_add<mode>_2_const_strict"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F 2 "register_operand")
- (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
+ (match_operand:SVE_F 2 "register_operand")
+ (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
UNSPEC_COND_FADD)
(match_dup 2)]
UNSPEC_SEL))]
@@ -6015,16 +6018,16 @@
)
(define_insn_and_rewrite "*cond_add<mode>_any_const_strict"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F 2 "register_operand")
- (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
+ (match_operand:SVE_F 2 "register_operand")
+ (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
UNSPEC_COND_FADD)
- (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
{@ [ cons: =0 , 1 , 2 , 3 , 4 ]
@@ -6266,14 +6269,14 @@
)
(define_insn "*cond_sub<mode>_3_const_strict"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
- (match_operand:SVE_FULL_F 3 "register_operand")]
+ (match_operand:SVE_F 2 "aarch64_sve_float_arith_immediate")
+ (match_operand:SVE_F 3 "register_operand")]
UNSPEC_COND_FSUB)
(match_dup 3)]
UNSPEC_SEL))]
@@ -6323,16 +6326,16 @@
)
(define_insn_and_rewrite "*cond_sub<mode>_const_strict"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
- (match_operand:SVE_FULL_F 3 "register_operand")]
+ (match_operand:SVE_F 2 "aarch64_sve_float_arith_immediate")
+ (match_operand:SVE_F 3 "register_operand")]
UNSPEC_COND_FSUB)
- (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE && !rtx_equal_p (operands[3], operands[4])"
{@ [ cons: =0 , 1 , 3 , 4 ]
@@ -6913,7 +6916,7 @@
;; Predicate AND. We can reuse one of the inputs as the GP.
;; Doubling the second operand is the preferred implementation
;; of the MOV alias, so we use that instead of %1/z, %1, %2.
-(define_insn "and<mode>3"
+(define_insn "@and<mode>3"
[(set (match_operand:PRED_ALL 0 "register_operand")
(and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand")
(match_operand:PRED_ALL 2 "register_operand")))]
@@ -7595,29 +7598,29 @@
;; Unpredicated floating-point ternary operations.
(define_expand "<optab><mode>4"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
[(match_dup 4)
- (const_int SVE_RELAXED_GP)
- (match_operand:SVE_FULL_F_B16B16 1 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
+ (match_dup 5)
+ (match_operand:SVE_F_B16B16 1 "register_operand")
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")]
SVE_COND_FP_TERNARY))]
"TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
{
- operands[4] = aarch64_ptrue_reg (<VPRED>mode);
+ operands[4] = aarch64_sve_fp_pred (<MODE>mode, &operands[5]);
}
)
;; Predicated floating-point ternary operations.
(define_insn "@aarch64_pred_<optab><mode>"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
- [(match_operand:<VPRED> 1 "register_operand")
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
(match_operand:SI 5 "aarch64_sve_gp_strictness")
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 4 "register_operand")]
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")
+ (match_operand:SVE_F_B16B16 4 "register_operand")]
SVE_COND_FP_TERNARY))]
"TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
{@ [ cons: =0 , 1 , %2 , 3 , 4 ; attrs: movprfx , is_rev ]
@@ -7631,17 +7634,17 @@
;; Predicated floating-point ternary operations with merging.
(define_expand "@cond_<optab><mode>"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ (unspec:SVE_F_B16B16
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 4 "register_operand")]
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")
+ (match_operand:SVE_F_B16B16 4 "register_operand")]
SVE_COND_FP_TERNARY)
- (match_operand:SVE_FULL_F_B16B16 5 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_F_B16B16 5 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
{
@@ -7649,20 +7652,22 @@
second of the two. */
if (rtx_equal_p (operands[3], operands[5]))
std::swap (operands[2], operands[3]);
+
+ operands[1] = aarch64_sve_emit_masked_fp_pred (<MODE>mode, operands[1]);
})
;; Predicated floating-point ternary operations, merging with the
;; first input.
(define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F
+ (unspec:SVE_F
[(match_operand 5)
(const_int SVE_RELAXED_GP)
- (match_operand:SVE_FULL_F 2 "register_operand")
- (match_operand:SVE_FULL_F 3 "register_operand")
- (match_operand:SVE_FULL_F 4 "register_operand")]
+ (match_operand:SVE_F 2 "register_operand")
+ (match_operand:SVE_F 3 "register_operand")
+ (match_operand:SVE_F 4 "register_operand")]
SVE_COND_FP_TERNARY)
(match_dup 2)]
UNSPEC_SEL))]
@@ -7678,15 +7683,15 @@
)
(define_insn "*cond_<optab><mode>_2_strict"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F 2 "register_operand")
- (match_operand:SVE_FULL_F 3 "register_operand")
- (match_operand:SVE_FULL_F 4 "register_operand")]
+ (match_operand:SVE_F 2 "register_operand")
+ (match_operand:SVE_F 3 "register_operand")
+ (match_operand:SVE_F 4 "register_operand")]
SVE_COND_FP_TERNARY)
(match_dup 2)]
UNSPEC_SEL))]
@@ -7700,15 +7705,15 @@
;; Predicated floating-point ternary operations, merging with the
;; third input.
(define_insn_and_rewrite "*cond_<optab><mode>_4_relaxed"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ (unspec:SVE_F_B16B16
[(match_operand 5)
(const_int SVE_RELAXED_GP)
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 4 "register_operand")]
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")
+ (match_operand:SVE_F_B16B16 4 "register_operand")]
SVE_COND_FP_TERNARY)
(match_dup 4)]
UNSPEC_SEL))]
@@ -7724,15 +7729,15 @@
)
(define_insn "*cond_<optab><mode>_4_strict"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F_B16B16
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 4 "register_operand")]
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")
+ (match_operand:SVE_F_B16B16 4 "register_operand")]
SVE_COND_FP_TERNARY)
(match_dup 4)]
UNSPEC_SEL))]
@@ -7746,17 +7751,17 @@
;; Predicated floating-point ternary operations, merging with an
;; independent value.
(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ (unspec:SVE_F_B16B16
[(match_operand 6)
(const_int SVE_RELAXED_GP)
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 4 "register_operand")]
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")
+ (match_operand:SVE_F_B16B16 4 "register_operand")]
SVE_COND_FP_TERNARY)
- (match_operand:SVE_FULL_F_B16B16 5 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_F_B16B16 5 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE
&& (<supports_bf16> || !<is_bf16>)
@@ -7792,17 +7797,17 @@
)
(define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F_B16B16
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 4 "register_operand")]
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")
+ (match_operand:SVE_F_B16B16 4 "register_operand")]
SVE_COND_FP_TERNARY)
- (match_operand:SVE_FULL_F_B16B16 5 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_F_B16B16 5 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE
&& (<supports_bf16> || !<is_bf16>)
@@ -8201,20 +8206,23 @@
;;
;; For unpacked vectors, it doesn't really matter whether SEL uses the
;; the container size or the element size. If SEL used the container size,
-;; it would ignore undefined bits of the predicate but would copy the
-;; upper (undefined) bits of each container along with the defined bits.
-;; If SEL used the element size, it would use undefined bits of the predicate
-;; to select between undefined elements in each input vector. Thus the only
-;; difference is whether the undefined bits in a container always come from
-;; the same input as the defined bits, or whether the choice can vary
-;; independently of the defined bits.
+;; it would would copy the upper (undefined) bits of each container along
+;; with the corresponding defined bits. If SEL used the element size,
+;; it would use separate predicate bits to select between the undefined
+;; elements in each input vector; these seperate predicate bits might
+;; themselves be undefined, depending on the mode of the predicate.
+;;
+;; Thus the only difference is whether the undefined bits in a container
+;; always come from the same input as the defined bits, or whether the
+;; choice can vary independently of the defined bits.
;;
;; For the other instructions, using the element size is more natural,
;; so we do that for SEL as well.
+;;
(define_insn "*vcond_mask_<mode><vpred>"
[(set (match_operand:SVE_ALL 0 "register_operand")
(unspec:SVE_ALL
- [(match_operand:<VPRED> 3 "register_operand")
+ [(match_operand:<VPRED> 3 "aarch64_predicate_operand")
(match_operand:SVE_ALL 1 "aarch64_sve_reg_or_dup_imm")
(match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index cb1699a..5502d0b 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -3933,6 +3933,33 @@ aarch64_sve_fp_pred (machine_mode data_mode, rtx *strictness)
return aarch64_ptrue_reg (aarch64_sve_pred_mode (data_mode));
}
+/* PRED is a predicate that governs an operation on DATA_MODE. If DATA_MODE
+ is a partial vector mode, and if exceptions must be suppressed for its
+ undefined elements, convert PRED from a container-level predicate to
+ an element-level predicate and ensure that the undefined elements
+ are inactive. Make no changes otherwise.
+
+ Return the resultant predicate. */
+rtx
+aarch64_sve_emit_masked_fp_pred (machine_mode data_mode, rtx pred)
+{
+ unsigned int vec_flags = aarch64_classify_vector_mode (data_mode);
+ if (flag_trapping_math && (vec_flags & VEC_PARTIAL))
+ {
+ /* Generate an element-level mask. */
+ rtx mask = aarch64_sve_packed_pred (data_mode);
+ machine_mode pmode = GET_MODE (mask);
+
+ /* Apply the existing predicate. */
+ rtx dst = gen_reg_rtx (pmode);
+ emit_insn (gen_and3 (pmode, dst, mask,
+ gen_lowpart (pmode, pred)));
+ return dst;
+ }
+
+ return pred;
+}
+
/* Emit a comparison CMP between OP0 and OP1, both of which have mode
DATA_MODE, and return the result in a predicate of mode PRED_MODE.
Use TARGET as the target register if nonnull and convenient. */
@@ -31964,9 +31991,43 @@ aarch64_test_sysreg_encoding_clashes (void)
static void
aarch64_test_sve_folding ()
{
+ aarch64_target_switcher switcher (AARCH64_FL_SVE);
+
tree res = fold_unary (BIT_NOT_EXPR, ssizetype,
ssize_int (poly_int64 (1, 1)));
ASSERT_TRUE (operand_equal_p (res, ssize_int (poly_int64 (-2, -1))));
+
+ auto build_v16bi = [](bool a, bool b)
+ {
+ rtx_vector_builder builder (VNx16BImode, 2, 1);
+ builder.quick_push (a ? const1_rtx : const0_rtx);
+ builder.quick_push (b ? const1_rtx : const0_rtx);
+ return builder.build ();
+ };
+ rtx v16bi_10 = build_v16bi (1, 0);
+ rtx v16bi_01 = build_v16bi (0, 1);
+
+ for (auto mode : { VNx8BImode, VNx4BImode, VNx2BImode })
+ {
+ rtx reg = gen_rtx_REG (mode, LAST_VIRTUAL_REGISTER + 1);
+ rtx subreg = lowpart_subreg (VNx16BImode, reg, mode);
+ rtx and1 = simplify_gen_binary (AND, VNx16BImode, subreg, v16bi_10);
+ ASSERT_EQ (lowpart_subreg (mode, and1, VNx16BImode), reg);
+ rtx and0 = simplify_gen_binary (AND, VNx16BImode, subreg, v16bi_01);
+ ASSERT_EQ (lowpart_subreg (mode, and0, VNx16BImode), CONST0_RTX (mode));
+
+ rtx ior1 = simplify_gen_binary (IOR, VNx16BImode, subreg, v16bi_10);
+ ASSERT_EQ (lowpart_subreg (mode, ior1, VNx16BImode), CONSTM1_RTX (mode));
+ rtx ior0 = simplify_gen_binary (IOR, VNx16BImode, subreg, v16bi_01);
+ ASSERT_EQ (lowpart_subreg (mode, ior0, VNx16BImode), reg);
+
+ rtx xor1 = simplify_gen_binary (XOR, VNx16BImode, subreg, v16bi_10);
+ ASSERT_RTX_EQ (lowpart_subreg (mode, xor1, VNx16BImode),
+ lowpart_subreg (mode, gen_rtx_NOT (VNx16BImode, subreg),
+ VNx16BImode));
+ rtx xor0 = simplify_gen_binary (XOR, VNx16BImode, subreg, v16bi_01);
+ ASSERT_EQ (lowpart_subreg (mode, xor0, VNx16BImode), reg);
+ }
}
/* Run all target-specific selftests. */
diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index 557568c..5ffeb23 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -54,6 +54,7 @@
#include "gimple.h"
#include "cgraph.h"
#include "case-cfn-macros.h"
+#include "opts.h"
/* This file should be included last. */
#include "target-def.h"
@@ -183,6 +184,11 @@ gcn_option_override (void)
if (flag_sram_ecc == HSACO_ATTR_DEFAULT)
flag_sram_ecc = gcn_devices[gcn_arch].sramecc_default;
+
+ /* TODO: This seems to produce tighter loops, but the testsuites expects it
+ to be set to '2', so I'll leave it default for now.
+ SET_OPTION_IF_UNSET (&global_options, &global_options_set,
+ param_vect_partial_vector_usage, 1); */
}
/* }}} */
@@ -5789,6 +5795,16 @@ gcn_libc_has_function (enum function_class fn_class,
return bsd_libc_has_function (fn_class, type);
}
+/* Implement TARGET_VECTORIZE_PREFER_GATHER_SCATTER. */
+
+static bool
+gcn_prefer_gather_scatter (machine_mode ARG_UNUSED (mode),
+ int ARG_UNUSED (scale),
+ unsigned int ARG_UNUSED (group_size))
+{
+ return true;
+}
+
/* }}} */
/* {{{ md_reorg pass. */
@@ -8140,6 +8156,8 @@ gcn_dwarf_register_span (rtx rtl)
gcn_vectorize_builtin_vectorized_function
#undef TARGET_VECTORIZE_GET_MASK_MODE
#define TARGET_VECTORIZE_GET_MASK_MODE gcn_vectorize_get_mask_mode
+#undef TARGET_VECTORIZE_PREFER_GATHER_SCATTER
+#define TARGET_VECTORIZE_PREFER_GATHER_SCATTER gcn_prefer_gather_scatter
#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE gcn_vectorize_preferred_simd_mode
#undef TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT
diff --git a/gcc/config/i386/i386-modes.def b/gcc/config/i386/i386-modes.def
index 2fedbeb..c2db305 100644
--- a/gcc/config/i386/i386-modes.def
+++ b/gcc/config/i386/i386-modes.def
@@ -91,7 +91,6 @@ VECTOR_MODES (FLOAT, 16); /* V8HF V4SF V2DF */
VECTOR_MODES (FLOAT, 32); /* V16HF V8SF V4DF V2TF */
VECTOR_MODES (FLOAT, 64); /* V32HF V16SF V8DF V4TF */
VECTOR_MODES (FLOAT, 128); /* V64HF V32SF V16DF V8TF */
-VECTOR_MODES (FLOAT, 256); /* V128HF V64SF V32DF V16TF */
VECTOR_MODE (FLOAT, HF, 2); /* V2HF */
VECTOR_MODE (FLOAT, BF, 2); /* V2BF */
VECTOR_MODE (FLOAT, HF, 6); /* V6HF */
@@ -102,7 +101,6 @@ VECTOR_MODE (INT, QI, 2); /* V2QI */
VECTOR_MODE (INT, QI, 12); /* V12QI */
VECTOR_MODE (INT, QI, 14); /* V14QI */
VECTOR_MODE (INT, HI, 6); /* V6HI */
-VECTOR_MODE (INT, SI, 64); /* V64SI */
INT_MODE (OI, 32);
INT_MODE (XI, 64);
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 0f0acae..613f2b2 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -21513,8 +21513,7 @@ ix86_hard_regno_nregs (unsigned int regno, machine_mode mode)
/* Register pair for mask registers. */
if (mode == P2QImode || mode == P2HImode)
return 2;
- if (mode == V64SFmode || mode == V64SImode)
- return 4;
+
return 1;
}
@@ -25233,20 +25232,14 @@ asm_preferred_eh_data_format (int code, int global)
return DW_EH_PE_absptr;
}
-/* Implement targetm.vectorize.builtin_vectorization_cost. */
+/* Worker for ix86_builtin_vectorization_cost and the fallback calls
+ from ix86_vector_costs::add_stmt_cost. */
static int
-ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
- tree vectype, int)
+ix86_default_vector_cost (enum vect_cost_for_stmt type_of_cost,
+ machine_mode mode)
{
- bool fp = false;
- machine_mode mode = TImode;
+ bool fp = FLOAT_MODE_P (mode);
int index;
- if (vectype != NULL)
- {
- fp = FLOAT_TYPE_P (vectype);
- mode = TYPE_MODE (vectype);
- }
-
switch (type_of_cost)
{
case scalar_stmt:
@@ -25305,14 +25298,14 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
COSTS_N_INSNS
(ix86_cost->gather_static
+ ix86_cost->gather_per_elt
- * TYPE_VECTOR_SUBPARTS (vectype)) / 2);
+ * GET_MODE_NUNITS (mode)) / 2);
case vector_scatter_store:
return ix86_vec_cost (mode,
COSTS_N_INSNS
(ix86_cost->scatter_static
+ ix86_cost->scatter_per_elt
- * TYPE_VECTOR_SUBPARTS (vectype)) / 2);
+ * GET_MODE_NUNITS (mode)) / 2);
case cond_branch_taken:
return ix86_cost->cond_taken_branch_cost;
@@ -25330,7 +25323,7 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
case vec_construct:
{
- int n = TYPE_VECTOR_SUBPARTS (vectype);
+ int n = GET_MODE_NUNITS (mode);
/* N - 1 element inserts into an SSE vector, the possible
GPR -> XMM move is accounted for in add_stmt_cost. */
if (GET_MODE_BITSIZE (mode) <= 128)
@@ -25358,6 +25351,17 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
}
}
+/* Implement targetm.vectorize.builtin_vectorization_cost. */
+static int
+ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
+ tree vectype, int)
+{
+ machine_mode mode = TImode;
+ if (vectype != NULL)
+ mode = TYPE_MODE (vectype);
+ return ix86_default_vector_cost (type_of_cost, mode);
+}
+
/* This function returns the calling abi specific va_list type node.
It returns the FNDECL specific va_list type. */
@@ -25811,7 +25815,7 @@ ix86_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
unsigned
ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
stmt_vec_info stmt_info, slp_tree node,
- tree vectype, int misalign,
+ tree vectype, int,
vect_cost_model_location where)
{
unsigned retval = 0;
@@ -26160,14 +26164,14 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
|| (SLP_TREE_MEMORY_ACCESS_TYPE (node)
== VMAT_GATHER_SCATTER)))))
{
- stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
+ stmt_cost = ix86_default_vector_cost (kind, mode);
stmt_cost *= (TYPE_VECTOR_SUBPARTS (vectype) + 1);
}
else if ((kind == vec_construct || kind == scalar_to_vec)
&& node
&& SLP_TREE_DEF_TYPE (node) == vect_external_def)
{
- stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
+ stmt_cost = ix86_default_vector_cost (kind, mode);
unsigned i;
tree op;
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
@@ -26231,7 +26235,7 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
TREE_VISITED (op) = 0;
}
if (stmt_cost == -1)
- stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
+ stmt_cost = ix86_default_vector_cost (kind, mode);
if (kind == vec_perm && vectype
&& GET_MODE_SIZE (TYPE_MODE (vectype)) == 32)
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index eb52699..a50475b 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -2968,7 +2968,8 @@
(match_operand:SWI248 1 "const_int_operand"))]
"optimize_insn_for_size_p () && optimize_size > 1
&& operands[1] != const0_rtx
- && operands[1] != constm1_rtx
+ && (operands[1] != constm1_rtx
+ || (<MODE>mode == DImode && LEGACY_INT_REG_P (operands[0])))
&& IN_RANGE (INTVAL (operands[1]), -128, 127)
&& !ix86_red_zone_used
&& REGNO (operands[0]) != SP_REG"
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index d88c3d6..ec74f93 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -21729,6 +21729,19 @@
(const_string "orig")))
(set_attr "mode" "TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
+;; Eliminate redundancy caused by
+;; /* Special case TImode to 128-bit vector conversions via V2DI. */
+;; in ix86_expand_vector_move
+
+(define_split
+ [(set (match_operand:V2DI 0 "register_operand")
+ (vec_concat:V2DI
+ (subreg:DI (match_operand:TI 1 "register_operand") 0)
+ (subreg:DI (match_dup 1) 8)))]
+ "TARGET_SSE2 && ix86_pre_reload_split ()"
+ [(set (match_dup 0)
+ (subreg:V2DI (match_dup 1) 0))])
+
(define_insn "*vec_concatv2di_0"
[(set (match_operand:V2DI 0 "register_operand" "=v,v ,x")
(vec_concat:V2DI
diff --git a/gcc/config/riscv/gen-riscv-mcpu-texi.cc b/gcc/config/riscv/gen-riscv-mcpu-texi.cc
new file mode 100644
index 0000000..9681438
--- /dev/null
+++ b/gcc/config/riscv/gen-riscv-mcpu-texi.cc
@@ -0,0 +1,43 @@
+#include <string>
+#include <vector>
+#include <stdio.h>
+
+int
+main ()
+{
+ puts ("@c Copyright (C) 2025 Free Software Foundation, Inc.");
+ puts ("@c This is part of the GCC manual.");
+ puts ("@c For copying conditions, see the file gcc/doc/include/fdl.texi.");
+ puts ("");
+ puts ("@c This file is generated automatically using");
+ puts ("@c gcc/config/riscv/gen-riscv-mcpu-texi.cc from:");
+ puts ("@c gcc/config/riscv/riscv-cores.def");
+ puts ("");
+ puts ("@c Please *DO NOT* edit manually.");
+ puts ("");
+ puts ("@samp{Core Name}");
+ puts ("");
+ puts ("@opindex mcpu");
+ puts ("@item -mcpu=@var{processor-string}");
+ puts ("Use architecture of and optimize the output for the given processor, specified");
+ puts ("by particular CPU name. Permissible values for this option are:");
+ puts ("");
+ puts ("");
+
+ std::vector<std::string> coreNames;
+
+#define RISCV_CORE(CORE_NAME, ARCH, MICRO_ARCH) \
+ coreNames.push_back (CORE_NAME);
+#include "riscv-cores.def"
+#undef RISCV_CORE
+
+ for (size_t i = 0; i < coreNames.size(); ++i) {
+ if (i == coreNames.size() - 1) {
+ printf("@samp{%s}.\n", coreNames[i].c_str());
+ } else {
+ printf("@samp{%s},\n\n", coreNames[i].c_str());
+ }
+ }
+
+ return 0;
+}
diff --git a/gcc/config/riscv/gen-riscv-mtune-texi.cc b/gcc/config/riscv/gen-riscv-mtune-texi.cc
new file mode 100644
index 0000000..1bdfe2a
--- /dev/null
+++ b/gcc/config/riscv/gen-riscv-mtune-texi.cc
@@ -0,0 +1,41 @@
+#include <string>
+#include <vector>
+#include <stdio.h>
+
+int
+main ()
+{
+ puts ("@c Copyright (C) 2025 Free Software Foundation, Inc.");
+ puts ("@c This is part of the GCC manual.");
+ puts ("@c For copying conditions, see the file gcc/doc/include/fdl.texi.");
+ puts ("");
+ puts ("@c This file is generated automatically using");
+ puts ("@c gcc/config/riscv/gen-riscv-mtune-texi.cc from:");
+ puts ("@c gcc/config/riscv/riscv-cores.def");
+ puts ("");
+ puts ("@c Please *DO NOT* edit manually.");
+ puts ("");
+ puts ("@samp{Tune Name}");
+ puts ("");
+ puts ("@opindex mtune");
+ puts ("@item -mtune=@var{processor-string}");
+ puts ("Optimize the output for the given processor, specified by microarchitecture or");
+ puts ("particular CPU name. Permissible values for this option are:");
+ puts ("");
+ puts ("");
+
+ std::vector<std::string> tuneNames;
+
+#define RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO) \
+ tuneNames.push_back (TUNE_NAME);
+#include "riscv-cores.def"
+#undef RISCV_TUNE
+
+ for (size_t i = 0; i < tuneNames.size(); ++i) {
+ printf("@samp{%s},\n\n", tuneNames[i].c_str());
+ }
+
+ puts ("and all valid options for @option{-mcpu=}.");
+
+ return 0;
+}
diff --git a/gcc/config/riscv/t-riscv b/gcc/config/riscv/t-riscv
index 7aac56a..a7eaa8b 100644
--- a/gcc/config/riscv/t-riscv
+++ b/gcc/config/riscv/t-riscv
@@ -229,8 +229,41 @@ s-riscv-ext.texi: build/gen-riscv-ext-texi$(build_exeext)
$(SHELL) $(srcdir)/../move-if-change tmp-riscv-ext.texi $(srcdir)/doc/riscv-ext.texi
$(STAMP) s-riscv-ext.texi
-# Run `riscv-regen' after you changed or added anything from riscv-ext*.def
+RISCV_CORES_DEFS = \
+ $(srcdir)/config/riscv/riscv-cores.def
+
+build/gen-riscv-mtune-texi.o: $(srcdir)/config/riscv/gen-riscv-mtune-texi.cc \
+ $(RISCV_CORES_DEFS)
+ $(CXX_FOR_BUILD) $(CXXFLAGS_FOR_BUILD) -c $< -o $@
+
+build/gen-riscv-mcpu-texi.o: $(srcdir)/config/riscv/gen-riscv-mcpu-texi.cc \
+ $(RISCV_CORES_DEFS)
+ $(CXX_FOR_BUILD) $(CXXFLAGS_FOR_BUILD) -c $< -o $@
+
+build/gen-riscv-mtune-texi$(build_exeext): build/gen-riscv-mtune-texi.o
+ $(LINKER_FOR_BUILD) $(BUILD_LINKERFLAGS) $(BUILD_LDFLAGS) -o $@ $<
+
+build/gen-riscv-mcpu-texi$(build_exeext): build/gen-riscv-mcpu-texi.o
+ $(LINKER_FOR_BUILD) $(BUILD_LINKERFLAGS) $(BUILD_LDFLAGS) -o $@ $<
+
+$(srcdir)/doc/riscv-mtune.texi: $(RISCV_CORES_DEFS)
+$(srcdir)/doc/riscv-mtune.texi: s-riscv-mtune.texi ; @true
+
+$(srcdir)/doc/riscv-mcpu.texi: $(RISCV_CORES_DEFS)
+$(srcdir)/doc/riscv-mcpu.texi: s-riscv-mcpu.texi ; @true
+
+s-riscv-mtune.texi: build/gen-riscv-mtune-texi$(build_exeext)
+ $(RUN_GEN) build/gen-riscv-mtune-texi$(build_exeext) > tmp-riscv-mtune.texi
+ $(SHELL) $(srcdir)/../move-if-change tmp-riscv-mtune.texi $(srcdir)/doc/riscv-mtune.texi
+ $(STAMP) s-riscv-mtune.texi
+
+s-riscv-mcpu.texi: build/gen-riscv-mcpu-texi$(build_exeext)
+ $(RUN_GEN) build/gen-riscv-mcpu-texi$(build_exeext) > tmp-riscv-mcpu.texi
+ $(SHELL) $(srcdir)/../move-if-change tmp-riscv-mcpu.texi $(srcdir)/doc/riscv-mcpu.texi
+ $(STAMP) s-riscv-mcpu.texi
+
+# Run `riscv-regen' after you changed or added anything from riscv-ext*.def and riscv-cores*.def
.PHONY: riscv-regen
-riscv-regen: s-riscv-ext.texi s-riscv-ext.opt
+riscv-regen: s-riscv-ext.texi s-riscv-ext.opt s-riscv-mtune.texi s-riscv-mcpu.texi
diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
index d760a7e..6becad1 100644
--- a/gcc/config/s390/s390-protos.h
+++ b/gcc/config/s390/s390-protos.h
@@ -128,6 +128,8 @@ extern void s390_expand_vcond (rtx, rtx, rtx, enum rtx_code, rtx, rtx);
extern void s390_expand_vec_init (rtx, rtx);
extern rtx s390_expand_merge_perm_const (machine_mode, bool);
extern void s390_expand_merge (rtx, rtx, rtx, bool);
+extern void s390_expand_int_spaceship (rtx, rtx, rtx, rtx);
+extern void s390_expand_fp_spaceship (rtx, rtx, rtx, rtx);
extern rtx s390_build_signbit_mask (machine_mode);
extern rtx s390_return_addr_rtx (int, rtx);
extern rtx s390_back_chain_rtx (void);
diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index abe551c..012b6db 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -8213,6 +8213,167 @@ s390_expand_atomic (machine_mode mode, enum rtx_code code,
NULL_RTX, 1, OPTAB_DIRECT), 1);
}
+/* Expand integer op0 = op1 <=> op2, i.e.,
+ op0 = op1 == op2 ? 0 : op1 < op2 ? -1 : 1.
+
+ Signedness is specified by op3. If op3 equals 1, then perform an unsigned
+ comparison, and if op3 equals -1, then perform a signed comparison.
+
+ For integer comparisons we strive for a sequence like
+ CR[L] ; LHI ; LOCHIL ; LOCHIH
+ where the first three instructions fit into a group. */
+
+void
+s390_expand_int_spaceship (rtx op0, rtx op1, rtx op2, rtx op3)
+{
+ gcc_assert (op3 == const1_rtx || op3 == constm1_rtx);
+
+ rtx cc, cond_lt, cond_gt;
+ machine_mode cc_mode;
+ machine_mode mode = GET_MODE (op1);
+
+ /* Prior VXE3 emulate a 128-bit comparison by breaking it up into three
+ comparisons. First test the high halfs. In case they equal, then test
+ the low halfs. Finally, test for equality. Depending on the results
+ make use of LOCs. */
+ if (mode == TImode && !TARGET_VXE3)
+ {
+ gcc_assert (TARGET_VX);
+ op1
+ = force_reg (V2DImode, simplify_gen_subreg (V2DImode, op1, TImode, 0));
+ op2
+ = force_reg (V2DImode, simplify_gen_subreg (V2DImode, op2, TImode, 0));
+ rtx lab = gen_label_rtx ();
+ rtx ccz = gen_rtx_REG (CCZmode, CC_REGNUM);
+ /* Compare high halfs for equality.
+ VEC[L]G op1, op2 sets
+ CC1 if high(op1) < high(op2)
+ and
+ CC2 if high(op1) > high(op2). */
+ machine_mode cc_mode = op3 == const1_rtx ? CCUmode : CCSmode;
+ rtx lane0 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
+ emit_insn (gen_rtx_SET (
+ gen_rtx_REG (cc_mode, CC_REGNUM),
+ gen_rtx_COMPARE (cc_mode,
+ gen_rtx_VEC_SELECT (DImode, op1, lane0),
+ gen_rtx_VEC_SELECT (DImode, op2, lane0))));
+ s390_emit_jump (lab, gen_rtx_NE (CCZmode, ccz, const0_rtx));
+ /* At this point we know that the high halfs equal.
+ VCHLGS op2, op1 sets CC1 if low(op1) < low(op2) */
+ emit_insn (gen_rtx_PARALLEL (
+ VOIDmode,
+ gen_rtvec (2,
+ gen_rtx_SET (gen_rtx_REG (CCVIHUmode, CC_REGNUM),
+ gen_rtx_COMPARE (CCVIHUmode, op2, op1)),
+ gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode)))));
+ emit_label (lab);
+ emit_insn (gen_rtx_SET (op0, const1_rtx));
+ emit_insn (
+ gen_movsicc (op0,
+ gen_rtx_LTU (CCUmode, gen_rtx_REG (CCUmode, CC_REGNUM),
+ const0_rtx),
+ constm1_rtx, op0));
+ /* Deal with the case where both halfs equal. */
+ emit_insn (gen_rtx_PARALLEL (
+ VOIDmode,
+ gen_rtvec (2,
+ gen_rtx_SET (gen_rtx_REG (CCVEQmode, CC_REGNUM),
+ gen_rtx_COMPARE (CCVEQmode, op1, op2)),
+ gen_rtx_SET (gen_reg_rtx (V2DImode),
+ gen_rtx_EQ (V2DImode, op1, op2)))));
+ emit_insn (gen_movsicc (op0, gen_rtx_EQ (CCZmode, ccz, const0_rtx),
+ const0_rtx, op0));
+ return;
+ }
+
+ if (mode == QImode || mode == HImode)
+ {
+ rtx_code extend = op3 == const1_rtx ? ZERO_EXTEND : SIGN_EXTEND;
+ op1 = simplify_gen_unary (extend, SImode, op1, mode);
+ op1 = force_reg (SImode, op1);
+ op2 = simplify_gen_unary (extend, SImode, op2, mode);
+ op2 = force_reg (SImode, op2);
+ mode = SImode;
+ }
+
+ if (op3 == const1_rtx)
+ {
+ cc_mode = CCUmode;
+ cc = gen_rtx_REG (cc_mode, CC_REGNUM);
+ cond_lt = gen_rtx_LTU (mode, cc, const0_rtx);
+ cond_gt = gen_rtx_GTU (mode, cc, const0_rtx);
+ }
+ else
+ {
+ cc_mode = CCSmode;
+ cc = gen_rtx_REG (cc_mode, CC_REGNUM);
+ cond_lt = gen_rtx_LT (mode, cc, const0_rtx);
+ cond_gt = gen_rtx_GT (mode, cc, const0_rtx);
+ }
+
+ emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (cc_mode, op1, op2)));
+ emit_move_insn (op0, const0_rtx);
+ emit_insn (gen_movsicc (op0, cond_lt, constm1_rtx, op0));
+ emit_insn (gen_movsicc (op0, cond_gt, const1_rtx, op0));
+}
+
+/* Expand floating-point op0 = op1 <=> op2, i.e.,
+ op0 = op1 == op2 ? 0 : op1 < op2 ? -1 : op1 > op2 ? 1 : 2.
+
+ If op3 equals const0_rtx, then we are interested in the compare only (see
+ test spaceship-fp-4.c). Otherwise, op3 is a CONST_INT different than
+ const1_rtx and constm1_rtx which is used in order to set op0 for unordered.
+
+ Emit a branch-only solution, i.e., let if-convert fold the branches into
+ LOCs if applicable. This has the benefit that the solution is also
+ applicable if we are only interested in the compare, i.e., if op3 equals
+ const0_rtx.
+ */
+
+void
+s390_expand_fp_spaceship (rtx op0, rtx op1, rtx op2, rtx op3)
+{
+ gcc_assert (op3 != const1_rtx && op3 != constm1_rtx);
+
+ machine_mode mode = GET_MODE (op1);
+ machine_mode cc_mode = s390_select_ccmode (LTGT, op1, op2);
+ rtx cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
+ rtx cond_unordered = gen_rtx_UNORDERED (mode, cc_reg, const0_rtx);
+ rtx cond_eq = gen_rtx_EQ (mode, cc_reg, const0_rtx);
+ rtx cond_gt = gen_rtx_GT (mode, cc_reg, const0_rtx);
+ rtx_insn *insn;
+ rtx l_unordered = gen_label_rtx ();
+ rtx l_eq = gen_label_rtx ();
+ rtx l_gt = gen_label_rtx ();
+ rtx l_end = gen_label_rtx ();
+
+ s390_emit_compare (VOIDmode, LTGT, op1, op2);
+ if (!flag_finite_math_only)
+ {
+ insn = s390_emit_jump (l_unordered, cond_unordered);
+ add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
+ }
+ insn = s390_emit_jump (l_eq, cond_eq);
+ add_reg_br_prob_note (insn, profile_probability::unlikely ());
+ insn = s390_emit_jump (l_gt, cond_gt);
+ add_reg_br_prob_note (insn, profile_probability::even ());
+ emit_move_insn (op0, constm1_rtx);
+ emit_jump (l_end);
+ emit_label (l_eq);
+ emit_move_insn (op0, const0_rtx);
+ emit_jump (l_end);
+ emit_label (l_gt);
+ emit_move_insn (op0, const1_rtx);
+ if (!flag_finite_math_only)
+ {
+ emit_jump (l_end);
+ emit_label (l_unordered);
+ rtx unord_val = op3 == const0_rtx ? const2_rtx : op3;
+ emit_move_insn (op0, unord_val);
+ }
+ emit_label (l_end);
+}
+
/* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
We need to emit DTP-relative relocations. */
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 1edbfde..8cc48b0 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -1527,6 +1527,27 @@
operands[0] = SET_DEST (PATTERN (curr_insn));
})
+; Restrict spaceship optab to z13 or later since there we have
+; LOAD HALFWORD IMMEDIATE ON CONDITION.
+
+(define_mode_iterator SPACESHIP_INT [(TI "TARGET_VX") DI SI HI QI])
+(define_expand "spaceship<mode>4"
+ [(match_operand:SI 0 "register_operand")
+ (match_operand:SPACESHIP_INT 1 "register_operand")
+ (match_operand:SPACESHIP_INT 2 "register_operand")
+ (match_operand:SI 3 "const_int_operand")]
+ "TARGET_Z13 && TARGET_64BIT"
+ "s390_expand_int_spaceship (operands[0], operands[1], operands[2], operands[3]); DONE;")
+
+(define_mode_iterator SPACESHIP_BFP [TF DF SF])
+(define_expand "spaceship<mode>4"
+ [(match_operand:SI 0 "register_operand")
+ (match_operand:SPACESHIP_BFP 1 "register_operand")
+ (match_operand:SPACESHIP_BFP 2 "register_operand")
+ (match_operand:SI 3 "const_int_operand")]
+ "TARGET_Z13 && TARGET_64BIT && TARGET_HARD_FLOAT"
+ "s390_expand_fp_spaceship (operands[0], operands[1], operands[2], operands[3]); DONE;")
+
; (TF|DF|SF|TD|DD|SD) instructions
diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index 9e9cd9b..eb66427 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -2921,7 +2921,7 @@ static size_t cp_parser_skip_std_attribute_spec_seq
static size_t cp_parser_skip_attributes_opt
(cp_parser *, size_t);
static bool cp_parser_extension_opt
- (cp_parser *, int *);
+ (cp_parser *, int *, int *);
static void cp_parser_label_declaration
(cp_parser *);
@@ -9504,11 +9504,12 @@ cp_parser_unary_expression (cp_parser *parser, cp_id_kind * pidk,
case RID_EXTENSION:
{
/* The saved value of the PEDANTIC flag. */
- int saved_pedantic;
+ int saved_pedantic, saved_long_long;
tree expr;
/* Save away the PEDANTIC flag. */
- cp_parser_extension_opt (parser, &saved_pedantic);
+ cp_parser_extension_opt (parser, &saved_pedantic,
+ &saved_long_long);
/* Also suppress -Wconditionally-supported. */
diagnostic_push_diagnostics (global_dc, input_location);
diagnostic_classify_diagnostic
@@ -9519,6 +9520,7 @@ cp_parser_unary_expression (cp_parser *parser, cp_id_kind * pidk,
/* Restore the PEDANTIC flag. */
diagnostic_pop_diagnostics (global_dc, input_location);
pedantic = saved_pedantic;
+ warn_long_long = saved_long_long;
return expr;
}
@@ -16047,15 +16049,16 @@ cp_parser_declaration_seq_opt (cp_parser* parser)
static void
cp_parser_declaration (cp_parser* parser, tree prefix_attrs)
{
- int saved_pedantic;
+ int saved_pedantic, saved_long_long;
/* Check for the `__extension__' keyword. */
- if (cp_parser_extension_opt (parser, &saved_pedantic))
+ if (cp_parser_extension_opt (parser, &saved_pedantic, &saved_long_long))
{
/* Parse the qualified declaration. */
cp_parser_declaration (parser, prefix_attrs);
/* Restore the PEDANTIC flag. */
pedantic = saved_pedantic;
+ warn_long_long = saved_long_long;
return;
}
@@ -16323,15 +16326,16 @@ static void
cp_parser_block_declaration (cp_parser *parser,
bool statement_p)
{
- int saved_pedantic;
+ int saved_pedantic, saved_long_long;
/* Check for the `__extension__' keyword. */
- if (cp_parser_extension_opt (parser, &saved_pedantic))
+ if (cp_parser_extension_opt (parser, &saved_pedantic, &saved_long_long))
{
/* Parse the qualified declaration. */
cp_parser_block_declaration (parser, statement_p);
/* Restore the PEDANTIC flag. */
pedantic = saved_pedantic;
+ warn_long_long = saved_long_long;
return;
}
@@ -28869,16 +28873,17 @@ cp_parser_member_declaration (cp_parser* parser)
cp_token *token = NULL;
cp_token *decl_spec_token_start = NULL;
cp_token *initializer_token_start = NULL;
- int saved_pedantic;
+ int saved_pedantic, saved_long_long;
bool saved_colon_corrects_to_scope_p = parser->colon_corrects_to_scope_p;
/* Check for the `__extension__' keyword. */
- if (cp_parser_extension_opt (parser, &saved_pedantic))
+ if (cp_parser_extension_opt (parser, &saved_pedantic, &saved_long_long))
{
/* Recurse. */
cp_parser_member_declaration (parser);
/* Restore the old value of the PEDANTIC flag. */
pedantic = saved_pedantic;
+ warn_long_long = saved_long_long;
return;
}
@@ -32020,13 +32025,16 @@ cp_parser_skip_attributes_opt (cp_parser *parser, size_t n)
present, and FALSE otherwise. *SAVED_PEDANTIC is set to the
current value of the PEDANTIC flag, regardless of whether or not
the `__extension__' keyword is present. The caller is responsible
- for restoring the value of the PEDANTIC flag. */
+ for restoring the value of the PEDANTIC flag. Similarly *SAVED_LONG_LONG
+ for warn_long_long flag. */
static bool
-cp_parser_extension_opt (cp_parser* parser, int* saved_pedantic)
+cp_parser_extension_opt (cp_parser *parser, int *saved_pedantic,
+ int *saved_long_long)
{
/* Save the old value of the PEDANTIC flag. */
*saved_pedantic = pedantic;
+ *saved_long_long = warn_long_long;
if (cp_lexer_next_token_is_keyword (parser->lexer, RID_EXTENSION))
{
@@ -32035,6 +32043,8 @@ cp_parser_extension_opt (cp_parser* parser, int* saved_pedantic)
/* We're not being pedantic while the `__extension__' keyword is
in effect. */
pedantic = 0;
+ /* And we don't want -Wlong-long warning. */
+ warn_long_long = 0;
return true;
}
diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 71ae764..0b7a05c 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -6952,14 +6952,22 @@ convert_nontype_argument_function (tree type, tree expr,
{
auto_diagnostic_group d;
location_t loc = cp_expr_loc_or_input_loc (expr);
- error_at (loc, "%qE is not a valid template argument for type %qT",
- expr, type);
- if (TYPE_PTR_P (type))
- inform (loc, "it must be the address of a function "
- "with external linkage");
+ tree c;
+ if (cxx_dialect >= cxx17
+ && (c = cxx_constant_value (fn),
+ c == error_mark_node))
+ ;
else
- inform (loc, "it must be the name of a function with "
- "external linkage");
+ {
+ error_at (loc, "%qE is not a valid template argument for "
+ "type %qT", expr, type);
+ if (TYPE_PTR_P (type))
+ inform (loc, "it must be the address of a function "
+ "with external linkage");
+ else
+ inform (loc, "it must be the name of a function with "
+ "external linkage");
+ }
}
return NULL_TREE;
}
@@ -7402,22 +7410,22 @@ invalid_tparm_referent_p (tree type, tree expr, tsubst_flags_t complain)
/* Null pointer values are OK in C++11. */;
else
{
- if (VAR_P (expr))
- {
- if (complain & tf_error)
- error ("%qD is not a valid template argument "
- "because %qD is a variable, not the address of "
- "a variable", expr, expr);
- return true;
- }
+ tree c;
+ if (!(complain & tf_error))
+ ;
+ else if (cxx_dialect >= cxx17
+ && (c = cxx_constant_value (expr),
+ c == error_mark_node))
+ ;
+ else if (VAR_P (expr))
+ error ("%qD is not a valid template argument "
+ "because %qD is a variable, not the address of "
+ "a variable", expr, expr);
else
- {
- if (complain & tf_error)
- error ("%qE is not a valid template argument for %qT "
- "because it is not the address of a variable",
- expr, type);
- return true;
- }
+ error ("%qE is not a valid template argument for %qT "
+ "because it is not the address of a variable",
+ expr, type);
+ return true;
}
}
return false;
diff --git a/gcc/cprop.cc b/gcc/cprop.cc
index bc72e64..dfe3462 100644
--- a/gcc/cprop.cc
+++ b/gcc/cprop.cc
@@ -1525,6 +1525,7 @@ static bool
bypass_block (basic_block bb, rtx_insn *setcc, rtx_insn *jump)
{
rtx_insn *insn;
+ rtx setcc_src, setcc_dest;
rtx note;
edge e, edest;
bool change;
@@ -1533,7 +1534,19 @@ bypass_block (basic_block bb, rtx_insn *setcc, rtx_insn *jump)
unsigned i;
edge_iterator ei;
- insn = (setcc != NULL) ? setcc : jump;
+ if (setcc != NULL)
+ {
+ rtx set = single_set (setcc);
+ setcc_dest = SET_DEST (set);
+ setcc_src = SET_SRC (set);
+ insn = setcc;
+ }
+ else
+ {
+ setcc_dest = NULL;
+ setcc_src = NULL;
+ insn = jump;
+ }
/* Determine set of register uses in INSN. */
reg_use_count = 0;
@@ -1608,9 +1621,7 @@ bypass_block (basic_block bb, rtx_insn *setcc, rtx_insn *jump)
src = SET_SRC (pc_set (jump));
if (setcc != NULL)
- src = simplify_replace_rtx (src,
- SET_DEST (PATTERN (setcc)),
- SET_SRC (PATTERN (setcc)));
+ src = simplify_replace_rtx (src, setcc_dest, setcc_src);
new_rtx = simplify_replace_rtx (src, reg_used, set->src);
@@ -1716,10 +1727,11 @@ bypass_conditional_jumps (void)
{
if (setcc)
break;
- if (GET_CODE (PATTERN (insn)) != SET)
+ rtx singleset = single_set (insn);
+ if (singleset == NULL_RTX)
break;
- dest = SET_DEST (PATTERN (insn));
+ dest = SET_DEST (singleset);
if (REG_P (dest))
setcc = insn;
else
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 0980230..e442a9c 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -31370,31 +31370,14 @@ When the RISC-V specifications define an extension as depending on other
extensions, GCC will implicitly add the dependent extensions to the enabled
extension set if they weren't added explicitly.
-@opindex mcpu
-@item -mcpu=@var{processor-string}
-Use architecture of and optimize the output for the given processor, specified
-by particular CPU name.
-Permissible values for this option are: @samp{mips-p8700}, @samp{sifive-e20},
-@samp{sifive-e21}, @samp{sifive-e24}, @samp{sifive-e31}, @samp{sifive-e34},
-@samp{sifive-e76}, @samp{sifive-s21}, @samp{sifive-s51}, @samp{sifive-s54},
-@samp{sifive-s76}, @samp{sifive-u54}, @samp{sifive-u74}, @samp{sifive-x280},
-@samp{sifive-xp450}, @samp{sifive-x670}, @samp{thead-c906}, @samp{tt-ascalon-d8},
-@samp{xiangshan-nanhu}, @samp{xiangshan-kunminghu}, @samp{xt-c908}, @samp{xt-c908v},
-@samp{xt-c910}, @samp{xt-c910v2}, @samp{xt-c920}, @samp{xt-c920v2}.
+@include riscv-mcpu.texi
Note that @option{-mcpu} does not override @option{-march} or @option{-mtune}.
-@opindex mtune
-@item -mtune=@var{processor-string}
-Optimize the output for the given processor, specified by microarchitecture or
-particular CPU name. Permissible values for this option are:
-@samp{generic-ooo}, @samp{mips-p8700}, @samp{rocket}, @samp{sifive-3-series},
-@samp{sifive-5-series}, @samp{sifive-7-series}, @samp{size},
-@samp{sifive-p400-series}, @samp{sifive-p600-series}, and all valid options for
-@option{-mcpu=}.
+@include riscv-mtune.texi
When @option{-mtune=} is not specified, use the setting from @option{-mcpu},
-the default is @samp{rocket} if both are not specified.
+the default is @samp{generic} if both are not specified.
The @samp{size} choice is not intended for use by end-users. This is used
when @option{-Os} is specified. It overrides the instruction cost info
diff --git a/gcc/doc/riscv-mcpu.texi b/gcc/doc/riscv-mcpu.texi
new file mode 100644
index 0000000..6753e51
--- /dev/null
+++ b/gcc/doc/riscv-mcpu.texi
@@ -0,0 +1,69 @@
+@c Copyright (C) 2025 Free Software Foundation, Inc.
+@c This is part of the GCC manual.
+@c For copying conditions, see the file gcc/doc/include/fdl.texi.
+
+@c This file is generated automatically using
+@c gcc/config/riscv/gen-riscv-mcpu-texi.cc from:
+@c gcc/config/riscv/riscv-cores.def
+
+@c Please *DO NOT* edit manually.
+
+@samp{Core Name}
+
+@opindex mcpu
+@item -mcpu=@var{processor-string}
+Use architecture of and optimize the output for the given processor, specified
+by particular CPU name. Permissible values for this option are:
+
+
+@samp{sifive-e20},
+
+@samp{sifive-e21},
+
+@samp{sifive-e24},
+
+@samp{sifive-e31},
+
+@samp{sifive-e34},
+
+@samp{sifive-e76},
+
+@samp{sifive-s21},
+
+@samp{sifive-s51},
+
+@samp{sifive-s54},
+
+@samp{sifive-s76},
+
+@samp{sifive-u54},
+
+@samp{sifive-u74},
+
+@samp{sifive-x280},
+
+@samp{sifive-p450},
+
+@samp{sifive-p670},
+
+@samp{thead-c906},
+
+@samp{xt-c908},
+
+@samp{xt-c908v},
+
+@samp{xt-c910},
+
+@samp{xt-c910v2},
+
+@samp{xt-c920},
+
+@samp{xt-c920v2},
+
+@samp{tt-ascalon-d8},
+
+@samp{xiangshan-nanhu},
+
+@samp{xiangshan-kunminghu},
+
+@samp{mips-p8700}.
diff --git a/gcc/doc/riscv-mtune.texi b/gcc/doc/riscv-mtune.texi
new file mode 100644
index 0000000..a2a4d3e
--- /dev/null
+++ b/gcc/doc/riscv-mtune.texi
@@ -0,0 +1,59 @@
+@c Copyright (C) 2025 Free Software Foundation, Inc.
+@c This is part of the GCC manual.
+@c For copying conditions, see the file gcc/doc/include/fdl.texi.
+
+@c This file is generated automatically using
+@c gcc/config/riscv/gen-riscv-mtune-texi.cc from:
+@c gcc/config/riscv/riscv-cores.def
+
+@c Please *DO NOT* edit manually.
+
+@samp{Tune Name}
+
+@opindex mtune
+@item -mtune=@var{processor-string}
+Optimize the output for the given processor, specified by microarchitecture or
+particular CPU name. Permissible values for this option are:
+
+
+@samp{generic},
+
+@samp{rocket},
+
+@samp{sifive-3-series},
+
+@samp{sifive-5-series},
+
+@samp{sifive-7-series},
+
+@samp{sifive-p400-series},
+
+@samp{sifive-p600-series},
+
+@samp{tt-ascalon-d8},
+
+@samp{thead-c906},
+
+@samp{xt-c908},
+
+@samp{xt-c908v},
+
+@samp{xt-c910},
+
+@samp{xt-c910v2},
+
+@samp{xt-c920},
+
+@samp{xt-c920v2},
+
+@samp{xiangshan-nanhu},
+
+@samp{xiangshan-kunminghu},
+
+@samp{generic-ooo},
+
+@samp{size},
+
+@samp{mips-p8700},
+
+and all valid options for @option{-mcpu=}.
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 928578b..215552c 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -6513,6 +6513,15 @@ The default is @code{NULL_TREE} which means to not vectorize scatter
stores.
@end deftypefn
+@deftypefn {Target Hook} bool TARGET_VECTORIZE_PREFER_GATHER_SCATTER (machine_mode @var{mode}, int @var{scale}, unsigned int @var{group_size})
+This hook returns TRUE if gather loads or scatter stores are cheaper on
+this target than a sequence of elementwise loads or stores. The @var{mode}
+and @var{scale} correspond to the @code{gather_load} and
+@code{scatter_store} instruction patterns. The @var{group_size} is the
+number of scalar elements in each scalar loop iteration that are to be
+combined into the vector.
+@end deftypefn
+
@deftypefn {Target Hook} int TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN (struct cgraph_node *@var{}, struct cgraph_simd_clone *@var{}, @var{tree}, @var{int}, @var{bool})
This hook should set @var{vecsize_mangle}, @var{vecsize_int}, @var{vecsize_float}
fields in @var{simd_clone} structure pointed by @var{clone_info} argument and also
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index eccc4d8..b03ad4c 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -4311,6 +4311,8 @@ address; but often a machine-dependent strategy can generate better code.
@hook TARGET_VECTORIZE_BUILTIN_SCATTER
+@hook TARGET_VECTORIZE_PREFER_GATHER_SCATTER
+
@hook TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
@hook TARGET_SIMD_CLONE_ADJUST
diff --git a/gcc/fortran/check.cc b/gcc/fortran/check.cc
index 838d523..8626526 100644
--- a/gcc/fortran/check.cc
+++ b/gcc/fortran/check.cc
@@ -5559,6 +5559,27 @@ gfc_check_scan (gfc_expr *x, gfc_expr *y, gfc_expr *z, gfc_expr *kind)
return true;
}
+bool
+gfc_check_split (gfc_expr *string, gfc_expr *set, gfc_expr *pos, gfc_expr *back)
+{
+ if (!type_check (string, 0, BT_CHARACTER))
+ return false;
+
+ if (!type_check (set, 1, BT_CHARACTER))
+ return false;
+
+ if (!type_check (pos, 2, BT_INTEGER) || !scalar_check (pos, 2))
+ return false;
+
+ if (back != NULL
+ && (!type_check (back, 3, BT_LOGICAL) || !scalar_check (back, 3)))
+ return false;
+
+ if (!same_type_check (string, 0, set, 1))
+ return false;
+
+ return true;
+}
bool
gfc_check_secnds (gfc_expr *r)
diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h
index 85feb18..d9dcd1b 100644
--- a/gcc/fortran/gfortran.h
+++ b/gcc/fortran/gfortran.h
@@ -729,6 +729,8 @@ enum gfc_isym_id
GFC_ISYM_COSPI,
GFC_ISYM_SINPI,
GFC_ISYM_TANPI,
+
+ GFC_ISYM_SPLIT,
};
enum init_local_logical
diff --git a/gcc/fortran/intrinsic.cc b/gcc/fortran/intrinsic.cc
index 9e07627..c99a7a8 100644
--- a/gcc/fortran/intrinsic.cc
+++ b/gcc/fortran/intrinsic.cc
@@ -3933,6 +3933,14 @@ add_subroutines (void)
pt, BT_INTEGER, di, OPTIONAL, INTENT_IN,
gt, BT_INTEGER, di, OPTIONAL, INTENT_OUT);
+ add_sym_4s ("split", GFC_ISYM_SPLIT, CLASS_PURE,
+ BT_UNKNOWN, 0, GFC_STD_F2023,
+ gfc_check_split, NULL, gfc_resolve_split,
+ "string", BT_CHARACTER, dc, REQUIRED, INTENT_IN,
+ "set", BT_CHARACTER, dc, REQUIRED, INTENT_IN,
+ "pos", BT_INTEGER, di, REQUIRED, INTENT_INOUT,
+ "back", BT_LOGICAL, dl, OPTIONAL, INTENT_IN);
+
/* The following subroutines are part of ISO_C_BINDING. */
add_sym_3s ("c_f_pointer", GFC_ISYM_C_F_POINTER, CLASS_IMPURE, BT_UNKNOWN, 0,
diff --git a/gcc/fortran/intrinsic.h b/gcc/fortran/intrinsic.h
index fd54588..8a0ab93 100644
--- a/gcc/fortran/intrinsic.h
+++ b/gcc/fortran/intrinsic.h
@@ -215,6 +215,7 @@ bool gfc_check_mvbits (gfc_expr *, gfc_expr *, gfc_expr *, gfc_expr *,
bool gfc_check_random_init (gfc_expr *, gfc_expr *);
bool gfc_check_random_number (gfc_expr *);
bool gfc_check_random_seed (gfc_expr *, gfc_expr *, gfc_expr *);
+bool gfc_check_split (gfc_expr *, gfc_expr *, gfc_expr *, gfc_expr *);
bool gfc_check_dtime_etime_sub (gfc_expr *, gfc_expr *);
bool gfc_check_fgetputc_sub (gfc_expr *, gfc_expr *, gfc_expr *);
bool gfc_check_fgetput_sub (gfc_expr *, gfc_expr *);
@@ -693,6 +694,7 @@ void gfc_resolve_link_sub (gfc_code *);
void gfc_resolve_symlnk_sub (gfc_code *);
void gfc_resolve_signal_sub (gfc_code *);
void gfc_resolve_sleep_sub (gfc_code *);
+void gfc_resolve_split (gfc_code *);
void gfc_resolve_stat_sub (gfc_code *);
void gfc_resolve_system_clock (gfc_code *);
void gfc_resolve_system_sub (gfc_code *);
diff --git a/gcc/fortran/intrinsic.texi b/gcc/fortran/intrinsic.texi
index 3103da3..a24b234 100644
--- a/gcc/fortran/intrinsic.texi
+++ b/gcc/fortran/intrinsic.texi
@@ -313,6 +313,7 @@ Some basic guidelines for editing this document:
* @code{SIZEOF}: SIZEOF, Determine the size in bytes of an expression
* @code{SLEEP}: SLEEP, Sleep for the specified number of seconds
* @code{SPACING}: SPACING, Smallest distance between two numbers of a given type
+* @code{SPLIT}: SPLIT, Parse a string into tokens, one at a time.
* @code{SPREAD}: SPREAD, Add a dimension to an array
* @code{SQRT}: SQRT, Square-root function
* @code{SRAND}: SRAND, Reinitialize the random number generator
@@ -14203,6 +14204,69 @@ Fortran 90 and later
+@node SPLIT
+@section @code{SPLIT} --- Parse a string into tokens, one at a time
+@fnindex SPLIT
+@cindex string, split
+
+@table @asis
+@item @emph{Synopsis}:
+@code{RESULT = SPLIT(STRING, SET, POS [, BACK])}
+
+@item @emph{Description}:
+Updates the integer @var{POS} to the position of the next (or previous)
+separator in @var{STRING}.
+
+If @var{BACK} is absent or is present with the value false, @var{POS} is
+assigned the position of the leftmost token delimiter in @var{STRING} whose
+position is greater than @var{POS}, or if there is no such character, it is
+assigned a value one greater than the length of @var{STRING}. This identifies
+a token with starting position one greater than the value of @var{POS} on
+invocation, and ending position one less than the value of @var{POS} on return.
+
+If @var{BACK} is present with the value true, @var{POS} is assigned the
+position of the rightmost token delimiter in @var{STRING} whose position is
+less than @var{POS}, or if there is no such character, it is assigned the value
+zero. This identifies a token with ending position one less than the value of
+@var{POS} on invocation, and starting position one greater than the value of
+@var{POS} on return.
+
+@item @emph{Class}:
+Subroutine
+
+@item @emph{Arguments}:
+@multitable @columnfractions .15 .70
+@item @var{STRING} @tab Shall be of type @code{CHARACTER}.
+@item @var{SET} @tab Shall be of type @code{CHARACTER}.
+@item @var{POS} @tab Shall be of type @code{INTEGER}.
+@item @var{BACK} @tab (Optional) Shall be of type @code{LOGICAL}.
+@end multitable
+
+@item @emph{Example}:
+@smallexample
+character(len=:), allocatable :: input
+character(len=2) :: set = ', '
+integer :: p
+input = "one,last example"
+p = 0
+do
+ if (p > len(input)) exit
+ istart = p + 1
+ call split(input, set, p)
+ iend = p - 1
+ print '(t7, a)', input(istart:iend)
+end do
+@end smallexample
+
+@item @emph{Standard}:
+Fortran 2023
+
+@item @emph{See also}:
+@ref{SCAN}
+@end table
+
+
+
@node SPREAD
@section @code{SPREAD} --- Add a dimension to an array
@fnindex SPREAD
diff --git a/gcc/fortran/iresolve.cc b/gcc/fortran/iresolve.cc
index 1001309..da354ab 100644
--- a/gcc/fortran/iresolve.cc
+++ b/gcc/fortran/iresolve.cc
@@ -3863,6 +3863,19 @@ gfc_resolve_sleep_sub (gfc_code *c)
c->resolved_sym = gfc_get_intrinsic_sub_symbol (name);
}
+void
+gfc_resolve_split (gfc_code *c)
+{
+ const char *name;
+ gfc_expr *string;
+
+ string = c->ext.actual->expr;
+ if (string->ts.type == BT_CHARACTER && string->ts.kind == 4)
+ name = "__split_char4";
+ else
+ name = "__split";
+ c->resolved_sym = gfc_get_intrinsic_sub_symbol (name);
+}
/* G77 compatibility function srand(). */
diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc
index 6b759d1..0f7637d 100644
--- a/gcc/fortran/trans-array.cc
+++ b/gcc/fortran/trans-array.cc
@@ -6296,8 +6296,8 @@ static tree
gfc_array_init_size (tree descriptor, int rank, int corank, tree * poffset,
gfc_expr ** lower, gfc_expr ** upper, stmtblock_t * pblock,
stmtblock_t * descriptor_block, tree * overflow,
- tree expr3_elem_size, tree *nelems, gfc_expr *expr3,
- tree expr3_desc, bool e3_has_nodescriptor, gfc_expr *expr,
+ tree expr3_elem_size, gfc_expr *expr3, tree expr3_desc,
+ bool e3_has_nodescriptor, gfc_expr *expr,
tree *element_size, bool explicit_ts)
{
tree type;
@@ -6573,7 +6573,6 @@ gfc_array_init_size (tree descriptor, int rank, int corank, tree * poffset,
if (rank == 0)
return *element_size;
- *nelems = gfc_evaluate_now (stride, pblock);
stride = fold_convert (size_type_node, stride);
/* First check for overflow. Since an array of type character can
@@ -6662,9 +6661,8 @@ retrieve_last_ref (gfc_ref **ref_in, gfc_ref **prev_ref_in)
bool
gfc_array_allocate (gfc_se * se, gfc_expr * expr, tree status, tree errmsg,
tree errlen, tree label_finish, tree expr3_elem_size,
- tree *nelems, gfc_expr *expr3, tree e3_arr_desc,
- bool e3_has_nodescriptor, gfc_omp_namelist *omp_alloc,
- bool explicit_ts)
+ gfc_expr *expr3, tree e3_arr_desc, bool e3_has_nodescriptor,
+ gfc_omp_namelist *omp_alloc, bool explicit_ts)
{
tree tmp;
tree pointer;
@@ -6795,7 +6793,7 @@ gfc_array_allocate (gfc_se * se, gfc_expr * expr, tree status, tree errmsg,
coarray ? ref->u.ar.as->corank : 0,
&offset, lower, upper,
&se->pre, &set_descriptor_block, &overflow,
- expr3_elem_size, nelems, expr3, e3_arr_desc,
+ expr3_elem_size, expr3, e3_arr_desc,
e3_has_nodescriptor, expr, &element_size,
explicit_ts);
diff --git a/gcc/fortran/trans-array.h b/gcc/fortran/trans-array.h
index 1bb3294..29098fd 100644
--- a/gcc/fortran/trans-array.h
+++ b/gcc/fortran/trans-array.h
@@ -20,9 +20,8 @@ along with GCC; see the file COPYING3. If not see
/* Generate code to initialize and allocate an array. Statements are added to
se, which should contain an expression for the array descriptor. */
-bool gfc_array_allocate (gfc_se *, gfc_expr *, tree, tree, tree, tree,
- tree, tree *, gfc_expr *, tree, bool,
- gfc_omp_namelist *, bool);
+bool gfc_array_allocate (gfc_se *, gfc_expr *, tree, tree, tree, tree, tree,
+ gfc_expr *, tree, bool, gfc_omp_namelist *, bool);
/* Allow the bounds of a loop to be set from a callee's array spec. */
void gfc_set_loop_bounds_from_array_spec (gfc_interface_mapping *,
diff --git a/gcc/fortran/trans-decl.cc b/gcc/fortran/trans-decl.cc
index d5acdca..741acc0 100644
--- a/gcc/fortran/trans-decl.cc
+++ b/gcc/fortran/trans-decl.cc
@@ -197,6 +197,7 @@ tree gfor_fndecl_string_scan;
tree gfor_fndecl_string_verify;
tree gfor_fndecl_string_trim;
tree gfor_fndecl_string_minmax;
+tree gfor_fndecl_string_split;
tree gfor_fndecl_adjustl;
tree gfor_fndecl_adjustr;
tree gfor_fndecl_select_string;
@@ -208,6 +209,7 @@ tree gfor_fndecl_string_scan_char4;
tree gfor_fndecl_string_verify_char4;
tree gfor_fndecl_string_trim_char4;
tree gfor_fndecl_string_minmax_char4;
+tree gfor_fndecl_string_split_char4;
tree gfor_fndecl_adjustl_char4;
tree gfor_fndecl_adjustr_char4;
tree gfor_fndecl_select_string_char4;
@@ -3569,6 +3571,12 @@ gfc_build_intrinsic_function_decls (void)
build_pointer_type (pchar1_type_node), integer_type_node,
integer_type_node);
+ gfor_fndecl_string_split = gfc_build_library_function_decl_with_spec (
+ get_identifier (PREFIX ("string_split")), ". . R . R . . ",
+ gfc_charlen_type_node, 6, gfc_charlen_type_node, pchar1_type_node,
+ gfc_charlen_type_node, pchar1_type_node, gfc_charlen_type_node,
+ gfc_logical4_type_node);
+
gfor_fndecl_adjustl = gfc_build_library_function_decl_with_spec (
get_identifier (PREFIX("adjustl")), ". W . R ",
void_type_node, 3, pchar1_type_node, gfc_charlen_type_node,
@@ -3641,6 +3649,12 @@ gfc_build_intrinsic_function_decls (void)
build_pointer_type (pchar4_type_node), integer_type_node,
integer_type_node);
+ gfor_fndecl_string_split_char4 = gfc_build_library_function_decl_with_spec (
+ get_identifier (PREFIX ("string_split_char4")), ". . R . R . . ",
+ gfc_charlen_type_node, 6, gfc_charlen_type_node, pchar4_type_node,
+ gfc_charlen_type_node, pchar4_type_node, gfc_charlen_type_node,
+ gfc_logical4_type_node);
+
gfor_fndecl_adjustl_char4 = gfc_build_library_function_decl_with_spec (
get_identifier (PREFIX("adjustl_char4")), ". W . R ",
void_type_node, 3, pchar4_type_node, gfc_charlen_type_node,
diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index be98427..f68ceb1 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -3466,6 +3466,74 @@ else
return gfc_finish_block (&block);
}
+static tree
+conv_intrinsic_split (gfc_code *code)
+{
+ stmtblock_t block, post_block;
+ gfc_se se;
+ gfc_expr *string_expr, *set_expr, *pos_expr, *back_expr;
+ tree string, string_len;
+ tree set, set_len;
+ tree pos, pos_for_call;
+ tree back;
+ tree fndecl, call;
+
+ string_expr = code->ext.actual->expr;
+ set_expr = code->ext.actual->next->expr;
+ pos_expr = code->ext.actual->next->next->expr;
+ back_expr = code->ext.actual->next->next->next->expr;
+
+ gfc_start_block (&block);
+ gfc_init_block (&post_block);
+
+ gfc_init_se (&se, NULL);
+ gfc_conv_expr (&se, string_expr);
+ gfc_conv_string_parameter (&se);
+ gfc_add_block_to_block (&block, &se.pre);
+ gfc_add_block_to_block (&post_block, &se.post);
+ string = se.expr;
+ string_len = se.string_length;
+
+ gfc_init_se (&se, NULL);
+ gfc_conv_expr (&se, set_expr);
+ gfc_conv_string_parameter (&se);
+ gfc_add_block_to_block (&block, &se.pre);
+ gfc_add_block_to_block (&post_block, &se.post);
+ set = se.expr;
+ set_len = se.string_length;
+
+ gfc_init_se (&se, NULL);
+ gfc_conv_expr (&se, pos_expr);
+ gfc_add_block_to_block (&block, &se.pre);
+ gfc_add_block_to_block (&post_block, &se.post);
+ pos = se.expr;
+ pos_for_call = fold_convert (gfc_charlen_type_node, pos);
+
+ if (back_expr)
+ {
+ gfc_init_se (&se, NULL);
+ gfc_conv_expr (&se, back_expr);
+ gfc_add_block_to_block (&block, &se.pre);
+ gfc_add_block_to_block (&post_block, &se.post);
+ back = se.expr;
+ }
+ else
+ back = logical_false_node;
+
+ if (string_expr->ts.kind == 1)
+ fndecl = gfor_fndecl_string_split;
+ else if (string_expr->ts.kind == 4)
+ fndecl = gfor_fndecl_string_split_char4;
+ else
+ gcc_unreachable ();
+
+ call = build_call_expr_loc (input_location, fndecl, 6, string_len, string,
+ set_len, set, pos_for_call, back);
+ gfc_add_modify (&block, pos, fold_convert (TREE_TYPE (pos), call));
+
+ gfc_add_block_to_block (&block, &post_block);
+ return gfc_finish_block (&block);
+}
/* Return a character string containing the tty name. */
@@ -13261,6 +13329,10 @@ gfc_conv_intrinsic_subroutine (gfc_code *code)
res = conv_intrinsic_system_clock (code);
break;
+ case GFC_ISYM_SPLIT:
+ res = conv_intrinsic_split (code);
+ break;
+
default:
res = NULL_TREE;
break;
diff --git a/gcc/fortran/trans-stmt.cc b/gcc/fortran/trans-stmt.cc
index f105401..b4ddf75 100644
--- a/gcc/fortran/trans-stmt.cc
+++ b/gcc/fortran/trans-stmt.cc
@@ -6710,7 +6710,6 @@ gfc_trans_allocate (gfc_code * code, gfc_omp_namelist *omp_allocate)
stmtblock_t block;
stmtblock_t post;
stmtblock_t final_block;
- tree nelems;
bool upoly_expr, tmp_expr3_len_flag = false, al_len_needs_set, is_coarray;
bool needs_caf_sync, caf_refs_comp;
bool e3_has_nodescriptor = false;
@@ -7242,7 +7241,6 @@ gfc_trans_allocate (gfc_code * code, gfc_omp_namelist *omp_allocate)
to handle the complete array allocation. Only the element size
needs to be provided, which is done most of the time by the
pre-evaluation step. */
- nelems = NULL_TREE;
if (expr3_len && (code->expr3->ts.type == BT_CHARACTER
|| code->expr3->ts.type == BT_CLASS))
{
@@ -7313,9 +7311,8 @@ gfc_trans_allocate (gfc_code * code, gfc_omp_namelist *omp_allocate)
}
- if (!gfc_array_allocate (&se, expr, stat, errmsg, errlen,
- label_finish, tmp, &nelems,
- e3rhs ? e3rhs : code->expr3,
+ if (!gfc_array_allocate (&se, expr, stat, errmsg, errlen, label_finish,
+ tmp, e3rhs ? e3rhs : code->expr3,
e3_is == E3_DESC ? expr3 : NULL_TREE,
e3_has_nodescriptor, omp_alloc_item,
code->ext.alloc.ts.type != BT_UNKNOWN))
diff --git a/gcc/fortran/trans.h b/gcc/fortran/trans.h
index 461b0cd..40680e9 100644
--- a/gcc/fortran/trans.h
+++ b/gcc/fortran/trans.h
@@ -961,6 +961,7 @@ extern GTY(()) tree gfor_fndecl_string_scan;
extern GTY(()) tree gfor_fndecl_string_verify;
extern GTY(()) tree gfor_fndecl_string_trim;
extern GTY(()) tree gfor_fndecl_string_minmax;
+extern GTY(()) tree gfor_fndecl_string_split;
extern GTY(()) tree gfor_fndecl_adjustl;
extern GTY(()) tree gfor_fndecl_adjustr;
extern GTY(()) tree gfor_fndecl_select_string;
@@ -972,6 +973,7 @@ extern GTY(()) tree gfor_fndecl_string_scan_char4;
extern GTY(()) tree gfor_fndecl_string_verify_char4;
extern GTY(()) tree gfor_fndecl_string_trim_char4;
extern GTY(()) tree gfor_fndecl_string_minmax_char4;
+extern GTY(()) tree gfor_fndecl_string_split_char4;
extern GTY(()) tree gfor_fndecl_adjustl_char4;
extern GTY(()) tree gfor_fndecl_adjustr_char4;
extern GTY(()) tree gfor_fndecl_select_string_char4;
diff --git a/gcc/gcov-io.cc b/gcc/gcov-io.cc
index f39b4bd..dd3fc88 100644
--- a/gcc/gcov-io.cc
+++ b/gcc/gcov-io.cc
@@ -69,7 +69,7 @@ gcov_position (void)
/* Return nonzero if the error flag is set. */
/* We need to expose this function when compiling for gcov-tool. */
-#ifndef IN_GCOV_TOOL
+#if !defined (IN_GCOV_TOOL) && !defined (IN_GCC)
static inline
#endif
int
diff --git a/gcc/gcov-io.h b/gcc/gcov-io.h
index f3e3a1c..313c15c 100644
--- a/gcc/gcov-io.h
+++ b/gcc/gcov-io.h
@@ -387,6 +387,7 @@ char *mangle_path (char const *base);
/* Available outside gcov */
GCOV_LINKAGE void gcov_write (const void *, unsigned) ATTRIBUTE_HIDDEN;
GCOV_LINKAGE void gcov_write_unsigned (gcov_unsigned_t) ATTRIBUTE_HIDDEN;
+GCOV_LINKAGE int gcov_is_error (void);
#endif
#if !IN_GCOV && !IN_LIBGCOV
diff --git a/gcc/hooks.cc b/gcc/hooks.cc
index 951825d..76cb5931 100644
--- a/gcc/hooks.cc
+++ b/gcc/hooks.cc
@@ -117,6 +117,13 @@ hook_bool_mode_const_rtx_true (machine_mode, const_rtx)
return true;
}
+/* Generic hook that takes (machine_mode, int, unsigned) and returns false. */
+bool
+hook_bool_mode_int_unsigned_false (machine_mode, int, unsigned)
+{
+ return false;
+}
+
/* Generic hook that takes (machine_mode, rtx) and returns false. */
bool
hook_bool_mode_rtx_false (machine_mode, rtx)
diff --git a/gcc/hooks.h b/gcc/hooks.h
index c0663bf..e95bd11 100644
--- a/gcc/hooks.h
+++ b/gcc/hooks.h
@@ -36,6 +36,7 @@ extern bool hook_bool_mode_true (machine_mode);
extern bool hook_bool_mode_mode_true (machine_mode, machine_mode);
extern bool hook_bool_mode_const_rtx_false (machine_mode, const_rtx);
extern bool hook_bool_mode_const_rtx_true (machine_mode, const_rtx);
+extern bool hook_bool_mode_int_unsigned_false (machine_mode, int, unsigned);
extern bool hook_bool_mode_rtx_false (machine_mode, rtx);
extern bool hook_bool_mode_rtx_true (machine_mode, rtx);
extern bool hook_bool_const_rtx_insn_const_rtx_insn_true (const rtx_insn *,
diff --git a/gcc/m2/ChangeLog b/gcc/m2/ChangeLog
index 6babeb9..2406b95 100644
--- a/gcc/m2/ChangeLog
+++ b/gcc/m2/ChangeLog
@@ -1,3 +1,36 @@
+2025-07-29 Gaius Mulley <gaiusmod2@gmail.com>
+
+ * gm2-compiler/M2GenGCC.mod (FoldBecomes): Remove all
+ local variables.
+ (CodeIndrX): Remove length.
+ Remove newstr.
+ * gm2-compiler/M2Range.mod (FoldTypeIndrX): Remove desType.
+
+2025-07-29 Gaius Mulley <gaiusmod2@gmail.com>
+
+ PR modula2/121289
+ * gm2-compiler/M2Students.def (CheckVariableAgainstKeyword): New
+ parameter tok.
+ * gm2-compiler/M2Students.mod (CheckVariableAgainstKeyword): New
+ parameter tok.
+ Pass tok to PerformVariableKeywordCheck.
+ (PerformVariableKeywordCheck): New parameter tok.
+ Pass tok to MetaErrorStringT0.
+ * gm2-compiler/P2SymBuild.mod (BuildVariable): Pass tok to
+ CheckVariableAgainstKeyword.
+ * gm2-libs-iso/LowLong.mod (except): Replace with ...
+ (exceptSrc): ... this.
+ * gm2-libs-iso/LowReal.mod (except): Replace with ...
+ (exceptSrc): ... this.
+ * gm2-libs-iso/LowShort.mod (except): Replace with ...
+ (exceptSrc): ... this.
+ * gm2-libs-iso/Processes.mod (Wait): Replace from with fromCor.
+ * gm2-libs-iso/RndFile.mod (EndPos): Replace end with endP.
+ * gm2-libs/SCmdArgs.mod (GetArg): Replace start with startPos.
+ Replace end with endPos.
+ (NArg): Replace start with startPos.
+ Replace end with endPos.
+
2025-07-25 David Malcolm <dmalcolm@redhat.com>
* gm2-gcc/m2linemap.cc: Update usage of "diagnostic_info" to
diff --git a/gcc/m2/gm2-compiler/M2GenGCC.mod b/gcc/m2/gm2-compiler/M2GenGCC.mod
index 4a9ced3..2507c53 100644
--- a/gcc/m2/gm2-compiler/M2GenGCC.mod
+++ b/gcc/m2/gm2-compiler/M2GenGCC.mod
@@ -2903,9 +2903,6 @@ END CheckStop ;
*)
PROCEDURE FoldBecomes (p: WalkAction; bb: BasicBlock; quad: CARDINAL) ;
-VAR
- op : QuadOperator ;
- des, op2, expr: CARDINAL ;
BEGIN
IF DeclaredOperandsBecomes (p, quad)
THEN
@@ -8154,8 +8151,6 @@ VAR
rightpos,
typepos,
indrxpos : CARDINAL ;
- length,
- newstr : tree ;
location : location_t ;
BEGIN
GetQuadOtok (quad, indrxpos, op, left, type, right,
diff --git a/gcc/m2/gm2-compiler/M2Range.mod b/gcc/m2/gm2-compiler/M2Range.mod
index dcac2ba..f1516d3 100644
--- a/gcc/m2/gm2-compiler/M2Range.mod
+++ b/gcc/m2/gm2-compiler/M2Range.mod
@@ -1869,14 +1869,12 @@ END FoldTypeAssign ;
PROCEDURE FoldTypeIndrX (q: CARDINAL; tokenNo: CARDINAL; des, expr: CARDINAL; r: CARDINAL) ;
VAR
- desType,
exprType: CARDINAL ;
BEGIN
(* Need to skip over a variable or temporary in des and expr so
long as expr is not a procedure. In the case of des = *expr,
both expr and des will be variables due to the property of
indirection. *)
- desType := GetType (des) ;
IF IsProcedure (expr)
THEN
(* Must not GetType for a procedure as it gives the return type. *)
diff --git a/gcc/match.pd b/gcc/match.pd
index 4903552..82e6e29 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3595,22 +3595,34 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
return (T)x;
}
while WT is uint128_t, T is uint8_t, uint16_t, uint32_t or uint64_t. */
- (convert@4 (min (widen_mult:c@3 (convert@5 (convert @0))
- (convert@6 (convert @1)))
+ (convert (min (widen_mult:c@3 (convert@4 (convert @0))
+ (convert@5 (convert @1)))
INTEGER_CST@2))
- (if (types_match (type, @0, @1) && types_match (type, @4))
+ (if (types_match (type, @0, @1))
(with
{
unsigned prec = TYPE_PRECISION (type);
unsigned widen_prec = TYPE_PRECISION (TREE_TYPE (@3));
+ unsigned cvt4_prec = TYPE_PRECISION (TREE_TYPE (@4));
unsigned cvt5_prec = TYPE_PRECISION (TREE_TYPE (@5));
- unsigned cvt6_prec = TYPE_PRECISION (TREE_TYPE (@6));
wide_int c2 = wi::to_wide (@2);
wide_int max = wi::mask (prec, false, widen_prec);
bool c2_is_max_p = wi::eq_p (c2, max);
- bool widen_mult_p = cvt5_prec == cvt6_prec && widen_prec == cvt6_prec * 2;
+ bool widen_mult_p = cvt4_prec == cvt5_prec && widen_prec == cvt5_prec * 2;
}
(if (widen_prec > prec && c2_is_max_p && widen_mult_p)))))
+ (match (unsigned_integer_sat_mul @0 @1)
+ (convert (min (mult:c@3 (convert @0) (convert @1)) INTEGER_CST@2))
+ (if (types_match (type, @0, @1))
+ (with
+ {
+ unsigned prec = TYPE_PRECISION (type);
+ unsigned widen_prec = TYPE_PRECISION (TREE_TYPE (@3));
+ wide_int c2 = wi::to_wide (@2);
+ wide_int max = wi::mask (prec, false, widen_prec);
+ bool c2_is_max_p = wi::eq_p (c2, max);
+ }
+ (if (widen_prec > prec && c2_is_max_p)))))
)
/* The boundary condition for case 10: IMM = 1:
diff --git a/gcc/optc-save-gen.awk b/gcc/optc-save-gen.awk
index a3d7e5a..31756ec 100644
--- a/gcc/optc-save-gen.awk
+++ b/gcc/optc-save-gen.awk
@@ -1313,6 +1313,12 @@ for (i = 0; i < n_opts; i++) {
# offloading is enabled.
if (flag_set_p("Target", flags[i]))
var_target_opt[n_opt_val] = 1;
+
+ # These options should not be passed from host to target, but
+ # are not actually target specific.
+ if (flag_set_p("NoOffload", flags[i]))
+ var_target_opt[n_opt_val] = 2;
+
n_opt_val++;
}
}
@@ -1393,7 +1399,7 @@ for (i = 0; i < n_opt_val; i++) {
# Do not stream out target-specific opts if offloading is
# enabled.
if (var_target_opt[i])
- print " if (!lto_stream_offload_p)"
+ print " if (!lto_stream_offload_p) {"
# If applicable, encode the streamed value.
if (var_opt_optimize_init[i]) {
print " if (" var_opt_optimize_init[i] " > (" var_opt_val_type[i] ") 10)";
@@ -1403,6 +1409,8 @@ for (i = 0; i < n_opt_val; i++) {
} else {
print " bp_pack_var_len_" sgn " (bp, ptr->" name");";
}
+ if (var_target_opt[i])
+ print "}"
}
}
print " for (size_t i = 0; i < ARRAY_SIZE (ptr->explicit_mask); i++)";
@@ -1418,10 +1426,14 @@ print " struct cl_optimization *ptr ATTRIBUTE_UNUSED)"
print "{";
for (i = 0; i < n_opt_val; i++) {
name = var_opt_val[i]
- if (var_target_opt[i]) {
+ if (var_target_opt[i] == 1) {
print "#ifdef ACCEL_COMPILER"
print "#error accel compiler cannot define Optimization attribute for target-specific option " name;
print "#else"
+ } else if (var_target_opt[i] == 2) {
+ print "#ifdef ACCEL_COMPILER"
+ print " ptr->" name " = global_options." name ";"
+ print "#else"
}
otype = var_opt_val_type[i];
if (otype ~ "^const char \\**$") {
@@ -1489,6 +1501,9 @@ for (i = 0; i < n_opts; i++) {
if (flag_set_p("Warning", flags[i]))
continue;
+ if (flag_set_p("NoOffload", flags[i]))
+ continue;
+
if (name in checked_options)
continue;
checked_options[name]++
diff --git a/gcc/params.opt b/gcc/params.opt
index c7d5fd4..ac1b2c7 100644
--- a/gcc/params.opt
+++ b/gcc/params.opt
@@ -1226,7 +1226,7 @@ Common Joined UInteger Var(param_use_canonical_types) Init(1) IntegerRange(0, 1)
Whether to use canonical types.
-param=vect-epilogues-nomask=
-Common Joined UInteger Var(param_vect_epilogues_nomask) Init(1) IntegerRange(0, 1) Param Optimization
+Common Joined UInteger Var(param_vect_epilogues_nomask) Init(1) IntegerRange(0, 1) Param Optimization NoOffload
Enable loop epilogue vectorization using smaller vector size.
-param=vect-max-layout-candidates=
@@ -1246,11 +1246,11 @@ Common Joined UInteger Var(param_vect_max_version_for_alignment_checks) Init(6)
Bound on number of runtime checks inserted by the vectorizer's loop versioning for alignment check.
-param=vect-partial-vector-usage=
-Common Joined UInteger Var(param_vect_partial_vector_usage) Init(2) IntegerRange(0, 2) Param Optimization
+Common Joined UInteger Var(param_vect_partial_vector_usage) Init(2) IntegerRange(0, 2) Param Optimization NoOffload
Controls how loop vectorizer uses partial vectors. 0 means never, 1 means only for loops whose need to iterate can be removed, 2 means for all loops. The default value is 2.
-param=vect-inner-loop-cost-factor=
-Common Joined UInteger Var(param_vect_inner_loop_cost_factor) Init(50) IntegerRange(1, 10000) Param Optimization
+Common Joined UInteger Var(param_vect_inner_loop_cost_factor) Init(50) IntegerRange(1, 10000) Param Optimization NoOffload
The maximum factor which the loop vectorizer applies to the cost of statements in an inner loop relative to the loop being vectorized.
-param=vect-induction-float=
diff --git a/gcc/predict.cc b/gcc/predict.cc
index 872f54d..5639d81 100644
--- a/gcc/predict.cc
+++ b/gcc/predict.cc
@@ -245,7 +245,10 @@ unlikely_executed_edge_p (edge e)
{
return (e->src->count == profile_count::zero ()
|| e->probability == profile_probability::never ())
- || (e->flags & (EDGE_EH | EDGE_FAKE));
+ || (e->flags & EDGE_FAKE)
+ /* If we read profile and know EH edge is executed, trust it.
+ Otherwise we consider EH edges never executed. */
+ || ((e->flags & EDGE_EH) && !e->probability.reliable_p ());
}
/* Return true if edge E of function FUN is probably never executed. */
@@ -830,6 +833,26 @@ unlikely_executed_stmt_p (gimple *stmt)
{
if (!is_gimple_call (stmt))
return false;
+
+ /* Those calls are inserted by optimizers when code is known to be
+ unreachable or undefined. */
+ if (gimple_call_builtin_p (stmt, BUILT_IN_UNREACHABLE)
+ || gimple_call_builtin_p (stmt, BUILT_IN_UNREACHABLE_TRAP)
+ || gimple_call_builtin_p (stmt, BUILT_IN_TRAP))
+ return false;
+
+ /* Checks below do not need to be fully reliable. Cold attribute may be
+ misplaced by user and in the presence of comdat we may result in call to
+ function with 0 profile having non-zero profile.
+
+ We later detect that profile is lost and will drop the profile of the
+ comdat.
+
+ So if we think profile count is reliable, do not try to apply these
+ heuristics. */
+ if (gimple_bb (stmt)->count.reliable_p ()
+ && gimple_bb (stmt)->count.nonzero_p ())
+ return gimple_bb (stmt)->count == profile_count::zero ();
/* NORETURN attribute alone is not strong enough: exit() may be quite
likely executed once during program run. */
if (gimple_call_fntype (stmt)
@@ -3269,7 +3292,8 @@ tree_estimate_probability (bool dry_run)
calculate_dominance_info (CDI_POST_DOMINATORS);
/* Decide which edges are known to be unlikely. This improves later
branch prediction. */
- determine_unlikely_bbs ();
+ if (!dry_run)
+ determine_unlikely_bbs ();
bb_predictions = new hash_map<const_basic_block, edge_prediction *>;
ssa_expected_value = new hash_map<int_hash<unsigned, 0>, expected_value>;
diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
index cbe61b4..c723a07 100644
--- a/gcc/simplify-rtx.cc
+++ b/gcc/simplify-rtx.cc
@@ -8344,6 +8344,15 @@ simplify_context::simplify_subreg (machine_mode outermode, rtx op,
return simplify_gen_binary (GET_CODE (op), outermode, op0, op1);
}
+ /* Attempt to simplify WORD_MODE SUBREGs of unary bitwise expression. */
+ if (outermode == word_mode && GET_CODE (op) == NOT
+ && SCALAR_INT_MODE_P (innermode))
+ {
+ rtx op0 = simplify_subreg (outermode, XEXP (op, 0), innermode, byte);
+ if (op0)
+ return simplify_gen_unary (GET_CODE (op), outermode, op0, outermode);
+ }
+
scalar_int_mode int_outermode, int_innermode;
if (is_a <scalar_int_mode> (outermode, &int_outermode)
&& is_a <scalar_int_mode> (innermode, &int_innermode)
@@ -8394,9 +8403,45 @@ simplify_context::simplify_subreg (machine_mode outermode, rtx op,
&& VECTOR_MODE_P (innermode)
&& known_eq (GET_MODE_NUNITS (outermode), GET_MODE_NUNITS (innermode))
&& known_eq (GET_MODE_UNIT_SIZE (outermode),
- GET_MODE_UNIT_SIZE (innermode)))
+ GET_MODE_UNIT_SIZE (innermode)))
return simplify_gen_relational (GET_CODE (op), outermode, innermode,
XEXP (op, 0), XEXP (op, 1));
+
+ /* Distribute non-paradoxical subregs through logic ops in cases where one term
+ disappears.
+
+ (subreg:M1 (and:M2 X C1)) -> (subreg:M1 X)
+ (subreg:M1 (ior:M2 X C1)) -> (subreg:M1 C1)
+ (subreg:M1 (xor:M2 X C1)) -> (subreg:M1 (not:M2 X))
+
+ if M2 is no smaller than M1 and (subreg:M1 C1) is all-ones.
+
+ (subreg:M1 (and:M2 X C2)) -> (subreg:M1 C2)
+ (subreg:M1 (ior/xor:M2 X C2)) -> (subreg:M1 X)
+
+ if M2 is no smaller than M1 and (subreg:M1 C2) is zero. */
+ if (known_ge (innersize, outersize)
+ && GET_MODE_CLASS (outermode) == GET_MODE_CLASS (innermode)
+ && (GET_CODE (op) == AND || GET_CODE (op) == IOR || GET_CODE (op) == XOR)
+ && CONSTANT_P (XEXP (op, 1)))
+ {
+ rtx op1_subreg = simplify_subreg (outermode, XEXP (op, 1), innermode, byte);
+ if (op1_subreg == CONSTM1_RTX (outermode))
+ {
+ if (GET_CODE (op) == IOR)
+ return op1_subreg;
+ rtx op0 = XEXP (op, 0);
+ if (GET_CODE (op) == XOR)
+ op0 = simplify_gen_unary (NOT, innermode, op0, innermode);
+ return simplify_gen_subreg (outermode, op0, innermode, byte);
+ }
+
+ if (op1_subreg == CONST0_RTX (outermode))
+ return (GET_CODE (op) == AND
+ ? op1_subreg
+ : simplify_gen_subreg (outermode, XEXP (op, 0), innermode, byte));
+ }
+
return NULL_RTX;
}
@@ -8668,6 +8713,43 @@ test_scalar_int_ext_ops (machine_mode bmode, machine_mode smode)
lowpart_subreg (bmode, sreg, smode),
bmode),
sreg);
+
+ /* Test extensions, followed by logic ops, followed by truncations. */
+ rtx bsubreg = lowpart_subreg (bmode, sreg, smode);
+ rtx smask = gen_int_mode (GET_MODE_MASK (smode), bmode);
+ rtx inv_smask = gen_int_mode (~GET_MODE_MASK (smode), bmode);
+ ASSERT_RTX_EQ (lowpart_subreg (smode,
+ simplify_gen_binary (AND, bmode,
+ bsubreg, smask),
+ bmode),
+ sreg);
+ ASSERT_RTX_EQ (lowpart_subreg (smode,
+ simplify_gen_binary (AND, bmode,
+ bsubreg, inv_smask),
+ bmode),
+ const0_rtx);
+ ASSERT_RTX_EQ (lowpart_subreg (smode,
+ simplify_gen_binary (IOR, bmode,
+ bsubreg, smask),
+ bmode),
+ constm1_rtx);
+ ASSERT_RTX_EQ (lowpart_subreg (smode,
+ simplify_gen_binary (IOR, bmode,
+ bsubreg, inv_smask),
+ bmode),
+ sreg);
+ ASSERT_RTX_EQ (lowpart_subreg (smode,
+ simplify_gen_binary (XOR, bmode,
+ bsubreg, smask),
+ bmode),
+ lowpart_subreg (smode,
+ gen_rtx_NOT (bmode, bsubreg),
+ bmode));
+ ASSERT_RTX_EQ (lowpart_subreg (smode,
+ simplify_gen_binary (XOR, bmode,
+ bsubreg, inv_smask),
+ bmode),
+ sreg);
}
/* Verify more simplifications of integer extension/truncation.
diff --git a/gcc/symtab.cc b/gcc/symtab.cc
index 652f66a..20dfe09 100644
--- a/gcc/symtab.cc
+++ b/gcc/symtab.cc
@@ -303,6 +303,11 @@ symbol_table::change_decl_assembler_name (tree decl, tree name)
warning (0, "%qD renamed after being referenced in assembly", decl);
SET_DECL_ASSEMBLER_NAME (decl, name);
+ if (DECL_RTL_SET_P (decl))
+ {
+ SET_DECL_RTL (decl, NULL);
+ make_decl_rtl (decl);
+ }
if (alias)
{
gcc_assert (!IDENTIFIER_INTERNAL_P (name));
diff --git a/gcc/target.def b/gcc/target.def
index 427dc40..5dd8f25 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -2060,6 +2060,20 @@ all zeros. GCC can then try to branch around the instruction instead.",
(unsigned ifn),
default_empty_mask_is_expensive)
+/* Prefer gather/scatter loads/stores to e.g. elementwise accesses if\n\
+we cannot use a contiguous access. */
+DEFHOOK
+(prefer_gather_scatter,
+ "This hook returns TRUE if gather loads or scatter stores are cheaper on\n\
+this target than a sequence of elementwise loads or stores. The @var{mode}\n\
+and @var{scale} correspond to the @code{gather_load} and\n\
+@code{scatter_store} instruction patterns. The @var{group_size} is the\n\
+number of scalar elements in each scalar loop iteration that are to be\n\
+combined into the vector.",
+ bool,
+ (machine_mode mode, int scale, unsigned int group_size),
+ hook_bool_mode_int_unsigned_false)
+
/* Target builtin that implements vector gather operation. */
DEFHOOK
(builtin_gather,
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index bd12ca6..0280d3b 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,111 @@
+2025-07-29 Andrew Pinski <quic_apinski@quicinc.com>
+
+ PR testsuite/121215
+ * lib/profopt.exp (profopt-execute): Call cleanup-after-saved-dg-test
+ if returning early for the -fauto-profile case failing case.
+
+2025-07-29 Spencer Abson <spencer.abson@arm.com>
+
+ * g++.target/aarch64/sve/unpacked_cond_binary_bf16_2.C: New test.
+ * gcc.target/aarch64/sve/unpacked_cond_builtin_fmax_2.c: Likewise.
+ * gcc.target/aarch64/sve/unpacked_cond_builtin_fmin_2.c: Likewise.
+ * gcc.target/aarch64/sve/unpacked_cond_fadd_2.c: Likewise.
+ * gcc.target/aarch64/sve/unpacked_cond_fdiv_2.c: Likewise.
+ * gcc.target/aarch64/sve/unpacked_cond_fmaxnm_2.c: Likewise.
+ * gcc.target/aarch64/sve/unpacked_cond_fminnm_2.c: Likewise.
+ * gcc.target/aarch64/sve/unpacked_cond_fmul_2.c: Likewise.
+ * gcc.target/aarch64/sve/unpacked_cond_fsubr_2.c: Likewise.
+
+2025-07-29 H.J. Lu <hjl.tools@gmail.com>
+
+ PR target/121208
+ * gcc.target/i386/pr121208-1a.c (dg-options): Add -mno-80387.
+ * gcc.target/i386/pr121208-1b.c (dg-options): Likewise.
+
+2025-07-29 Juergen Christ <jchrist@linux.ibm.com>
+
+ PR testsuite/121286
+ PR testsuite/121288
+ * gcc.dg/vect/pr112325.c: Adjust parameters for s390.
+ * gcc.dg/vect/pr117888-1.c: Ditto.
+
+2025-07-29 Richard Sandiford <richard.sandiford@arm.com>
+
+ * gcc.target/aarch64/saturating_arithmetic_1.c: Allow w0 and w1
+ to be duplicated in either order.
+ * gcc.target/aarch64/saturating_arithmetic_2.c: Likewise.
+
+2025-07-29 Richard Sandiford <richard.sandiford@arm.com>
+
+ * gcc.target/aarch64/cmpbr.c: Support both operand orders
+ for 8-bit and 16-bit comparisons.
+
+2025-07-29 Konstantinos Eleftheriou <konstantinos.eleftheriou@vrull.eu>
+
+ PR rtl-optimization/120660
+ * gcc.dg/pr120660.c: New test.
+
+2025-07-29 Konstantinos Eleftheriou <konstantinos.eleftheriou@vrull.eu>
+
+ PR rtl-optimization/119795
+ * gcc.target/i386/pr119795.c: New test.
+
+2025-07-29 Pan Li <pan2.li@intel.com>
+
+ * gcc.target/riscv/sat/sat_u_mul-run-1-u16-from-u64.c: Add rv64
+ target for run.
+ * gcc.target/riscv/sat/sat_u_mul-run-1-u32-from-u64.c: Ditto.
+ * gcc.target/riscv/sat/sat_u_mul-run-1-u8-from-u64.c: Ditto.
+ * gcc.target/riscv/sat/sat_u_mul-1-u16-from-u32.c: New test.
+ * gcc.target/riscv/sat/sat_u_mul-1-u8-from-u16.c: New test.
+ * gcc.target/riscv/sat/sat_u_mul-1-u8-from-u32.c: New test.
+ * gcc.target/riscv/sat/sat_u_mul-2-u16-from-u64.c: New test.
+ * gcc.target/riscv/sat/sat_u_mul-2-u32-from-u64.c: New test.
+ * gcc.target/riscv/sat/sat_u_mul-2-u8-from-u64.c: New test.
+ * gcc.target/riscv/sat/sat_u_mul-run-1-u16-from-u32.c: New test.
+ * gcc.target/riscv/sat/sat_u_mul-run-1-u8-from-u16.c: New test.
+ * gcc.target/riscv/sat/sat_u_mul-run-1-u8-from-u32.c: New test.
+
+2025-07-29 Richard Biener <rguenther@suse.de>
+
+ PR tree-optimization/120687
+ * gcc.dg/vect/pr120687-3.c: New testcase.
+
+2025-07-29 Nathaniel Shead <nathanieloshead@gmail.com>
+
+ PR testsuite/121285
+ * g++.dg/modules/class-11_a.H: Make static_asserts valid for
+ C++14.
+
+2025-07-29 Richard Biener <rguenther@suse.de>
+
+ PR tree-optimization/120687
+ * gcc.dg/vect/pr120687-1.c: New testcase.
+ * gcc.dg/vect/pr120687-2.c: Likewise.
+
+2025-07-29 Gaius Mulley <gaiusmod2@gmail.com>
+
+ PR modula2/121289
+ * gm2/warnings/style/fail/badvarname.mod: New test.
+ * gm2/warnings/style/fail/warnings-style-fail.exp: New test.
+
+2025-07-29 Christophe Lyon <christophe.lyon@linaro.org>
+
+ * gcc.dg/pr116906-1.c: Add 'dg-do run'.
+ * gcc.dg/pr116906-2.c: Likewise.
+ * gcc.dg/pr78185.c: Likewise.
+
+2025-07-29 Jakub Jelinek <jakub@redhat.com>
+
+ PR middle-end/121159
+ * c-c++-common/pr121159.c: New test.
+ * gcc.dg/plugin/must-tail-call-2.c (test_5): Don't expect an error.
+
+2025-07-29 Andrew Pinski <quic_apinski@quicinc.com>
+
+ PR middle-end/120523
+ * gcc.dg/tree-ssa/cswtch-7.c: New test.
+
2025-07-28 Andrew Pinski <quic_apinski@quicinc.com>
PR tree-optimization/121236
diff --git a/gcc/testsuite/g++.dg/cpp/if-comma-1.C b/gcc/testsuite/g++.dg/cpp/if-comma-1.C
new file mode 100644
index 0000000..0daaff9
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp/if-comma-1.C
@@ -0,0 +1,42 @@
+// PR c++/120778
+// { dg-do preprocess }
+// { dg-options "-pedantic-errors" }
+
+#if (1, 2)
+#define M1 1
+#else
+#error
+#endif
+#if 1 ? 2, 3 : 4
+#define M2 2
+#else
+#error
+#endif
+#if 0 ? 2, 0 : 1
+#define M3 3
+#else
+#error
+#endif
+#if 0 || (1, 2)
+#define M4 4
+#else
+#error
+#endif
+#if 1 || (1, 2)
+#define M5 5
+#else
+#error
+#endif
+#if (1, 2) && 1
+#define M6 6
+#else
+#error
+#endif
+#if 1 && (1, 2)
+#define M7 7
+#else
+#error
+#endif
+#if M1 + M2 + M3 + M4 + M5 + M6 + M7 != 28
+#error
+#endif
diff --git a/gcc/testsuite/g++.dg/cpp1z/nontype8.C b/gcc/testsuite/g++.dg/cpp1z/nontype8.C
new file mode 100644
index 0000000..b81e85b
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1z/nontype8.C
@@ -0,0 +1,12 @@
+// Test that the diagnostic mentions lack of constexpr
+// { dg-do compile { target c++17 } }
+
+template <auto f> void g() {}
+void x()
+{
+ using fp = void (*)();
+ fp f = nullptr; // { dg-message "constexpr" }
+ g<f>(); // { dg-error "" }
+ int *p = nullptr; // { dg-message "constexpr" }
+ g<p>(); // { dg-error "" }
+}
diff --git a/gcc/testsuite/g++.dg/modules/class-11_a.H b/gcc/testsuite/g++.dg/modules/class-11_a.H
index f7bbf9d..799dbdd 100644
--- a/gcc/testsuite/g++.dg/modules/class-11_a.H
+++ b/gcc/testsuite/g++.dg/modules/class-11_a.H
@@ -20,7 +20,7 @@ struct pr106381 {
struct L1 : pr106381 {
char x; // { dg-warning "offset" "" { target c++14 } }
};
-static_assert(sizeof(L1) == sizeof(pr106381));
+static_assert(sizeof(L1) == sizeof(pr106381), "");
struct pr120012 {
@@ -33,4 +33,4 @@ struct pr120012 {
struct L2 : pr120012 {
unsigned char y; // { dg-warning "offset" "" { target c++20 } }
};
-static_assert(sizeof(L2) > sizeof(pr120012));
+static_assert(sizeof(L2) > sizeof(pr120012), "");
diff --git a/gcc/testsuite/g++.dg/tc1/dr49.C b/gcc/testsuite/g++.dg/tc1/dr49.C
index 753d96b..6ddea6b 100644
--- a/gcc/testsuite/g++.dg/tc1/dr49.C
+++ b/gcc/testsuite/g++.dg/tc1/dr49.C
@@ -10,8 +10,8 @@ template struct R<&p>; // OK
template struct S<&p>; // OK due to parameter adjustment
int *ptr;
-template struct R<ptr>; // { dg-error "argument" }
-template struct S<ptr>; // { dg-error "argument" }
+template struct R<ptr>; // { dg-error "template argument|constant expression" }
+template struct S<ptr>; // { dg-error "template argument|constant expression" }
int v[5];
template struct R<v>; // OK due to implicit argument conversion
diff --git a/gcc/testsuite/g++.dg/template/func2.C b/gcc/testsuite/g++.dg/template/func2.C
index 0116f23..360f430 100644
--- a/gcc/testsuite/g++.dg/template/func2.C
+++ b/gcc/testsuite/g++.dg/template/func2.C
@@ -4,8 +4,7 @@ typedef void (*fptr)();
fptr zeroptr = 0;
template<typename T, fptr F> struct foo { };
template<typename T> struct foo<T,zeroptr> { };
-// { dg-error "not a valid template argument" "not valid" { target *-*-* } .-1 }
-// { dg-message "must be the address" "must be the address " { target *-*-* } .-2 }
+// { dg-error "template argument|constant expression" "not valid" { target *-*-* } .-1 }
// The rest is needed to trigger the ICE in 4.0 to 4.3:
void f() { }
diff --git a/gcc/testsuite/g++.dg/tree-prof/eh1.C b/gcc/testsuite/g++.dg/tree-prof/eh1.C
new file mode 100644
index 0000000..10a3596
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-prof/eh1.C
@@ -0,0 +1,34 @@
+/* { dg-options "-O3 -fdump-ipa-profile-details -fno-inline -fdump-tree-fixup_cfg3-details -fdump-tree-optimized-details" } */
+char a[10000];
+char b[10000];
+int sz = 1000;
+
+__attribute__((noipa))
+ void test2 ()
+{
+ throw (sz);
+}
+void
+test ()
+{
+ try
+ {
+ test2 ();
+ }
+ catch (int v)
+ {
+ __builtin_memcpy (b, a, v);
+ }
+}
+int
+main ()
+{
+ for (int i = 0; i < 100000; i++)
+ test ();
+}
+/* { dg-final-use-not-autofdo { scan-ipa-dump-times "Average value sum:100000000" 2 "profile" } } */
+/* 1 zero count for resx block. */
+/* { dg-final-use-not-autofdo { scan-tree-dump-times "count: 0" 1 "fixup_cfg3" } } */
+/* 2 zero count for resx block and return block since return gets duplicated by tracer. */
+/* { dg-final-use-not-autofdo { scan-tree-dump-times "count: 0" 2 "optimized" } } */
+/* { dg-final-use-not-autofdo { scan-tree-dump-times "Average value sum:100000000" 1 "optimized" } } */
diff --git a/gcc/testsuite/g++.dg/warn/pr121133-1.C b/gcc/testsuite/g++.dg/warn/pr121133-1.C
new file mode 100644
index 0000000..6d6e13b
--- /dev/null
+++ b/gcc/testsuite/g++.dg/warn/pr121133-1.C
@@ -0,0 +1,16 @@
+// PR c++/121133
+// { dg-do compile }
+// { dg-options "-std=c++98 -Wno-long-long -pedantic-errors" }
+
+__extension__ typedef long long L;
+__extension__ long long a;
+struct S {
+ __extension__ long long b;
+};
+
+void
+foo ()
+{
+ __extension__ long long c;
+ c = c + (__extension__ (long long) 1);
+}
diff --git a/gcc/testsuite/g++.dg/warn/pr121133-2.C b/gcc/testsuite/g++.dg/warn/pr121133-2.C
new file mode 100644
index 0000000..cd97a76
--- /dev/null
+++ b/gcc/testsuite/g++.dg/warn/pr121133-2.C
@@ -0,0 +1,5 @@
+// PR c++/121133
+// { dg-do compile }
+// { dg-options "-std=c++98 -pedantic-errors" }
+
+#include "pr121133-1.C"
diff --git a/gcc/testsuite/g++.dg/warn/pr121133-3.C b/gcc/testsuite/g++.dg/warn/pr121133-3.C
new file mode 100644
index 0000000..9ffd407
--- /dev/null
+++ b/gcc/testsuite/g++.dg/warn/pr121133-3.C
@@ -0,0 +1,5 @@
+// PR c++/121133
+// { dg-do compile { target c++11 } }
+// { dg-options "-pedantic-errors" }
+
+#include "pr121133-1.C"
diff --git a/gcc/testsuite/g++.dg/warn/pr121133-4.C b/gcc/testsuite/g++.dg/warn/pr121133-4.C
new file mode 100644
index 0000000..76885ba
--- /dev/null
+++ b/gcc/testsuite/g++.dg/warn/pr121133-4.C
@@ -0,0 +1,5 @@
+// PR c++/121133
+// { dg-do compile { target c++11 } }
+// { dg-options "-pedantic-errors -Wlong-long" }
+
+#include "pr121133-1.C"
diff --git a/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_binary_bf16_2.C b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_binary_bf16_2.C
new file mode 100644
index 0000000..02880ef
--- /dev/null
+++ b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_binary_bf16_2.C
@@ -0,0 +1,18 @@
+/* { dg-do compile }*/
+/* { dg-options "-O -ffinite-math-only -fno-signed-zeros -msve-vector-bits=2048 " } */
+
+#include "unpacked_cond_binary_bf16_1.C"
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 15 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 15 } } */
+/* { dg-final { scan-assembler-times {\tand} 30 } } */
+
+/* { dg-final { scan-assembler-times {\tbfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tbfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tbfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+
+/* { dg-final { scan-assembler-times {\tbfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tbfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+
+// There's no BFSUBR.
+/* { dg-final { scan-assembler-times {\tsel\t} 2 } } */
diff --git a/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_1.C b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_1.C
new file mode 100644
index 0000000..95cd698
--- /dev/null
+++ b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_1.C
@@ -0,0 +1,35 @@
+/* { dg-do compile }*/
+/* { dg-options "-O2 -fno-trapping-math -msve-vector-bits=2048 " } */
+
+#include <stdint.h>
+#pragma GCC target "arch=armv9-a+sve-b16b16"
+
+#define COND_BFMLA(TYPE, PRED_TYPE, MERGE) \
+ TYPE test_bfmla_##TYPE##_##MERGE (TYPE a, TYPE b, TYPE c, PRED_TYPE p) \
+ {return p ? a * b + c : MERGE; }
+
+#define COND_BFMLS(TYPE, PRED_TYPE, MERGE) \
+ TYPE test_bfmls_##TYPE##_##MERGE (TYPE a, TYPE b, TYPE c, PRED_TYPE p) \
+ {return p ? a * -b + c : MERGE; }
+
+#define TEST_OP(TYPE, PRED_TYPE, T) \
+ T (TYPE, PRED_TYPE, c) \
+ T (TYPE, PRED_TYPE, 0)
+
+#define TEST(TYPE, PTYPE, SIZE) \
+ typedef TYPE TYPE##SIZE __attribute__ ((vector_size (SIZE))); \
+ typedef PTYPE PTYPE##SIZE __attribute__ ((vector_size (SIZE))); \
+ TEST_OP (TYPE##SIZE, PTYPE##SIZE, COND_BFMLA) \
+ TEST_OP (TYPE##SIZE, PTYPE##SIZE, COND_BFMLS)
+
+TEST (__bf16, uint16_t, 128)
+
+TEST (__bf16, uint16_t, 64)
+
+/* { dg-final { scan-assembler-times {\tptrue} 8 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tbfmla\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tbfmls\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_2.C b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_2.C
new file mode 100644
index 0000000..c0d7c50
--- /dev/null
+++ b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_2.C
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msve-vector-bits=2048" } */
+
+#include "unpacked_cond_ternary_bf16_1.C"
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 4 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 4 } } */
+/* { dg-final { scan-assembler-times {\tand} 8 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tbfmla\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tbfmls\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/g++.target/aarch64/sve/unpacked_ternary_bf16_1.C b/gcc/testsuite/g++.target/aarch64/sve/unpacked_ternary_bf16_1.C
new file mode 100644
index 0000000..19bfe95
--- /dev/null
+++ b/gcc/testsuite/g++.target/aarch64/sve/unpacked_ternary_bf16_1.C
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msve-vector-bits=2048" } */
+
+#define BFMLA(TYPE) \
+ TYPE test_bfmla_##TYPE (TYPE a, TYPE b, TYPE c) \
+ { return a * b + c; }
+
+#define BFMLS(TYPE) \
+ TYPE test_bfmls_##TYPE (TYPE a, TYPE b, TYPE c) \
+ { return a * -b + c; }
+
+#define TEST_TYPE(TYPE, SIZE) \
+ typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \
+ BFMLA (TYPE##SIZE) \
+ BFMLS (TYPE##SIZE)
+
+#pragma GCC target "arch=armv9-a+sve-b16b16"
+
+TEST_TYPE (__bf16, 128)
+
+TEST_TYPE (__bf16, 64)
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 2 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tbfmla\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tbfmls\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
diff --git a/gcc/testsuite/g++.target/aarch64/sve/unpacked_ternary_bf16_2.C b/gcc/testsuite/g++.target/aarch64/sve/unpacked_ternary_bf16_2.C
new file mode 100644
index 0000000..ef37400
--- /dev/null
+++ b/gcc/testsuite/g++.target/aarch64/sve/unpacked_ternary_bf16_2.C
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msve-vector-bits=2048 -fno-trapping-math" } */
+
+#include "unpacked_ternary_bf16_1.C"
+
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 4 } } */
+
+/* { dg-final { scan-assembler-times {\tbfmla\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tbfmls\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
diff --git a/gcc/testsuite/gcc.dg/pr120660.c b/gcc/testsuite/gcc.dg/pr120660.c
new file mode 100644
index 0000000..6e8c5e8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr120660.c
@@ -0,0 +1,19 @@
+/* { dg-do run } */
+/* { dg-options "-O -favoid-store-forwarding" } */
+
+int c;
+
+short
+foo (short s)
+{
+ __builtin_memset (&s, c, 1);
+ return s;
+}
+
+int
+main ()
+{
+ short x = foo (0x1111);
+ if (x != 0x1100 && x != 0x0011)
+ __builtin_abort();
+}
diff --git a/gcc/testsuite/gcc.dg/torture/pr121295-1.c b/gcc/testsuite/gcc.dg/torture/pr121295-1.c
new file mode 100644
index 0000000..7825c6e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr121295-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-additional-options " -fno-tree-copy-prop -fno-tree-pre -fno-code-hoisting" */
+
+/* PR tree-optimization/121295 */
+
+
+int a, b, c;
+int main() {
+ int *d = &a;
+ while (b)
+ b = (*d &= 10) <= 0 || (*d = c);
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/vect/pr112325.c b/gcc/testsuite/gcc.dg/vect/pr112325.c
index 8689fbf..d380595 100644
--- a/gcc/testsuite/gcc.dg/vect/pr112325.c
+++ b/gcc/testsuite/gcc.dg/vect/pr112325.c
@@ -5,6 +5,7 @@
/* { dg-additional-options "-mavx2" { target x86_64-*-* i?86-*-* } } */
/* { dg-additional-options "--param max-completely-peeled-insns=200" { target powerpc64*-*-* } } */
/* { dg-additional-options "-mlsx" { target loongarch64-*-* } } */
+/* { dg-additional-options "--param max-completely-peeled-insns=200 --param min-vect-loop-bound=0" { target s390*-*-* } } */
typedef unsigned short ggml_fp16_t;
static float table_f32_f16[1 << 16];
diff --git a/gcc/testsuite/gcc.dg/vect/pr117888-1.c b/gcc/testsuite/gcc.dg/vect/pr117888-1.c
index 0b31fcd..884aed2 100644
--- a/gcc/testsuite/gcc.dg/vect/pr117888-1.c
+++ b/gcc/testsuite/gcc.dg/vect/pr117888-1.c
@@ -5,6 +5,7 @@
/* { dg-additional-options "-mavx2" { target x86_64-*-* i?86-*-* } } */
/* { dg-additional-options "--param max-completely-peeled-insns=200" { target powerpc64*-*-* } } */
/* { dg-additional-options "-mlsx" { target loongarch64-*-* } } */
+/* { dg-additional-options "--param max-completely-peeled-insns=200 --param min-vect-loop-bound=0" { target s390*-*-* } } */
typedef unsigned short ggml_fp16_t;
static float table_f32_f16[1 << 16];
diff --git a/gcc/testsuite/gcc.dg/vect/pr120687-1.c b/gcc/testsuite/gcc.dg/vect/pr120687-1.c
new file mode 100644
index 0000000..ce9cf63
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr120687-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+
+unsigned
+frd (unsigned *p, unsigned *lastone)
+{
+ unsigned sum = 0;
+ for (; p <= lastone; p += 16)
+ sum += p[0] + p[1] + p[2] + p[3] + p[4] + p[5] + p[6] + p[7]
+ + p[8] + p[9] + p[10] + p[11] + p[12] + p[13] + p[14] + p[15];
+ return sum;
+}
+
+/* { dg-final { scan-tree-dump "reduction: detected reduction chain" "vect" } } */
+/* { dg-final { scan-tree-dump-not "SLP discovery of reduction chain failed" "vect" } } */
+/* { dg-final { scan-tree-dump "optimized: loop vectorized" "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr120687-2.c b/gcc/testsuite/gcc.dg/vect/pr120687-2.c
new file mode 100644
index 0000000..dfc6dc7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr120687-2.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_float } */
+/* { dg-additional-options "-ffast-math" } */
+
+float
+frd (float *p, float *lastone)
+{
+ float sum = 0;
+ for (; p <= lastone; p += 16)
+ sum += p[0] + p[1] + p[2] + p[3] + p[4] + p[5] + p[6] + p[7]
+ + p[8] + p[9] + p[10] + p[11] + p[12] + p[13] + p[14] + p[15];
+ return sum;
+}
+
+/* { dg-final { scan-tree-dump "reduction: detected reduction chain" "vect" } } */
+/* { dg-final { scan-tree-dump-not "SLP discovery of reduction chain failed" "vect" } } */
+/* { dg-final { scan-tree-dump "optimized: loop vectorized" "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr120687-3.c b/gcc/testsuite/gcc.dg/vect/pr120687-3.c
new file mode 100644
index 0000000..f20a66a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr120687-3.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_double } */
+/* { dg-additional-options "-ffast-math" } */
+
+float
+frd (float *p, float *lastone)
+{
+ float sum = 0;
+ for (; p <= lastone; p += 2)
+ sum += p[0] + p[1];
+ return sum;
+}
+
+/* { dg-final { scan-tree-dump "reduction: detected reduction chain" "vect" } } */
+/* { dg-final { scan-tree-dump-not "SLP discovery of reduction chain failed" "vect" } } */
+/* { dg-final { scan-tree-dump "optimized: loop vectorized" "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_137-pr121190.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_137-pr121190.c
new file mode 100644
index 0000000..e6b071c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_137-pr121190.c
@@ -0,0 +1,62 @@
+/* PR tree-optimization/121190 */
+/* { dg-options "-O3" } */
+/* { dg-additional-options "-march=znver2" { target x86_64-*-* i?86-*-* } } */
+/* { dg-require-effective-target mmap } */
+/* { dg-require-effective-target vect_early_break } */
+
+#include <stdint.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include "tree-vect.h"
+
+#define MAX_COMPARE 5000
+
+__attribute__((noipa))
+int diff (uint64_t *restrict p, uint64_t *restrict q)
+{
+ int i = 0;
+ while (i < MAX_COMPARE) {
+ if (*(p + i) != *(q + i))
+ return i;
+ i++;
+ }
+ return -1;
+}
+
+int main ()
+{
+ check_vect ();
+
+ long pgsz = sysconf (_SC_PAGESIZE);
+ if (pgsz == -1) {
+ fprintf (stderr, "sysconf failed\n");
+ return 0;
+ }
+
+ /* Allocate 2 consecutive pages of memory and let p1 and p2 point to the
+ beginning of each. */
+ void *mem = mmap (NULL, pgsz * 2, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (mem == MAP_FAILED) {
+ fprintf (stderr, "mmap failed\n");
+ return 0;
+ }
+ uint64_t *p1 = (uint64_t *) mem;
+ uint64_t *p2 = (uint64_t *) mem + pgsz / sizeof (uint64_t);
+
+ /* Fill the first page with zeros, except for its last 64 bits. */
+ memset (p1, 0, pgsz);
+ *(p2 - 1) = -1;
+
+ /* Make the 2nd page not accessable. */
+ mprotect (p2, pgsz, PROT_NONE);
+
+ /* Calls to diff should not read the 2nd page. */
+ for (int i = 1; i <= 20; i++) {
+ if (diff (p2 - i, p1) != i - 1)
+ __builtin_abort ();
+ }
+}
+
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_138-pr121020.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_138-pr121020.c
new file mode 100644
index 0000000..8cb62bf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_138-pr121020.c
@@ -0,0 +1,54 @@
+/* PR tree-optimization/121020 */
+/* { dg-options "-O3 --vect-cost-model=unlimited" } */
+/* { dg-additional-options "-march=znver2" { target x86_64-*-* i?86-*-* } } */
+/* { dg-require-effective-target mmap } */
+/* { dg-require-effective-target vect_early_break } */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include "tree-vect.h"
+
+__attribute__((noipa))
+bool equal (uint64_t *restrict p, uint64_t *restrict q, int length)
+{
+ for (int i = 0; i < length; i++) {
+ if (*(p + i) != *(q + i))
+ return false;
+ }
+ return true;
+}
+
+int main ()
+{
+ check_vect ();
+
+ long pgsz = sysconf (_SC_PAGESIZE);
+ if (pgsz == -1) {
+ fprintf (stderr, "sysconf failed\n");
+ return 0;
+ }
+
+ /* Allocate a whole page of memory. */
+ void *mem = mmap (NULL, pgsz, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (mem == MAP_FAILED) {
+ fprintf (stderr, "mmap failed\n");
+ return 0;
+ }
+ uint64_t *p1 = (uint64_t *) mem;
+ uint64_t *p2 = (uint64_t *) mem + 32;
+
+ /* The first 16 elements pointed to by p1 and p2 are the same. */
+ for (int i = 0; i < 32; i++) {
+ *(p1 + i) = 0;
+ *(p2 + i) = (i < 16 ? 0 : -1);
+ }
+
+ /* All calls to equal should return true. */
+ for (int len = 0; len < 16; len++) {
+ if (!equal (p1 + 1, p2 + 1, len))
+ __builtin_abort();
+ }
+}
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_52.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_52.c
index 86a632f..6abfcd6 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-early-break_52.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_52.c
@@ -18,4 +18,4 @@ int main1 (short X)
}
}
-/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" { target { ! "x86_64-*-* i?86-*-*" } } } } */
+/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" { target { ! "x86_64-*-* i?86-*-* arm*-*-*" } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-pr121130.c b/gcc/testsuite/gcc.dg/vect/vect-simd-pr121130.c
new file mode 100644
index 0000000..c882ded
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-pr121130.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+
+int n2;
+
+__attribute__((simd)) char
+w7(void)
+{
+ short int xb = n2;
+ xb = w7() < 1;
+ return xb;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/cmpbr.c b/gcc/testsuite/gcc.target/aarch64/cmpbr.c
index a86af9d..34630f9 100644
--- a/gcc/testsuite/gcc.target/aarch64/cmpbr.c
+++ b/gcc/testsuite/gcc.target/aarch64/cmpbr.c
@@ -121,7 +121,7 @@ FAR_BRANCH(u64, 42);
/*
** u8_x0_eq_x1:
-** cbbeq w1, w0, .L([0-9]+)
+** cbbeq (?:w1, w0|w0, w1), .L([0-9]+)
** b not_taken
** .L\1:
** b taken
@@ -129,7 +129,7 @@ FAR_BRANCH(u64, 42);
/*
** u8_x0_ne_x1:
-** cbbne w1, w0, .L([0-9]+)
+** cbbne (?:w1, w0|w0, w1), .L([0-9]+)
** b not_taken
** .L\1:
** b taken
@@ -137,7 +137,7 @@ FAR_BRANCH(u64, 42);
/*
** u8_x0_ult_x1:
-** cbbhi w1, w0, .L([0-9]+)
+** (?:cbbhi w1, w0|cbblo w0, w1), .L([0-9]+)
** b not_taken
** .L\1:
** b taken
@@ -145,7 +145,7 @@ FAR_BRANCH(u64, 42);
/*
** u8_x0_ule_x1:
-** cbbhs w1, w0, .L([0-9]+)
+** (?:cbbhs w1, w0|cbbls w0, w1), .L([0-9]+)
** b not_taken
** .L\1:
** b taken
@@ -153,7 +153,7 @@ FAR_BRANCH(u64, 42);
/*
** u8_x0_ugt_x1:
-** cbblo w1, w0, .L([0-9]+)
+** (?:cbblo w1, w0|cbbhi w0, w1), .L([0-9]+)
** b not_taken
** .L\1:
** b taken
@@ -161,7 +161,7 @@ FAR_BRANCH(u64, 42);
/*
** u8_x0_uge_x1:
-** cbbls w1, w0, .L([0-9]+)
+** (?:cbbls w1, w0|cbbhs w0, w1), .L([0-9]+)
** b not_taken
** .L\1:
** b taken
@@ -169,7 +169,7 @@ FAR_BRANCH(u64, 42);
/*
** i8_x0_slt_x1:
-** cbbgt w1, w0, .L([0-9]+)
+** (?:cbbgt w1, w0|cbblt w0, w1), .L([0-9]+)
** b not_taken
** .L\1:
** b taken
@@ -177,7 +177,7 @@ FAR_BRANCH(u64, 42);
/*
** i8_x0_sle_x1:
-** cbbge w1, w0, .L([0-9]+)
+** (?:cbbge w1, w0|cbble w0, w1), .L([0-9]+)
** b not_taken
** .L\1:
** b taken
@@ -185,7 +185,7 @@ FAR_BRANCH(u64, 42);
/*
** i8_x0_sgt_x1:
-** cbblt w1, w0, .L([0-9]+)
+** (?:cbblt w1, w0|cbbgt w0, w1), .L([0-9]+)
** b not_taken
** .L\1:
** b taken
@@ -193,7 +193,7 @@ FAR_BRANCH(u64, 42);
/*
** i8_x0_sge_x1:
-** cbble w1, w0, .L([0-9]+)
+** (?:cbble w1, w0|cbbge w0, w1), .L([0-9]+)
** b not_taken
** .L\1:
** b taken
@@ -201,7 +201,7 @@ FAR_BRANCH(u64, 42);
/*
** u16_x0_eq_x1:
-** cbheq w1, w0, .L([0-9]+)
+** cbheq (?:w1, w0|w0, w1), .L([0-9]+)
** b not_taken
** .L\1:
** b taken
@@ -209,7 +209,7 @@ FAR_BRANCH(u64, 42);
/*
** u16_x0_ne_x1:
-** cbhne w0|w1, w1|w0, .L([0-9]+)
+** cbhne (?:w1, w0|w0, w1), .L([0-9]+)
** b not_taken
** .L\1:
** b taken
@@ -217,7 +217,7 @@ FAR_BRANCH(u64, 42);
/*
** u16_x0_ult_x1:
-** cbhhi w1, w0, .L([0-9]+)
+** (?:cbhhi w1, w0|cbhlo w0, w1), .L([0-9]+)
** b not_taken
** .L\1:
** b taken
@@ -225,7 +225,7 @@ FAR_BRANCH(u64, 42);
/*
** u16_x0_ule_x1:
-** cbhhs w1, w0, .L([0-9]+)
+** (?:cbhhs w1, w0|cbhls w0, w1), .L([0-9]+)
** b not_taken
** .L\1:
** b taken
@@ -233,7 +233,7 @@ FAR_BRANCH(u64, 42);
/*
** u16_x0_ugt_x1:
-** cbhlo w1, w0, .L([0-9]+)
+** (?:cbhlo w1, w0|cbhhi w0, w1), .L([0-9]+)
** b not_taken
** .L\1:
** b taken
@@ -241,7 +241,7 @@ FAR_BRANCH(u64, 42);
/*
** u16_x0_uge_x1:
-** cbhls w1, w0, .L([0-9]+)
+** (?:cbhls w1, w0|cbhhs w0, w1), .L([0-9]+)
** b not_taken
** .L\1:
** b taken
@@ -249,7 +249,7 @@ FAR_BRANCH(u64, 42);
/*
** i16_x0_slt_x1:
-** cbhgt w1, w0, .L([0-9]+)
+** (?:cbhgt w1, w0|cbhlt w0, w1), .L([0-9]+)
** b not_taken
** .L\1:
** b taken
@@ -257,7 +257,7 @@ FAR_BRANCH(u64, 42);
/*
** i16_x0_sle_x1:
-** cbhge w1, w0, .L([0-9]+)
+** (?:cbhge w1, w0|cbhle w0, w1), .L([0-9]+)
** b not_taken
** .L\1:
** b taken
@@ -265,7 +265,7 @@ FAR_BRANCH(u64, 42);
/*
** i16_x0_sgt_x1:
-** cbhlt w1, w0, .L([0-9]+)
+** (?:cbhlt w1, w0|cbhgt w0, w1), .L([0-9]+)
** b not_taken
** .L\1:
** b taken
@@ -273,7 +273,7 @@ FAR_BRANCH(u64, 42);
/*
** i16_x0_sge_x1:
-** cbhle w1, w0, .L([0-9]+)
+** (?:cbhle w1, w0|cbhge w0, w1), .L([0-9]+)
** b not_taken
** .L\1:
** b taken
diff --git a/gcc/testsuite/gcc.target/aarch64/pr121300.c b/gcc/testsuite/gcc.target/aarch64/pr121300.c
new file mode 100644
index 0000000..5f2cd9a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr121300.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-S -O3 -march=armv9-a+sme2" } */
+
+#include <arm_sme.h>
+
+svfloat16x2_t test (svfloat16x2_t zd, svfloat16x2_t zm) __arm_streaming
+{
+ return svamin_f16_x2 (zd, zm); // { dg-error "ACLE function .svamin_f16_x2. requires ISA extension .faminmax." }
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/saturating_arithmetic_1.c b/gcc/testsuite/gcc.target/aarch64/saturating_arithmetic_1.c
index acd2e11..8fc1569 100644
--- a/gcc/testsuite/gcc.target/aarch64/saturating_arithmetic_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/saturating_arithmetic_1.c
@@ -4,24 +4,24 @@
/*
** uadd:
-** dup v([0-9]+).8b, w1
-** dup v([0-9]+).8b, w0
+** dup v([0-9]+).8b, w[01]
+** dup v([0-9]+).8b, w[01]
** uqadd b([0-9]+), (?:b\2, b\1|b\1, b\2)
** umov w0, v\3.b\[0\]
** ret
*/
/*
** uadd2:
-** dup v([0-9]+).8b, w1
-** dup v([0-9]+).8b, w0
+** dup v([0-9]+).8b, w[01]
+** dup v([0-9]+).8b, w[01]
** uqadd b([0-9]+), (?:b\2, b\1|b\1, b\2)
** umov w0, v\3.b\[0\]
** ret
*/
/*
** usub: { xfail *-*-* }
-** dup v([0-9]+).8b, w1
-** dup v([0-9]+).8b, w0
+** dup v([0-9]+).8b, w[01]
+** dup v([0-9]+).8b, w[01]
** uqsub b([0-9]+), b\1, b\2
** umov w0, v\3.b\[0\]
** ret
diff --git a/gcc/testsuite/gcc.target/aarch64/saturating_arithmetic_2.c b/gcc/testsuite/gcc.target/aarch64/saturating_arithmetic_2.c
index 86c88f8..dd0fefa 100644
--- a/gcc/testsuite/gcc.target/aarch64/saturating_arithmetic_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/saturating_arithmetic_2.c
@@ -4,16 +4,16 @@
/*
** uadd:
-** dup v([0-9]+).4h, w1
-** dup v([0-9]+).4h, w0
+** dup v([0-9]+).4h, w[01]
+** dup v([0-9]+).4h, w[01]
** uqadd h([0-9]+), (?:h\2, h\1|h\1, h\2)
** umov w0, v\3.h\[0\]
** ret
*/
/*
** uadd2:
-** dup v([0-9]+).4h, w1
-** dup v([0-9]+).4h, w0
+** dup v([0-9]+).4h, w[01]
+** dup v([0-9]+).4h, w[01]
** uqadd h([0-9]+), (?:h\2, h\1|h\1, h\2)
** umov w0, v\3.h\[0\]
** ret
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmax_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmax_2.c
new file mode 100644
index 0000000..f84ded5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmax_2.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */
+
+#include "unpacked_cond_builtin_fmax_1.c"
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 7 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 14 } } */
+/* { dg-final { scan-assembler-times {\tand} 21 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 13 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 13 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 13 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.0\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmin_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmin_2.c
new file mode 100644
index 0000000..bceddf9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmin_2.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */
+
+#include "unpacked_cond_builtin_fmin_1.c"
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 7 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 14 } } */
+/* { dg-final { scan-assembler-times {\tand} 21 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 13 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 13 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 13 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.0\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fadd_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fadd_2.c
new file mode 100644
index 0000000..e59864b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fadd_2.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */
+
+#include "unpacked_cond_fadd_1.c"
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 11 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 22 } } */
+/* { dg-final { scan-assembler-times {\tand} 33 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 19 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 19 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 19 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 5 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 10 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fdiv_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fdiv_2.c
new file mode 100644
index 0000000..1ca3dbf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fdiv_2.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */
+
+#include "unpacked_cond_fdiv_1.c"
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 3 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 6 } } */
+/* { dg-final { scan-assembler-times {\tand} 9 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 7 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 7 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 7 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfdivr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfdivr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmaxnm_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmaxnm_2.c
new file mode 100644
index 0000000..282f3ed
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmaxnm_2.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 -fno-signed-zeros -ffinite-math-only" } */
+
+#include "unpacked_cond_fmaxnm_1.c"
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 7 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 14 } } */
+/* { dg-final { scan-assembler-times {\tand} 21 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 13 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 13 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 13 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.0\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fminnm_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fminnm_2.c
new file mode 100644
index 0000000..8226a6f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fminnm_2.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 -fno-signed-zeros -ffinite-math-only" } */
+
+#include "unpacked_cond_fminnm_1.c"
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 7 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 14 } } */
+/* { dg-final { scan-assembler-times {\tand} 21 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 13 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 13 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 13 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.0\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_1.c
new file mode 100644
index 0000000..cae9242
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_1.c
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 -fno-trapping-math" } */
+
+#include <stdint.h>
+
+#define FMLA(SUFF) __builtin_fma##SUFF (a[i], b[i], c[i])
+#define FMLS(SUFF) __builtin_fma##SUFF (a[i], -b[i], c[i])
+#define FNMLA(SUFF) -FMLA (SUFF)
+#define FNMLS(SUFF) -FMLS (SUFF)
+
+#define a_i a[i]
+#define b_i b[i]
+#define c_i c[i]
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT, MERGE) \
+ void \
+ f_##TYPE0##_##TYPE1##_##MERGE (TYPE0 *__restrict out, \
+ TYPE0 *__restrict a, \
+ TYPE0 *__restrict b, \
+ TYPE0 *__restrict c, \
+ TYPE1 *__restrict p) \
+ { \
+ for (unsigned int i = 0; i < COUNT; i++) \
+ out[i] = p[i] ? FN : MERGE; \
+ }
+
+#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \
+ TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i) \
+ TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i) \
+ TEST_FN (FN, TYPE0, TYPE1, COUNT, c_i) \
+ TEST_FN (FN, TYPE0, TYPE1, COUNT, 0)
+
+TEST_ALL (FMLA (f16), _Float16, uint64_t, 32)
+
+TEST_ALL (FMLA (f16), _Float16, uint32_t, 64)
+
+TEST_ALL (FMLA (f32), float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 12 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 12 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 12 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_2.c
new file mode 100644
index 0000000..72e04a4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_2.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */
+
+#include "unpacked_cond_fmla_1.c"
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 4 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 8 } } */
+/* { dg-final { scan-assembler-times {\tand} 12 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 12 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 12 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 12 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_1.c
new file mode 100644
index 0000000..db0f818
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_1.c
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 -fno-trapping-math" } */
+
+#include <stdint.h>
+
+#define FMLA(SUFF) __builtin_fma##SUFF (a[i], b[i], c[i])
+#define FMLS(SUFF) __builtin_fma##SUFF (a[i], -b[i], c[i])
+#define FNMLA(SUFF) -FMLA (SUFF)
+#define FNMLS(SUFF) -FMLS (SUFF)
+
+#define a_i a[i]
+#define b_i b[i]
+#define c_i c[i]
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT, MERGE) \
+ void \
+ f_##TYPE0##_##TYPE1##_##MERGE (TYPE0 *__restrict out, \
+ TYPE0 *__restrict a, \
+ TYPE0 *__restrict b, \
+ TYPE0 *__restrict c, \
+ TYPE1 *__restrict p) \
+ { \
+ for (unsigned int i = 0; i < COUNT; i++) \
+ out[i] = p[i] ? FN : MERGE; \
+ }
+
+#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \
+ TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i) \
+ TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i) \
+ TEST_FN (FN, TYPE0, TYPE1, COUNT, c_i) \
+ TEST_FN (FN, TYPE0, TYPE1, COUNT, 0)
+
+TEST_ALL (FMLS (f16), _Float16, uint64_t, 32)
+
+TEST_ALL (FMLS (f16), _Float16, uint32_t, 64)
+
+TEST_ALL (FMLS (f32), float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 12 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 12 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 12 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_2.c
new file mode 100644
index 0000000..3012052
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_2.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */
+
+#include "unpacked_cond_fmls_1.c"
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 4 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 8 } } */
+/* { dg-final { scan-assembler-times {\tand} 12 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 12 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 12 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 12 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmul_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmul_2.c
new file mode 100644
index 0000000..21713f5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmul_2.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */
+
+#include "unpacked_cond_fmul_1.c"
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 5 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 10 } } */
+/* { dg-final { scan-assembler-times {\tand} 15 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 10 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 10 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 10 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_1.c
new file mode 100644
index 0000000..07bab63
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_1.c
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 -fno-trapping-math" } */
+
+#include <stdint.h>
+
+#define FMLA(SUFF) __builtin_fma##SUFF (a[i], b[i], c[i])
+#define FMLS(SUFF) __builtin_fma##SUFF (a[i], -b[i], c[i])
+#define FNMLA(SUFF) -FMLA (SUFF)
+#define FNMLS(SUFF) -FMLS (SUFF)
+
+#define a_i a[i]
+#define b_i b[i]
+#define c_i c[i]
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT, MERGE) \
+ void \
+ f_##TYPE0##_##TYPE1##_##MERGE (TYPE0 *__restrict out, \
+ TYPE0 *__restrict a, \
+ TYPE0 *__restrict b, \
+ TYPE0 *__restrict c, \
+ TYPE1 *__restrict p) \
+ { \
+ for (unsigned int i = 0; i < COUNT; i++) \
+ out[i] = p[i] ? FN : MERGE; \
+ }
+
+#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \
+ TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i) \
+ TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i) \
+ TEST_FN (FN, TYPE0, TYPE1, COUNT, c_i) \
+ TEST_FN (FN, TYPE0, TYPE1, COUNT, 0)
+
+TEST_ALL (FNMLA (f16), _Float16, uint64_t, 32)
+
+TEST_ALL (FNMLA (f16), _Float16, uint32_t, 64)
+
+TEST_ALL (FNMLA (f32), float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 12 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 12 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 12 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfnmla\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfnmad\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfnmad\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfnmla\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_2.c
new file mode 100644
index 0000000..daef4e49
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_2.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */
+
+#include "unpacked_cond_fnmla_1.c"
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 4 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 8 } } */
+/* { dg-final { scan-assembler-times {\tand} 12 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 12 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 12 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 12 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfnmad\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfnmla\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfnmad\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfnmla\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_1.c
new file mode 100644
index 0000000..5526378
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_1.c
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 -fno-trapping-math" } */
+
+#include <stdint.h>
+
+#define FMLA(SUFF) __builtin_fma##SUFF (a[i], b[i], c[i])
+#define FMLS(SUFF) __builtin_fma##SUFF (a[i], -b[i], c[i])
+#define FNMLA(SUFF) -FMLA (SUFF)
+#define FNMLS(SUFF) -FMLS (SUFF)
+
+#define a_i a[i]
+#define b_i b[i]
+#define c_i c[i]
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT, MERGE) \
+ void \
+ f_##TYPE0##_##TYPE1##_##MERGE (TYPE0 *__restrict out, \
+ TYPE0 *__restrict a, \
+ TYPE0 *__restrict b, \
+ TYPE0 *__restrict c, \
+ TYPE1 *__restrict p) \
+ { \
+ for (unsigned int i = 0; i < COUNT; i++) \
+ out[i] = p[i] ? FN : MERGE; \
+ }
+
+#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \
+ TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i) \
+ TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i) \
+ TEST_FN (FN, TYPE0, TYPE1, COUNT, c_i) \
+ TEST_FN (FN, TYPE0, TYPE1, COUNT, 0)
+
+TEST_ALL (FNMLS (f16), _Float16, uint64_t, 32)
+
+TEST_ALL (FNMLS (f16), _Float16, uint32_t, 64)
+
+TEST_ALL (FNMLS (f32), float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 12 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 12 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 12 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfnmsb\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfnmls\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfnmsb\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfnmls\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_2.c
new file mode 100644
index 0000000..8a8f348
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_2.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */
+
+#include "unpacked_cond_fnmls_1.c"
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 4 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 8 } } */
+/* { dg-final { scan-assembler-times {\tand} 12 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 12 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 12 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 12 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfnmsb\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfnmls\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfnmsb\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfnmls\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fsubr_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fsubr_2.c
new file mode 100644
index 0000000..cd7a0e1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fsubr_2.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */
+
+#include "unpacked_cond_fsubr_1.c"
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 7 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 14 } } */
+/* { dg-final { scan-assembler-times {\tand} 21 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 13 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 13 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 13 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmla_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmla_1.c
new file mode 100644
index 0000000..312bccc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmla_1.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */
+
+#include <stdint.h>
+
+#define FMLA(SUFF) __builtin_fma##SUFF (a[i], b[i], c[i])
+#define FMLS(SUFF) __builtin_fma##SUFF (a[i], -b[i], c[i])
+#define FNMLA(SUFF) -FMLA (SUFF)
+#define FNMLS(SUFF) -FMLS (SUFF)
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT) \
+ void \
+ f_##TYPE0##_##TYPE1 (TYPE1 *__restrict out, \
+ TYPE0 *__restrict a, \
+ TYPE0 *__restrict b, \
+ TYPE0 *__restrict c, \
+ TYPE0 *__restrict d) \
+ { \
+ for (unsigned int i = 0; i < COUNT; i++) \
+ if (FN > d[i]) \
+ out[i] = 3; \
+ }
+
+TEST_FN (FMLA (f16), _Float16, uint64_t, 32)
+
+TEST_FN (FMLA (f16), _Float16, uint32_t, 64)
+
+TEST_FN (FMLA (f32), float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 4 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 4 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 4 } } */
+
+/* { dg-final { scan-assembler-times {\t(fmla|fmad)\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\t(fmla|fmad)\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmla_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmla_2.c
new file mode 100644
index 0000000..ca3f94d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmla_2.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 -fno-trapping-math" } */
+
+#include "unpacked_fmla_1.c"
+
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 4 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 4 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 4 } } */
+
+/* { dg-final { scan-assembler-times {\t(fmla|fmad)\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\t(fmla|fmad)\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmls_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmls_1.c
new file mode 100644
index 0000000..f7cbfb3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmls_1.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */
+
+#include <stdint.h>
+
+#define FMLA(SUFF) __builtin_fma##SUFF (a[i], b[i], c[i])
+#define FMLS(SUFF) __builtin_fma##SUFF (a[i], -b[i], c[i])
+#define FNMLA(SUFF) -FMLA (SUFF)
+#define FNMLS(SUFF) -FMLS (SUFF)
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT) \
+ void \
+ f_##TYPE0##_##TYPE1 (TYPE1 *__restrict out, \
+ TYPE0 *__restrict a, \
+ TYPE0 *__restrict b, \
+ TYPE0 *__restrict c, \
+ TYPE0 *__restrict d) \
+ { \
+ for (unsigned int i = 0; i < COUNT; i++) \
+ if (FN > d[i]) \
+ out[i] = 3; \
+ }
+
+TEST_FN (FMLS (f16), _Float16, uint64_t, 32)
+
+TEST_FN (FMLS (f16), _Float16, uint32_t, 64)
+
+TEST_FN (FMLS (f32), float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 4 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 4 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 4 } } */
+
+/* { dg-final { scan-assembler-times {\t(fmls|fmsb)\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\t(fmls|fmsb)\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmls_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmls_2.c
new file mode 100644
index 0000000..387dbec
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmls_2.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 -fno-trapping-math" } */
+
+#include "unpacked_fmls_1.c"
+
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 4 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 4 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 4 } } */
+
+/* { dg-final { scan-assembler-times {\t(fmls|fmsb)\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\t(fmls|fmsb)\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmla_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmla_1.c
new file mode 100644
index 0000000..bf13ff5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmla_1.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */
+
+#include <stdint.h>
+
+#define FMLA(SUFF) __builtin_fma##SUFF (a[i], b[i], c[i])
+#define FMLS(SUFF) __builtin_fma##SUFF (a[i], -b[i], c[i])
+#define FNMLA(SUFF) -FMLA (SUFF)
+#define FNMLS(SUFF) -FMLS (SUFF)
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT) \
+ void \
+ f_##TYPE0##_##TYPE1 (TYPE1 *__restrict out, \
+ TYPE0 *__restrict a, \
+ TYPE0 *__restrict b, \
+ TYPE0 *__restrict c, \
+ TYPE0 *__restrict d) \
+ { \
+ for (unsigned int i = 0; i < COUNT; i++) \
+ if (FN > d[i]) \
+ out[i] = 3; \
+ }
+
+TEST_FN (FNMLA (f16), _Float16, uint64_t, 32)
+
+TEST_FN (FNMLA (f16), _Float16, uint32_t, 64)
+
+TEST_FN (FNMLA (f32), float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 4 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 4 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 4 } } */
+
+/* { dg-final { scan-assembler-times {\t(fnmla|fnmad)\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\t(fnmla|fnmad)\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmla_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmla_2.c
new file mode 100644
index 0000000..64130ba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmla_2.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 -fno-trapping-math" } */
+
+#include "unpacked_fnmla_1.c"
+
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 4 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 4 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 4 } } */
+
+/* { dg-final { scan-assembler-times {\t(fnmla|fnmad)\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\t(fnmla|fnmad)\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmls_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmls_1.c
new file mode 100644
index 0000000..399920a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmls_1.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */
+
+#include <stdint.h>
+
+#define FMLA(SUFF) __builtin_fma##SUFF (a[i], b[i], c[i])
+#define FMLS(SUFF) __builtin_fma##SUFF (a[i], -b[i], c[i])
+#define FNMLA(SUFF) -FMLA (SUFF)
+#define FNMLS(SUFF) -FMLS (SUFF)
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT) \
+ void \
+ f_##TYPE0##_##TYPE1 (TYPE1 *__restrict out, \
+ TYPE0 *__restrict a, \
+ TYPE0 *__restrict b, \
+ TYPE0 *__restrict c, \
+ TYPE0 *__restrict d) \
+ { \
+ for (unsigned int i = 0; i < COUNT; i++) \
+ if (FN > d[i]) \
+ out[i] = 3; \
+ }
+
+TEST_FN (FNMLS (f16), _Float16, uint64_t, 32)
+
+TEST_FN (FNMLS (f16), _Float16, uint32_t, 64)
+
+TEST_FN (FNMLS (f32), float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 4 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 4 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 4 } } */
+
+/* { dg-final { scan-assembler-times {\t(fnmls|fnmsb)\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\t(fnmls|fnmsb)\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmls_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmls_2.c
new file mode 100644
index 0000000..59fb7f9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmls_2.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 -fno-trapping-math" } */
+
+#include "unpacked_fnmls_1.c"
+
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 4 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 4 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 4 } } */
+
+/* { dg-final { scan-assembler-times {\t(fnmls|fnmsb)\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\t(fnmls|fnmsb)\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr119795.c b/gcc/testsuite/gcc.target/i386/pr119795.c
new file mode 100644
index 0000000..03c91cc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr119795.c
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+/* { dg-options "-O -fschedule-insns -favoid-store-forwarding" } */
+
+unsigned a, b, c;
+
+void
+foo (_BitInt(2) b2, unsigned _BitInt(255) by, unsigned _BitInt(5) b5,
+ unsigned _BitInt(256) *ret)
+{
+ unsigned _BitInt(255) bx = b2;
+ by += 0x80000000000000000000000000000000wb;
+ __builtin_memmove (&b, &c, 3);
+ unsigned d = b;
+ unsigned e = __builtin_stdc_rotate_right (0x1uwb % b5, a);
+ unsigned _BitInt(256) r = by + bx + d + e;
+ *ret = r;
+}
+
+int
+main ()
+{
+ unsigned _BitInt(256) x;
+ foo (0, -1, 2, &x);
+ if (x != 0x80000000000000000000000000000000wb)
+ __builtin_abort();
+} \ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/i386/pr120427-5.c b/gcc/testsuite/gcc.target/i386/pr120427-5.c
new file mode 100644
index 0000000..7199aef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr120427-5.c
@@ -0,0 +1,10 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-Oz" } */
+
+long long
+func1 (void)
+{
+ return -1;
+}
+/* { dg-final { scan-assembler-times "pushq\[ \\t\]+\\\$-1" 1 } } */
+/* { dg-final { scan-assembler-times "popq\[ \\t\]+%rax" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr121208-1a.c b/gcc/testsuite/gcc.target/i386/pr121208-1a.c
index ac851cb..cb8bd0b 100644
--- a/gcc/testsuite/gcc.target/i386/pr121208-1a.c
+++ b/gcc/testsuite/gcc.target/i386/pr121208-1a.c
@@ -1,5 +1,5 @@
/* { dg-do compile { target *-*-linux* } } */
-/* { dg-options "-O2 -fPIC -mtls-dialect=gnu" } */
+/* { dg-options "-O2 -fPIC -mno-80387 -mtls-dialect=gnu" } */
extern __thread int bar;
extern void func (void);
diff --git a/gcc/testsuite/gcc.target/i386/pr121208-1b.c b/gcc/testsuite/gcc.target/i386/pr121208-1b.c
index b97ac71..037e9a0 100644
--- a/gcc/testsuite/gcc.target/i386/pr121208-1b.c
+++ b/gcc/testsuite/gcc.target/i386/pr121208-1b.c
@@ -1,4 +1,4 @@
/* { dg-do compile { target *-*-linux* } } */
-/* { dg-options "-O2 -fPIC -mtls-dialect=gnu2" } */
+/* { dg-options "-O2 -fPIC -mno-80387 -mtls-dialect=gnu2" } */
#include "pr121208-1a.c"
diff --git a/gcc/testsuite/gcc.target/i386/pr121274.c b/gcc/testsuite/gcc.target/i386/pr121274.c
new file mode 100644
index 0000000..16760cf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121274.c
@@ -0,0 +1,24 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-march=x86-64-v4 -O2" } */
+/* { dg-final { scan-assembler-not "vpextrq" } } */
+/* { dg-final { scan-assembler-not "vpinsrq" } } */
+
+typedef int v16si __attribute__((vector_size(64)));
+typedef int v4si __attribute__((vector_size(16)));
+
+v4si f(v16si x)
+{
+ return __builtin_shufflevector(x, x, 0, 1, 2, 3);
+}
+
+v4si g(v16si x)
+{
+return __builtin_shufflevector(x, x, 4, 5, 6, 7);
+}
+
+v4si f1(__int128 *x)
+{
+ __int128 t = *x;
+ asm("":"+x"(t));
+ return (v4si)t;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u64.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u64.c
index e7b1ef0..8e7a788 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u64.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u64.c
@@ -19,4 +19,7 @@ TEST_BINARY_VX_UNSIGNED_0(T)
/* { dg-final { scan-assembler-times {vminu.vx} 2 } } */
/* { dg-final { scan-assembler-times {vsaddu.vx} 1 } } */
/* { dg-final { scan-assembler-times {vssubu.vx} 1 } } */
-/* { dg-final { scan-assembler-times {vaaddu.vx} 1 { target { no-opts "-O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m2" "-O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m4" } } } } */
+/* { dg-final { scan-assembler-times {vaaddu.vx} 2 { target { no-opts {
+ "-O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m2"
+ "-O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m4"
+ } } } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u8.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u8.c
index 559887e..d213c18 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u8.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u8.c
@@ -19,4 +19,4 @@ TEST_BINARY_VX_UNSIGNED_0(T)
/* { dg-final { scan-assembler-times {vminu.vx} 2 } } */
/* { dg-final { scan-assembler-times {vsaddu.vx} 1 } } */
/* { dg-final { scan-assembler-times {vssubu.vx} 1 } } */
-/* { dg-final { scan-assembler-times {vaaddu.vx} 1 } } */
+/* { dg-final { scan-assembler-times {vaaddu.vx} 2 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u16.c
index c851f23..3ecfce6 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u16.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u16.c
@@ -20,6 +20,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY_X8)
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X8)
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY_X8)
DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY_X8)
+DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY_X8)
/* { dg-final { scan-assembler {vadd.vx} } } */
/* { dg-final { scan-assembler {vsub.vx} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u32.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u32.c
index b7805c1..7ce1fe8 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u32.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u32.c
@@ -20,6 +20,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY_X4)
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X4)
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY_X4)
DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY_X4)
+DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY_X4)
/* { dg-final { scan-assembler {vadd.vx} } } */
/* { dg-final { scan-assembler {vsub.vx} } } */
@@ -31,5 +32,6 @@ DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_B
/* { dg-final { scan-assembler {vremu.vx} } } */
/* { dg-final { scan-assembler {vmaxu.vx} } } */
/* { dg-final { scan-assembler {vminu.vx} } } */
+/* { dg-final { scan-assembler {vsaddu.vx} } } */
/* { dg-final { scan-assembler {vssubu.vx} } } */
/* { dg-final { scan-assembler {vaaddu.vx} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u64.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u64.c
index 8295dc2..c84a30c 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u64.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u64.c
@@ -20,6 +20,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY)
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY)
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY)
DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY)
+DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY)
/* { dg-final { scan-assembler {vadd.vx} } } */
/* { dg-final { scan-assembler {vsub.vx} } } */
@@ -33,4 +34,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_B
/* { dg-final { scan-assembler {vminu.vx} } } */
/* { dg-final { scan-assembler-not {vsaddu.vx} } } */
/* { dg-final { scan-assembler-not {vssubu.vx} } } */
-/* { dg-final { scan-assembler {vaaddu.vx} { target { no-opts "-O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m2" "-O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m4" } } } } */
+/* { dg-final { scan-assembler {vaaddu.vx} { target { no-opts {
+ "-O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m2"
+ "-O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m4"
+ } } } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u8.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u8.c
index d214da9..9f3d7df 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u8.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u8.c
@@ -20,6 +20,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY_X8)
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X8)
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY_X8)
DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY_X8)
+DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY_X8)
/* { dg-final { scan-assembler {vadd.vx} } } */
/* { dg-final { scan-assembler {vsub.vx} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u16.c
index b7c7ad4..5497b5a 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u16.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u16.c
@@ -20,6 +20,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY_X8)
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X8)
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY_X8)
DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY_X8)
+DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY_X8)
/* { dg-final { scan-assembler {vadd.vx} } } */
/* { dg-final { scan-assembler {vsub.vx} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u32.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u32.c
index dd9c845..3a8e85f 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u32.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u32.c
@@ -20,6 +20,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY_X4)
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X4)
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY_X4)
DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY_X4)
+DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY_X4)
/* { dg-final { scan-assembler {vadd.vx} } } */
/* { dg-final { scan-assembler {vsub.vx} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u64.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u64.c
index 1fda062..060d591 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u64.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u64.c
@@ -20,6 +20,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY)
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY)
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY)
DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY)
+DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY)
/* { dg-final { scan-assembler {vadd.vx} } } */
/* { dg-final { scan-assembler {vsub.vx} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u8.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u8.c
index 725a55b..86a6c45 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u8.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u8.c
@@ -20,6 +20,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY_X8)
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X8)
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY_X8)
DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY_X8)
+DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY_X8)
/* { dg-final { scan-assembler {vadd.vx} } } */
/* { dg-final { scan-assembler {vsub.vx} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u16.c
index 3a215ea..f51e7a1 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u16.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u16.c
@@ -20,6 +20,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY_X8)
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X8)
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY_X8)
DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY_X8)
+DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY_X8)
/* { dg-final { scan-assembler {vadd.vx} } } */
/* { dg-final { scan-assembler {vsub.vx} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u32.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u32.c
index ac4d100..79b7477 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u32.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u32.c
@@ -20,6 +20,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY_X4)
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X4)
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY_X4)
DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY_X4)
+DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY_X4)
/* { dg-final { scan-assembler {vadd.vx} } } */
/* { dg-final { scan-assembler {vsub.vx} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u64.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u64.c
index 5eb0ed6..ac5fd69 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u64.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u64.c
@@ -20,6 +20,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY)
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY)
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY)
DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY)
+DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY)
/* { dg-final { scan-assembler-not {vadd.vx} } } */
/* { dg-final { scan-assembler-not {vsub.vx} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u8.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u8.c
index 8b404b6..84aa06b 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u8.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u8.c
@@ -20,6 +20,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY_X8)
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X8)
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY_X8)
DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY_X8)
+DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY_X8)
/* { dg-final { scan-assembler {vadd.vx} } } */
/* { dg-final { scan-assembler {vsub.vx} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_binary.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_binary.h
index b7c0f79..de48ebd 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_binary.h
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_binary.h
@@ -363,14 +363,30 @@ DEF_AVG_FLOOR(int8_t, int16_t)
DEF_AVG_FLOOR(int16_t, int32_t)
DEF_AVG_FLOOR(int32_t, int64_t)
+#define DEF_AVG_CEIL(NT, WT) \
+NT \
+test_##NT##_avg_ceil(NT x, NT y) \
+{ \
+ return (NT)(((WT)x + (WT)y + 1) >> 1); \
+}
+
+DEF_AVG_CEIL(uint8_t, uint16_t)
+DEF_AVG_CEIL(uint16_t, uint32_t)
+DEF_AVG_CEIL(uint32_t, uint64_t)
+
#ifdef HAS_INT128
DEF_AVG_FLOOR(uint64_t, uint128_t)
DEF_AVG_FLOOR(int64_t, int128_t)
+
+ DEF_AVG_CEIL(uint64_t, uint128_t)
#endif
#define AVG_FLOOR_FUNC(T) test_##T##_avg_floor
#define AVG_FLOOR_FUNC_WRAP(T) AVG_FLOOR_FUNC(T)
+#define AVG_CEIL_FUNC(T) test_##T##_avg_ceil
+#define AVG_CEIL_FUNC_WRAP(T) AVG_CEIL_FUNC(T)
+
#define TEST_BINARY_VX_SIGNED_0(T) \
DEF_VX_BINARY_CASE_0_WRAP(T, +, add) \
DEF_VX_BINARY_CASE_0_WRAP(T, -, sub) \
@@ -405,5 +421,6 @@ DEF_AVG_FLOOR(int32_t, int64_t)
DEF_VX_BINARY_CASE_2_WRAP(T, SAT_U_ADD_FUNC(T), sat_add) \
DEF_VX_BINARY_CASE_2_WRAP(T, SAT_U_SUB_FUNC(T), sat_sub) \
DEF_VX_BINARY_CASE_2_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor) \
+ DEF_VX_BINARY_CASE_2_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil) \
#endif
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_binary_data.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_binary_data.h
index 6847309..5024ae7 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_binary_data.h
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_binary_data.h
@@ -5298,4 +5298,200 @@ int64_t TEST_BINARY_DATA(int64_t, avg_floor)[][3][N] =
},
};
+uint8_t TEST_BINARY_DATA(uint8_t, avg_ceil)[][3][N] =
+{
+ {
+ { 0 },
+ {
+ 2, 2, 2, 2,
+ 1, 1, 1, 1,
+ 0, 0, 0, 0,
+ 4, 4, 4, 4,
+ },
+ {
+ 1, 1, 1, 1,
+ 1, 1, 1, 1,
+ 0, 0, 0, 0,
+ 2, 2, 2, 2,
+ },
+ },
+ {
+ { 127 },
+ {
+ 127, 127, 127, 127,
+ 128, 128, 128, 128,
+ 255, 255, 255, 255,
+ 1, 1, 1, 1,
+ },
+ {
+ 127, 127, 127, 127,
+ 128, 128, 128, 128,
+ 191, 191, 191, 191,
+ 64, 64, 64, 64,
+ },
+ },
+ {
+ { 255 },
+ {
+ 0, 0, 0, 0,
+ 255, 255, 255, 255,
+ 254, 254, 254, 254,
+ 1, 1, 1, 1,
+ },
+ {
+ 128, 128, 128, 128,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 128, 128, 128, 128,
+ },
+ },
+};
+
+uint16_t TEST_BINARY_DATA(uint16_t, avg_ceil)[][3][N] =
+{
+ {
+ { 0 },
+ {
+ 2, 2, 2, 2,
+ 1, 1, 1, 1,
+ 0, 0, 0, 0,
+ 4, 4, 4, 4,
+ },
+ {
+ 1, 1, 1, 1,
+ 1, 1, 1, 1,
+ 0, 0, 0, 0,
+ 2, 2, 2, 2,
+ },
+ },
+ {
+ { 32767 },
+ {
+ 32767, 32767, 32767, 32767,
+ 32768, 32768, 32768, 32768,
+ 65535, 65535, 65535, 65535,
+ 1, 1, 1, 1,
+ },
+ {
+ 32767, 32767, 32767, 32767,
+ 32768, 32768, 32768, 32768,
+ 49151, 49151, 49151, 49151,
+ 16384, 16384, 16384, 16384,
+ },
+ },
+ {
+ { 65535 },
+ {
+ 0, 0, 0, 0,
+ 65535, 65535, 65535, 65535,
+ 65534, 65534, 65534, 65534,
+ 1, 1, 1, 1,
+ },
+ {
+ 32768, 32768, 32768, 32768,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 32768, 32768, 32768, 32768,
+ },
+ },
+};
+
+uint32_t TEST_BINARY_DATA(uint32_t, avg_ceil)[][3][N] =
+{
+ {
+ { 0 },
+ {
+ 2, 2, 2, 2,
+ 1, 1, 1, 1,
+ 0, 0, 0, 0,
+ 4, 4, 4, 4,
+ },
+ {
+ 1, 1, 1, 1,
+ 1, 1, 1, 1,
+ 0, 0, 0, 0,
+ 2, 2, 2, 2,
+ },
+ },
+ {
+ { 2147483647 },
+ {
+ 2147483647, 2147483647, 2147483647, 2147483647,
+ 2147483648, 2147483648, 2147483648, 2147483648,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 1, 1, 1, 1,
+ },
+ {
+ 2147483647, 2147483647, 2147483647, 2147483647,
+ 2147483648, 2147483648, 2147483648, 2147483648,
+ 3221225471, 3221225471, 3221225471, 3221225471,
+ 1073741824, 1073741824, 1073741824, 1073741824,
+ },
+ },
+ {
+ { 4294967295 },
+ {
+ 0, 0, 0, 0,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967294, 4294967294, 4294967294, 4294967294,
+ 1, 1, 1, 1,
+ },
+ {
+ 2147483648, 2147483648, 2147483648, 2147483648,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 2147483648, 2147483648, 2147483648, 2147483648,
+ },
+ },
+};
+
+uint64_t TEST_BINARY_DATA(uint64_t, avg_ceil)[][3][N] =
+{
+ {
+ { 0 },
+ {
+ 2, 2, 2, 2,
+ 1, 1, 1, 1,
+ 0, 0, 0, 0,
+ 4, 4, 4, 4,
+ },
+ {
+ 1, 1, 1, 1,
+ 1, 1, 1, 1,
+ 0, 0, 0, 0,
+ 2, 2, 2, 2,
+ },
+ },
+ {
+ { 9223372036854775807ull },
+ {
+ 9223372036854775807ull, 9223372036854775807ull, 9223372036854775807ull, 9223372036854775807ull,
+ 9223372036854775808ull, 9223372036854775808ull, 9223372036854775808ull, 9223372036854775808ull,
+ 18446744073709551615ull, 18446744073709551615ull, 18446744073709551615ull, 18446744073709551615ull,
+ 1, 1, 1, 1,
+ },
+ {
+ 9223372036854775807ull, 9223372036854775807ull, 9223372036854775807ull, 9223372036854775807ull,
+ 9223372036854775808ull, 9223372036854775808ull, 9223372036854775808ull, 9223372036854775808ull,
+ 13835058055282163711ull, 13835058055282163711ull, 13835058055282163711ull, 13835058055282163711ull,
+ 4611686018427387904ull, 4611686018427387904ull, 4611686018427387904ull, 4611686018427387904ull,
+ },
+ },
+ {
+ { 18446744073709551615ull },
+ {
+ 0, 0, 0, 0,
+ 18446744073709551615ull, 18446744073709551615ull, 18446744073709551615ull, 18446744073709551615ull,
+ 18446744073709551614ull, 18446744073709551614ull, 18446744073709551614ull, 18446744073709551614ull,
+ 1, 1, 1, 1,
+ },
+ {
+ 9223372036854775808ull, 9223372036854775808ull, 9223372036854775808ull, 9223372036854775808ull,
+ 18446744073709551615ull, 18446744073709551615ull, 18446744073709551615ull, 18446744073709551615ull,
+ 18446744073709551615ull, 18446744073709551615ull, 18446744073709551615ull, 18446744073709551615ull,
+ 9223372036854775808ull, 9223372036854775808ull, 9223372036854775808ull, 9223372036854775808ull,
+ },
+ },
+};
+
#endif
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-u16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-u16.c
new file mode 100644
index 0000000..6297672
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-u16.c
@@ -0,0 +1,17 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99 --param=gpr2vr-cost=0" } */
+
+#include "vx_binary.h"
+#include "vx_binary_data.h"
+
+#define T uint16_t
+#define NAME avg_ceil
+#define FUNC AVG_CEIL_FUNC_WRAP(T)
+#define TEST_DATA TEST_BINARY_DATA_WRAP(T, NAME)
+
+DEF_VX_BINARY_CASE_2_WRAP(T, FUNC, NAME)
+
+#define TEST_RUN(T, NAME, out, in, x, n) \
+ RUN_VX_BINARY_CASE_2_WRAP(T, NAME, FUNC, out, in, x, n)
+
+#include "vx_binary_run.h"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-u32.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-u32.c
new file mode 100644
index 0000000..30db24b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-u32.c
@@ -0,0 +1,17 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99 --param=gpr2vr-cost=0" } */
+
+#include "vx_binary.h"
+#include "vx_binary_data.h"
+
+#define T uint32_t
+#define NAME avg_ceil
+#define FUNC AVG_CEIL_FUNC_WRAP(T)
+#define TEST_DATA TEST_BINARY_DATA_WRAP(T, NAME)
+
+DEF_VX_BINARY_CASE_2_WRAP(T, FUNC, NAME)
+
+#define TEST_RUN(T, NAME, out, in, x, n) \
+ RUN_VX_BINARY_CASE_2_WRAP(T, NAME, FUNC, out, in, x, n)
+
+#include "vx_binary_run.h"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-u64.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-u64.c
new file mode 100644
index 0000000..db3c911
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-u64.c
@@ -0,0 +1,17 @@
+/* { dg-do run { target { riscv_v && rv64 } } } */
+/* { dg-additional-options "-std=c99 --param=gpr2vr-cost=0" } */
+
+#include "vx_binary.h"
+#include "vx_binary_data.h"
+
+#define T uint64_t
+#define NAME avg_ceil
+#define FUNC AVG_CEIL_FUNC_WRAP(T)
+#define TEST_DATA TEST_BINARY_DATA_WRAP(T, NAME)
+
+DEF_VX_BINARY_CASE_2_WRAP(T, FUNC, NAME)
+
+#define TEST_RUN(T, NAME, out, in, x, n) \
+ RUN_VX_BINARY_CASE_2_WRAP(T, NAME, FUNC, out, in, x, n)
+
+#include "vx_binary_run.h"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-u8.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-u8.c
new file mode 100644
index 0000000..a7755f0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-u8.c
@@ -0,0 +1,17 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99 --param=gpr2vr-cost=0" } */
+
+#include "vx_binary.h"
+#include "vx_binary_data.h"
+
+#define T uint8_t
+#define NAME avg_ceil
+#define FUNC AVG_CEIL_FUNC_WRAP(T)
+#define TEST_DATA TEST_BINARY_DATA_WRAP(T, NAME)
+
+DEF_VX_BINARY_CASE_2_WRAP(T, FUNC, NAME)
+
+#define TEST_RUN(T, NAME, out, in, x, n) \
+ RUN_VX_BINARY_CASE_2_WRAP(T, NAME, FUNC, out, in, x, n)
+
+#include "vx_binary_run.h"
diff --git a/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-1-u16-from-u32.c b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-1-u16-from-u32.c
new file mode 100644
index 0000000..7409232
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-1-u16-from-u32.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -fdump-tree-optimized" } */
+
+#include "sat_arith.h"
+
+#define NT uint16_t
+#define WT uint32_t
+
+DEF_SAT_U_MUL_FMT_1_WRAP(NT, WT)
+
+/* { dg-final { scan-tree-dump-times ".SAT_MUL" 1 "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-1-u8-from-u16.c b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-1-u8-from-u16.c
new file mode 100644
index 0000000..ec79e5d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-1-u8-from-u16.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -fdump-tree-optimized" } */
+
+#include "sat_arith.h"
+
+#define NT uint8_t
+#define WT uint16_t
+
+DEF_SAT_U_MUL_FMT_1_WRAP(NT, WT)
+
+/* { dg-final { scan-tree-dump-times ".SAT_MUL" 1 "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-1-u8-from-u32.c b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-1-u8-from-u32.c
new file mode 100644
index 0000000..eb95184
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-1-u8-from-u32.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -fdump-tree-optimized" } */
+
+#include "sat_arith.h"
+
+#define NT uint8_t
+#define WT uint32_t
+
+DEF_SAT_U_MUL_FMT_1_WRAP(NT, WT)
+
+/* { dg-final { scan-tree-dump-times ".SAT_MUL" 1 "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-2-u16-from-u64.c b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-2-u16-from-u64.c
new file mode 100644
index 0000000..b1d33a9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-2-u16-from-u64.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -fdump-tree-optimized" } */
+
+#include "sat_arith.h"
+
+#define NT uint16_t
+#define WT uint64_t
+
+DEF_SAT_U_MUL_FMT_1_WRAP(NT, WT)
+
+/* { dg-final { scan-tree-dump-times ".SAT_MUL" 1 "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-2-u32-from-u64.c b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-2-u32-from-u64.c
new file mode 100644
index 0000000..af5ffecf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-2-u32-from-u64.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -fdump-tree-optimized" } */
+
+#include "sat_arith.h"
+
+#define NT uint32_t
+#define WT uint64_t
+
+DEF_SAT_U_MUL_FMT_1_WRAP(NT, WT)
+
+/* { dg-final { scan-tree-dump-times ".SAT_MUL" 1 "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-2-u8-from-u64.c b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-2-u8-from-u64.c
new file mode 100644
index 0000000..d65cab0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-2-u8-from-u64.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -fdump-tree-optimized" } */
+
+#include "sat_arith.h"
+
+#define NT uint8_t
+#define WT uint64_t
+
+DEF_SAT_U_MUL_FMT_1_WRAP(NT, WT)
+
+/* { dg-final { scan-tree-dump-times ".SAT_MUL" 1 "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u16-from-u32.c b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u16-from-u32.c
new file mode 100644
index 0000000..e212391
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u16-from-u32.c
@@ -0,0 +1,16 @@
+/* { dg-do run { target { rv32 || rv64 } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+#include "sat_arith_data.h"
+
+#define NT uint16_t
+#define WT uint32_t
+#define NAME usmul
+#define DATA TEST_BINARY_DATA_WRAP(NT, NAME)
+#define T TEST_BINARY_STRUCT_DECL_WRAP(NT, NAME)
+#define RUN_BINARY(x, y) RUN_SAT_U_MUL_FMT_1_WRAP(NT, WT, x, y)
+
+DEF_SAT_U_MUL_FMT_1_WRAP(NT, WT)
+
+#include "scalar_sat_binary_run_xxx.h"
diff --git a/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u16-from-u64.c b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u16-from-u64.c
index 065afb8..79d3fb3 100644
--- a/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u16-from-u64.c
+++ b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u16-from-u64.c
@@ -1,4 +1,4 @@
-/* { dg-do run { target { rv32 } } } */
+/* { dg-do run { target { rv32 || rv64 } } } */
/* { dg-additional-options "-std=c99" } */
#include "sat_arith.h"
diff --git a/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u32-from-u64.c b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u32-from-u64.c
index 062bbc9..ad63db3 100644
--- a/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u32-from-u64.c
+++ b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u32-from-u64.c
@@ -1,4 +1,4 @@
-/* { dg-do run { target { rv32 } } } */
+/* { dg-do run { target { rv32 || rv64 } } } */
/* { dg-additional-options "-std=c99" } */
#include "sat_arith.h"
diff --git a/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u8-from-u16.c b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u8-from-u16.c
new file mode 100644
index 0000000..f5a0ab5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u8-from-u16.c
@@ -0,0 +1,16 @@
+/* { dg-do run { target { rv32 || rv64 } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+#include "sat_arith_data.h"
+
+#define NT uint8_t
+#define WT uint16_t
+#define NAME usmul
+#define DATA TEST_BINARY_DATA_WRAP(NT, NAME)
+#define T TEST_BINARY_STRUCT_DECL_WRAP(NT, NAME)
+#define RUN_BINARY(x, y) RUN_SAT_U_MUL_FMT_1_WRAP(NT, WT, x, y)
+
+DEF_SAT_U_MUL_FMT_1_WRAP(NT, WT)
+
+#include "scalar_sat_binary_run_xxx.h"
diff --git a/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u8-from-u32.c b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u8-from-u32.c
new file mode 100644
index 0000000..32074a4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u8-from-u32.c
@@ -0,0 +1,16 @@
+/* { dg-do run { target { rv32 || rv64 } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+#include "sat_arith_data.h"
+
+#define NT uint8_t
+#define WT uint32_t
+#define NAME usmul
+#define DATA TEST_BINARY_DATA_WRAP(NT, NAME)
+#define T TEST_BINARY_STRUCT_DECL_WRAP(NT, NAME)
+#define RUN_BINARY(x, y) RUN_SAT_U_MUL_FMT_1_WRAP(NT, WT, x, y)
+
+DEF_SAT_U_MUL_FMT_1_WRAP(NT, WT)
+
+#include "scalar_sat_binary_run_xxx.h"
diff --git a/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u8-from-u64.c b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u8-from-u64.c
index e6f632b..16ca905 100644
--- a/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u8-from-u64.c
+++ b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u8-from-u64.c
@@ -1,4 +1,4 @@
-/* { dg-do run { target { rv32 } } } */
+/* { dg-do run { target { rv32 || rv64 } } } */
/* { dg-additional-options "-std=c99" } */
#include "sat_arith.h"
diff --git a/gcc/testsuite/gcc.target/s390/spaceship-fp-1.c b/gcc/testsuite/gcc.target/s390/spaceship-fp-1.c
new file mode 100644
index 0000000..56c3d77
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/spaceship-fp-1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-O2 -mzarch -march=z13 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times {\.SPACESHIP \([^,]+, [^,]+, 2\)} 3 optimized } } */
+/* { dg-final { scan-assembler-times {\tk[edx]br\t} 3 } } */
+/* { dg-final { scan-assembler-not {\tbrc} } } */
+/* { dg-final { scan-assembler-not {\tc[edx]br\t} } } */
+
+#define TEST(T, U) \
+ int test_##U (T x, T y) \
+ { \
+ if (x == y) \
+ return 0; \
+ else if (x < y) \
+ return -1; \
+ else if (x > y) \
+ return 1; \
+ else \
+ return 2; \
+ }
+
+TEST (float, float)
+TEST (double, double)
+TEST (long double, longdouble)
diff --git a/gcc/testsuite/gcc.target/s390/spaceship-fp-2.c b/gcc/testsuite/gcc.target/s390/spaceship-fp-2.c
new file mode 100644
index 0000000..0c6e6b6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/spaceship-fp-2.c
@@ -0,0 +1,23 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-O2 -mzarch -march=z13 -ffinite-math-only -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times {\.SPACESHIP \([^,]+, [^,]+, 2\)} 3 optimized } } */
+/* { dg-final { scan-assembler-times {\tc[edx]br\t} 3 } } */
+/* { dg-final { scan-assembler-not {\tbrc} } } */
+/* { dg-final { scan-assembler-not {\tk[edx]br\t} } } */
+
+#define TEST(T, U) \
+ int test_##U (T x, T y) \
+ { \
+ if (x == y) \
+ return 0; \
+ else if (x < y) \
+ return -1; \
+ else if (x > y) \
+ return 1; \
+ else \
+ return 2; \
+ }
+
+TEST (float, float)
+TEST (double, double)
+TEST (long double, longdouble)
diff --git a/gcc/testsuite/gcc.target/s390/spaceship-fp-3.c b/gcc/testsuite/gcc.target/s390/spaceship-fp-3.c
new file mode 100644
index 0000000..2f567d1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/spaceship-fp-3.c
@@ -0,0 +1,23 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-O2 -mzarch -march=z13 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times {\.SPACESHIP \([^,]+, [^,]+, 42\)} 3 optimized } } */
+/* { dg-final { scan-assembler-times {\tk[edx]br\t} 3 } } */
+/* { dg-final { scan-assembler-not {\tbrc} } } */
+/* { dg-final { scan-assembler-not {\tc[edx]br\t} } } */
+
+#define TEST(T, U) \
+ int test_##U (T x, T y) \
+ { \
+ if (x == y) \
+ return 0; \
+ else if (x < y) \
+ return -1; \
+ else if (x > y) \
+ return 1; \
+ else \
+ return 42; \
+ }
+
+TEST (float, float)
+TEST (double, double)
+TEST (long double, longdouble)
diff --git a/gcc/testsuite/gcc.target/s390/spaceship-fp-4.c b/gcc/testsuite/gcc.target/s390/spaceship-fp-4.c
new file mode 100644
index 0000000..4531ecb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/spaceship-fp-4.c
@@ -0,0 +1,53 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-O2 -mzarch -march=z13 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times {\.SPACESHIP \([^,]+, [^,]+, 0\)} 3 optimized } } */
+/* { dg-final { scan-assembler-times {\tk[edx]br\t} 3 } } */
+/* { dg-final { scan-assembler-not {\tloc} } } */
+/* { dg-final { scan-assembler-not {\tbrc} } } */
+/* { dg-final { scan-assembler-not {\tc[edx]br\t} } } */
+
+/* By time of writing this we emit
+
+ kebr %f0,%f2
+ jo .L2
+ je .L3
+ jnh .L10
+ jg f3@PLT
+.L10:
+ jg f2@PLT
+.L3:
+ jg f1@PLT
+.L2:
+ jg f4@PLT
+
+ which is not optimal. Instead we could fold the conditional branch with the
+ unconditional into something along the lines
+
+ kebr %f0,%f2
+ jo f4@PLT
+ je f1@PLT
+ jnh f2@PLT
+ jg f3@PLT
+*/
+
+void f1 (void);
+void f2 (void);
+void f3 (void);
+void f4 (void);
+
+#define TEST(T, U) \
+ void test_##U (T x, T y) \
+ { \
+ if (x == y) \
+ f1 (); \
+ else if (x < y) \
+ f2 (); \
+ else if (x > y) \
+ f3 (); \
+ else \
+ f4 (); \
+ }
+
+TEST (float, float)
+TEST (double, double)
+TEST (long double, longdouble)
diff --git a/gcc/testsuite/gcc.target/s390/spaceship-int-1.c b/gcc/testsuite/gcc.target/s390/spaceship-int-1.c
new file mode 100644
index 0000000..8ca2677
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/spaceship-int-1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-O2 -mzarch -march=z13 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times {\.SPACESHIP \([^,]+, [^,]+, -1\)} 4 optimized } } */
+/* { dg-final { scan-tree-dump-times {\.SPACESHIP \([^,]+, [^,]+, 1\)} 5 optimized } } */
+/* { dg-final { scan-assembler-times {\tlhi} 9 } } */
+/* { dg-final { scan-assembler-times {\tloc} 18 } } */
+
+#define TEST(T, U) \
+ int test_##U (T x, T y) \
+ { \
+ if (x == y) \
+ return 0; \
+ else if (x < y) \
+ return -1; \
+ else \
+ return 1; \
+ }
+
+TEST(signed char, schar)
+TEST(unsigned char, uchar)
+TEST(char, char)
+
+TEST(short, sshort)
+TEST(unsigned short, ushort)
+
+TEST(int, sint)
+TEST(unsigned int, uint)
+
+TEST(long, slong)
+TEST(unsigned long, ulong)
diff --git a/gcc/testsuite/gcc.target/s390/spaceship-int-2.c b/gcc/testsuite/gcc.target/s390/spaceship-int-2.c
new file mode 100644
index 0000000..5f7975c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/spaceship-int-2.c
@@ -0,0 +1,24 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2 -mzarch -march=z13 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times {\.SPACESHIP \([^,]+, [^,]+, -1\)} 1 optimized } } */
+/* { dg-final { scan-tree-dump-times {\.SPACESHIP \([^,]+, [^,]+, 1\)} 1 optimized } } */
+/* { dg-final { scan-assembler-times {\tvecg} 1 } } */
+/* { dg-final { scan-assembler-times {\tveclg} 1 } } */
+/* { dg-final { scan-assembler-times {\tvchlgs} 2 } } */
+/* { dg-final { scan-assembler-times {\tvceqgs} 2 } } */
+/* { dg-final { scan-assembler-times {\tlhi} 2 } } */
+/* { dg-final { scan-assembler-times {\tloc} 4 } } */
+
+#define TEST(T, U) \
+ int test_##U (T x, T y) \
+ { \
+ if (x == y) \
+ return 0; \
+ else if (x < y) \
+ return -1; \
+ else \
+ return 1; \
+ }
+
+TEST(__int128, sint128)
+TEST(unsigned __int128, uint128)
diff --git a/gcc/testsuite/gcc.target/s390/spaceship-int-3.c b/gcc/testsuite/gcc.target/s390/spaceship-int-3.c
new file mode 100644
index 0000000..46b0e4a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/spaceship-int-3.c
@@ -0,0 +1,21 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2 -march=z17 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times {\.SPACESHIP \([^,]+, [^,]+, -1\)} 1 optimized } } */
+/* { dg-final { scan-tree-dump-times {\.SPACESHIP \([^,]+, [^,]+, 1\)} 1 optimized } } */
+/* { dg-final { scan-assembler-times {\tvecq\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tveclq\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tloc} 4 } } */
+
+#define TEST(T, U) \
+ int test_##U (T x, T y) \
+ { \
+ if (x == y) \
+ return 0; \
+ else if (x < y) \
+ return -1; \
+ else \
+ return 1; \
+ }
+
+TEST(__int128, sint128)
+TEST(unsigned __int128, uint128)
diff --git a/gcc/testsuite/gfortran.dg/split_1.f90 b/gcc/testsuite/gfortran.dg/split_1.f90
new file mode 100644
index 0000000..21659b0
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/split_1.f90
@@ -0,0 +1,28 @@
+! { dg-do run }
+program b
+ character(len=:), allocatable :: input
+ character(len=2) :: set = ', '
+ integer :: p
+ input = " one,last example,"
+ p = 0
+
+ call split(input, set, p)
+ if (p /= 1) STOP 1
+ call split(input, set, p)
+ if (p /= 5) STOP 2
+ call split(input, set, p)
+ if (p /= 10) STOP 3
+ call split(input, set, p)
+ if (p /= 18) STOP 4
+ call split(input, set, p)
+ if (p /= 19) STOP 5
+
+ call split(input, set, p, .true.)
+ if (p /= 18) STOP 6
+ call split(input, set, p, .true.)
+ if (p /= 10) STOP 7
+ call split(input, set, p, .true.)
+ if (p /= 5) STOP 8
+ call split(input, set, p, .true.)
+ if (p /= 1) STOP 9
+end program b
diff --git a/gcc/testsuite/gfortran.dg/split_2.f90 b/gcc/testsuite/gfortran.dg/split_2.f90
new file mode 100644
index 0000000..9afb30b
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/split_2.f90
@@ -0,0 +1,22 @@
+! { dg-do run }
+program b
+ integer, parameter :: ucs4 = selected_char_kind('ISO_10646')
+ character(kind=ucs4, len=:), allocatable :: input, set
+ integer :: p = 0
+
+ input = char(int(z'4f60'), ucs4) // char(int(z'597d'), ucs4) // char(int(z'4f60'), ucs4) // char(int(z'4e16'), ucs4)
+ set = char(int(z'597d'), ucs4) // char(int(z'4e16'), ucs4)
+
+ call split(input, set, p)
+ if (p /= 2) stop 1
+ call split(input, set, p)
+ if (p /= 4) stop 2
+ call split(input, set, p)
+ if (p /= 5) stop 3
+ call split(input, set, p, .true.)
+ if (p /= 4) stop 4
+ call split(input, set, p, .true.)
+ if (p /= 2) stop 5
+ call split(input, set, p, .true.)
+ if (p /= 0) stop 6
+end program b
diff --git a/gcc/testsuite/gfortran.dg/split_3.f90 b/gcc/testsuite/gfortran.dg/split_3.f90
new file mode 100644
index 0000000..bec3fdc
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/split_3.f90
@@ -0,0 +1,11 @@
+! { dg-do run }
+! { dg-shouldfail "Fortran runtime error" }
+
+program b
+ character(len=:), allocatable :: input
+ character(len=2) :: set = ', '
+ integer :: p
+ input = " one,last example,"
+ p = -1
+ call split(input, set, p)
+end program b
diff --git a/gcc/testsuite/gfortran.dg/split_4.f90 b/gcc/testsuite/gfortran.dg/split_4.f90
new file mode 100644
index 0000000..a3c27bb
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/split_4.f90
@@ -0,0 +1,11 @@
+! { dg-do run }
+! { dg-shouldfail "Fortran runtime error" }
+
+program b
+ character(len=:), allocatable :: input
+ character(len=2) :: set = ', '
+ integer :: p
+ input = " one,last example,"
+ p = 0
+ call split(input, set, p, .true.)
+end program b
diff --git a/gcc/testsuite/lib/profopt.exp b/gcc/testsuite/lib/profopt.exp
index b4d244b..81d86c6 100644
--- a/gcc/testsuite/lib/profopt.exp
+++ b/gcc/testsuite/lib/profopt.exp
@@ -382,6 +382,7 @@ proc profopt-execute { src } {
unsupported "$testcase"
unset testname_with_flags
verbose "$src not supported on this target, skipping it" 3
+ cleanup-after-saved-dg-test
return
}
@@ -458,6 +459,7 @@ proc profopt-execute { src } {
unsupported "$testcase -fauto-profile: cannot run create_gcov"
unset testname_with_flags
set status "fail"
+ cleanup-after-saved-dg-test
return
}
set status [remote_wait "" 300]
diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
index a8b800b..b7ce072 100644
--- a/gcc/tree-if-conv.cc
+++ b/gcc/tree-if-conv.cc
@@ -1755,7 +1755,7 @@ strip_nop_cond_scalar_reduction (bool has_nop, tree op)
EXTENDED is true if PHI has > 2 arguments. */
static bool
-is_cond_scalar_reduction (basic_block bb, tree phi_res, gimple **reduc, tree arg_0, tree arg_1,
+is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
tree *op0, tree *op1, bool extended, bool* has_nop,
gimple **nop_reduc)
{
@@ -1763,6 +1763,7 @@ is_cond_scalar_reduction (basic_block bb, tree phi_res, gimple **reduc, tree arg
gimple *stmt;
gimple *header_phi = NULL;
enum tree_code reduction_op;
+ basic_block bb = gimple_bb (phi);
class loop *loop = bb->loop_father;
edge latch_e = loop_latch_edge (loop);
imm_use_iterator imm_iter;
@@ -1790,7 +1791,7 @@ is_cond_scalar_reduction (basic_block bb, tree phi_res, gimple **reduc, tree arg
if (gimple_bb (header_phi) != loop->header)
return false;
- if (PHI_ARG_DEF_FROM_EDGE (header_phi, latch_e) != phi_res)
+ if (PHI_ARG_DEF_FROM_EDGE (header_phi, latch_e) != PHI_RESULT (phi))
return false;
if (gimple_code (stmt) != GIMPLE_ASSIGN
@@ -1888,7 +1889,7 @@ is_cond_scalar_reduction (basic_block bb, tree phi_res, gimple **reduc, tree arg
continue;
if (use_stmt == SSA_NAME_DEF_STMT (r_op1))
continue;
- if (use_stmt != SSA_NAME_DEF_STMT (phi_res))
+ if (use_stmt != phi)
return false;
}
}
@@ -2198,8 +2199,8 @@ commutative:
and *RES to the new values if the factoring happened.
Loops until all of the factoring is completed. */
-static bool
-factor_out_operators (gimple_stmt_iterator *pgsi, tree *res, gimple_stmt_iterator *gsi,
+static void
+factor_out_operators (tree *res, gimple_stmt_iterator *gsi,
tree *arg0, tree *arg1, gphi *phi)
{
gimple_match_op arg0_op, arg1_op;
@@ -2207,28 +2208,28 @@ factor_out_operators (gimple_stmt_iterator *pgsi, tree *res, gimple_stmt_iterato
again:
if (TREE_CODE (*arg0) != SSA_NAME || TREE_CODE (*arg1) != SSA_NAME)
- return repeated;
+ return;
if (operand_equal_p (*arg0, *arg1))
- return repeated;
+ return;
/* If either args have > 1 use, then this transformation actually
increases the number of expressions evaluated at runtime. */
if (repeated
? (!has_zero_uses (*arg0) || !has_zero_uses (*arg1))
: (!has_single_use (*arg0) || !has_single_use (*arg1)))
- return repeated;
+ return;
gimple *arg0_def_stmt = SSA_NAME_DEF_STMT (*arg0);
if (!gimple_extract_op (arg0_def_stmt, &arg0_op))
- return repeated;
+ return;
/* At this point there should be no ssa names occuring in abnormals. */
gcc_assert (!arg0_op.operands_occurs_in_abnormal_phi ());
gimple *arg1_def_stmt = SSA_NAME_DEF_STMT (*arg1);
if (!gimple_extract_op (arg1_def_stmt, &arg1_op))
- return repeated;
+ return;
/* At this point there should be no ssa names occuring in abnormals. */
gcc_assert (!arg1_op.operands_occurs_in_abnormal_phi ());
@@ -2237,15 +2238,15 @@ again:
or the number operands. */
if (arg1_op.code != arg0_op.code
|| arg1_op.num_ops != arg0_op.num_ops)
- return repeated;
+ return;
tree new_arg0, new_arg1;
int opnum = find_different_opnum (arg0_op, arg1_op, &new_arg0, &new_arg1);
if (opnum == -1)
- return repeated;
+ return;
if (!types_compatible_p (TREE_TYPE (new_arg0), TREE_TYPE (new_arg1)))
- return repeated;
+ return;
tree new_res = make_ssa_name (TREE_TYPE (new_arg0), NULL);
/* Create the operation stmt if possible and insert it. */
@@ -2261,7 +2262,7 @@ again:
if (!result)
{
release_ssa_name (new_res);
- return repeated;
+ return;
}
gsi_insert_seq_before (gsi, seq, GSI_CONTINUE_LINKING);
@@ -2276,10 +2277,6 @@ again:
fprintf (dump_file, ".\n");
}
- /* Remove the phi and move to the next phi arg if needed. */
- if (!repeated)
- remove_phi_node (pgsi, false);
-
/* Remove the old operation(s) that has single use. */
gimple_stmt_iterator gsi_for_def;
@@ -2294,6 +2291,13 @@ again:
*arg0 = new_arg0;
*arg1 = new_arg1;
*res = new_res;
+
+ /* Update the phi node too. */
+ gimple_phi_set_result (phi, new_res);
+ gimple_phi_arg (phi, 0)->def = new_arg0;
+ gimple_phi_arg (phi, 0)->def = new_arg1;
+ update_stmt (phi);
+
repeated = true;
goto again;
}
@@ -2403,9 +2407,8 @@ cmp_arg_entry (const void *p1, const void *p2, void * /* data. */)
vectorization. */
-static bool
-predicate_scalar_phi (gimple_stmt_iterator *phi_gsi, gphi *phi,
- gimple_stmt_iterator *gsi, bool loop_versioned)
+static void
+predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi, bool loop_versioned)
{
gimple *new_stmt = NULL, *reduc, *nop_reduc;
tree rhs, res, arg0, arg1, op0, op1, scev;
@@ -2415,11 +2418,10 @@ predicate_scalar_phi (gimple_stmt_iterator *phi_gsi, gphi *phi,
basic_block bb;
unsigned int i;
bool has_nop;
- bool removed_phi = false;
res = gimple_phi_result (phi);
if (virtual_operand_p (res))
- return removed_phi;
+ return;
if ((rhs = degenerate_phi_result (phi))
|| ((scev = analyze_scalar_evolution (gimple_bb (phi)->loop_father,
@@ -2436,7 +2438,7 @@ predicate_scalar_phi (gimple_stmt_iterator *phi_gsi, gphi *phi,
new_stmt = gimple_build_assign (res, rhs);
gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
update_stmt (new_stmt);
- return removed_phi;
+ return;
}
bb = gimple_bb (phi);
@@ -2482,13 +2484,9 @@ predicate_scalar_phi (gimple_stmt_iterator *phi_gsi, gphi *phi,
/* Factor out operand if possible. This can only be done easily
for PHI with 2 elements. */
- if (factor_out_operators (phi_gsi, &res, gsi, &arg0, &arg1, phi))
- {
- phi = nullptr;
- removed_phi = true;
- }
+ factor_out_operators (&res, gsi, &arg0, &arg1, phi);
- if (is_cond_scalar_reduction (bb, res, &reduc, arg0, arg1,
+ if (is_cond_scalar_reduction (phi, &reduc, arg0, arg1,
&op0, &op1, false, &has_nop,
&nop_reduc))
{
@@ -2517,7 +2515,7 @@ predicate_scalar_phi (gimple_stmt_iterator *phi_gsi, gphi *phi,
fprintf (dump_file, "new phi replacement stmt\n");
print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
}
- return removed_phi;
+ return;
}
/* Create hashmap for PHI node which contain vector of argument indexes
@@ -2585,7 +2583,7 @@ predicate_scalar_phi (gimple_stmt_iterator *phi_gsi, gphi *phi,
/* Gimplify the condition to a valid cond-expr conditonal operand. */
cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true,
NULL_TREE, true, GSI_SAME_STMT);
- if (!(is_cond_scalar_reduction (bb, res, &reduc, arg0 , arg1,
+ if (!(is_cond_scalar_reduction (phi, &reduc, arg0 , arg1,
&op0, &op1, true, &has_nop, &nop_reduc)))
rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
swap ? arg1 : arg0,
@@ -2615,7 +2613,6 @@ predicate_scalar_phi (gimple_stmt_iterator *phi_gsi, gphi *phi,
fprintf (dump_file, "new extended phi replacement stmt\n");
print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
}
- return removed_phi;
}
/* Replaces in LOOP all the scalar phi nodes other than those in the
@@ -2652,8 +2649,8 @@ predicate_all_scalar_phis (class loop *loop, bool loop_versioned)
gsi_next (&phi_gsi);
else
{
- if (!predicate_scalar_phi (&phi_gsi, phi, &gsi, loop_versioned))
- remove_phi_node (&phi_gsi, false);
+ predicate_scalar_phi (phi, &gsi, loop_versioned);
+ remove_phi_node (&phi_gsi, false);
}
}
}
diff --git a/gcc/tree-ssa-reassoc.cc b/gcc/tree-ssa-reassoc.cc
index 3c38f3d..c140f76 100644
--- a/gcc/tree-ssa-reassoc.cc
+++ b/gcc/tree-ssa-reassoc.cc
@@ -7167,9 +7167,10 @@ reassociate_bb (basic_block bb)
/* If the target support FMA, rank_ops_for_fma will detect if
the chain has fmas and rearrange the ops if so. */
- if (direct_internal_fn_supported_p (IFN_FMA,
- TREE_TYPE (lhs),
- opt_type)
+ if (!reassoc_insert_powi_p
+ && direct_internal_fn_supported_p (IFN_FMA,
+ TREE_TYPE (lhs),
+ opt_type)
&& (rhs_code == PLUS_EXPR || rhs_code == MINUS_EXPR))
{
mult_num = rank_ops_for_fma (&ops);
@@ -7200,7 +7201,8 @@ reassociate_bb (basic_block bb)
to make sure the ones that get the double
binary op are chosen wisely. */
int len = ops.length ();
- if (len >= 3
+ if (!reassoc_insert_powi_p
+ && len >= 3
&& (!has_fma
/* width > 1 means ranking ops results in better
parallelism. Check current value to avoid
diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index e7919b7..da700cd 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -2918,12 +2918,14 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
2) there is at least one unsupported misaligned data ref with an unknown
misalignment, and
3) all misaligned data refs with a known misalignment are supported, and
- 4) the number of runtime alignment checks is within reason. */
+ 4) the number of runtime alignment checks is within reason.
+ 5) the vectorization factor is a constant. */
do_versioning
= (optimize_loop_nest_for_speed_p (loop)
&& !loop->inner /* FORNOW */
- && loop_cost_model (loop) > VECT_COST_MODEL_CHEAP);
+ && loop_cost_model (loop) > VECT_COST_MODEL_CHEAP)
+ && LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant ();
if (do_versioning)
{
@@ -2964,17 +2966,6 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
break;
}
- /* At present we don't support versioning for alignment
- with variable VF, since there's no guarantee that the
- VF is a power of two. We could relax this if we added
- a way of enforcing a power-of-two size. */
- unsigned HOST_WIDE_INT size;
- if (!GET_MODE_SIZE (TYPE_MODE (vectype)).is_constant (&size))
- {
- do_versioning = false;
- break;
- }
-
/* Forcing alignment in the first iteration is no good if
we don't keep it across iterations. For now, just disable
versioning in this case.
@@ -2993,7 +2984,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
Construct the mask needed for this test. For example,
GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
mask must be 15 = 0xf. */
- int mask = size - 1;
+ gcc_assert (DR_TARGET_ALIGNMENT (dr_info).is_constant ());
+ int mask = DR_TARGET_ALIGNMENT (dr_info).to_constant () - 1;
/* FORNOW: use the same mask to test all potentially unaligned
references in the loop. */
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index 2d01a4b..7fcbc1a 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -3295,7 +3295,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
bool skip_vector = (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
? maybe_lt (LOOP_VINFO_INT_NITERS (loop_vinfo),
bound_prolog + bound_epilog)
- : (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
+ : (!LOOP_VINFO_USE_VERSIONING_WITHOUT_PEELING (loop_vinfo)
|| vect_epilogues));
/* Epilog loop must be executed if the number of iterations for epilog
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index cb27d16..a9c7105 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -4950,6 +4950,9 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size,
max_tree_size, &limit,
force_single_lane))
{
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "SLP discovery of reduction chain failed\n");
/* Dissolve reduction chain group. */
stmt_vec_info vinfo = first_element;
stmt_vec_info last = NULL;
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 4cf6c3b..88a12a1 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -1702,19 +1702,32 @@ static bool
vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info, tree vectype,
loop_vec_info loop_vinfo, bool masked_p,
gather_scatter_info *gs_info,
- vec<int> *elsvals)
+ vec<int> *elsvals,
+ unsigned int group_size,
+ bool single_element_p)
{
if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info, elsvals)
|| gs_info->ifn == IFN_LAST)
- return vect_truncate_gather_scatter_offset (stmt_info, vectype, loop_vinfo,
- masked_p, gs_info, elsvals);
+ {
+ if (!vect_truncate_gather_scatter_offset (stmt_info, vectype, loop_vinfo,
+ masked_p, gs_info, elsvals))
+ return false;
+ }
+ else
+ {
+ tree old_offset_type = TREE_TYPE (gs_info->offset);
+ tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
- tree old_offset_type = TREE_TYPE (gs_info->offset);
- tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
+ gcc_assert (TYPE_PRECISION (new_offset_type)
+ >= TYPE_PRECISION (old_offset_type));
+ gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
+ }
- gcc_assert (TYPE_PRECISION (new_offset_type)
- >= TYPE_PRECISION (old_offset_type));
- gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
+ if (!single_element_p
+ && !targetm.vectorize.prefer_gather_scatter (TYPE_MODE (vectype),
+ gs_info->scale,
+ group_size))
+ return false;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
@@ -1976,7 +1989,49 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
separated by the stride, until we have a complete vector.
Fall back to scalar accesses if that isn't possible. */
*memory_access_type = VMAT_STRIDED_SLP;
- else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+ else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+ {
+ *memory_access_type = VMAT_GATHER_SCATTER;
+ if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info,
+ elsvals))
+ gcc_unreachable ();
+ slp_tree offset_node = SLP_TREE_CHILDREN (slp_node)[0];
+ tree offset_vectype = SLP_TREE_VECTYPE (offset_node);
+ gs_info->offset_vectype = offset_vectype;
+ /* When using internal functions, we rely on pattern recognition
+ to convert the type of the offset to the type that the target
+ requires, with the result being a call to an internal function.
+ If that failed for some reason (e.g. because another pattern
+ took priority), just handle cases in which the offset already
+ has the right type. */
+ if (GATHER_SCATTER_IFN_P (*gs_info)
+ && !is_gimple_call (stmt_info->stmt)
+ && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset),
+ offset_vectype))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "%s offset requires a conversion\n",
+ vls_type == VLS_LOAD ? "gather" : "scatter");
+ return false;
+ }
+ else if (GATHER_SCATTER_EMULATED_P (*gs_info))
+ {
+ if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
+ || !TYPE_VECTOR_SUBPARTS (offset_vectype).is_constant ()
+ || VECTOR_BOOLEAN_TYPE_P (offset_vectype)
+ || !constant_multiple_p (TYPE_VECTOR_SUBPARTS (offset_vectype),
+ TYPE_VECTOR_SUBPARTS (vectype)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "unsupported vector types for emulated "
+ "gather.\n");
+ return false;
+ }
+ }
+ }
+ else
{
int cmp = compare_step_with_zero (vinfo, stmt_info);
if (cmp < 0)
@@ -2220,54 +2275,12 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
if ((*memory_access_type == VMAT_ELEMENTWISE
|| *memory_access_type == VMAT_STRIDED_SLP)
&& !STMT_VINFO_GATHER_SCATTER_P (stmt_info)
- && single_element_p
&& SLP_TREE_LANES (slp_node) == 1
&& loop_vinfo
&& vect_use_strided_gather_scatters_p (stmt_info, vectype, loop_vinfo,
- masked_p, gs_info, elsvals))
+ masked_p, gs_info, elsvals,
+ group_size, single_element_p))
*memory_access_type = VMAT_GATHER_SCATTER;
- else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
- {
- *memory_access_type = VMAT_GATHER_SCATTER;
- if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info,
- elsvals))
- gcc_unreachable ();
- slp_tree offset_node = SLP_TREE_CHILDREN (slp_node)[0];
- tree offset_vectype = SLP_TREE_VECTYPE (offset_node);
- gs_info->offset_vectype = offset_vectype;
- /* When using internal functions, we rely on pattern recognition
- to convert the type of the offset to the type that the target
- requires, with the result being a call to an internal function.
- If that failed for some reason (e.g. because another pattern
- took priority), just handle cases in which the offset already
- has the right type. */
- if (GATHER_SCATTER_IFN_P (*gs_info)
- && !is_gimple_call (stmt_info->stmt)
- && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset),
- offset_vectype))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "%s offset requires a conversion\n",
- vls_type == VLS_LOAD ? "gather" : "scatter");
- return false;
- }
- else if (GATHER_SCATTER_EMULATED_P (*gs_info))
- {
- if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
- || !TYPE_VECTOR_SUBPARTS (offset_vectype).is_constant ()
- || VECTOR_BOOLEAN_TYPE_P (offset_vectype)
- || !constant_multiple_p (TYPE_VECTOR_SUBPARTS (offset_vectype),
- TYPE_VECTOR_SUBPARTS (vectype)))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "unsupported vector types for emulated "
- "gather.\n");
- return false;
- }
- }
- }
if (*memory_access_type == VMAT_CONTIGUOUS_DOWN
|| *memory_access_type == VMAT_CONTIGUOUS_REVERSE)
@@ -3301,7 +3314,13 @@ vectorizable_call (vec_info *vinfo,
int mask_opno = -1;
if (internal_fn_p (cfn))
- mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
+ {
+ /* We can only handle direct internal masked calls here,
+ vectorizable_simd_clone_call is for the rest. */
+ if (cfn == CFN_MASK_CALL)
+ return false;
+ mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
+ }
for (i = 0; i < nargs; i++)
{
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 203e5ad..e1900279 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -1197,6 +1197,10 @@ public:
|| LOOP_REQUIRES_VERSIONING_FOR_NITERS (L) \
|| LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (L))
+#define LOOP_VINFO_USE_VERSIONING_WITHOUT_PEELING(L) \
+ ((L)->may_misalign_stmts.length () > 0 \
+ && !LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT (L))
+
#define LOOP_VINFO_NITERS_KNOWN_P(L) \
(tree_fits_shwi_p ((L)->num_iters) && tree_to_shwi ((L)->num_iters) > 0)
diff --git a/gcc/vec.h b/gcc/vec.h
index 9604edb..0ea7a49 100644
--- a/gcc/vec.h
+++ b/gcc/vec.h
@@ -2514,6 +2514,10 @@ public:
return false;
if (lhs.size () != rhs.size ())
return false;
+ /* Case where either is a NULL pointer and therefore, as both are valid,
+ both are empty slices with length 0. */
+ if (lhs.size () == 0)
+ return true;
return memcmp (lhs.begin (), rhs.begin (), lhs.size ()) == 0;
}