aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/aarch64/aarch64-c.cc1
-rw-r--r--gcc/config/aarch64/aarch64-cores.def18
-rw-r--r--gcc/config/aarch64/aarch64-protos.h1
-rw-r--r--gcc/config/aarch64/aarch64-sve-builtins.cc37
-rw-r--r--gcc/config/aarch64/aarch64-sve.md6
-rw-r--r--gcc/config/aarch64/aarch64-tune.md2
-rw-r--r--gcc/config/aarch64/aarch64.cc143
-rw-r--r--gcc/config/aarch64/aarch64.md72
-rw-r--r--gcc/config/alpha/alpha.cc23
-rw-r--r--gcc/config/c6x/c6x.h6
-rw-r--r--gcc/config/darwin.h1
-rw-r--r--gcc/config/gcn/gcn.cc6
-rw-r--r--gcc/config/gcn/gcn.md4
-rw-r--r--gcc/config/gcn/gcn.opt8
-rw-r--r--gcc/config/gcn/mkoffload.cc3
-rw-r--r--gcc/config/h8300/jumpcall.md8
-rw-r--r--gcc/config/i386/i386-expand.cc50
-rw-r--r--gcc/config/i386/i386-options.cc4
-rw-r--r--gcc/config/i386/i386.cc215
-rw-r--r--gcc/config/i386/i386.h16
-rw-r--r--gcc/config/i386/i386.md4
-rw-r--r--gcc/config/i386/predicates.md14
-rw-r--r--gcc/config/i386/sse.md94
-rw-r--r--gcc/config/i386/x86-tune-costs.h123
-rw-r--r--gcc/config/i386/x86-tune-sched.cc15
-rw-r--r--gcc/config/loongarch/genopts/gen-evolution.awk12
-rwxr-xr-xgcc/config/loongarch/genopts/genstr.sh57
-rw-r--r--gcc/config/mips/mips.cc3
-rw-r--r--gcc/config/nvptx/mkoffload.cc3
-rw-r--r--gcc/config/nvptx/nvptx.cc36
-rw-r--r--gcc/config/nvptx/nvptx.md4
-rw-r--r--gcc/config/nvptx/nvptx.opt18
-rw-r--r--gcc/config/riscv/bitmanip.md56
-rw-r--r--gcc/config/riscv/freebsd.h2
-rw-r--r--gcc/config/riscv/gnu.h59
-rwxr-xr-xgcc/config/riscv/multilib-generator4
-rw-r--r--gcc/config/riscv/riscv-cores.def48
-rw-r--r--gcc/config/riscv/riscv-target-attr.cc6
-rw-r--r--gcc/config/riscv/riscv-vector-builtins.cc34
-rw-r--r--gcc/config/riscv/riscv-vsetvl.cc19
-rw-r--r--gcc/config/riscv/riscv.cc5
-rw-r--r--gcc/config/riscv/riscv.h2
-rw-r--r--gcc/config/riscv/riscv.md28
-rw-r--r--gcc/config/riscv/vector.md28
-rw-r--r--gcc/config/rs6000/rs6000.cc11
-rw-r--r--gcc/config/rx/rx.md20
-rw-r--r--gcc/config/s390/9175.md316
-rw-r--r--gcc/config/s390/driver-native.cc4
-rw-r--r--gcc/config/s390/s390-builtins.def8
-rw-r--r--gcc/config/s390/s390-c.cc4
-rw-r--r--gcc/config/s390/s390-opts.h2
-rw-r--r--gcc/config/s390/s390.cc70
-rw-r--r--gcc/config/s390/s390.h18
-rw-r--r--gcc/config/s390/s390.md61
-rw-r--r--gcc/config/s390/s390.opt5
-rw-r--r--gcc/config/sh/sh-modes.def6
56 files changed, 1556 insertions, 267 deletions
diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
index d1e2ab9..98337b7 100644
--- a/gcc/config/aarch64/aarch64-c.cc
+++ b/gcc/config/aarch64/aarch64-c.cc
@@ -293,6 +293,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
aarch64_def_or_undef (TARGET_SME2, "__ARM_FEATURE_SME2", pfile);
aarch64_def_or_undef (AARCH64_HAVE_ISA (SME2p1),
"__ARM_FEATURE_SME2p1", pfile);
+ aarch64_def_or_undef (TARGET_FAMINMAX, "__ARM_FEATURE_FAMINMAX", pfile);
/* Not for ACLE, but required to keep "float.h" correct if we switch
target between implementations that do or do not support ARMv8.2-A
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index 0e22d72..1209630 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -173,6 +173,22 @@ AARCH64_CORE("cortex-a76.cortex-a55", cortexa76cortexa55, cortexa53, V8_2A, (F
AARCH64_CORE("cortex-r82", cortexr82, cortexa53, V8R, (), cortexa53, 0x41, 0xd15, -1)
AARCH64_CORE("cortex-r82ae", cortexr82ae, cortexa53, V8R, (), cortexa53, 0x41, 0xd14, -1)
+/* Apple (A12 and M) cores.
+ Known part numbers as listed in other public sources.
+ Placeholders for schedulers, generic_armv8_a for costs.
+ A12 seems mostly 8.3, M1 is 8.5 without BTI, M2 and M3 are 8.6
+ From measurements made so far the odd-number core IDs are performance. */
+AARCH64_CORE("apple-a12", applea12, cortexa53, V8_3A, (), generic_armv8_a, 0x61, 0x12, -1)
+AARCH64_CORE("apple-m1", applem1_0, cortexa57, V8_5A, (), generic_armv8_a, 0x61, AARCH64_BIG_LITTLE (0x21, 0x20), -1)
+AARCH64_CORE("apple-m1", applem1_1, cortexa57, V8_5A, (), generic_armv8_a, 0x61, AARCH64_BIG_LITTLE (0x23, 0x22), -1)
+AARCH64_CORE("apple-m1", applem1_2, cortexa57, V8_5A, (), generic_armv8_a, 0x61, AARCH64_BIG_LITTLE (0x25, 0x24), -1)
+AARCH64_CORE("apple-m1", applem1_3, cortexa57, V8_5A, (), generic_armv8_a, 0x61, AARCH64_BIG_LITTLE (0x29, 0x28), -1)
+AARCH64_CORE("apple-m2", applem2_0, cortexa57, V8_6A, (), generic_armv8_a, 0x61, AARCH64_BIG_LITTLE (0x31, 0x30), -1)
+AARCH64_CORE("apple-m2", applem2_1, cortexa57, V8_6A, (), generic_armv8_a, 0x61, AARCH64_BIG_LITTLE (0x33, 0x32), -1)
+AARCH64_CORE("apple-m2", applem2_2, cortexa57, V8_6A, (), generic_armv8_a, 0x61, AARCH64_BIG_LITTLE (0x35, 0x34), -1)
+AARCH64_CORE("apple-m2", applem2_3, cortexa57, V8_6A, (), generic_armv8_a, 0x61, AARCH64_BIG_LITTLE (0x39, 0x38), -1)
+AARCH64_CORE("apple-m3", applem3_0, cortexa57, V8_6A, (), generic_armv8_a, 0x61, AARCH64_BIG_LITTLE (0x49, 0x48), -1)
+
/* Armv9.0-A Architecture Processors. */
/* Arm ('A') cores. */
@@ -208,7 +224,7 @@ AARCH64_CORE("neoverse-v3ae", neoversev3ae, cortexa57, V9_2A, (SVE2_BITPERM, RNG
AARCH64_CORE("demeter", demeter, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversev2, 0x41, 0xd4f, -1)
/* NVIDIA ('N') cores. */
-AARCH64_CORE("olympus", olympus, cortexa57, V9_2A, (SVE2_BITPERM, RNG, LS64, MEMTAG, PROFILE, FAMINMAX, FP8DOT2, LUT, SVE2_AES, SVE2_SHA3, SVE2_SM4), neoversev3, 0x4e, 0x10, -1)
+AARCH64_CORE("olympus", olympus, cortexa57, V9_2A, (SVE2_BITPERM, RNG, LS64, MEMTAG, PROFILE, FAMINMAX, FP8FMA, FP8DOT2, FP8DOT4, LUT, SVE2_AES, SVE2_SHA3, SVE2_SM4), neoversev3, 0x4e, 0x10, -1)
/* Generic Architecture Processors. */
AARCH64_CORE("generic", generic, cortexa53, V8A, (), generic, 0x0, 0x0, -1)
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 8f44aea..1ca86c9 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1260,6 +1260,7 @@ void aarch64_restore_za (rtx);
void aarch64_expand_crc_using_pmull (scalar_mode, scalar_mode, rtx *);
void aarch64_expand_reversed_crc_using_pmull (scalar_mode, scalar_mode, rtx *);
+void aarch64_expand_fp_spaceship (rtx, rtx, rtx, rtx);
extern bool aarch64_gcs_enabled ();
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
index 44e4807..3651926 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -5174,7 +5174,11 @@ bool
verify_type_context (location_t loc, type_context_kind context,
const_tree type, bool silent_p)
{
- if (!sizeless_type_p (type))
+ const_tree tmp = type;
+ if (omp_type_context (context) && POINTER_TYPE_P (type))
+ tmp = strip_pointer_types (tmp);
+
+ if (!sizeless_type_p (tmp))
return true;
switch (context)
@@ -5234,6 +5238,37 @@ verify_type_context (location_t loc, type_context_kind context,
if (!silent_p)
error_at (loc, "capture by copy of SVE type %qT", type);
return false;
+
+ case TCTX_OMP_MAP:
+ if (!silent_p)
+ error_at (loc, "SVE type %qT not allowed in %<map%> clause", type);
+ return false;
+
+ case TCTX_OMP_MAP_IMP_REF:
+ if (!silent_p)
+ error ("cannot reference %qT object types in %<target%> region", type);
+ return false;
+
+ case TCTX_OMP_PRIVATE:
+ if (!silent_p)
+ error_at (loc, "SVE type %qT not allowed in"
+ " %<target%> %<private%> clause", type);
+ return false;
+
+ case TCTX_OMP_FIRSTPRIVATE:
+ if (!silent_p)
+ error_at (loc, "SVE type %qT not allowed in"
+ " %<target%> %<firstprivate%> clause", type);
+ return false;
+
+ case TCTX_OMP_DEVICE_ADDR:
+ if (!silent_p)
+ error_at (loc, "SVE type %qT not allowed in"
+ " %<target%> device clauses", type);
+ return false;
+
+ default:
+ break;
}
gcc_unreachable ();
}
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index 3dbd659..d4af370 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -3133,9 +3133,9 @@
"TARGET_SVE"
{
rtx tmp = gen_reg_rtx (<MODE>mode);
- emit_insn (gen_vcond_mask_<mode><vpred> (tmp, operands[1],
- CONST1_RTX (<MODE>mode),
- CONST0_RTX (<MODE>mode)));
+ emit_insn (gen_vcond_mask_<mode><vpred> (tmp, CONST1_RTX (<MODE>mode),
+ CONST0_RTX (<MODE>mode),
+ operands[1]));
emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2]));
DONE;
}
diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
index 56a914f..982074c 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@
;; -*- buffer-read-only: t -*-
;; Generated automatically by gentune.sh from aarch64-cores.def
(define_attr "tune"
- "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88,thunderxt88p1,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,ampere1b,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,neoversen1,ares,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,fujitsu_monaka,tsv110,thunderx3t110,neoversev1,zeus,neoverse512tvb,saphira,oryon1,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexr82ae,cortexa510,cortexa520,cortexa520ae,cortexa710,cortexa715,cortexa720,cortexa720ae,cortexa725,cortexx2,cortexx3,cortexx4,cortexx925,neoversen2,cobalt100,neoversen3,neoversev2,grace,neoversev3,neoversev3ae,demeter,olympus,generic,generic_armv8_a,generic_armv9_a"
+ "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88,thunderxt88p1,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,ampere1b,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,neoversen1,ares,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,fujitsu_monaka,tsv110,thunderx3t110,neoversev1,zeus,neoverse512tvb,saphira,oryon1,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexr82ae,applea12,applem1_0,applem1_1,applem1_2,applem1_3,applem2_0,applem2_1,applem2_2,applem2_3,applem3_0,cortexa510,cortexa520,cortexa520ae,cortexa710,cortexa715,cortexa720,cortexa720ae,cortexa725,cortexx2,cortexx3,cortexx4,cortexx925,neoversen2,cobalt100,neoversen3,neoversev2,grace,neoversev3,neoversev3ae,demeter,olympus,generic,generic_armv8_a,generic_armv9_a"
(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 4e80114..f7bccf5 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -9417,13 +9417,16 @@ aarch64_emit_stack_tie (rtx reg)
}
/* Allocate POLY_SIZE bytes of stack space using TEMP1 and TEMP2 as scratch
- registers. If POLY_SIZE is not large enough to require a probe this function
- will only adjust the stack. When allocating the stack space
- FRAME_RELATED_P is then used to indicate if the allocation is frame related.
- FINAL_ADJUSTMENT_P indicates whether we are allocating the area below
- the saved registers. If we are then we ensure that any allocation
- larger than the ABI defined buffer needs a probe so that the
- invariant of having a 1KB buffer is maintained.
+ registers, given that the stack pointer is currently BYTES_BELOW_SP bytes
+ above the bottom of the static frame.
+
+ If POLY_SIZE is not large enough to require a probe this function will only
+ adjust the stack. When allocating the stack space FRAME_RELATED_P is then
+ used to indicate if the allocation is frame related. FINAL_ADJUSTMENT_P
+ indicates whether we are allocating the area below the saved registers.
+ If we are then we ensure that any allocation larger than the ABI defined
+ buffer needs a probe so that the invariant of having a 1KB buffer is
+ maintained.
We emit barriers after each stack adjustment to prevent optimizations from
breaking the invariant that we never drop the stack more than a page. This
@@ -9440,6 +9443,7 @@ aarch64_emit_stack_tie (rtx reg)
static void
aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
poly_int64 poly_size,
+ poly_int64 bytes_below_sp,
aarch64_isa_mode force_isa_mode,
bool frame_related_p,
bool final_adjustment_p)
@@ -9503,8 +9507,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
poly_size, temp1, temp2, force_isa_mode,
false, true);
- rtx_insn *insn = get_last_insn ();
-
+ auto initial_cfa_offset = frame.frame_size - bytes_below_sp;
+ auto final_cfa_offset = initial_cfa_offset + poly_size;
if (frame_related_p)
{
/* This is done to provide unwinding information for the stack
@@ -9514,28 +9518,31 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
The tie will expand to nothing but the optimizers will not touch
the instruction. */
rtx stack_ptr_copy = gen_rtx_REG (Pmode, STACK_CLASH_SVE_CFA_REGNUM);
- emit_move_insn (stack_ptr_copy, stack_pointer_rtx);
+ auto *insn = emit_move_insn (stack_ptr_copy, stack_pointer_rtx);
aarch64_emit_stack_tie (stack_ptr_copy);
/* We want the CFA independent of the stack pointer for the
duration of the loop. */
- add_reg_note (insn, REG_CFA_DEF_CFA, stack_ptr_copy);
+ add_reg_note (insn, REG_CFA_DEF_CFA,
+ plus_constant (Pmode, stack_ptr_copy,
+ initial_cfa_offset));
RTX_FRAME_RELATED_P (insn) = 1;
}
rtx probe_const = gen_int_mode (min_probe_threshold, Pmode);
rtx guard_const = gen_int_mode (guard_size, Pmode);
- insn = emit_insn (gen_probe_sve_stack_clash (Pmode, stack_pointer_rtx,
- stack_pointer_rtx, temp1,
- probe_const, guard_const));
+ auto *insn
+ = emit_insn (gen_probe_sve_stack_clash (Pmode, stack_pointer_rtx,
+ stack_pointer_rtx, temp1,
+ probe_const, guard_const));
/* Now reset the CFA register if needed. */
if (frame_related_p)
{
add_reg_note (insn, REG_CFA_DEF_CFA,
- gen_rtx_PLUS (Pmode, stack_pointer_rtx,
- gen_int_mode (poly_size, Pmode)));
+ plus_constant (Pmode, stack_pointer_rtx,
+ final_cfa_offset));
RTX_FRAME_RELATED_P (insn) = 1;
}
@@ -9581,12 +9588,13 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
We can determine which allocation we are doing by looking at
the value of FRAME_RELATED_P since the final allocations are not
frame related. */
+ auto cfa_offset = frame.frame_size - (bytes_below_sp - rounded_size);
if (frame_related_p)
{
/* We want the CFA independent of the stack pointer for the
duration of the loop. */
add_reg_note (insn, REG_CFA_DEF_CFA,
- plus_constant (Pmode, temp1, rounded_size));
+ plus_constant (Pmode, temp1, cfa_offset));
RTX_FRAME_RELATED_P (insn) = 1;
}
@@ -9608,7 +9616,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
if (frame_related_p)
{
add_reg_note (insn, REG_CFA_DEF_CFA,
- plus_constant (Pmode, stack_pointer_rtx, rounded_size));
+ plus_constant (Pmode, stack_pointer_rtx, cfa_offset));
RTX_FRAME_RELATED_P (insn) = 1;
}
@@ -9916,17 +9924,22 @@ aarch64_expand_prologue (void)
code below does not handle it for -fstack-clash-protection. */
gcc_assert (known_eq (initial_adjust, 0) || callee_adjust == 0);
+ /* The offset of the current SP from the bottom of the static frame. */
+ poly_int64 bytes_below_sp = frame_size;
+
/* Will only probe if the initial adjustment is larger than the guard
less the amount of the guard reserved for use by the caller's
outgoing args. */
aarch64_allocate_and_probe_stack_space (tmp0_rtx, tmp1_rtx, initial_adjust,
- force_isa_mode, true, false);
+ bytes_below_sp, force_isa_mode,
+ true, false);
+ bytes_below_sp -= initial_adjust;
if (callee_adjust != 0)
- aarch64_push_regs (reg1, reg2, callee_adjust);
-
- /* The offset of the current SP from the bottom of the static frame. */
- poly_int64 bytes_below_sp = frame_size - initial_adjust - callee_adjust;
+ {
+ aarch64_push_regs (reg1, reg2, callee_adjust);
+ bytes_below_sp -= callee_adjust;
+ }
if (emit_frame_chain)
{
@@ -9994,7 +10007,7 @@ aarch64_expand_prologue (void)
|| known_eq (frame.reg_offset[VG_REGNUM], bytes_below_sp));
aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx,
sve_callee_adjust,
- force_isa_mode,
+ bytes_below_sp, force_isa_mode,
!frame_pointer_needed, false);
bytes_below_sp -= sve_callee_adjust;
}
@@ -10005,10 +10018,11 @@ aarch64_expand_prologue (void)
/* We may need to probe the final adjustment if it is larger than the guard
that is assumed by the called. */
- gcc_assert (known_eq (bytes_below_sp, final_adjust));
aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, final_adjust,
- force_isa_mode,
+ bytes_below_sp, force_isa_mode,
!frame_pointer_needed, true);
+ bytes_below_sp -= final_adjust;
+ gcc_assert (known_eq (bytes_below_sp, 0));
if (emit_frame_chain && maybe_ne (final_adjust, 0))
aarch64_emit_stack_tie (hard_frame_pointer_rtx);
@@ -31073,8 +31087,6 @@ aarch64_valid_sysreg_name_p (const char *regname)
const sysreg_t *sysreg = aarch64_lookup_sysreg_map (regname);
if (sysreg == NULL)
return aarch64_is_implem_def_reg (regname);
- if (sysreg->arch_reqs)
- return bool (aarch64_isa_flags & sysreg->arch_reqs);
return true;
}
@@ -31098,8 +31110,6 @@ aarch64_retrieve_sysreg (const char *regname, bool write_p, bool is128op)
if ((write_p && (sysreg->properties & F_REG_READ))
|| (!write_p && (sysreg->properties & F_REG_WRITE)))
return NULL;
- if ((~aarch64_isa_flags & sysreg->arch_reqs) != 0)
- return NULL;
return sysreg->encoding;
}
@@ -31298,6 +31308,79 @@ aarch64_expand_reversed_crc_using_pmull (scalar_mode crc_mode,
}
}
+/* Expand the spaceship optab for floating-point operands.
+
+ If the result is compared against (-1, 0, 1 , 2), expand into
+ fcmpe + conditional branch insns.
+
+ Otherwise (the result is just stored as an integer), expand into
+ fcmpe + a sequence of conditional select/increment/invert insns. */
+void
+aarch64_expand_fp_spaceship (rtx dest, rtx op0, rtx op1, rtx hint)
+{
+ rtx cc_reg = gen_rtx_REG (CCFPEmode, CC_REGNUM);
+ emit_set_insn (cc_reg, gen_rtx_COMPARE (CCFPEmode, op0, op1));
+
+ rtx cc_gt = gen_rtx_GT (VOIDmode, cc_reg, const0_rtx);
+ rtx cc_lt = gen_rtx_LT (VOIDmode, cc_reg, const0_rtx);
+ rtx cc_un = gen_rtx_UNORDERED (VOIDmode, cc_reg, const0_rtx);
+
+ if (hint == const0_rtx)
+ {
+ rtx un_label = gen_label_rtx ();
+ rtx lt_label = gen_label_rtx ();
+ rtx gt_label = gen_label_rtx ();
+ rtx end_label = gen_label_rtx ();
+
+ rtx temp = gen_rtx_IF_THEN_ELSE (VOIDmode, cc_un,
+ gen_rtx_LABEL_REF (Pmode, un_label), pc_rtx);
+ aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, temp));
+
+ temp = gen_rtx_IF_THEN_ELSE (VOIDmode, cc_lt,
+ gen_rtx_LABEL_REF (Pmode, lt_label), pc_rtx);
+ emit_jump_insn (gen_rtx_SET (pc_rtx, temp));
+
+ temp = gen_rtx_IF_THEN_ELSE (VOIDmode, cc_gt,
+ gen_rtx_LABEL_REF (Pmode, gt_label), pc_rtx);
+ emit_jump_insn (gen_rtx_SET (pc_rtx, temp));
+
+ /* Equality. */
+ emit_move_insn (dest, const0_rtx);
+ emit_jump (end_label);
+
+ emit_label (un_label);
+ emit_move_insn (dest, const2_rtx);
+ emit_jump (end_label);
+
+ emit_label (gt_label);
+ emit_move_insn (dest, const1_rtx);
+ emit_jump (end_label);
+
+ emit_label (lt_label);
+ emit_move_insn (dest, constm1_rtx);
+
+ emit_label (end_label);
+ }
+ else
+ {
+ rtx temp0 = gen_reg_rtx (SImode);
+ rtx temp1 = gen_reg_rtx (SImode);
+ rtx cc_ungt = gen_rtx_UNGT (VOIDmode, cc_reg, const0_rtx);
+
+ /* The value of hint is stored if the operands are unordered. */
+ rtx temp_un = gen_int_mode (UINTVAL (hint) - 1, SImode);
+ if (!aarch64_reg_zero_or_m1_or_1 (temp_un, SImode))
+ temp_un = force_reg (SImode, temp_un);
+
+ emit_set_insn (temp0, gen_rtx_IF_THEN_ELSE (SImode, cc_lt,
+ constm1_rtx, const0_rtx));
+ emit_set_insn (temp1, gen_rtx_IF_THEN_ELSE (SImode, cc_un,
+ temp_un, const0_rtx));
+ emit_set_insn (dest, gen_rtx_IF_THEN_ELSE (SImode, cc_ungt,
+ gen_rtx_PLUS (SImode, temp1, const1_rtx), temp0));
+ }
+}
+
/* Target-specific selftests. */
#if CHECKING_P
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 031e621..c678f7a 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -707,11 +707,12 @@
)
(define_expand "cbranch<mode>4"
- [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
- [(match_operand:GPF 1 "register_operand")
- (match_operand:GPF 2 "aarch64_fp_compare_operand")])
- (label_ref (match_operand 3 "" ""))
- (pc)))]
+ [(set (pc) (if_then_else
+ (match_operator 0 "aarch64_comparison_operator"
+ [(match_operand:GPF_F16 1 "register_operand")
+ (match_operand:GPF_F16 2 "aarch64_fp_compare_operand")])
+ (label_ref (match_operand 3 "" ""))
+ (pc)))]
""
"
operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), operands[1],
@@ -4337,26 +4338,28 @@
(define_insn "fcmp<mode>"
[(set (reg:CCFP CC_REGNUM)
- (compare:CCFP (match_operand:GPF 0 "register_operand")
- (match_operand:GPF 1 "aarch64_fp_compare_operand")))]
+ (compare:CCFP
+ (match_operand:GPF_F16 0 "register_operand")
+ (match_operand:GPF_F16 1 "aarch64_fp_compare_operand")))]
"TARGET_FLOAT"
{@ [ cons: 0 , 1 ]
[ w , Y ] fcmp\t%<s>0, #0.0
[ w , w ] fcmp\t%<s>0, %<s>1
}
- [(set_attr "type" "fcmp<s>")]
+ [(set_attr "type" "fcmp<stype>")]
)
(define_insn "fcmpe<mode>"
[(set (reg:CCFPE CC_REGNUM)
- (compare:CCFPE (match_operand:GPF 0 "register_operand")
- (match_operand:GPF 1 "aarch64_fp_compare_operand")))]
+ (compare:CCFPE
+ (match_operand:GPF_F16 0 "register_operand")
+ (match_operand:GPF_F16 1 "aarch64_fp_compare_operand")))]
"TARGET_FLOAT"
{@ [ cons: 0 , 1 ]
[ w , Y ] fcmpe\t%<s>0, #0.0
[ w , w ] fcmpe\t%<s>0, %<s>1
}
- [(set_attr "type" "fcmp<s>")]
+ [(set_attr "type" "fcmp<stype>")]
)
(define_insn "*cmp_swp_<shift>_reg<mode>"
@@ -4392,6 +4395,49 @@
[(set_attr "type" "alus_ext")]
)
+;; <=> operator pattern (integer)
+;; (a == b) ? 0 : (a < b) ? -1 : 1.
+(define_expand "spaceship<mode>4"
+ [(match_operand:SI 0 "register_operand")
+ (match_operand:GPI 1 "register_operand")
+ (match_operand:GPI 2 "register_operand")
+ (match_operand:SI 3 "const_int_operand")]
+ ""
+ {
+ // 1 indicates unsigned comparison, -1 indicates signed.
+ gcc_assert (operands[3] == constm1_rtx || operands[3] == const1_rtx);
+
+ rtx cc_reg = aarch64_gen_compare_reg (EQ, operands[1], operands[2]);
+ RTX_CODE code_gt = operands[3] == const1_rtx ? GTU : GT;
+ RTX_CODE code_lt = operands[3] == const1_rtx ? LTU : LT;
+
+ rtx cc_gt = gen_rtx_fmt_ee (code_gt, VOIDmode, cc_reg, const0_rtx);
+ rtx cc_lt = gen_rtx_fmt_ee (code_lt, VOIDmode, cc_reg, const0_rtx);
+
+ rtx temp = gen_reg_rtx (SImode);
+ emit_insn (gen_rtx_SET (temp, gen_rtx_IF_THEN_ELSE (SImode, cc_gt,
+ const1_rtx, const0_rtx)));
+ emit_insn (gen_rtx_SET (operands[0], gen_rtx_IF_THEN_ELSE (SImode, cc_lt,
+ constm1_rtx, temp)));
+ DONE;
+ }
+)
+
+;; <=> operator pattern (floating-point)
+;; (a == b) ? 0 : (a < b) ? -1 : (a > b) ? 1 : UNORDERED.
+(define_expand "spaceship<mode>4"
+ [(match_operand:SI 0 "register_operand")
+ (match_operand:GPF 1 "register_operand")
+ (match_operand:GPF 2 "register_operand")
+ (match_operand:SI 3 "const_int_operand")]
+ "TARGET_FLOAT"
+ {
+ aarch64_expand_fp_spaceship (operands[0], operands[1], operands[2],
+ operands[3]);
+ DONE;
+ }
+)
+
;; -------------------------------------------------------------------
;; Store-flag and conditional select insns
;; -------------------------------------------------------------------
@@ -4424,8 +4470,8 @@
(define_expand "cstore<mode>4"
[(set (match_operand:SI 0 "register_operand")
(match_operator:SI 1 "aarch64_comparison_operator_mode"
- [(match_operand:GPF 2 "register_operand")
- (match_operand:GPF 3 "aarch64_fp_compare_operand")]))]
+ [(match_operand:GPF_F16 2 "register_operand")
+ (match_operand:GPF_F16 3 "aarch64_fp_compare_operand")]))]
""
"
operands[2] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2],
diff --git a/gcc/config/alpha/alpha.cc b/gcc/config/alpha/alpha.cc
index ba470d9..14e7da5 100644
--- a/gcc/config/alpha/alpha.cc
+++ b/gcc/config/alpha/alpha.cc
@@ -4291,14 +4291,10 @@ alpha_get_mem_rtx_alignment_and_offset (rtx expr, int &a, HOST_WIDE_INT &o)
tree mem = MEM_EXPR (expr);
if (mem != NULL_TREE)
- switch (TREE_CODE (mem))
- {
- case MEM_REF:
- tree_offset = mem_ref_offset (mem).force_shwi ();
- tree_align = get_object_alignment (get_base_address (mem));
- break;
+ {
+ HOST_WIDE_INT comp_offset = 0;
- case COMPONENT_REF:
+ for (; TREE_CODE (mem) == COMPONENT_REF; mem = TREE_OPERAND (mem, 0))
{
tree byte_offset = component_ref_field_offset (mem);
tree bit_offset = DECL_FIELD_BIT_OFFSET (TREE_OPERAND (mem, 1));
@@ -4307,14 +4303,15 @@ alpha_get_mem_rtx_alignment_and_offset (rtx expr, int &a, HOST_WIDE_INT &o)
|| !poly_int_tree_p (byte_offset, &offset)
|| !tree_fits_shwi_p (bit_offset))
break;
- tree_offset = offset + tree_to_shwi (bit_offset) / BITS_PER_UNIT;
+ comp_offset += offset + tree_to_shwi (bit_offset) / BITS_PER_UNIT;
}
- tree_align = get_object_alignment (get_base_address (mem));
- break;
- default:
- break;
- }
+ if (TREE_CODE (mem) == MEM_REF)
+ {
+ tree_offset = comp_offset + mem_ref_offset (mem).force_shwi ();
+ tree_align = get_object_alignment (get_base_address (mem));
+ }
+ }
if (reg_align > mem_align)
{
diff --git a/gcc/config/c6x/c6x.h b/gcc/config/c6x/c6x.h
index e7da250..50bad27 100644
--- a/gcc/config/c6x/c6x.h
+++ b/gcc/config/c6x/c6x.h
@@ -444,11 +444,9 @@ struct GTY(()) machine_function
#define TARG_VEC_PERMUTE_COST 1
#endif
-/* ttype entries (the only interesting data references used) are
- sb-relative got-indirect (aka .ehtype). */
+/* .ehtype ttype entries are sb-relative. */
#define ASM_PREFERRED_EH_DATA_FORMAT(code, data) \
- (((code) == 0 && (data) == 1) ? (DW_EH_PE_datarel | DW_EH_PE_indirect) \
- : DW_EH_PE_absptr)
+ (((code) == 0 && (data) == 1) ? DW_EH_PE_datarel : DW_EH_PE_absptr)
/* This should be the same as the definition in elfos.h, plus the call
to output special unwinding directives. */
diff --git a/gcc/config/darwin.h b/gcc/config/darwin.h
index 8c164fd..9b9a3fe 100644
--- a/gcc/config/darwin.h
+++ b/gcc/config/darwin.h
@@ -504,6 +504,7 @@ extern GTY(()) int darwin_ms_struct;
%{static|static-libgcc|static-libgfortran:%:replace-outfile(-lgfortran libgfortran.a%s)}\
%{static|static-libgcc|static-libquadmath:%:replace-outfile(-lquadmath libquadmath.a%s)}\
%{static|static-libgcc|static-libphobos:%:replace-outfile(-lgphobos libgphobos.a%s)}\
+ %{static|static-libgcc|static-libgcobol:%:replace-outfile(-lgcobol libgcobol.a%s)}\
%{static|static-libgcc|static-libstdc++|static-libgfortran:%:replace-outfile(-lgomp libgomp.a%s)}\
%{static|static-libgcc|static-libstdc++:%:replace-outfile(-lstdc++ libstdc++.a%s)}\
%{static|static-libgm2:%:replace-outfile(-lm2pim libm2pim.a%s)}\
diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index d59e87b..91ce801 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -6587,8 +6587,8 @@ gcn_hsa_declare_function_name (FILE *file, const char *name,
if (avgpr % vgpr_block_size)
avgpr += vgpr_block_size - (avgpr % vgpr_block_size);
- fputs ("\t.rodata\n"
- "\t.p2align\t6\n"
+ switch_to_section (readonly_data_section);
+ fputs ("\t.p2align\t6\n"
"\t.amdhsa_kernel\t", file);
assemble_name (file, name);
fputs ("\n", file);
@@ -6707,7 +6707,7 @@ gcn_hsa_declare_function_name (FILE *file, const char *name,
fputs (" .end_amdgpu_metadata\n", file);
#endif
- fputs ("\t.text\n", file);
+ switch_to_section (current_function_section ());
fputs ("\t.align\t256\n", file);
fputs ("\t.type\t", file);
assemble_name (file, name);
diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md
index 695656f..e0fb735 100644
--- a/gcc/config/gcn/gcn.md
+++ b/gcc/config/gcn/gcn.md
@@ -1018,7 +1018,9 @@
[(const_int 0)]
""
{
- sorry ("exception handling not supported");
+ if (!fake_exceptions)
+ sorry ("exception handling not supported");
+ DONE;
})
;; }}}
diff --git a/gcc/config/gcn/gcn.opt b/gcc/config/gcn/gcn.opt
index 142b439..99d6aeb 100644
--- a/gcc/config/gcn/gcn.opt
+++ b/gcc/config/gcn/gcn.opt
@@ -101,3 +101,11 @@ Enum(gcn_preferred_vectorization_factor) String(32) Value(32)
EnumValue
Enum(gcn_preferred_vectorization_factor) String(64) Value(64)
+
+mfake-exceptions
+Target Var(fake_exceptions) Init(0) Undocumented
+; With '-mfake-exceptions' enabled, the user-visible behavior in presence of
+; exception handling constructs changes such that the compile-time
+; 'sorry, unimplemented: exception handling not supported' is skipped, code
+; generation proceeds, and instead, exception handling constructs 'abort' at
+; run time. (..., or don't, if they're in dead code.)
diff --git a/gcc/config/gcn/mkoffload.cc b/gcc/config/gcn/mkoffload.cc
index f5b89c9..b284ff4 100644
--- a/gcc/config/gcn/mkoffload.cc
+++ b/gcc/config/gcn/mkoffload.cc
@@ -1160,6 +1160,9 @@ main (int argc, char **argv)
obstack_ptr_grow (&cc_argv_obstack, "-xlto");
if (fopenmp)
obstack_ptr_grow (&cc_argv_obstack, "-mgomp");
+ /* The host code may contain exception handling constructs.
+ Handle these as good as we can. */
+ obstack_ptr_grow (&cc_argv_obstack, "-mfake-exceptions");
for (int ix = 1; ix != argc; ix++)
{
diff --git a/gcc/config/h8300/jumpcall.md b/gcc/config/h8300/jumpcall.md
index b596399..4e63408 100644
--- a/gcc/config/h8300/jumpcall.md
+++ b/gcc/config/h8300/jumpcall.md
@@ -146,9 +146,9 @@
(define_insn_and_split ""
[(set (pc)
(if_then_else (match_operator 3 "eqne_operator"
- [(zero_extract:QHSI (match_operand:QHSI 1 "register_operand" "r")
- (const_int 1)
- (match_operand 2 "const_int_operand" "n"))
+ [(zero_extract:HSI (match_operand:HSI 1 "register_operand" "r")
+ (const_int 1)
+ (match_operand 2 "const_int_operand" "n"))
(const_int 0)])
(label_ref (match_operand 0 "" ""))
(pc)))]
@@ -156,7 +156,7 @@
"#"
"&& reload_completed"
[(set (reg:CCZ CC_REG)
- (eq (zero_extract:QHSI (match_dup 1) (const_int 1) (match_dup 2))
+ (eq (zero_extract:HSI (match_dup 1) (const_int 1) (match_dup 2))
(const_int 0)))
(set (pc)
(if_then_else (match_op_dup 3 [(reg:CCZ CC_REG) (const_int 0)])
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index cdfd94d..a314800 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -4138,6 +4138,10 @@ ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
return false;
mode = GET_MODE (dest);
+ if (immediate_operand (if_false, mode))
+ if_false = force_reg (mode, if_false);
+ if (immediate_operand (if_true, mode))
+ if_true = force_reg (mode, if_true);
/* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
but MODE may be a vector mode and thus not appropriate. */
@@ -4687,6 +4691,8 @@ ix86_expand_fp_movcc (rtx operands[])
compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
}
+ operands[2] = force_reg (mode, operands[2]);
+ operands[3] = force_reg (mode, operands[3]);
emit_insn (gen_rtx_SET (operands[0],
gen_rtx_IF_THEN_ELSE (mode, compare_op,
operands[2], operands[3])));
@@ -19256,8 +19262,6 @@ ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
e1 = gen_reg_rtx (mode);
x1 = gen_reg_rtx (mode);
- /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
-
b = force_reg (mode, b);
/* x0 = rcp(b) estimate */
@@ -19270,20 +19274,42 @@ ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
UNSPEC_RCP)));
- /* e0 = x0 * b */
- emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b)));
+ unsigned vector_size = GET_MODE_SIZE (mode);
+
+ /* (a - (rcp(b) * a * b)) * rcp(b) + rcp(b) * a
+ N-R step with 2 fma implementation. */
+ if (TARGET_FMA
+ || (TARGET_AVX512F && vector_size == 64)
+ || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
+ {
+ /* e0 = x0 * a */
+ emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a)));
+ /* e1 = e0 * b - a */
+ emit_insn (gen_rtx_SET (e1, gen_rtx_FMA (mode, e0, b,
+ gen_rtx_NEG (mode, a))));
+ /* res = - e1 * x0 + e0 */
+ emit_insn (gen_rtx_SET (res, gen_rtx_FMA (mode,
+ gen_rtx_NEG (mode, e1),
+ x0, e0)));
+ }
+ else
+ /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
+ {
+ /* e0 = x0 * b */
+ emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b)));
- /* e0 = x0 * e0 */
- emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0)));
+ /* e1 = x0 + x0 */
+ emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0)));
- /* e1 = x0 + x0 */
- emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0)));
+ /* e0 = x0 * e0 */
+ emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0)));
- /* x1 = e1 - e0 */
- emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0)));
+ /* x1 = e1 - e0 */
+ emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0)));
- /* res = a * x1 */
- emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
+ /* res = a * x1 */
+ emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
+ }
}
/* Output code to perform a Newton-Rhapson approximation of a
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
index a9fac01..964449f 100644
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -2828,8 +2828,8 @@ ix86_option_override_internal (bool main_args_p,
if (flag_nop_mcount)
error ("%<-mnop-mcount%> is not compatible with this target");
#endif
- if (flag_nop_mcount && flag_pic)
- error ("%<-mnop-mcount%> is not implemented for %<-fPIC%>");
+ if (flag_nop_mcount && flag_pic && !flag_plt)
+ error ("%<-mnop-mcount%> is not implemented for %<-fno-plt%>");
/* Accept -msseregparm only if at least SSE support is enabled. */
if (TARGET_SSEREGPARM_P (opts->x_target_flags)
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 4f8380c4..78df3d9 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -100,6 +100,7 @@ along with GCC; see the file COPYING3. If not see
#include "i386-features.h"
#include "function-abi.h"
#include "rtl-error.h"
+#include "gimple-pretty-print.h"
/* This file should be included last. */
#include "target-def.h"
@@ -458,6 +459,9 @@ int ix86_arch_specified;
indirect thunk pushes the return address onto stack, destroying
red-zone.
+ NB: Don't use red-zone for functions with no_caller_saved_registers
+ and 32 GPRs since 128-byte red-zone is too small for 31 GPRs.
+
TODO: If we can reserve the first 2 WORDs, for PUSH and, another
for CALL, in red-zone, we can allow local indirect jumps with
indirect thunk. */
@@ -467,6 +471,9 @@ ix86_using_red_zone (void)
{
return (TARGET_RED_ZONE
&& !TARGET_64BIT_MS_ABI
+ && (!TARGET_APX_EGPR
+ || (cfun->machine->call_saved_registers
+ != TYPE_NO_CALLER_SAVED_REGISTERS))
&& (!cfun->machine->has_local_indirect_jump
|| cfun->machine->indirect_branch_type == indirect_branch_keep));
}
@@ -21810,6 +21817,25 @@ ix86_insn_cost (rtx_insn *insn, bool speed)
return insn_cost + pattern_cost (PATTERN (insn), speed);
}
+/* Return cost of SSE/AVX FP->FP conversion (extensions and truncates). */
+
+static int
+vec_fp_conversion_cost (const struct processor_costs *cost, int size)
+{
+ if (size < 128)
+ return cost->cvtss2sd;
+ else if (size < 256)
+ {
+ if (TARGET_SSE_SPLIT_REGS)
+ return cost->cvtss2sd * size / 64;
+ return cost->cvtss2sd;
+ }
+ if (size < 512)
+ return cost->vcvtps2pd256;
+ else
+ return cost->vcvtps2pd512;
+}
+
/* Compute a (partial) cost for rtx X. Return true if the complete
cost has been computed, and false if subexpressions should be
scanned. In either case, *TOTAL contains the cost result. */
@@ -22473,17 +22499,18 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
return false;
case FLOAT_EXTEND:
+ /* x87 represents all values extended to 80bit. */
if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
*total = 0;
else
- *total = ix86_vec_cost (mode, cost->addss);
+ *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode));
return false;
case FLOAT_TRUNCATE:
if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
*total = cost->fadd;
else
- *total = ix86_vec_cost (mode, cost->addss);
+ *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode));
return false;
case ABS:
@@ -23158,6 +23185,12 @@ x86_print_call_or_nop (FILE *file, const char *target)
if (flag_nop_mcount || !strcmp (target, "nop"))
/* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
fprintf (file, "1:" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
+ else if (!TARGET_PECOFF && flag_pic)
+ {
+ gcc_assert (flag_plt);
+
+ fprintf (file, "1:\tcall\t%s@PLT\n", target);
+ }
else
fprintf (file, "1:\tcall\t%s\n", target);
}
@@ -23321,7 +23354,7 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
break;
case CM_SMALL_PIC:
case CM_MEDIUM_PIC:
- if (!ix86_direct_extern_access)
+ if (!flag_plt)
{
if (ASSEMBLER_DIALECT == ASM_INTEL)
fprintf (file, "1:\tcall\t[QWORD PTR %s@GOTPCREL[rip]]\n",
@@ -23352,7 +23385,9 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
"\tleal\t%sP%d@GOTOFF(%%ebx), %%" PROFILE_COUNT_REGISTER "\n",
LPREFIX, labelno);
#endif
- if (ASSEMBLER_DIALECT == ASM_INTEL)
+ if (flag_plt)
+ x86_print_call_or_nop (file, mcount_name);
+ else if (ASSEMBLER_DIALECT == ASM_INTEL)
fprintf (file, "1:\tcall\t[DWORD PTR %s@GOT[ebx]]\n", mcount_name);
else
fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
@@ -24669,7 +24704,7 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
switch (type_of_cost)
{
case scalar_stmt:
- return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
+ return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
case scalar_load:
/* load/store costs are relative to register move which is 2. Recompute
@@ -24740,7 +24775,11 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
return ix86_cost->cond_not_taken_branch_cost;
case vec_perm:
+ return ix86_vec_cost (mode, ix86_cost->sse_op);
+
case vec_promote_demote:
+ if (fp)
+ return vec_fp_conversion_cost (ix86_tune_cost, mode);
return ix86_vec_cost (mode, ix86_cost->sse_op);
case vec_construct:
@@ -25261,7 +25300,7 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
else if (X87_FLOAT_MODE_P (mode))
stmt_cost = ix86_cost->fadd;
else
- stmt_cost = ix86_cost->add;
+ stmt_cost = ix86_cost->add;
}
else
stmt_cost = ix86_vec_cost (mode, fp ? ix86_cost->addss
@@ -25316,7 +25355,7 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
(subcode == RSHIFT_EXPR
&& !TYPE_UNSIGNED (TREE_TYPE (op1)))
? ASHIFTRT : LSHIFTRT, mode,
- TREE_CODE (op2) == INTEGER_CST,
+ TREE_CODE (op2) == INTEGER_CST,
cst_and_fits_in_hwi (op2)
? int_cst_value (op2) : -1,
false, false, NULL, NULL);
@@ -25325,27 +25364,152 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
case NOP_EXPR:
/* Only sign-conversions are free. */
if (tree_nop_conversion_p
- (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)),
+ (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)),
TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
stmt_cost = 0;
+ else if (fp)
+ stmt_cost = vec_fp_conversion_cost
+ (ix86_tune_cost, GET_MODE_BITSIZE (mode));
+ break;
+
+ case COND_EXPR:
+ {
+ /* SSE2 conditinal move sequence is:
+ pcmpgtd %xmm5, %xmm0 (accounted separately)
+ pand %xmm0, %xmm2
+ pandn %xmm1, %xmm0
+ por %xmm2, %xmm0
+ while SSE4 uses cmp + blend
+ and AVX512 masked moves.
+
+ The condition is accounted separately since we usually have
+ p = a < b
+ c = p ? x : y
+ and we will account first statement as setcc. Exception is when
+ p is loaded from memory as bool and then we will not acocunt
+ the compare, but there is no way to check for this. */
+
+ int ninsns = TARGET_SSE4_1 ? 1 : 3;
+
+ /* If one of parameters is 0 or -1 the sequence will be simplified:
+ (if_true & mask) | (if_false & ~mask) -> if_true & mask */
+ if (ninsns > 1
+ && (zerop (gimple_assign_rhs2 (stmt_info->stmt))
+ || zerop (gimple_assign_rhs3 (stmt_info->stmt))
+ || integer_minus_onep
+ (gimple_assign_rhs2 (stmt_info->stmt))
+ || integer_minus_onep
+ (gimple_assign_rhs3 (stmt_info->stmt))))
+ ninsns = 1;
+
+ if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+ stmt_cost = ninsns * ix86_cost->sse_op;
+ else if (X87_FLOAT_MODE_P (mode))
+ /* x87 requires conditional branch. We don't have cost for
+ that. */
+ ;
+ else if (VECTOR_MODE_P (mode))
+ stmt_cost = ix86_vec_cost (mode, ninsns * ix86_cost->sse_op);
+ else
+ /* compare (accounted separately) + cmov. */
+ stmt_cost = ix86_cost->add;
+ }
break;
- case BIT_IOR_EXPR:
- case ABS_EXPR:
- case ABSU_EXPR:
case MIN_EXPR:
case MAX_EXPR:
+ if (fp)
+ {
+ if (X87_FLOAT_MODE_P (mode))
+ /* x87 requires conditional branch. We don't have cost for
+ that. */
+ ;
+ else
+ /* minss */
+ stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
+ }
+ else
+ {
+ if (VECTOR_MODE_P (mode))
+ {
+ stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
+ /* vpmin was introduced in SSE3.
+ SSE2 needs pcmpgtd + pand + pandn + pxor.
+ If one of parameters is 0 or -1 the sequence is simplified
+ to pcmpgtd + pand. */
+ if (!TARGET_SSSE3)
+ {
+ if (zerop (gimple_assign_rhs2 (stmt_info->stmt))
+ || integer_minus_onep
+ (gimple_assign_rhs2 (stmt_info->stmt)))
+ stmt_cost *= 2;
+ else
+ stmt_cost *= 4;
+ }
+ }
+ else
+ /* cmp + cmov. */
+ stmt_cost = ix86_cost->add * 2;
+ }
+ break;
+
+ case ABS_EXPR:
+ case ABSU_EXPR:
+ if (fp)
+ {
+ if (X87_FLOAT_MODE_P (mode))
+ /* fabs. */
+ stmt_cost = ix86_cost->fabs;
+ else
+ /* andss of sign bit. */
+ stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
+ }
+ else
+ {
+ if (VECTOR_MODE_P (mode))
+ {
+ stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
+ /* vabs was introduced in SSE3.
+ SSE3 uses psrat + pxor + psub. */
+ if (!TARGET_SSSE3)
+ stmt_cost *= 3;
+ }
+ else
+ /* neg + cmov. */
+ stmt_cost = ix86_cost->add * 2;
+ }
+ break;
+
+ case BIT_IOR_EXPR:
case BIT_XOR_EXPR:
case BIT_AND_EXPR:
case BIT_NOT_EXPR:
- if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
- stmt_cost = ix86_cost->sse_op;
- else if (VECTOR_MODE_P (mode))
+ gcc_assert (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)
+ && !X87_FLOAT_MODE_P (mode));
+ if (VECTOR_MODE_P (mode))
stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
else
stmt_cost = ix86_cost->add;
break;
+
default:
+ if (truth_value_p (subcode))
+ {
+ if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+ /* CMPccS? insructions are cheap, so use sse_op. While they
+ produce a mask which may need to be turned to 0/1 by and,
+ expect that this will be optimized away in a common case. */
+ stmt_cost = ix86_cost->sse_op;
+ else if (X87_FLOAT_MODE_P (mode))
+ /* fcmp + setcc. */
+ stmt_cost = ix86_cost->fadd + ix86_cost->add;
+ else if (VECTOR_MODE_P (mode))
+ stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
+ else
+ /* setcc. */
+ stmt_cost = ix86_cost->add;
+ break;
+ }
break;
}
}
@@ -25369,6 +25533,29 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
break;
}
+ if (kind == vec_promote_demote
+ && fp && FLOAT_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
+ {
+ int outer_size
+ = tree_to_uhwi
+ (TYPE_SIZE
+ (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt))));
+ int inner_size
+ = tree_to_uhwi
+ (TYPE_SIZE
+ (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))));
+ int stmt_cost = vec_fp_conversion_cost
+ (ix86_tune_cost, GET_MODE_BITSIZE (mode));
+ /* VEC_PACK_TRUNC_EXPR: If inner size is greater than outer size we will end
+ up doing two conversions and packing them. */
+ if (inner_size > outer_size)
+ {
+ int n = inner_size / outer_size;
+ stmt_cost = stmt_cost * n
+ + (n - 1) * ix86_vec_cost (mode, ix86_cost->sse_op);
+ }
+ }
+
/* If we do elementwise loads into a vector then we are bound by
latency and execution resources for the many scalar loads
(AGU and load ports). Try to account for this by scaling the
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 13da3d8..18aa42d 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -207,6 +207,12 @@ struct processor_costs {
const int divsd; /* cost of DIVSD instructions. */
const int sqrtss; /* cost of SQRTSS instructions. */
const int sqrtsd; /* cost of SQRTSD instructions. */
+ const int cvtss2sd; /* cost SSE FP conversions,
+ such as CVTSS2SD. */
+ const int vcvtps2pd256; /* cost 256bit packed FP conversions,
+ such as VCVTPD2PS with larger reg in ymm. */
+ const int vcvtps2pd512; /* cost 512bit packed FP conversions,
+ such as VCVTPD2PS with larger reg in zmm. */
const int reassoc_int, reassoc_fp, reassoc_vec_int, reassoc_vec_fp;
/* Specify reassociation width for integer,
fp, vector integer and vector fp
@@ -2449,11 +2455,11 @@ constexpr wide_int_bitmask PTA_DIAMONDRAPIDS = PTA_SKYLAKE | PTA_PKU | PTA_SHA
| PTA_WBNOINVD | PTA_CLWB | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_ENQCMD
| PTA_CLDEMOTE | PTA_PTWRITE | PTA_WAITPKG | PTA_SERIALIZE | PTA_TSXLDTRK
| PTA_AMX_TILE | PTA_AMX_INT8 | PTA_AMX_BF16 | PTA_UINTR | PTA_AVXVNNI
- | PTA_AMX_FP16 | PTA_PREFETCHI | PTA_AMX_COMPLEX | PTA_AVX10_1
- | PTA_AVXIFMA | PTA_AVXNECONVERT | PTA_AVXVNNIINT16 | PTA_AVXVNNIINT8
- | PTA_CMPCCXADD | PTA_SHA512 | PTA_SM3 | PTA_SM4 | PTA_AVX10_2
- | PTA_APX_F | PTA_AMX_AVX512 | PTA_AMX_FP8 | PTA_AMX_TF32 | PTA_AMX_TRANSPOSE
- | PTA_MOVRS | PTA_AMX_MOVRS | PTA_USER_MSR;
+ | PTA_AMX_FP16 | PTA_PREFETCHI | PTA_AMX_COMPLEX | PTA_AVX10_1_256
+ | PTA_AVX10_1 | PTA_AVXIFMA | PTA_AVXNECONVERT | PTA_AVXVNNIINT16
+ | PTA_AVXVNNIINT8 | PTA_CMPCCXADD | PTA_SHA512 | PTA_SM3 | PTA_SM4
+ | PTA_AVX10_2 | PTA_APX_F | PTA_AMX_AVX512 | PTA_AMX_FP8 | PTA_AMX_TF32
+ | PTA_AMX_TRANSPOSE | PTA_MOVRS | PTA_AMX_MOVRS | PTA_USER_MSR;
constexpr wide_int_bitmask PTA_BDVER1 = PTA_64BIT | PTA_MMX | PTA_SSE
| PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index d6b2f29..e170da3 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -26592,8 +26592,8 @@
[(set (match_operand:X87MODEF 0 "register_operand")
(if_then_else:X87MODEF
(match_operand 1 "comparison_operator")
- (match_operand:X87MODEF 2 "register_operand")
- (match_operand:X87MODEF 3 "register_operand")))]
+ (match_operand:X87MODEF 2 "nonimm_or_0_or_1s_operand")
+ (match_operand:X87MODEF 3 "nonimm_or_0_operand")))]
"(TARGET_80387 && TARGET_CMOVE)
|| (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
"if (ix86_expand_fp_movcc (operands)) DONE; else FAIL;")
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 3d3848c..4b23e18 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -1267,6 +1267,14 @@
(match_operand 0 "vector_memory_operand")
(match_code "const_vector")))
+; Return true when OP is register_operand, vector_memory_operand,
+; const_vector zero or const_vector all ones.
+(define_predicate "vector_or_0_or_1s_operand"
+ (ior (match_operand 0 "register_operand")
+ (match_operand 0 "vector_memory_operand")
+ (match_operand 0 "const0_operand")
+ (match_operand 0 "int_float_vector_all_ones_operand")))
+
(define_predicate "bcst_mem_operand"
(and (match_code "vec_duplicate")
(and (match_test "TARGET_AVX512F")
@@ -1333,6 +1341,12 @@
(ior (match_operand 0 "nonimmediate_operand")
(match_operand 0 "const0_operand")))
+; Return true when OP is a nonimmediate or zero or all ones.
+(define_predicate "nonimm_or_0_or_1s_operand"
+ (ior (match_operand 0 "nonimmediate_operand")
+ (match_operand 0 "const0_operand")
+ (match_operand 0 "int_float_vector_all_ones_operand")))
+
;; Return true for RTX codes that force SImode address.
(define_predicate "SImode_address_operand"
(match_code "subreg,zero_extend,and"))
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index b280676..2ed348c 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -527,6 +527,16 @@
(V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
(V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+(define_mode_iterator V48_AVX512VL_4
+ [(V4SF "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
+ (V4SI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL")])
+
+(define_mode_iterator VI48_AVX512VL_4
+ [(V4SI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL")])
+
+(define_mode_iterator V8_AVX512VL_2
+ [(V2DF "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
+
(define_mode_iterator VFH_AVX10_2
[(V32HF "TARGET_AVX10_2") V16HF V8HF
(V16SF "TARGET_AVX10_2") V8SF V4SF
@@ -4410,7 +4420,7 @@
(unspec:<V48H_AVX512VL:avx512fmaskmode>
[(match_operand:V48H_AVX512VL 1 "nonimmediate_operand" "v")
(match_operand:V48H_AVX512VL 2 "nonimmediate_operand" "vm")
- (match_operand:SI 3 "const_0_to_7_operand" "n")]
+ (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
UNSPEC_PCMP)))]
"TARGET_AVX512F
&& (!VALID_MASK_AVX512BW_MODE (<SWI248x:MODE>mode) || TARGET_AVX512BW)
@@ -4428,7 +4438,7 @@
(unspec:<V48H_AVX512VL:avx512fmaskmode>
[(match_operand:V48H_AVX512VL 1 "nonimmediate_operand")
(match_operand:V48H_AVX512VL 2 "nonimmediate_operand")
- (match_operand:SI 3 "const_0_to_7_operand")]
+ (match_operand:SI 3 "<cmp_imm_predicate>")]
UNSPEC_PCMP)))
(set (match_operand:<V48H_AVX512VL:avx512fmaskmode> 4 "register_operand")
(unspec:<V48H_AVX512VL:avx512fmaskmode>
@@ -4469,7 +4479,8 @@
(match_operand:V48H_AVX512VL 2 "nonimmediate_operand")
(match_operand:SI 3 "<cmp_imm_predicate>" "n")]
UNSPEC_PCMP)))]
- "TARGET_AVX512F && ix86_pre_reload_split ()"
+ "TARGET_AVX512F && GET_MODE_NUNITS (<MODE>mode) >= 8
+ && ix86_pre_reload_split ()"
"#"
"&& 1"
[(set (match_dup 0)
@@ -4480,6 +4491,70 @@
UNSPEC_PCMP))]
"operands[4] = GEN_INT (INTVAL (operands[3]) ^ 4);")
+(define_insn "*<avx512>_cmp<mode>3_and15"
+ [(set (match_operand:QI 0 "register_operand" "=k")
+ (and:QI
+ (unspec:QI
+ [(match_operand:V48_AVX512VL_4 1 "nonimmediate_operand" "v")
+ (match_operand:V48_AVX512VL_4 2 "nonimmediate_operand" "vm")
+ (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
+ UNSPEC_PCMP)
+ (const_int 15)))]
+ "TARGET_AVX512F"
+ "v<ssecmpintprefix>cmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*<avx512>_ucmp<mode>3_and15"
+ [(set (match_operand:QI 0 "register_operand" "=k")
+ (and:QI
+ (unspec:QI
+ [(match_operand:VI48_AVX512VL_4 1 "nonimmediate_operand" "v")
+ (match_operand:VI48_AVX512VL_4 2 "nonimmediate_operand" "vm")
+ (match_operand:SI 3 "const_0_to_7_operand" "n")]
+ UNSPEC_UNSIGNED_PCMP)
+ (const_int 15)))]
+ "TARGET_AVX512F"
+ "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*<avx512>_cmp<mode>3_and3"
+ [(set (match_operand:QI 0 "register_operand" "=k")
+ (and:QI
+ (unspec:QI
+ [(match_operand:V8_AVX512VL_2 1 "nonimmediate_operand" "v")
+ (match_operand:V8_AVX512VL_2 2 "nonimmediate_operand" "vm")
+ (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
+ UNSPEC_PCMP)
+ (const_int 3)))]
+ "TARGET_AVX512F"
+ "v<ssecmpintprefix>cmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*avx512vl_ucmpv2di3_and3"
+ [(set (match_operand:QI 0 "register_operand" "=k")
+ (and:QI
+ (unspec:QI
+ [(match_operand:V2DI 1 "nonimmediate_operand" "v")
+ (match_operand:V2DI 2 "nonimmediate_operand" "vm")
+ (match_operand:SI 3 "const_0_to_7_operand" "n")]
+ UNSPEC_UNSIGNED_PCMP)
+ (const_int 3)))]
+ "TARGET_AVX512F"
+ "vpcmpuq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
(define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
(unspec:<avx512fmaskmode>
@@ -4762,7 +4837,8 @@
(match_operand:VI48_AVX512VL 2 "nonimmediate_operand")
(match_operand:SI 3 "const_0_to_7_operand")]
UNSPEC_UNSIGNED_PCMP)))]
- "TARGET_AVX512F && ix86_pre_reload_split ()"
+ "TARGET_AVX512F && ix86_pre_reload_split ()
+ && GET_MODE_NUNITS (<MODE>mode) >= 8"
"#"
"&& 1"
[(set (match_dup 0)
@@ -5142,7 +5218,7 @@
(define_expand "vcond_mask_<mode><sseintvecmodelower>"
[(set (match_operand:VI_256_AVX2 0 "register_operand")
(vec_merge:VI_256_AVX2
- (match_operand:VI_256_AVX2 1 "nonimmediate_operand")
+ (match_operand:VI_256_AVX2 1 "nonimm_or_0_or_1s_operand")
(match_operand:VI_256_AVX2 2 "nonimm_or_0_operand")
(match_operand:<sseintvecmode> 3 "register_operand")))]
"TARGET_AVX"
@@ -5155,7 +5231,7 @@
(define_expand "vcond_mask_<mode><sseintvecmodelower>"
[(set (match_operand:VI_128 0 "register_operand")
(vec_merge:VI_128
- (match_operand:VI_128 1 "vector_operand")
+ (match_operand:VI_128 1 "vector_or_0_or_1s_operand")
(match_operand:VI_128 2 "nonimm_or_0_operand")
(match_operand:<sseintvecmode> 3 "register_operand")))]
"TARGET_SSE2"
@@ -5168,7 +5244,7 @@
(define_expand "vcond_mask_v1tiv1ti"
[(set (match_operand:V1TI 0 "register_operand")
(vec_merge:V1TI
- (match_operand:V1TI 1 "vector_operand")
+ (match_operand:V1TI 1 "vector_or_0_or_1s_operand")
(match_operand:V1TI 2 "nonimm_or_0_operand")
(match_operand:V1TI 3 "register_operand")))]
"TARGET_SSE2"
@@ -5181,7 +5257,7 @@
(define_expand "vcond_mask_<mode><sseintvecmodelower>"
[(set (match_operand:VF_256 0 "register_operand")
(vec_merge:VF_256
- (match_operand:VF_256 1 "nonimmediate_operand")
+ (match_operand:VF_256 1 "nonimm_or_0_or_1s_operand")
(match_operand:VF_256 2 "nonimm_or_0_operand")
(match_operand:<sseintvecmode> 3 "register_operand")))]
"TARGET_AVX"
@@ -5194,7 +5270,7 @@
(define_expand "vcond_mask_<mode><sseintvecmodelower>"
[(set (match_operand:VF_128 0 "register_operand")
(vec_merge:VF_128
- (match_operand:VF_128 1 "vector_operand")
+ (match_operand:VF_128 1 "vector_or_0_or_1s_operand")
(match_operand:VF_128 2 "nonimm_or_0_operand")
(match_operand:<sseintvecmode> 3 "register_operand")))]
"TARGET_SSE"
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index 7c8cb73..cddcf61 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -121,16 +121,19 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */
COSTS_N_BYTES (2), /* cost of FCHS instruction. */
COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
- COSTS_N_BYTES (2), /* cost of cheap SSE instruction. */
- COSTS_N_BYTES (2), /* cost of ADDSS/SD SUBSS/SD insns. */
- COSTS_N_BYTES (2), /* cost of MULSS instruction. */
- COSTS_N_BYTES (2), /* cost of MULSD instruction. */
- COSTS_N_BYTES (2), /* cost of FMA SS instruction. */
- COSTS_N_BYTES (2), /* cost of FMA SD instruction. */
- COSTS_N_BYTES (2), /* cost of DIVSS instruction. */
- COSTS_N_BYTES (2), /* cost of DIVSD instruction. */
- COSTS_N_BYTES (2), /* cost of SQRTSS instruction. */
- COSTS_N_BYTES (2), /* cost of SQRTSD instruction. */
+ COSTS_N_BYTES (4), /* cost of cheap SSE instruction. */
+ COSTS_N_BYTES (4), /* cost of ADDSS/SD SUBSS/SD insns. */
+ COSTS_N_BYTES (4), /* cost of MULSS instruction. */
+ COSTS_N_BYTES (4), /* cost of MULSD instruction. */
+ COSTS_N_BYTES (4), /* cost of FMA SS instruction. */
+ COSTS_N_BYTES (4), /* cost of FMA SD instruction. */
+ COSTS_N_BYTES (4), /* cost of DIVSS instruction. */
+ COSTS_N_BYTES (4), /* cost of DIVSD instruction. */
+ COSTS_N_BYTES (4), /* cost of SQRTSS instruction. */
+ COSTS_N_BYTES (4), /* cost of SQRTSD instruction. */
+ COSTS_N_BYTES (4), /* cost of CVTSS2SD etc. */
+ COSTS_N_BYTES (4), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_BYTES (6), /* cost of 512bit VCVTPS2PD etc. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
ix86_size_memcpy,
ix86_size_memset,
@@ -243,6 +246,9 @@ struct processor_costs i386_cost = { /* 386 specific costs */
COSTS_N_INSNS (88), /* cost of DIVSD instruction. */
COSTS_N_INSNS (122), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (122), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (27), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (54), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (108), /* cost of 512bit VCVTPS2PD etc. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
i386_memcpy,
i386_memset,
@@ -356,6 +362,9 @@ struct processor_costs i486_cost = { /* 486 specific costs */
COSTS_N_INSNS (74), /* cost of DIVSD instruction. */
COSTS_N_INSNS (83), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (83), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (8), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (16), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (32), /* cost of 512bit VCVTPS2PD etc. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
i486_memcpy,
i486_memset,
@@ -467,6 +476,9 @@ struct processor_costs pentium_cost = {
COSTS_N_INSNS (39), /* cost of DIVSD instruction. */
COSTS_N_INSNS (70), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (70), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
pentium_memcpy,
pentium_memset,
@@ -571,6 +583,9 @@ struct processor_costs lakemont_cost = {
COSTS_N_INSNS (60), /* cost of DIVSD instruction. */
COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (63), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (5), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (10), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (20), /* cost of 512bit VCVTPS2PD etc. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
pentium_memcpy,
pentium_memset,
@@ -690,6 +705,9 @@ struct processor_costs pentiumpro_cost = {
COSTS_N_INSNS (18), /* cost of DIVSD instruction. */
COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (31), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
pentiumpro_memcpy,
pentiumpro_memset,
@@ -800,6 +818,9 @@ struct processor_costs geode_cost = {
COSTS_N_INSNS (47), /* cost of DIVSD instruction. */
COSTS_N_INSNS (54), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (54), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (6), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (12), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (24), /* cost of 512bit VCVTPS2PD etc. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
geode_memcpy,
geode_memset,
@@ -913,6 +934,9 @@ struct processor_costs k6_cost = {
COSTS_N_INSNS (56), /* cost of DIVSD instruction. */
COSTS_N_INSNS (56), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (56), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (4), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (8), /* cost of 512bit VCVTPS2PD etc. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
k6_memcpy,
k6_memset,
@@ -1027,6 +1051,9 @@ struct processor_costs athlon_cost = {
COSTS_N_INSNS (24), /* cost of DIVSD instruction. */
COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (19), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
athlon_memcpy,
athlon_memset,
@@ -1150,6 +1177,9 @@ struct processor_costs k8_cost = {
COSTS_N_INSNS (20), /* cost of DIVSD instruction. */
COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (27), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
k8_memcpy,
k8_memset,
@@ -1281,6 +1311,9 @@ struct processor_costs amdfam10_cost = {
COSTS_N_INSNS (20), /* cost of DIVSD instruction. */
COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (27), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
amdfam10_memcpy,
amdfam10_memset,
@@ -1405,6 +1438,9 @@ const struct processor_costs bdver_cost = {
COSTS_N_INSNS (27), /* cost of DIVSD instruction. */
COSTS_N_INSNS (15), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (26), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */
1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
bdver_memcpy,
bdver_memset,
@@ -1553,6 +1589,10 @@ struct processor_costs znver1_cost = {
COSTS_N_INSNS (13), /* cost of DIVSD instruction. */
COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ /* Real latency is 4, but for split regs multiply cost of half op by 2. */
+ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
/* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles
and it can execute 2 integer additions and 2 multiplications thus
reassociation may make sense up to with of 6. SPEC2k6 bencharks suggests
@@ -1712,6 +1752,9 @@ struct processor_costs znver2_cost = {
COSTS_N_INSNS (13), /* cost of DIVSD instruction. */
COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */
/* Zen can execute 4 integer operations per cycle. FP operations
take 3 cycles and it can execute 2 integer additions and 2
multiplications thus reassociation may make sense up to with of 6.
@@ -1847,6 +1890,9 @@ struct processor_costs znver3_cost = {
COSTS_N_INSNS (13), /* cost of DIVSD instruction. */
COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */
/* Zen can execute 4 integer operations per cycle. FP operations
take 3 cycles and it can execute 2 integer additions and 2
multiplications thus reassociation may make sense up to with of 6.
@@ -1984,6 +2030,10 @@ struct processor_costs znver4_cost = {
COSTS_N_INSNS (13), /* cost of DIVSD instruction. */
COSTS_N_INSNS (15), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (21), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */
+ /* Real latency is 6, but for split regs multiply cost of half op by 2. */
+ COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */
/* Zen can execute 4 integer operations per cycle. FP operations
take 3 cycles and it can execute 2 integer additions and 2
multiplications thus reassociation may make sense up to with of 6.
@@ -2120,7 +2170,7 @@ struct processor_costs znver5_cost = {
COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
/* ADDSS has throughput 2 and latency 2
(in some cases when source is another addition). */
- COSTS_N_INSNS (2), /* cost of ADDSS/SD SUBSS/SD insns. */
+ COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
/* MULSS has throughput 2 and latency 3. */
COSTS_N_INSNS (3), /* cost of MULSS instruction. */
COSTS_N_INSNS (3), /* cost of MULSD instruction. */
@@ -2135,6 +2185,9 @@ struct processor_costs znver5_cost = {
COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */
/* DIVSD has throughtput 0.13 and latency 20. */
COSTS_N_INSNS (20), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (5), /* cost of 512bit VCVTPS2PD etc. */
/* Zen5 can execute:
- integer ops: 6 per cycle, at most 3 multiplications.
latency 1 for additions, 3 for multiplications (pipelined)
@@ -2274,6 +2327,9 @@ struct processor_costs skylake_cost = {
COSTS_N_INSNS (14), /* cost of DIVSD instruction. */
COSTS_N_INSNS (12), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (4), /* cost of 512bit VCVTPS2PD etc. */
1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
skylake_memcpy,
skylake_memset,
@@ -2403,6 +2459,9 @@ struct processor_costs icelake_cost = {
COSTS_N_INSNS (14), /* cost of DIVSD instruction. */
COSTS_N_INSNS (12), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */
1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
icelake_memcpy,
icelake_memset,
@@ -2526,6 +2585,9 @@ struct processor_costs alderlake_cost = {
COSTS_N_INSNS (17), /* cost of DIVSD instruction. */
COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */
1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */
alderlake_memcpy,
alderlake_memset,
@@ -2642,6 +2704,9 @@ const struct processor_costs btver1_cost = {
COSTS_N_INSNS (17), /* cost of DIVSD instruction. */
COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (48), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
btver1_memcpy,
btver1_memset,
@@ -2755,6 +2820,9 @@ const struct processor_costs btver2_cost = {
COSTS_N_INSNS (19), /* cost of DIVSD instruction. */
COSTS_N_INSNS (16), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (21), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
btver2_memcpy,
btver2_memset,
@@ -2867,6 +2935,9 @@ struct processor_costs pentium4_cost = {
COSTS_N_INSNS (38), /* cost of DIVSD instruction. */
COSTS_N_INSNS (23), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (38), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (10), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (20), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (40), /* cost of 512bit VCVTPS2PD etc. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
pentium4_memcpy,
pentium4_memset,
@@ -2982,6 +3053,9 @@ struct processor_costs nocona_cost = {
COSTS_N_INSNS (40), /* cost of DIVSD instruction. */
COSTS_N_INSNS (32), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (41), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (10), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (20), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (40), /* cost of 512bit VCVTPS2PD etc. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
nocona_memcpy,
nocona_memset,
@@ -3095,6 +3169,9 @@ struct processor_costs atom_cost = {
COSTS_N_INSNS (60), /* cost of DIVSD instruction. */
COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (63), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (6), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (12), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (24), /* cost of 512bit VCVTPS2PD etc. */
2, 2, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
atom_memcpy,
atom_memset,
@@ -3208,6 +3285,9 @@ struct processor_costs slm_cost = {
COSTS_N_INSNS (69), /* cost of DIVSD instruction. */
COSTS_N_INSNS (20), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (35), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
slm_memcpy,
slm_memset,
@@ -3335,6 +3415,9 @@ struct processor_costs tremont_cost = {
COSTS_N_INSNS (17), /* cost of DIVSD instruction. */
COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */
tremont_memcpy,
tremont_memset,
@@ -3448,6 +3531,9 @@ struct processor_costs intel_cost = {
COSTS_N_INSNS (20), /* cost of DIVSD instruction. */
COSTS_N_INSNS (40), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (40), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (8), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (16), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (32), /* cost of 512bit VCVTPS2PD etc. */
1, 4, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
intel_memcpy,
intel_memset,
@@ -3566,6 +3652,9 @@ struct processor_costs lujiazui_cost = {
COSTS_N_INSNS (17), /* cost of DIVSD instruction. */
COSTS_N_INSNS (32), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (60), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */
lujiazui_memcpy,
lujiazui_memset,
@@ -3682,6 +3771,9 @@ struct processor_costs yongfeng_cost = {
COSTS_N_INSNS (14), /* cost of DIVSD instruction. */
COSTS_N_INSNS (20), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (35), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
4, 4, 4, 4, /* reassoc int, fp, vec_int, vec_fp. */
yongfeng_memcpy,
yongfeng_memset,
@@ -3798,6 +3890,9 @@ struct processor_costs shijidadao_cost = {
COSTS_N_INSNS (14), /* cost of DIVSD instruction. */
COSTS_N_INSNS (11), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
4, 4, 4, 4, /* reassoc int, fp, vec_int, vec_fp. */
shijidadao_memcpy,
shijidadao_memset,
@@ -3922,6 +4017,9 @@ struct processor_costs generic_cost = {
COSTS_N_INSNS (17), /* cost of DIVSD instruction. */
COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (4), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (5), /* cost of 512bit VCVTPS2PD etc. */
1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */
generic_memcpy,
generic_memset,
@@ -4051,6 +4149,9 @@ struct processor_costs core_cost = {
COSTS_N_INSNS (32), /* cost of DIVSD instruction. */
COSTS_N_INSNS (30), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (58), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */
1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
core_memcpy,
core_memset,
diff --git a/gcc/config/i386/x86-tune-sched.cc b/gcc/config/i386/x86-tune-sched.cc
index 685a83c..15d3d91 100644
--- a/gcc/config/i386/x86-tune-sched.cc
+++ b/gcc/config/i386/x86-tune-sched.cc
@@ -81,6 +81,14 @@ ix86_issue_rate (void)
case PROCESSOR_YONGFENG:
case PROCESSOR_SHIJIDADAO:
case PROCESSOR_GENERIC:
+ /* For znver5 decoder can handle 4 or 8 instructions per cycle,
+ op cache 12 instruction/cycle, dispatch 8 instructions
+ integer rename 8 instructions and Fp 6 instructions.
+
+ The scheduler, without understanding out of order nature of the CPU
+ is not going to be able to use more than 4 instructions since that
+ is limits of the decoders. */
+ case PROCESSOR_ZNVER5:
return 4;
case PROCESSOR_ICELAKE_CLIENT:
@@ -91,13 +99,6 @@ ix86_issue_rate (void)
return 5;
case PROCESSOR_SAPPHIRERAPIDS:
- /* For znver5 decoder can handle 4 or 8 instructions per cycle,
- op cache 12 instruction/cycle, dispatch 8 instructions
- integer rename 8 instructions and Fp 6 instructions.
-
- The scheduler, without understanding out of order nature of the CPU
- is unlikely going to be able to fill all of these. */
- case PROCESSOR_ZNVER5:
return 6;
default:
diff --git a/gcc/config/loongarch/genopts/gen-evolution.awk b/gcc/config/loongarch/genopts/gen-evolution.awk
index 142b658..507063b 100644
--- a/gcc/config/loongarch/genopts/gen-evolution.awk
+++ b/gcc/config/loongarch/genopts/gen-evolution.awk
@@ -101,10 +101,18 @@ function gen_cpucfg_useful_idx()
idx_list[j++] = i+0
delete idx_bucket
- asort (idx_list)
+ for (i = 1; i < j; i++) {
+ t = i
+ for (k = i + 1; k < j; k++)
+ t = idx_list[k] < idx_list[t] ? k : t
+
+ k = idx_list[t]
+ idx_list[t] = idx_list[i]
+ idx_list[i] = k
+ }
print "static constexpr int cpucfg_useful_idx[] = {"
- for (i in idx_list)
+ for (i = 1; i < j; i++)
printf(" %d,\n", idx_list[i])
print "};"
diff --git a/gcc/config/loongarch/genopts/genstr.sh b/gcc/config/loongarch/genopts/genstr.sh
index 16c2edd..97517da 100755
--- a/gcc/config/loongarch/genopts/genstr.sh
+++ b/gcc/config/loongarch/genopts/genstr.sh
@@ -51,18 +51,18 @@ along with GCC; see the file COPYING3. If not see
#define LOONGARCH_STR_H
EOF
- sed -e '/^$/n' -e 's@#.*$@@' -e '/^$/d' \
- -e 's@^\([^ \t]\+\)[ \t]*\([^ \t]*\)@#define \1 "\2"@' \
- loongarch-strings
+ awk '/^#.*$/ { next } /^$/ { print; next }
+ { printf ("#define %s \"%s\"\n", $1, $2) }' \
+ loongarch-strings
echo
- # Generate the strings from isa-evolution.in.
- awk '{
- a=$3
- gsub(/-/, "_", a)
- print("#define OPTSTR_"toupper(a)"\t\""$3"\"")
- }' isa-evolution.in
+ # Generate the strings from isa-evolution.in.
+ awk '{
+ a=$3
+ gsub(/-/, "_", a)
+ print("#define OPTSTR_"toupper(a)"\t\""$3"\"")
+ }' isa-evolution.in
echo
echo "#endif /* LOONGARCH_STR_H */"
@@ -73,18 +73,8 @@ EOF
# according to the key-value pairs defined in loongarch-strings.
gen_options() {
-
- sed -e '/^$/n' -e 's@#.*$@@' -e '/^$/d' \
- -e 's@^\([^ \t]\+\)[ \t]*\([^ \t]*\)@\1="\2"@' \
- loongarch-strings | { \
-
- # read the definitions
- while read -r line; do
- eval "$line"
- done
-
- # print a header
- cat << EOF
+ # print a header
+ cat << EOF
; Generated by "genstr" from the template "loongarch.opt.in"
; and definitions from "loongarch-strings" and "isa-evolution.in".
;
@@ -95,12 +85,25 @@ gen_options() {
;
EOF
- # make the substitutions
- sed -e 's@"@\\"@g' -e 's/@@\([^@]\+\)@@/${\1}/g' loongarch.opt.in | \
- while read -r line; do
- eval "echo \"$line\""
- done
- }
+ # Generate loongarch.opt.
+ awk 'BEGIN {
+ delete strtab
+ while (getline < "loongarch-strings" > 0) {
+ if ($0 ~ /^#.*$/ || $0 ~ /^$/) continue
+ strtab[$1] = $2
+ }
+ }
+ {
+ n = split($0, tmp, "@@")
+ for (i = 2; i <= n; i += 2)
+ tmp[i] = strtab[tmp[i]]
+
+ for (i = 1; i <= n; i++)
+ printf("%s", tmp[i])
+ printf ("\n")
+
+ }' loongarch.opt.in
+
# Generate the strings from isa-evolution.in.
awk '{
diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index 24a28dc..0d3d026 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -20678,6 +20678,9 @@ mips_option_override (void)
"-mcompact-branches=never");
}
+ if (is_micromips && TARGET_MSA)
+ error ("unsupported combination: %s", "-mmicromips -mmsa");
+
/* Require explicit relocs for MIPS R6 onwards. This enables simplification
of the compact branch and jump support through the backend. */
if (!TARGET_EXPLICIT_RELOCS && mips_isa_rev >= 6)
diff --git a/gcc/config/nvptx/mkoffload.cc b/gcc/config/nvptx/mkoffload.cc
index bdfe7f5..e7ec0ef 100644
--- a/gcc/config/nvptx/mkoffload.cc
+++ b/gcc/config/nvptx/mkoffload.cc
@@ -778,6 +778,9 @@ main (int argc, char **argv)
}
if (fopenmp)
obstack_ptr_grow (&argv_obstack, "-mgomp");
+ /* The host code may contain exception handling constructs.
+ Handle these as good as we can. */
+ obstack_ptr_grow (&argv_obstack, "-mfake-exceptions");
for (int ix = 1; ix != argc; ix++)
{
diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc
index de0ce5d..f893971 100644
--- a/gcc/config/nvptx/nvptx.cc
+++ b/gcc/config/nvptx/nvptx.cc
@@ -2359,7 +2359,25 @@ nvptx_assemble_integer (rtx x, unsigned int size, int ARG_UNUSED (aligned_p))
{
gcc_checking_assert (!init_frag.active);
/* Just use the default machinery; it's not getting used, anyway. */
- return default_assemble_integer (x, size, aligned_p);
+ bool ok = default_assemble_integer (x, size, aligned_p);
+ /* ..., but a few cases need special handling. */
+ switch (GET_CODE (x))
+ {
+ case SYMBOL_REF:
+ /* The default machinery won't work: we don't define the necessary
+ operations; don't use them outside of this. */
+ gcc_checking_assert (!ok);
+ {
+ /* Just emit something; it's not getting used, anyway. */
+ const char *op = "\t.symbol_ref\t";
+ ok = (assemble_integer_with_op (op, x), true);
+ }
+ break;
+
+ default:
+ break;
+ }
+ return ok;
}
gcc_checking_assert (init_frag.active);
@@ -7766,9 +7784,23 @@ nvptx_asm_output_def_from_decls (FILE *stream, tree name,
return;
}
+#ifdef ACCEL_COMPILER
emit_ptx_alias:
+#endif
cgraph_node *cnode = cgraph_node::get (name);
+#ifdef ACCEL_COMPILER
+ /* For nvptx offloading, make sure to emit C++ constructor, destructor aliases [PR97106]
+
+ For some reason (yet to be analyzed), they're not 'cnode->referred_to_p ()'.
+ (..., or that's not the right approach at all;
+ <https://inbox.sourceware.org/87v7rx8lbx.fsf@euler.schwinge.ddns.net>
+ "Re: [committed][nvptx] Use .alias directive for mptx >= 6.3"). */
+ if (DECL_CXX_CONSTRUCTOR_P (name)
+ || DECL_CXX_DESTRUCTOR_P (name))
+ ;
+ else
+#endif
if (!cnode->referred_to_p ())
/* Prevent "Internal error: reference to deleted section". */
return;
@@ -7873,8 +7905,6 @@ nvptx_asm_output_def_from_decls (FILE *stream, tree name,
#define TARGET_ASM_DECLARE_CONSTANT_NAME nvptx_asm_declare_constant_name
#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
-#undef TARGET_ASM_NEED_VAR_DECL_BEFORE_USE
-#define TARGET_ASM_NEED_VAR_DECL_BEFORE_USE true
#undef TARGET_MACHINE_DEPENDENT_REORG
#define TARGET_MACHINE_DEPENDENT_REORG nvptx_reorg
diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
index 3201247..7c3bd69 100644
--- a/gcc/config/nvptx/nvptx.md
+++ b/gcc/config/nvptx/nvptx.md
@@ -1644,7 +1644,9 @@
[(const_int 0)]
""
{
- sorry ("exception handling not supported");
+ if (!fake_exceptions)
+ sorry ("exception handling not supported");
+ DONE;
})
(define_expand "nonlocal_goto"
diff --git a/gcc/config/nvptx/nvptx.opt b/gcc/config/nvptx/nvptx.opt
index 02d36b3..ce9fbc7 100644
--- a/gcc/config/nvptx/nvptx.opt
+++ b/gcc/config/nvptx/nvptx.opt
@@ -168,17 +168,25 @@ Target Var(nvptx_alias) Init(0) Undocumented
mexperimental
Target Var(nvptx_experimental) Init(0) Undocumented
+mfake-exceptions
+Target Var(fake_exceptions) Init(0) Undocumented
+; With '-mfake-exceptions' enabled, the user-visible behavior in presence of
+; exception handling constructs changes such that the compile-time
+; 'sorry, unimplemented: exception handling not supported' is skipped, code
+; generation proceeds, and instead, exception handling constructs 'abort' at
+; run time. (..., or don't, if they're in dead code.)
+
mfake-ptx-alloca
Target Var(nvptx_fake_ptx_alloca) Init(0) Undocumented
; With '-mfake-ptx-alloca' enabled, the user-visible behavior changes only
; for configurations where PTX 'alloca' is not available. Rather than a
; compile-time 'sorry, unimplemented: dynamic stack allocation not supported'
-; in presence of dynamic stack allocation, compilation and assembly then
-; succeeds. However, attempting to link in such '*.o' files then fails due
-; to unresolved symbol '__GCC_nvptx__PTX_alloca_not_supported'.
+; in presence of dynamic stack allocation, with '-mfake-ptx-alloca' enabled,
+; compilation, assembly, and linking succeeds, as does execution, in case that
+; 'alloca' is not attempted (if only used in error code paths, for example),
+; and a run-time failure only in case that 'alloca' is actually attempted.
;
; This is meant to be used in scenarios where large volumes of code are
; compiled, a small fraction of which runs into dynamic stack allocation, but
; these parts are not important for specific use cases, and we'd thus like the
-; build to succeed, and error out just upon actual, very rare use of the
-; offending '*.o' files.
+; build to succeed, and error out just upon actual, very rare use of 'alloca'.
diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index 5ed5e18..d0919ec 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -908,6 +908,24 @@
"bext\t%0,%1,%2"
[(set_attr "type" "bitmanip")])
+;; We do not define SHIFT_COUNT_TRUNCATED, so we have to have variants
+;; that mask/extend the count if we want to eliminate those ops
+;;
+;; We could (in theory) use GPR for the various modes, but I haven't
+;; seen those cases appear in practice. Without a testcase I've
+;; elected to keep the modes X which is easy to reason about.
+(define_insn "*bext<mode>_mask_pos"
+ [(set (match_operand:X 0 "register_operand" "=r")
+ (zero_extract:X (match_operand:X 1 "register_operand" "r")
+ (const_int 1)
+ (and:X
+ (match_operand:X 2 "register_operand" "r")
+ (match_operand 3 "const_int_operand"))))]
+ "(TARGET_ZBS
+ && INTVAL (operands[3]) + 1 == GET_MODE_BITSIZE (<MODE>mode))"
+ "bext\t%0,%1,%2"
+ [(set_attr "type" "bitmanip")])
+
;; This is a bext followed by a seqz. Normally this would be a 3->2 split
;; But the and-not pattern with a constant operand is a define_insn_and_split,
;; so this looks like a 2->2 split, which combine rejects. So implement it
@@ -1245,3 +1263,41 @@
expand_crc_using_clmul (<SUBX:MODE>mode, <SUBX1:MODE>mode, operands);
DONE;
})
+
+;; If we have an XOR/IOR with a constant operand (C) and the we can
+;; synthesize ~C more efficiently than C, then synthesize ~C and use
+;; xnor/orn instead.
+;;
+;; The same can be done for AND, but mvconst_internal's issues get in
+;; the way. That's future work.
+(define_split
+ [(set (match_operand:X 0 "register_operand")
+ (any_or:X (match_operand:X 1 "register_operand")
+ (match_operand:X 2 "const_int_operand")))
+ (clobber (match_operand:X 3 "register_operand"))]
+ "TARGET_ZBB
+ && (riscv_const_insns (operands[2], true)
+ > riscv_const_insns (GEN_INT (~INTVAL (operands[2])), true))"
+ [(const_int 0)]
+{
+ /* Get the inverted constant into the temporary register. */
+ riscv_emit_move (operands[3], GEN_INT (~INTVAL (operands[2])));
+
+ /* For xnor, the NOT operation is in a different position. So
+ we have to customize the split code we generate a bit.
+
+ It is expected that AND will be handled like IOR in the future. */
+ if (<CODE> == XOR)
+ {
+ rtx x = gen_rtx_XOR (<X:MODE>mode, operands[1], operands[3]);
+ x = gen_rtx_NOT (<X:MODE>mode, x);
+ emit_insn (gen_rtx_SET (operands[0], x));
+ }
+ else
+ {
+ rtx x = gen_rtx_NOT (<X:MODE>mode, operands[3]);
+ x = gen_rtx_IOR (<X:MODE>mode, x, operands[1]);
+ emit_insn (gen_rtx_SET (operands[0], x));
+ }
+ DONE;
+})
diff --git a/gcc/config/riscv/freebsd.h b/gcc/config/riscv/freebsd.h
index 2dc7055..217e0ac 100644
--- a/gcc/config/riscv/freebsd.h
+++ b/gcc/config/riscv/freebsd.h
@@ -42,7 +42,7 @@ along with GCC; see the file COPYING3. If not see
#define LINK_SPEC " \
-melf" XLEN_SPEC DEFAULT_ENDIAN_SPEC "riscv \
%{p:%nconsider using `-pg' instead of `-p' with gprof (1)} \
- " FBSD_LINK_PG_NOTES " \
+ " FBSD_LINK_PG_NOTE " \
%{v:-V} \
%{assert*} %{R*} %{rpath*} %{defsym*} \
-X \
diff --git a/gcc/config/riscv/gnu.h b/gcc/config/riscv/gnu.h
new file mode 100644
index 0000000..047399b
--- /dev/null
+++ b/gcc/config/riscv/gnu.h
@@ -0,0 +1,59 @@
+/* Definitions for RISC-V GNU/Hurd systems with ELF format.
+ Copyright (C) 1998-2025 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#define TARGET_OS_CPP_BUILTINS() \
+ do { \
+ GNU_USER_TARGET_OS_CPP_BUILTINS(); \
+ } while (0)
+
+#define GNU_USER_DYNAMIC_LINKER "/lib/ld-riscv" XLEN_SPEC "-" ABI_SPEC ".so.1"
+
+#define ICACHE_FLUSH_FUNC "__riscv_flush_icache"
+
+#define CPP_SPEC "%{pthread:-D_REENTRANT}"
+
+#define LD_EMUL_SUFFIX \
+ "%{mabi=lp64d:}" \
+ "%{mabi=lp64f:_lp64f}" \
+ "%{mabi=lp64:_lp64}" \
+ "%{mabi=ilp32d:}" \
+ "%{mabi=ilp32f:_ilp32f}" \
+ "%{mabi=ilp32:_ilp32}"
+
+#define LINK_SPEC "\
+-melf" XLEN_SPEC DEFAULT_ENDIAN_SPEC "riscv" LD_EMUL_SUFFIX " \
+%{mno-relax:--no-relax} \
+-X \
+%{mbig-endian:-EB} \
+%{mlittle-endian:-EL} \
+%{shared} \
+ %{!shared: \
+ %{!static: \
+ %{!static-pie: \
+ %{rdynamic:-export-dynamic} \
+ -dynamic-linker " GNU_USER_DYNAMIC_LINKER "}} \
+ %{static:-static} %{static-pie:-static -pie --no-dynamic-linker -z text}}"
+
+#define STARTFILE_PREFIX_SPEC \
+ "/lib" XLEN_SPEC "/" ABI_SPEC "/ " \
+ "/usr/lib" XLEN_SPEC "/" ABI_SPEC "/ " \
+ "/lib/ " \
+ "/usr/lib/ "
+
+#define RISCV_USE_CUSTOMISED_MULTI_LIB select_by_abi
diff --git a/gcc/config/riscv/multilib-generator b/gcc/config/riscv/multilib-generator
index 4828016..6ad1cf0 100755
--- a/gcc/config/riscv/multilib-generator
+++ b/gcc/config/riscv/multilib-generator
@@ -159,8 +159,8 @@ for cmodel in cmodels:
"e.g. rv32imafd-ilp32--" % cfg)
sys.exit(1)
- # Compact code model only support rv64.
- if cmodel == "compact" and arch.startswith("rv32"):
+ # Large code model only support rv64.
+ if cmodel == "large" and arch.startswith("rv32"):
continue
arch = arch_canonicalize (arch, args.misa_spec)
diff --git a/gcc/config/riscv/riscv-cores.def b/gcc/config/riscv/riscv-cores.def
index 2918496..e31afc3 100644
--- a/gcc/config/riscv/riscv-cores.def
+++ b/gcc/config/riscv/riscv-cores.def
@@ -41,6 +41,12 @@ RISCV_TUNE("sifive-p400-series", sifive_p400, sifive_p400_tune_info)
RISCV_TUNE("sifive-p600-series", sifive_p600, sifive_p600_tune_info)
RISCV_TUNE("tt-ascalon-d8", generic_ooo, tt_ascalon_d8_tune_info)
RISCV_TUNE("thead-c906", generic, thead_c906_tune_info)
+RISCV_TUNE("xt-c908", generic, generic_ooo_tune_info)
+RISCV_TUNE("xt-c908v", generic, generic_ooo_tune_info)
+RISCV_TUNE("xt-c910", generic, generic_ooo_tune_info)
+RISCV_TUNE("xt-c910v2", generic, generic_ooo_tune_info)
+RISCV_TUNE("xt-c920", generic, generic_ooo_tune_info)
+RISCV_TUNE("xt-c920v2", generic, generic_ooo_tune_info)
RISCV_TUNE("xiangshan-nanhu", xiangshan, xiangshan_nanhu_tune_info)
RISCV_TUNE("generic-ooo", generic_ooo, generic_ooo_tune_info)
RISCV_TUNE("size", generic, optimize_size_tune_info)
@@ -93,6 +99,48 @@ RISCV_CORE("thead-c906", "rv64imafdc_xtheadba_xtheadbb_xtheadbs_xtheadcmo_"
"xtheadmemidx_xtheadmempair_xtheadsync",
"thead-c906")
+RISCV_CORE("xt-c908", "rv64imafdc_zicbom_zicbop_zicboz_zicntr_zicsr_"
+ "zifencei_zihintpause_zihpm_zfh_zba_zbb_zbc_zbs_"
+ "sstc_svinval_svnapot_svpbmt_xtheadba_xtheadbb_"
+ "xtheadbs_xtheadcmo_xtheadcondmov_xtheadfmemidx_"
+ "xtheadmac_xtheadmemidx_xtheadmempair_xtheadsync",
+ "xt-c908")
+RISCV_CORE("xt-c908v", "rv64imafdcv_zicbom_zicbop_zicboz_zicntr_zicsr_"
+ "zifencei_zihintpause_zihpm_zfh_zba_zbb_zbc_zbs_"
+ "zvfh_sstc_svinval_svnapot_svpbmt__xtheadba_"
+ "xtheadbb_xtheadbs_xtheadcmo_xtheadcondmov_"
+ "xtheadfmemidx_xtheadmac_xtheadmemidx_"
+ "xtheadmempair_xtheadsync_xtheadvdot",
+ "xt-c908")
+RISCV_CORE("xt-c910", "rv64imafdc_zicntr_zicsr_zifencei_zihpm_zfh_"
+ "xtheadba_xtheadbb_xtheadbs_xtheadcmo_"
+ "xtheadcondmov_xtheadfmemidx_xtheadmac_"
+ "xtheadmemidx_xtheadmempair_xtheadsync",
+ "xt-c910")
+RISCV_CORE("xt-c910v2", "rv64imafdc_zicbom_zicbop_zicboz_zicntr_zicond_"
+ "zicsr_zifencei _zihintntl_zihintpause_zihpm_"
+ "zawrs_zfa_zfbfmin_zfh_zca_zcb_zcd_zba_zbb_zbc_"
+ "zbs_sscofpmf_sstc_svinval_svnapot_svpbmt_"
+ "xtheadba_xtheadbb_xtheadbs_xtheadcmo_"
+ "xtheadcondmov_xtheadfmemidx_xtheadmac_"
+ "xtheadmemidx_xtheadmempair_xtheadsync",
+ "xt-c910v2")
+RISCV_CORE("xt-c920", "rv64imafdc_zicntr_zicsr_zifencei_zihpm_zfh_"
+ "xtheadba_xtheadbb_xtheadbs_xtheadcmo_"
+ "xtheadcondmov_xtheadfmemidx_xtheadmac_"
+ "xtheadmemidx_xtheadmempair_xtheadsync_"
+ "xtheadvector",
+ "xt-c910")
+RISCV_CORE("xt-c920v2", "rv64imafdcv_zicbom_zicbop_zicboz_zicntr_zicond_"
+ "zicsr_zifencei _zihintntl_zihintpause_zihpm_"
+ "zawrs_zfa_zfbfmin_zfh_zca_zcb_zcd_zba_zbb_zbc_"
+ "zbs_zvfbfmin_zvfbfwma_zvfh_sscofpmf_sstc_"
+ "svinval_svnapot_svpbmt_xtheadba_xtheadbb_"
+ "xtheadbs_xtheadcmo_xtheadcondmov_xtheadfmemidx_"
+ "xtheadmac_xtheadmemidx_xtheadmempair_"
+ "xtheadsync_xtheadvdot",
+ "xt-c920v2")
+
RISCV_CORE("tt-ascalon-d8", "rv64imafdcv_zic64b_zicbom_zicbop_zicboz_"
"ziccamoa_ziccif_zicclsm_ziccrse_zicond_zicsr_"
"zifencei_zihintntl_zihintpause_zimop_za64rs_"
diff --git a/gcc/config/riscv/riscv-target-attr.cc b/gcc/config/riscv/riscv-target-attr.cc
index 1d96865..8ad3025 100644
--- a/gcc/config/riscv/riscv-target-attr.cc
+++ b/gcc/config/riscv/riscv-target-attr.cc
@@ -257,11 +257,7 @@ riscv_target_attr_parser::update_settings (struct gcc_options *opts) const
{
std::string local_arch = m_subset_list->to_string (true);
const char* local_arch_str = local_arch.c_str ();
- struct cl_target_option *default_opts
- = TREE_TARGET_OPTION (target_option_default_node);
- if (opts->x_riscv_arch_string != default_opts->x_riscv_arch_string)
- free (CONST_CAST (void *, (const void *) opts->x_riscv_arch_string));
- opts->x_riscv_arch_string = xstrdup (local_arch_str);
+ opts->x_riscv_arch_string = ggc_strdup (local_arch_str);
riscv_set_arch_by_subset_list (m_subset_list, opts);
}
diff --git a/gcc/config/riscv/riscv-vector-builtins.cc b/gcc/config/riscv/riscv-vector-builtins.cc
index d2fe849..61dcdab 100644
--- a/gcc/config/riscv/riscv-vector-builtins.cc
+++ b/gcc/config/riscv/riscv-vector-builtins.cc
@@ -4724,7 +4724,11 @@ bool
verify_type_context (location_t loc, type_context_kind context, const_tree type,
bool silent_p)
{
- if (!sizeless_type_p (type))
+ const_tree tmp = type;
+ if (omp_type_context (context) && POINTER_TYPE_P (type))
+ tmp = strip_pointer_types (tmp);
+
+ if (!sizeless_type_p (tmp))
return true;
switch (context)
@@ -4796,6 +4800,34 @@ verify_type_context (location_t loc, type_context_kind context, const_tree type,
error_at (loc, "capture by copy of RVV type %qT", type);
return false;
+
+ case TCTX_OMP_MAP:
+ if (!silent_p)
+ error_at (loc, "RVV type %qT not allowed in %<map%> clause", type);
+ return false;
+
+ case TCTX_OMP_MAP_IMP_REF:
+ if (!silent_p)
+ error ("cannot reference %qT object types in %<target%> region", type);
+ return false;
+
+ case TCTX_OMP_PRIVATE:
+ if (!silent_p)
+ error_at (loc, "RVV type %qT not allowed in"
+ " %<target%> %<private%> clause", type);
+ return false;
+
+ case TCTX_OMP_FIRSTPRIVATE:
+ if (!silent_p)
+ error_at (loc, "RVV type %qT not allowed in"
+ " %<target%> %<firstprivate%> clause", type);
+ return false;
+
+ case TCTX_OMP_DEVICE_ADDR:
+ if (!silent_p)
+ error_at (loc, "RVV type %qT not allowed in"
+ " %<target%> device clauses", type);
+ return false;
}
gcc_unreachable ();
diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index 0ac2538..a8c9256 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -685,7 +685,7 @@ invalid_opt_bb_p (basic_block cfg_bb)
/* We only do LCM optimizations on blocks that are post dominated by
EXIT block, that is, we don't do LCM optimizations on infinite loop. */
FOR_EACH_EDGE (e, ei, cfg_bb->succs)
- if (e->flags & EDGE_FAKE)
+ if ((e->flags & EDGE_FAKE) || (e->flags & EDGE_ABNORMAL))
return true;
return false;
@@ -2698,6 +2698,7 @@ pre_vsetvl::compute_lcm_local_properties ()
m_avout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
bitmap_vector_clear (m_avloc, last_basic_block_for_fn (cfun));
+ bitmap_vector_clear (m_kill, last_basic_block_for_fn (cfun));
bitmap_vector_clear (m_antloc, last_basic_block_for_fn (cfun));
bitmap_vector_ones (m_transp, last_basic_block_for_fn (cfun));
@@ -2749,6 +2750,10 @@ pre_vsetvl::compute_lcm_local_properties ()
if (invalid_opt_bb_p (bb->cfg_bb ()))
{
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "\n --- skipping bb %u due to weird edge",
+ bb->index ());
+
bitmap_clear (m_antloc[bb_index]);
bitmap_clear (m_transp[bb_index]);
}
@@ -3022,6 +3027,18 @@ pre_vsetvl::earliest_fuse_vsetvl_info (int iter)
continue;
}
+ /* We cannot lift a vsetvl into the source block if the block is
+ not transparent WRT to it.
+ This is too restrictive for blocks where a register's use only
+ feeds into vsetvls and no regular insns. One example is the
+ test rvv/vsetvl/avl_single-68.c which is currently XFAILed for
+ that reason.
+ In order to support this case we'd need to check the vsetvl's
+ AVL operand's uses in the source block and make sure they are
+ only used in other vsetvls. */
+ if (!bitmap_bit_p (m_transp[eg->src->index], expr_index))
+ continue;
+
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file,
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 38f3ae7..bad59e2 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -10382,7 +10382,7 @@ riscv_file_end ()
fprintf (asm_out_file, "1:\n");
/* pr_type. */
- fprintf (asm_out_file, "\t.p2align\t3\n");
+ fprintf (asm_out_file, "\t.p2align\t%u\n", p2align);
fprintf (asm_out_file, "2:\n");
fprintf (asm_out_file, "\t.long\t0xc0000000\n");
/* pr_datasz. */
@@ -13136,9 +13136,6 @@ parse_features_for_version (tree decl,
DECL_SOURCE_LOCATION (decl));
gcc_assert (parse_res);
- if (arch_string != default_opts->x_riscv_arch_string)
- free (CONST_CAST (void *, (const void *) arch_string));
-
cl_target_option_restore (&global_options, &global_options_set,
&cur_target);
}
diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index 2bcabd0..2759a4c 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -888,7 +888,7 @@ extern enum riscv_cc get_riscv_cc (const rtx use);
#define ASM_OUTPUT_OPCODE(STREAM, PTR) \
(PTR) = riscv_asm_output_opcode(STREAM, PTR)
-#define JUMP_TABLES_IN_TEXT_SECTION 0
+#define JUMP_TABLES_IN_TEXT_SECTION (riscv_cmodel == CM_LARGE)
#define CASE_VECTOR_MODE SImode
#define CASE_VECTOR_PC_RELATIVE (riscv_cmodel != CM_MEDLOW)
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 26a247c..eec9687 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -789,7 +789,7 @@
rtx t5 = gen_reg_rtx (DImode);
rtx t6 = gen_reg_rtx (DImode);
- riscv_emit_binary (PLUS, operands[0], operands[1], operands[2]);
+ emit_insn (gen_addsi3_extended (t6, operands[1], operands[2]));
if (GET_CODE (operands[1]) != CONST_INT)
emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0));
else
@@ -799,7 +799,10 @@
else
t5 = operands[2];
emit_insn (gen_adddi3 (t3, t4, t5));
- emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0));
+ rtx t7 = gen_lowpart (SImode, t6);
+ SUBREG_PROMOTED_VAR_P (t7) = 1;
+ SUBREG_PROMOTED_SET (t7, SRP_SIGNED);
+ emit_move_insn (operands[0], t7);
riscv_expand_conditional_branch (operands[3], NE, t6, t3);
}
@@ -835,8 +838,11 @@
emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0));
else
t3 = operands[1];
- riscv_emit_binary (PLUS, operands[0], operands[1], operands[2]);
- emit_insn (gen_extend_insn (t4, operands[0], DImode, SImode, 0));
+ emit_insn (gen_addsi3_extended (t4, operands[1], operands[2]));
+ rtx t5 = gen_lowpart (SImode, t4);
+ SUBREG_PROMOTED_VAR_P (t5) = 1;
+ SUBREG_PROMOTED_SET (t5, SRP_SIGNED);
+ emit_move_insn (operands[0], t5);
riscv_expand_conditional_branch (operands[3], LTU, t4, t3);
}
@@ -966,7 +972,7 @@
rtx t5 = gen_reg_rtx (DImode);
rtx t6 = gen_reg_rtx (DImode);
- riscv_emit_binary (MINUS, operands[0], operands[1], operands[2]);
+ emit_insn (gen_subsi3_extended (t6, operands[1], operands[2]));
if (GET_CODE (operands[1]) != CONST_INT)
emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0));
else
@@ -976,7 +982,10 @@
else
t5 = operands[2];
emit_insn (gen_subdi3 (t3, t4, t5));
- emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0));
+ rtx t7 = gen_lowpart (SImode, t6);
+ SUBREG_PROMOTED_VAR_P (t7) = 1;
+ SUBREG_PROMOTED_SET (t7, SRP_SIGNED);
+ emit_move_insn (operands[0], t7);
riscv_expand_conditional_branch (operands[3], NE, t6, t3);
}
@@ -1015,8 +1024,11 @@
emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0));
else
t3 = operands[1];
- riscv_emit_binary (MINUS, operands[0], operands[1], operands[2]);
- emit_insn (gen_extend_insn (t4, operands[0], DImode, SImode, 0));
+ emit_insn (gen_subsi3_extended (t4, operands[1], operands[2]));
+ rtx t5 = gen_lowpart (SImode, t4);
+ SUBREG_PROMOTED_VAR_P (t5) = 1;
+ SUBREG_PROMOTED_SET (t5, SRP_SIGNED);
+ emit_move_insn (operands[0], t5);
riscv_expand_conditional_branch (operands[3], LTU, t3, t4);
}
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 8ee43cf..3ab4d76 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -2136,18 +2136,34 @@
(match_operand 7 "const_int_operand")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
- (vec_duplicate:V_VLS
- (match_operand:<VEL> 3 "direct_broadcast_operand"))
+ ;; (vec_duplicate:V_VLS ;; wrapper activated by wrap_vec_dup below.
+ (match_operand:<VEL> 3 "direct_broadcast_operand") ;; )
(match_operand:V_VLS 2 "vector_merge_operand")))]
"TARGET_VECTOR"
{
/* Transform vmv.v.x/vfmv.v.f (avl = 1) into vmv.s.x since vmv.s.x/vfmv.s.f
has better chances to do vsetvl fusion in vsetvl pass. */
+ bool wrap_vec_dup = true;
+ rtx vec_cst = NULL_RTX;
if (riscv_vector::splat_to_scalar_move_p (operands))
{
operands[1] = riscv_vector::gen_scalar_move_mask (<VM>mode);
operands[3] = force_reg (<VEL>mode, operands[3]);
}
+ else if (immediate_operand (operands[3], <VEL>mode)
+ && (vec_cst = gen_const_vec_duplicate (<MODE>mode, operands[3]))
+ && (/* -> pred_broadcast<mode>_zero */
+ (vector_least_significant_set_mask_operand (operands[1],
+ <VM>mode)
+ && vector_const_0_operand (vec_cst, <MODE>mode))
+ || (/* pred_broadcast<mode>_imm */
+ vector_all_trues_mask_operand (operands[1], <VM>mode)
+ && vector_const_int_or_double_0_operand (vec_cst,
+ <MODE>mode))))
+ {
+ operands[3] = vec_cst;
+ wrap_vec_dup = false;
+ }
/* Handle vmv.s.x instruction (Wb1 mask) which has memory scalar. */
else if (satisfies_constraint_Wdm (operands[3]))
{
@@ -2191,6 +2207,8 @@
;
else
operands[3] = force_reg (<VEL>mode, operands[3]);
+ if (wrap_vec_dup)
+ operands[3] = gen_rtx_VEC_DUPLICATE (<MODE>mode, operands[3]);
})
(define_insn_and_split "*pred_broadcast<mode>"
@@ -3939,7 +3957,7 @@
(any_extend:VWEXTI
(match_operand:<V_DOUBLE_TRUNC> 3 "register_operand" " vr, vr"))
(match_operand:VWEXTI 2 "vector_merge_operand" " vu, 0")))]
- "TARGET_VECTOR"
+ "TARGET_VECTOR && !TARGET_XTHEADVECTOR"
"v<sz>ext.vf2\t%0,%3%p1"
[(set_attr "type" "vext")
(set_attr "mode" "<MODE>")])
@@ -3959,7 +3977,7 @@
(any_extend:VQEXTI
(match_operand:<V_QUAD_TRUNC> 3 "register_operand" " vr, vr"))
(match_operand:VQEXTI 2 "vector_merge_operand" " vu, 0")))]
- "TARGET_VECTOR"
+ "TARGET_VECTOR && !TARGET_XTHEADVECTOR"
"v<sz>ext.vf4\t%0,%3%p1"
[(set_attr "type" "vext")
(set_attr "mode" "<MODE>")])
@@ -3979,7 +3997,7 @@
(any_extend:VOEXTI
(match_operand:<V_OCT_TRUNC> 3 "register_operand" " vr, vr"))
(match_operand:VOEXTI 2 "vector_merge_operand" " vu, 0")))]
- "TARGET_VECTOR"
+ "TARGET_VECTOR && !TARGET_XTHEADVECTOR"
"v<sz>ext.vf8\t%0,%3%p1"
[(set_attr "type" "vext")
(set_attr "mode" "<MODE>")])
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 737c3d6..12dbde2 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -25765,10 +25765,13 @@ rs6000_can_inline_p (tree caller, tree callee)
}
}
- /* Ignore -mpower8-fusion and -mpower10-fusion options for inlining
- purposes. */
- callee_isa &= ~(OPTION_MASK_P8_FUSION | OPTION_MASK_P10_FUSION);
- explicit_isa &= ~(OPTION_MASK_P8_FUSION | OPTION_MASK_P10_FUSION);
+ /* Ignore -mpower8-fusion, -mpower10-fusion and -msave-toc-indirect options
+ for inlining purposes. */
+ HOST_WIDE_INT ignored_isas = (OPTION_MASK_P8_FUSION
+ | OPTION_MASK_P10_FUSION
+ | OPTION_MASK_SAVE_TOC_INDIRECT);
+ callee_isa &= ~ignored_isas;
+ explicit_isa &= ~ignored_isas;
/* The callee's options must be a subset of the caller's options, i.e.
a vsx function may inline an altivec function, but a no-vsx function
diff --git a/gcc/config/rx/rx.md b/gcc/config/rx/rx.md
index edb2c96..a3d966e 100644
--- a/gcc/config/rx/rx.md
+++ b/gcc/config/rx/rx.md
@@ -2541,10 +2541,17 @@
(unspec_volatile:SI [(match_operand:BLK 1 "memory_operand") ;; String1
(match_operand:BLK 2 "memory_operand")] ;; String2
UNSPEC_CMPSTRN))
- (use (match_operand:SI 3 "register_operand")) ;; Max Length
+ (use (match_operand:SI 3 "nonmemory_operand")) ;; Max Length
(match_operand:SI 4 "immediate_operand")] ;; Known Align
"rx_allow_string_insns"
{
+ bool const_len = CONST_INT_P (operands[3]);
+ if (const_len && operands[3] == CONST0_RTX (SImode))
+ {
+ emit_move_insn (operands[0], CONST0_RTX (SImode));
+ DONE;
+ }
+
rtx str1 = gen_rtx_REG (SImode, 1);
rtx str2 = gen_rtx_REG (SImode, 2);
rtx len = gen_rtx_REG (SImode, 3);
@@ -2553,6 +2560,13 @@
emit_move_insn (str2, force_operand (XEXP (operands[2], 0), NULL_RTX));
emit_move_insn (len, operands[3]);
+ /* Set flags in case len is zero */
+ if (!const_len)
+ {
+ emit_insn (gen_setpsw (GEN_INT ('C')));
+ emit_insn (gen_setpsw (GEN_INT ('Z')));
+ }
+
emit_insn (gen_rx_cmpstrn (operands[0], operands[1], operands[2]));
DONE;
}
@@ -2590,9 +2604,7 @@
(clobber (reg:SI 3))
(clobber (reg:CC CC_REG))]
"rx_allow_string_insns"
- "setpsw z ; Set flags in case len is zero
- setpsw c
- scmpu ; Perform the string comparison
+ "scmpu ; Perform the string comparison
mov #-1, %0 ; Set up -1 result (which cannot be created
; by the SC insn)
bnc ?+ ; If Carry is not set skip over
diff --git a/gcc/config/s390/9175.md b/gcc/config/s390/9175.md
new file mode 100644
index 0000000..d0ac0e1
--- /dev/null
+++ b/gcc/config/s390/9175.md
@@ -0,0 +1,316 @@
+;; Scheduling description for z17.
+;; Copyright (C) 2025 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3, or (at your option) any later
+;; version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+;; for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_attr "z17_unit_fpd" ""
+ (cond [(eq_attr "mnemonic" "ddb,ddbr,deb,debr,dxbr,sqdb,sqdbr,sqeb,sqebr,\
+sqxbr,vdf,vdg,vdlf,vdlg,vdlq,vdq,vfddb,vfdsb,vfsqdb,vfsqsb,vrf,vrg,vrlf,vrlg,\
+vrlq,vrq,wfddb,wfdsb,wfdxb,wfsqdb,wfsqxb")
+ (const_int 1)] (const_int 0)))
+
+(define_attr "z17_unit_fxa" ""
+ (cond [(eq_attr "mnemonic" "a,afi,ag,agf,agfi,agfr,agh,aghi,aghik,agr,agrk,ah,\
+ahi,ahik,ahy,al,alc,alcg,alcgr,alcr,alfi,alg,algf,algfi,algfr,alghsik,algr,\
+algrk,alhsik,alr,alrk,aly,ar,ark,ay,bdepg,bextg,clzg,ctzg,etnd,flogr,ic,icm,\
+icmh,icmy,icy,iihf,iilf,ipm,la,larl,lay,lb,lbr,lcgfr,lcgr,lcr,lgb,lgbr,lgf,\
+lgfi,lgfr,lgfrl,lgh,lghi,lghr,lghrl,lgr,lh,lhi,lhr,lhrl,lhy,llcr,llgcr,llgfr,\
+llghr,llgtr,llhr,llihf,llihh,llihl,llilf,llilh,llill,llxab,llxaf,llxag,llxah,\
+llxaq,lngfr,lngr,lnr,loc,locg,locghi,locgr,lochi,locr,lpgfr,lpgr,lpr,lr,lrv,\
+lrvg,lrvgr,lrvh,lrvr,lt,ltg,ltgf,ltgfr,ltgr,ltr,lxab,lxaf,lxag,lxah,lxaq,m,mfy,\
+mg,mgh,mghi,mgrk,mh,mhi,mhy,ml,mlg,mlgr,mlr,mr,ms,msc,msfi,msg,msgc,msgf,msgfi,\
+msgfr,msgr,msgrkc,msr,msrkc,msy,n,ncgrk,ncrk,ng,ngr,ngrk,nihf,nihh,nihl,nilf,\
+nilh,nill,nngrk,nnrk,nogrk,nork,nr,nrk,nxgrk,nxrk,ny,o,ocgrk,ocrk,og,ogr,ogrk,\
+oihf,oihh,oihl,oilf,oilh,oill,or,ork,oy,pfpo,popcnt,risbg,risbgn,rll,rllg,\
+rnsbg,rosbg,rxsbg,s,selgr,selr,sg,sgf,sgfr,sgh,sgr,sgrk,sh,shy,sl,slb,slbg,\
+slbgr,slbr,sldl,slfi,slg,slgf,slgfi,slgfr,slgr,slgrk,sll,sllg,sllk,slr,slrk,\
+sly,sr,sra,srag,srak,srda,srdl,srk,srl,srlg,srlk,sy,x,xg,xgr,xgrk,xihf,xilf,xr,\
+xrk,xy")
+ (const_int 1)] (const_int 0)))
+
+(define_attr "z17_unit_fxb" ""
+ (cond [(eq_attr "mnemonic" "agsi,algsi,alsi,asi,b,bc,bcr,bi,br,c,cfi,cg,cgf,\
+cgfi,cgfr,cgfrl,cgh,cghi,cghrl,cghsi,cgit,cgr,cgrl,cgrt,ch,chi,chrl,chsi,chy,\
+cit,cl,clfhsi,clfi,clfit,clg,clgf,clgfi,clgfr,clgfrl,clghrl,clghsi,clgit,clgr,\
+clgrl,clgrt,clgt,clhhsi,clhrl,cli,cliy,clm,clmy,clr,clrl,clrt,clt,cly,cr,crl,\
+crt,cy,laa,laag,lan,lang,lao,laog,lat,lax,laxg,lcdfr,ldgr,ldr,lgat,lgdr,lndfr,\
+lpdfr,lxr,lzdr,lzer,lzxr,mvghi,mvhhi,mvhi,mvi,mviy,ni,niy,nop,nopr,ntstg,oi,\
+oiy,ppa,st,stc,stcy,std,stdy,ste,stey,stg,stgrl,sth,sthrl,sthy,stoc,stocg,strl,\
+strv,strvg,strvh,sty,tend,tm,tmh,tmhh,tmhl,tml,tmlh,tmll,tmy,vlgvb,vlgvf,vlgvg,\
+vlgvh,vlr,vlvgb,vlvgf,vlvgg,vlvgh,vlvgp,vscef,vsceg,vst,vstbrf,vstbrg,vstbrh,\
+vstbrq,vstebrf,vstebrg,vstef,vsteg,vsterf,vsterg,vsterh,vstl,vstrl,vstrlr,xi,\
+xiy")
+ (const_int 1)] (const_int 0)))
+
+(define_attr "z17_unit_fxd" ""
+ (cond [(eq_attr "mnemonic" "dlgr,dlr,dr,dsgfr,dsgr")
+ (const_int 1)] (const_int 0)))
+
+(define_attr "z17_unit_lsu" ""
+ (cond [(eq_attr "mnemonic" "clc,ear,l,lam,lcbb,ld,lde,ldy,lg,lgrl,llc,llgc,\
+llgf,llgfrl,llgh,llghrl,llgt,llh,llhrl,lm,lmg,lmy,lpq,lrl,ly,mvcrl,sar,sfpc,\
+tabort,vl,vlbb,vlbrf,vlbrg,vlbrh,vlbrq,vlbrrepf,vlbrrepg,vlbrreph,vlerf,vlerg,\
+vlerh,vll,vllebrzf,vllebrzg,vllebrzh,vllezb,vllezf,vllezg,vllezh,vllezlf,\
+vlrepb,vlrepf,vlrepg,vlreph,vlrl,vlrlr")
+ (const_int 1)] (const_int 0)))
+
+(define_attr "z17_unit_vfu" ""
+ (cond [(eq_attr "mnemonic" "adb,adbr,adtr,aeb,aebr,axbr,axtr,brcl,cdb,cdbr,\
+cdtr,ceb,cebr,cpsdr,cxbr,cxtr,ddtr,dxtr,fidbr,fidbra,fidtr,fiebr,fiebra,fixbr,\
+fixbra,fixtr,j,jg,kdb,kdbr,kdtr,keb,kebr,kxbr,kxtr,lcdbr,lcebr,lcxbr,ldeb,\
+ldebr,ldetr,le,ledbr,ledtr,ler,ley,lndbr,lnebr,lnxbr,lpdbr,lpebr,lpxbr,ltdbr,\
+ltdtr,ltebr,ltxbr,ltxtr,lxdb,lxdbr,lxdtr,lxeb,lxebr,madb,madbr,maeb,maebr,mdb,\
+mdbr,mdtr,meeb,meebr,msdb,msdbr,mseb,msebr,mxbr,mxtr,sdb,sdbr,sdtr,seb,sebr,\
+sxbr,sxtr,tcdb,tceb,tcxb,tdcdt,tdcet,tdcxt,vab,vaccb,vacccq,vaccf,vaccg,vacch,\
+vaccq,vacq,vaf,vag,vah,vaq,vavgb,vavgf,vavgg,vavgh,vavglb,vavglf,vavglg,vavglh,\
+vavglq,vavgq,vblendb,vblendf,vblendg,vblendh,vblendq,vbperm,vcdgb,vcdlgb,vcefb,\
+vcelfb,vceqb,vceqbs,vceqf,vceqfs,vceqg,vceqgs,vceqh,vceqhs,vceqq,vceqqs,vcfeb,\
+vcfn,vcgdb,vchb,vchbs,vchf,vchfs,vchg,vchgs,vchh,vchhs,vchlb,vchlbs,vchlf,\
+vchlfs,vchlg,vchlgs,vchlh,vchlhs,vchlq,vchlqs,vchq,vchqs,vcksm,vclfeb,vclfnh,\
+vclfnl,vclgdb,vclzb,vclzf,vclzg,vclzh,vclzq,vcnf,vcrnf,vctzb,vctzf,vctzg,vctzh,\
+vctzq,verimb,verimf,verimg,verimh,verllb,verllf,verllg,verllh,verllvb,verllvf,\
+verllvg,verllvh,veslb,veslf,veslg,veslh,veslvb,veslvf,veslvg,veslvh,vesrab,\
+vesraf,vesrag,vesrah,vesravb,vesravf,vesravg,vesravh,vesrlb,vesrlf,vesrlg,\
+vesrlh,vesrlvb,vesrlvf,vesrlvg,vesrlvh,veval,vfadb,vfasb,vfcedb,vfcedbs,vfcesb,\
+vfcesbs,vfchdb,vfchdbs,vfchedb,vfchedbs,vfchesb,vfchesbs,vfchsb,vfchsbs,vfeeb,\
+vfeef,vfeeh,vfeezbs,vfeezfs,vfeezhs,vfeneb,vfenef,vfeneh,vfenezb,vfenezf,\
+vfenezh,vfidb,vfisb,vfkedb,vfkesb,vfkhdb,vfkhedb,vfkhesb,vfkhsb,vflcdb,vflcsb,\
+vflndb,vflnsb,vflpdb,vflpsb,vfmadb,vfmasb,vfmaxdb,vfmaxsb,vfmdb,vfmindb,\
+vfminsb,vfmsb,vfmsdb,vfmssb,vfnmadb,vfnmasb,vfnmsdb,vfnmssb,vfsdb,vfssb,\
+vftcidb,vftcisb,vgbm,vgemb,vgemf,vgemg,vgemh,vgemq,vgfmab,vgfmaf,vgfmag,vgfmah,\
+vgfmb,vgfmf,vgfmg,vgfmh,vgm,vistrb,vistrbs,vistrf,vistrfs,vistrh,vistrhs,vlcb,\
+vlcf,vlcg,vlch,vldeb,vleb,vlebrf,vlebrg,vlebrh,vledb,vlef,vleg,vleh,vleib,\
+vleif,vleig,vleih,vlpb,vlpf,vlpg,vlph,vlpq,vmaeb,vmaef,vmaeg,vmaeh,vmahb,vmahf,\
+vmahg,vmahh,vmahq,vmalb,vmaleb,vmalef,vmaleg,vmaleh,vmalf,vmalg,vmalhb,vmalhf,\
+vmalhg,vmalhh,vmalhq,vmalhw,vmalob,vmalof,vmalog,vmaloh,vmalq,vmaob,vmaof,\
+vmaog,vmaoh,vmeb,vmef,vmeg,vmeh,vmhb,vmhf,vmhg,vmhh,vmhq,vmlb,vmleb,vmlef,\
+vmleg,vmleh,vmlf,vmlg,vmlhb,vmlhf,vmlhg,vmlhh,vmlhq,vmlhw,vmlob,vmlof,vmlog,\
+vmloh,vmlq,vmnb,vmnf,vmng,vmnh,vmnlb,vmnlf,vmnlg,vmnlh,vmnlq,vmnq,vmob,vmof,\
+vmog,vmoh,vmrhb,vmrhf,vmrhg,vmrhh,vmrlb,vmrlf,vmrlg,vmrlh,vmslg,vmxb,vmxf,vmxg,\
+vmxh,vmxlb,vmxlf,vmxlg,vmxlh,vmxlq,vmxq,vn,vnc,vnn,vno,vnot,vnx,vo,voc,vone,\
+vpdi,vperm,vpkf,vpkg,vpkh,vpklsf,vpklsfs,vpklsg,vpklsgs,vpklsh,vpklshs,vpksf,\
+vpksfs,vpksg,vpksgs,vpksh,vpkshs,vpopct,vpopctb,vpopctf,vpopctg,vpopcth,vrepb,\
+vrepf,vrepg,vreph,vrepi,vrepib,vrepif,vrepig,vrepih,vsb,vsbcbiq,vsbiq,vscbib,\
+vscbif,vscbig,vscbih,vscbiq,vsegb,vsegf,vsegh,vsel,vsf,vsg,vsh,vsl,vslb,vsld,\
+vsldb,vsq,vsra,vsrab,vsrd,vsrl,vsrlb,vsumb,vsumgf,vsumgh,vsumh,vsumqf,vsumqg,\
+vtm,vuphb,vuphf,vuphg,vuphh,vuplb,vuplf,vuplg,vuplhb,vuplhf,vuplhg,vuplhh,\
+vuplhw,vupllb,vupllf,vupllg,vupllh,vx,vzero,wcdgb,wcdlgb,wcefb,wcelfb,wcfeb,\
+wcgdb,wclfeb,wclgdb,wfadb,wfasb,wfaxb,wfcdb,wfcedb,wfcesb,wfcexb,wfcexbs,\
+wfchdb,wfchedb,wfchesb,wfchexb,wfchexbs,wfchsb,wfchxb,wfchxbs,wfcsb,wfcxb,\
+wfidb,wfisb,wfixb,wfkdb,wfkedb,wfkesb,wfkexb,wfkhdb,wfkhedb,wfkhesb,wfkhexb,\
+wfkhsb,wfkhxb,wfksb,wfkxb,wflcdb,wflcsb,wflcxb,wflld,wflndb,wflnsb,wflnxb,\
+wflpdb,wflpsb,wflpxb,wflrx,wfmadb,wfmasb,wfmaxb,wfmaxxb,wfmdb,wfminxb,wfmsb,\
+wfmsdb,wfmssb,wfmsxb,wfmxb,wfnmaxb,wfnmsxb,wfsdb,wfssb,wfsxb,wftcixb,wldeb,\
+wledb")
+ (const_int 1)] (const_int 0)))
+
+(define_attr "z17_cracked" ""
+ (cond [(eq_attr "mnemonic" "bas,basr,bras,brasl,cdfbr,cdftr,cdgbr,cdgtr,\
+cdlfbr,cdlftr,cdlgbr,cdlgtr,cefbr,cegbr,celfbr,celgbr,cfdbr,cfebr,cfxbr,cgdbr,\
+cgdtr,cgebr,cgxbr,cgxtr,chhsi,clfdbr,clfdtr,clfebr,clfxbr,clfxtr,clgdbr,clgdtr,\
+clgebr,clgxbr,clgxtr,cs,csg,csy,efpc,ex,exrl,lcgfr,lngfr,lpgfr,lpq,lxr,lzxr,\
+mvc,nc,oc,rnsbg,rosbg,rxsbg,stpq,vgef,vgeg,vscef,vsceg,vsteb,vstebrh,vsteh,xc")
+ (const_int 1)] (const_int 0)))
+
+(define_attr "z17_expanded" ""
+ (cond [(eq_attr "mnemonic" "cds,cdsg,cdsy,cxfbr,cxftr,cxgbr,cxgtr,cxlfbr,\
+cxlftr,cxlgbr,cxlgtr,d,dl,dlg,dsg,dsgf,lam,lm,lmg,lmy,sldl,srda,srdl,stam,stm,\
+stmg,stmy,tbegin,tbeginc")
+ (const_int 1)] (const_int 0)))
+
+(define_attr "z17_groupalone" ""
+ (cond [(eq_attr "mnemonic" "alc,alcg,alcgr,alcr,axbr,axtr,clc,cxbr,cxfbr,\
+cxftr,cxgbr,cxgtr,cxlfbr,cxlftr,cxlgbr,cxlgtr,cxtr,d,dl,dlg,dlgr,dlr,dr,dsg,\
+dsgf,dsgfr,dsgr,dxbr,dxtr,ex,exrl,fixbr,fixbra,fixtr,flogr,kxbr,kxtr,lcxbr,\
+lnxbr,lpxbr,ltxbr,ltxtr,lxdb,lxdbr,lxdtr,lxeb,lxebr,m,madb,maeb,maebr,mfy,mg,\
+mgrk,ml,mlg,mlgr,mlr,mr,msdb,mseb,msebr,mvc,mvcrl,mxbr,mxtr,nc,oc,ppa,sfpc,slb,\
+slbg,slbgr,slbr,sqxbr,sxbr,sxtr,tabort,tbegin,tbeginc,tcxb,tdcxt,tend,xc")
+ (const_int 1)] (const_int 0)))
+
+(define_attr "z17_endgroup" ""
+ (cond [(eq_attr "mnemonic" "bas,basr,bcr,br,bras,brasl,cdsg,clfebr,cs,csg,csy,\
+efpc,ex,exrl,ipm,lam,lpq,lxr,nopr,sldl,srda,srdl,stam,stm,stmg,stmy,tbegin,\
+tbeginc")
+ (const_int 1)] (const_int 0)))
+
+(define_attr "z17_groupoftwo" ""
+ (cond [(eq_attr "mnemonic" "cdfbr,cdftr,cdgbr,cdgtr,cdlfbr,cdlftr,cdlgbr,\
+cdlgtr,cefbr,cegbr,celfbr,celgbr,cfdbr,cfebr,cfxbr,cgdbr,cgdtr,cgebr,cgxbr,\
+cgxtr,chhsi,clfdbr,clfdtr,clfxbr,clfxtr,clgdbr,clgdtr,clgebr,clgxbr,clgxtr,\
+lcgfr,lngfr,lpgfr,lzxr,vacccq,vacq,vblendb,vblendf,vblendg,vblendh,vblendq,\
+veval,vfmadb,vfmasb,vfmsdb,vfmssb,vfnmadb,vfnmasb,vfnmsdb,vfnmssb,vgef,vgeg,\
+vgfmab,vgfmaf,vgfmag,vgfmah,vmaeb,vmaef,vmaeg,vmaeh,vmahb,vmahf,vmahg,vmahh,\
+vmahq,vmalb,vmaleb,vmalef,vmaleg,vmaleh,vmalf,vmalg,vmalhb,vmalhf,vmalhg,\
+vmalhh,vmalhq,vmalhw,vmalob,vmalof,vmalog,vmaloh,vmalq,vmaob,vmaof,vmaog,vmaoh,\
+vmslg,vperm,vsbcbiq,vsbiq,vscef,vsceg,vsel,vsteb,vstebrh,vsteh,wfmadb,wfmasb,\
+wfmaxb,wfmsdb,wfmssb,wfmsxb,wfnmaxb,wfnmsxb")
+ (const_int 1)] (const_int 0)))
+
+(define_insn_reservation "z17_0" 0
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "a,afi,ag,agfi,aghi,aghik,agr,agrk,ahi,ahik,al,alfi,alg,\
+algf,algfi,algfr,alghsik,algr,algrk,alhsik,alr,alrk,aly,ar,ark,ay,b,bc,bcr,bi,\
+br,brcl,c,cfi,cg,cgfi,cghi,cghsi,cgit,cgr,cgrl,cgrt,chi,chsi,cit,cl,clfhsi,\
+clfi,clfit,clg,clgf,clgfi,clgfr,clgfrl,clghrl,clghsi,clgit,clgr,clgrl,clgrt,\
+clgt,clhhsi,clhrl,cli,cliy,clr,clrl,clrt,clt,cly,cr,crl,crt,cy,etnd,ic,icm,\
+icmh,icmy,icy,iihf,iilf,j,jg,la,larl,lat,lay,lb,lbr,lcdfr,lcgr,lcr,ldgr,ldr,\
+lgat,lgb,lgbr,lgf,lgfi,lgfr,lgfrl,lgh,lghi,lghr,lghrl,lgr,lh,lhi,lhr,lhrl,lhy,\
+llcr,llgcr,llgfr,llghr,llgtr,llhr,llihf,llihh,llihl,llilf,llilh,llill,lndfr,\
+lngr,lnr,lpdfr,lpgr,lpr,lr,lrv,lrvg,lrvgr,lrvh,lrvr,lt,ltg,ltgf,ltgfr,ltgr,ltr,\
+lzdr,lzer,n,ncgrk,ncrk,ng,ngr,ngrk,nihf,nihh,nihl,nilf,nilh,nill,nngrk,nnrk,\
+nogrk,nop,nopr,nork,nr,nrk,nxgrk,nxrk,ny,o,ocgrk,ocrk,og,ogr,ogrk,oihf,oihh,\
+oihl,oilf,oilh,oill,or,ork,oy,pfpo,risbg,risbgn,rll,rllg,s,sg,sgr,sgrk,sl,sldl,\
+slfi,slg,slgf,slgfi,slgfr,slgr,slgrk,sll,sllg,sllk,slr,slrk,sly,sr,sra,srag,\
+srak,srda,srdl,srk,srl,srlg,srlk,sy,tm,tmh,tmhh,tmhl,tml,tmlh,tmll,tmy,vlr,\
+vlvgb,vlvgf,vlvgg,vlvgh,x,xg,xgr,xgrk,xihf,xilf,xr,xrk,xy")) "nothing")
+
+(define_insn_reservation "z17_1" 1
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "agf,agfr,agh,agsi,ah,ahy,algsi,alsi,asi,cgf,cgfr,cgfrl,\
+cgh,cghrl,ch,chrl,chy,clm,clmy,cpsdr,laa,laag,lan,lang,lao,laog,lax,laxg,le,\
+ler,ley,llxab,llxaf,llxag,llxah,llxaq,loc,locg,locghi,locgr,lochi,locr,lxab,\
+lxaf,lxag,lxah,lxaq,mvghi,mvhhi,mvhi,mvi,mviy,ni,niy,ntstg,oi,oiy,selgr,selr,\
+sgf,sgfr,sgh,sh,shy,st,stc,stcy,stg,stgrl,sth,sthrl,sthy,stoc,stocg,strl,strv,\
+strvg,strvh,sty,vab,vaccb,vacccq,vaccf,vaccg,vacch,vaccq,vacq,vaf,vag,vah,vaq,\
+vavgb,vavgf,vavgg,vavgh,vavglb,vavglf,vavglg,vavglh,vavglq,vavgq,vblendb,\
+vblendf,vblendg,vblendh,vblendq,vbperm,vceqb,vceqbs,vceqf,vceqfs,vceqg,vceqgs,\
+vceqh,vceqhs,vceqq,vceqqs,vcfn,vchb,vchbs,vchf,vchfs,vchg,vchgs,vchh,vchhs,\
+vchlb,vchlbs,vchlf,vchlfs,vchlg,vchlgs,vchlh,vchlhs,vchlq,vchlqs,vchq,vchqs,\
+vclfnh,vclfnl,vclzb,vclzf,vclzg,vclzh,vclzq,vcnf,vcrnf,vctzb,vctzf,vctzg,vctzh,\
+vctzq,verimb,verimf,verimg,verimh,verllb,verllf,verllg,verllh,verllvb,verllvf,\
+verllvg,verllvh,veslb,veslf,veslg,veslh,veslvb,veslvf,veslvg,veslvh,vesrab,\
+vesraf,vesrag,vesrah,vesravb,vesravf,vesravg,vesravh,vesrlb,vesrlf,vesrlg,\
+vesrlh,vesrlvb,vesrlvf,vesrlvg,vesrlvh,veval,vfcedb,vfcedbs,vfcesb,vfcesbs,\
+vfchdb,vfchdbs,vfchedb,vfchedbs,vfchesb,vfchesbs,vfchsb,vfchsbs,vfkedb,vfkesb,\
+vfkhdb,vfkhedb,vfkhesb,vfkhsb,vflcdb,vflcsb,vflndb,vflnsb,vflpdb,vflpsb,\
+vfmaxdb,vfmaxsb,vfmindb,vfminsb,vgbm,vgemb,vgemf,vgemg,vgemh,vgemq,vgm,vlcb,\
+vlcf,vlcg,vlch,vleb,vlebrf,vlebrg,vlebrh,vlef,vleg,vleh,vleib,vleif,vleig,\
+vleih,vlpb,vlpf,vlpg,vlph,vlpq,vmnb,vmnf,vmng,vmnh,vmnlb,vmnlf,vmnlg,vmnlh,\
+vmnlq,vmnq,vmrhb,vmrhf,vmrhg,vmrhh,vmrlb,vmrlf,vmrlg,vmrlh,vmxb,vmxf,vmxg,vmxh,\
+vmxlb,vmxlf,vmxlg,vmxlh,vmxlq,vmxq,vn,vnc,vnn,vno,vnot,vnx,vo,voc,vone,vpdi,\
+vperm,vpkf,vpkg,vpkh,vpklsf,vpklsfs,vpklsg,vpklsgs,vpklsh,vpklshs,vpksf,vpksfs,\
+vpksg,vpksgs,vpksh,vpkshs,vpopct,vpopctb,vpopctf,vpopctg,vpopcth,vrepb,vrepf,\
+vrepg,vreph,vrepi,vrepib,vrepif,vrepig,vrepih,vsb,vsbcbiq,vsbiq,vscbib,vscbif,\
+vscbig,vscbih,vscbiq,vsegb,vsegf,vsegh,vsel,vsf,vsg,vsh,vsl,vslb,vsld,vsldb,\
+vsq,vsra,vsrab,vsrd,vsrl,vsrlb,vuphb,vuphf,vuphg,vuphh,vuplb,vuplf,vuplg,\
+vuplhb,vuplhf,vuplhg,vuplhh,vuplhw,vupllb,vupllf,vupllg,vupllh,vx,vzero,wfcedb,\
+wfcesb,wfcexb,wfcexbs,wfchdb,wfchedb,wfchesb,wfchexb,wfchexbs,wfchsb,wfchxb,\
+wfchxbs,wfkedb,wfkesb,wfkexb,wfkhdb,wfkhedb,wfkhesb,wfkhexb,wfkhsb,wfkhxb,\
+wflcdb,wflcsb,wflcxb,wflndb,wflnsb,wflnxb,wflpdb,wflpsb,wflpxb,wfmaxxb,wfminxb,\
+xi,xiy")) "nothing")
+
+(define_insn_reservation "z17_2" 2
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "cdb,cdbr,ceb,cebr,clzg,ctzg,ear,ipm,kdb,kdbr,keb,kebr,l,\
+lcbb,lcdbr,lcebr,ld,lde,ldy,lg,lgdr,lgrl,llc,llgc,llgf,llgfrl,llgh,llghrl,llgt,\
+llh,llhrl,lm,lmg,lmy,lndbr,lnebr,lpdbr,lpebr,lrl,ltdbr,ltebr,ly,popcnt,sar,\
+tcdb,tceb,vfeeb,vfeef,vfeeh,vfeezbs,vfeezfs,vfeezhs,vfeneb,vfenef,vfeneh,\
+vfenezb,vfenezf,vfenezh,vftcidb,vftcisb,vistrb,vistrbs,vistrf,vistrfs,vistrh,\
+vistrhs,vlbrrepf,vlbrrepg,vlbrreph,vlgvb,vlgvf,vlgvg,vlgvh,vllebrzf,vllebrzg,\
+vllebrzh,vllezb,vllezf,vllezg,vllezh,vllezlf,vlrepb,vlrepf,vlrepg,vlreph,vlrl,\
+vlvgp,wfcdb,wfcsb,wfcxb,wfkdb,wfksb,wfkxb,wftcixb")) "nothing")
+
+(define_insn_reservation "z17_3" 3
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "bdepg,bextg,cds,cdsy,mgh,mghi,mh,mhi,mhy,ms,msc,msfi,msg,\
+msgc,msgf,msgfi,msgfr,msgr,msgrkc,msr,msrkc,msy,std,stdy,ste,stey,vcksm,vgfmab,\
+vgfmaf,vgfmag,vgfmah,vgfmb,vgfmf,vgfmg,vgfmh,vl,vlbb,vlbrf,vlbrg,vlbrh,vlbrq,\
+vlerf,vlerg,vlerh,vll,vlrlr,vmaeb,vmaef,vmaeg,vmaeh,vmahb,vmahf,vmahg,vmahh,\
+vmahq,vmalb,vmaleb,vmalef,vmaleg,vmaleh,vmalf,vmalg,vmalhb,vmalhf,vmalhg,\
+vmalhh,vmalhq,vmalhw,vmalob,vmalof,vmalog,vmaloh,vmalq,vmaob,vmaof,vmaog,vmaoh,\
+vmeb,vmef,vmeg,vmeh,vmhb,vmhf,vmhg,vmhh,vmhq,vmlb,vmleb,vmlef,vmleg,vmleh,vmlf,\
+vmlg,vmlhb,vmlhf,vmlhg,vmlhh,vmlhq,vmlhw,vmlob,vmlof,vmlog,vmloh,vmlq,vmob,\
+vmof,vmog,vmoh,vsumb,vsumgf,vsumgh,vsumh,vsumqf,vsumqg,vtm")) "nothing")
+
+(define_insn_reservation "z17_4" 4
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "bas,basr,bras,brasl,chhsi,clc,ex,exrl,lam,lcgfr,lngfr,\
+lpgfr,lxr,lzxr,mvcrl,ppa,rnsbg,rosbg,rxsbg,tabort,tend,vst,vstbrf,vstbrg,\
+vstbrh,vstbrq,vstebrf,vstebrg,vstef,vsteg,vsterf,vsterg,vsterh,vstl,vstrl,\
+vstrlr")) "nothing")
+
+(define_insn_reservation "z17_5" 5
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "adb,adbr,aeb,aebr,alc,alcg,alcgr,alcr,cs,csg,csy,fidbr,\
+fidbra,fiebr,fiebra,ldeb,ldebr,ledbr,madbr,mdb,mdbr,meeb,meebr,msdbr,sdb,sdbr,\
+seb,sebr,slb,slbg,slbgr,slbr,stm,stmg,stmy,vcdgb,vcdlgb,vcefb,vcelfb,vcfeb,\
+vcgdb,vclfeb,vclgdb,vldeb,vledb,vmslg,wcdgb,wcdlgb,wcefb,wcelfb,wcfeb,wcgdb,\
+wclfeb,wclgdb,wflld,wflrx,wldeb,wledb")) "nothing")
+
+(define_insn_reservation "z17_6" 6
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "sfpc")) "nothing")
+
+(define_insn_reservation "z17_7" 7
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "adtr,cdtr,fidtr,kdtr,ldetr,ltdtr,sdtr,tdcdt,tdcet,vfadb,\
+vfasb,vfidb,vfisb,vfsdb,vfssb,vgef,vgeg,wfadb,wfasb,wfaxb,wfidb,wfisb,wfixb,\
+wfsdb,wfssb,wfsxb")) "nothing")
+
+(define_insn_reservation "z17_8" 8
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "cdgtr,cdlgtr,cdsg,cxgtr,cxlgtr,flogr,lpq,m,mfy,mg,mgrk,\
+ml,mlg,mlgr,mlr,mr,stpq,vsteb,vstebrh,vsteh")) "nothing")
+
+(define_insn_reservation "z17_9" 9
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "cdfbr,cdgbr,cdlfbr,cdlgbr,cefbr,cegbr,celfbr,celgbr,madb,\
+maeb,maebr,msdb,mseb,msebr,stam")) "nothing")
+
+(define_insn_reservation "z17_10" 10
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "cgdtr,cgxtr,clfdtr,clfxtr,clgdtr,clgxtr,d,dl,dlg,dsg,\
+dsgf,efpc,lxdb,lxdbr,lxeb,lxebr,vscef,vsceg")) "nothing")
+
+(define_insn_reservation "z17_11" 11
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "cfdbr,cfebr,cgdbr,cgebr,clfdbr,clfebr,clgdbr,clgebr")) "nothing")
+
+(define_insn_reservation "z17_12" 12
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "cxbr,cxtr,kxbr,kxtr,tbegin,tbeginc,tcxb,tdcxt")) "nothing")
+
+(define_insn_reservation "z17_13" 13
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "axbr,axtr,cxfbr,cxgbr,cxlfbr,cxlgbr,fixbr,fixbra,fixtr,\
+lcxbr,lnxbr,lpxbr,ltxbr,ltxtr,lxdtr,sxbr,sxtr")) "nothing")
+
+(define_insn_reservation "z17_14" 14
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "cfxbr,cgxbr,clfxbr,clgxbr,ledtr")) "nothing")
+
+(define_insn_reservation "z17_15" 15
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "nc,oc")) "nothing")
+
+(define_insn_reservation "z17_16" 16
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "cdftr,cdlftr,cxftr,cxlftr")) "nothing")
+
+(define_insn_reservation "z17_18" 18
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "xc")) "nothing")
+
+(define_insn_reservation "z17_20" 20
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "ddb,ddbr,ddtr,deb,debr,dlgr,dlr,dr,dsgfr,dsgr,dxbr,dxtr,\
+mdtr,mvc,mxbr,mxtr,sqdb,sqdbr,sqeb,sqebr,sqxbr,vdf,vdg,vdlf,vdlg,vdlq,vdq,\
+vfddb,vfdsb,vfmadb,vfmasb,vfmdb,vfmsb,vfmsdb,vfmssb,vfnmadb,vfnmasb,vfnmsdb,\
+vfnmssb,vfsqdb,vfsqsb,vrf,vrg,vrlf,vrlg,vrlq,vrq,wfddb,wfdsb,wfdxb,wfmadb,\
+wfmasb,wfmaxb,wfmdb,wfmsb,wfmsdb,wfmssb,wfmsxb,wfmxb,wfnmaxb,wfnmsxb,wfsqdb,\
+wfsqxb")) "nothing")
+
diff --git a/gcc/config/s390/driver-native.cc b/gcc/config/s390/driver-native.cc
index 49e8fa0..7a7ceea 100644
--- a/gcc/config/s390/driver-native.cc
+++ b/gcc/config/s390/driver-native.cc
@@ -127,6 +127,10 @@ s390_host_detect_local_cpu (int argc, const char **argv)
case 0x3932:
cpu = "arch14";
break;
+ case 0x9175:
+ case 0x9176:
+ cpu = "arch15";
+ break;
default:
cpu = "arch15";
break;
diff --git a/gcc/config/s390/s390-builtins.def b/gcc/config/s390/s390-builtins.def
index d9af9b1..cee2326 100644
--- a/gcc/config/s390/s390-builtins.def
+++ b/gcc/config/s390/s390-builtins.def
@@ -300,8 +300,8 @@
#define B_VXE2 (1 << 4) /* Builtins requiring the z15 vector extensions. */
#define B_DEP (1 << 5) /* Builtin has been deprecated and a warning should be issued. */
#define B_NNPA (1 << 6) /* Builtins requiring the NNPA Facility. */
-#define B_VXE3 (1 << 7) /* Builtins requiring the arch15 vector extensions. */
-#define B_ARCH15 (1 << 8) /* Builtins requiring arch15. */
+#define B_VXE3 (1 << 7) /* Builtins requiring the z17 vector extensions. */
+#define B_Z17 (1 << 8) /* Builtins requiring z17. */
/* B_DEF defines a standard (not overloaded) builtin
B_DEF (<builtin name>, <RTL expander name>, <function attributes>, <builtin flags>, <operand flags, see above>, <fntype>)
@@ -3318,8 +3318,8 @@ B_DEF (s390_vcnf, vcnf_v8hi, 0,
/* arch 15 builtins */
-B_DEF (s390_bdepg, bdepg, 0, B_ARCH15, 0, BT_FN_ULONG_ULONG_ULONG)
-B_DEF (s390_bextg, bextg, 0, B_ARCH15, 0, BT_FN_ULONG_ULONG_ULONG)
+B_DEF (s390_bdepg, bdepg, 0, B_Z17, 0, BT_FN_ULONG_ULONG_ULONG)
+B_DEF (s390_bextg, bextg, 0, B_Z17, 0, BT_FN_ULONG_ULONG_ULONG)
OB_DEF (s390_vec_blend, s390_vec_blend_s8, s390_vec_blend_dbl, B_VXE3, BT_FN_OV4SI_OV4SI_OV4SI_OV4SI)
OB_DEF_VAR (s390_vec_blend_s8, s390_vblendb, 0, 0, BT_OV_V16QI_V16QI_V16QI_V16QI)
diff --git a/gcc/config/s390/s390-c.cc b/gcc/config/s390/s390-c.cc
index 311d74a..a01c44c 100644
--- a/gcc/config/s390/s390-c.cc
+++ b/gcc/config/s390/s390-c.cc
@@ -962,7 +962,7 @@ s390_resolve_overloaded_builtin (location_t loc, tree ob_fndecl,
if (!TARGET_VXE3 && (ob_flags & B_VXE3))
{
- error_at (loc, "%qF requires arch15 or higher", ob_fndecl);
+ error_at (loc, "%qF requires z17 or higher", ob_fndecl);
return error_mark_node;
}
@@ -1056,7 +1056,7 @@ s390_resolve_overloaded_builtin (location_t loc, tree ob_fndecl,
if (!TARGET_VXE3
&& bflags_overloaded_builtin_var[last_match_index] & B_VXE3)
{
- error_at (loc, "%qs matching variant requires arch15 or higher",
+ error_at (loc, "%qs matching variant requires z17 or higher",
IDENTIFIER_POINTER (DECL_NAME (ob_fndecl)));
return error_mark_node;
}
diff --git a/gcc/config/s390/s390-opts.h b/gcc/config/s390/s390-opts.h
index 437d3b9..9cacb2c 100644
--- a/gcc/config/s390/s390-opts.h
+++ b/gcc/config/s390/s390-opts.h
@@ -39,7 +39,7 @@ enum processor_type
PROCESSOR_3906_Z14,
PROCESSOR_8561_Z15,
PROCESSOR_3931_Z16,
- PROCESSOR_ARCH15,
+ PROCESSOR_9175_Z17,
PROCESSOR_NATIVE,
PROCESSOR_max
};
diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index 0ff3fd5..e3edf85 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -342,7 +342,7 @@ const struct s390_processor processor_table[] =
{ "z14", "arch12", PROCESSOR_3906_Z14, &zEC12_cost, 12 },
{ "z15", "arch13", PROCESSOR_8561_Z15, &zEC12_cost, 13 },
{ "z16", "arch14", PROCESSOR_3931_Z16, &zEC12_cost, 14 },
- { "arch15", "arch15", PROCESSOR_ARCH15, &zEC12_cost, 15 },
+ { "z17", "arch15", PROCESSOR_9175_Z17, &zEC12_cost, 15 },
{ "native", "", PROCESSOR_NATIVE, NULL, 0 }
};
@@ -916,7 +916,7 @@ s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
if ((bflags & B_VXE3) && !TARGET_VXE3)
{
- error ("Builtin %qF requires arch15 or higher", fndecl);
+ error ("Builtin %qF requires z17 or higher", fndecl);
return const0_rtx;
}
}
@@ -9204,7 +9204,7 @@ s390_issue_rate (void)
case PROCESSOR_3906_Z14:
case PROCESSOR_8561_Z15:
case PROCESSOR_3931_Z16:
- case PROCESSOR_ARCH15:
+ case PROCESSOR_9175_Z17:
default:
return 1;
}
@@ -14496,7 +14496,21 @@ s390_call_saved_register_used (tree call_expr)
for (reg = 0; reg < nregs; reg++)
if (!call_used_or_fixed_reg_p (reg + REGNO (parm_rtx)))
- return true;
+ {
+ rtx parm;
+ /* Allow passing through unmodified value from caller,
+ see PR119873. */
+ if (TREE_CODE (parameter) == SSA_NAME
+ && SSA_NAME_IS_DEFAULT_DEF (parameter)
+ && SSA_NAME_VAR (parameter)
+ && TREE_CODE (SSA_NAME_VAR (parameter)) == PARM_DECL
+ && (parm = DECL_INCOMING_RTL (SSA_NAME_VAR (parameter)))
+ && REG_P (parm)
+ && REGNO (parm) == REGNO (parm_rtx)
+ && REG_NREGS (parm) == REG_NREGS (parm_rtx))
+ break;
+ return true;
+ }
}
else if (GET_CODE (parm_rtx) == PARALLEL)
{
@@ -14510,7 +14524,17 @@ s390_call_saved_register_used (tree call_expr)
gcc_assert (REG_NREGS (r) == 1);
if (!call_used_or_fixed_reg_p (REGNO (r)))
- return true;
+ {
+ rtx parm;
+ if (TREE_CODE (parameter) == SSA_NAME
+ && SSA_NAME_IS_DEFAULT_DEF (parameter)
+ && SSA_NAME_VAR (parameter)
+ && TREE_CODE (SSA_NAME_VAR (parameter)) == PARM_DECL
+ && (parm = DECL_INCOMING_RTL (SSA_NAME_VAR (parameter)))
+ && rtx_equal_p (parm_rtx, parm))
+ break;
+ return true;
+ }
}
}
}
@@ -14543,8 +14567,9 @@ s390_function_ok_for_sibcall (tree decl, tree exp)
return false;
/* Register 6 on s390 is available as an argument register but unfortunately
- "caller saved". This makes functions needing this register for arguments
- not suitable for sibcalls. */
+ "caller saved". This makes functions needing this register for arguments
+ not suitable for sibcalls, unless the same value is passed from the
+ caller. */
return !s390_call_saved_register_used (exp);
}
@@ -15632,7 +15657,6 @@ s390_get_sched_attrmask (rtx_insn *insn)
mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
break;
case PROCESSOR_3931_Z16:
- case PROCESSOR_ARCH15:
if (get_attr_z16_cracked (insn))
mask |= S390_SCHED_ATTR_MASK_CRACKED;
if (get_attr_z16_expanded (insn))
@@ -15644,6 +15668,18 @@ s390_get_sched_attrmask (rtx_insn *insn)
if (get_attr_z16_groupoftwo (insn))
mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
break;
+ case PROCESSOR_9175_Z17:
+ if (get_attr_z17_cracked (insn))
+ mask |= S390_SCHED_ATTR_MASK_CRACKED;
+ if (get_attr_z17_expanded (insn))
+ mask |= S390_SCHED_ATTR_MASK_EXPANDED;
+ if (get_attr_z17_endgroup (insn))
+ mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
+ if (get_attr_z17_groupalone (insn))
+ mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
+ if (get_attr_z17_groupoftwo (insn))
+ mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
+ break;
default:
gcc_unreachable ();
}
@@ -15691,7 +15727,6 @@ s390_get_unit_mask (rtx_insn *insn, int *units)
mask |= 1 << 3;
break;
case PROCESSOR_3931_Z16:
- case PROCESSOR_ARCH15:
*units = 4;
if (get_attr_z16_unit_lsu (insn))
mask |= 1 << 0;
@@ -15702,6 +15737,17 @@ s390_get_unit_mask (rtx_insn *insn, int *units)
if (get_attr_z16_unit_vfu (insn))
mask |= 1 << 3;
break;
+ case PROCESSOR_9175_Z17:
+ *units = 4;
+ if (get_attr_z17_unit_lsu (insn))
+ mask |= 1 << 0;
+ if (get_attr_z17_unit_fxa (insn))
+ mask |= 1 << 1;
+ if (get_attr_z17_unit_fxb (insn))
+ mask |= 1 << 2;
+ if (get_attr_z17_unit_vfu (insn))
+ mask |= 1 << 3;
+ break;
default:
gcc_unreachable ();
}
@@ -15715,7 +15761,8 @@ s390_is_fpd (rtx_insn *insn)
return false;
return get_attr_z13_unit_fpd (insn) || get_attr_z14_unit_fpd (insn)
- || get_attr_z15_unit_fpd (insn) || get_attr_z16_unit_fpd (insn);
+ || get_attr_z15_unit_fpd (insn) || get_attr_z16_unit_fpd (insn)
+ || get_attr_z17_unit_fpd (insn);
}
static bool
@@ -15725,7 +15772,8 @@ s390_is_fxd (rtx_insn *insn)
return false;
return get_attr_z13_unit_fxd (insn) || get_attr_z14_unit_fxd (insn)
- || get_attr_z15_unit_fxd (insn) || get_attr_z16_unit_fxd (insn);
+ || get_attr_z15_unit_fxd (insn) || get_attr_z16_unit_fxd (insn)
+ || get_attr_z17_unit_fxd (insn);
}
/* Returns TRUE if INSN is a long-running instruction. */
diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h
index 6f7195d..8b04bc9 100644
--- a/gcc/config/s390/s390.h
+++ b/gcc/config/s390/s390.h
@@ -45,12 +45,12 @@ enum processor_flags
PF_NNPA = 32768,
PF_Z16 = 65536,
PF_VXE3 = 131072,
- PF_ARCH15 = 262144
+ PF_Z17 = 262144
};
/* This is necessary to avoid a warning about comparing different enum
types. */
-#define s390_tune_attr ((enum attr_cpu)(s390_tune > PROCESSOR_3931_Z16 ? PROCESSOR_3931_Z16 : s390_tune ))
+#define s390_tune_attr ((enum attr_cpu)(s390_tune > PROCESSOR_9175_Z17 ? PROCESSOR_9175_Z17 : s390_tune ))
/* These flags indicate that the generated code should run on a cpu
providing the respective hardware facility regardless of the
@@ -124,10 +124,10 @@ enum processor_flags
(s390_arch_flags & PF_VXE3)
#define TARGET_CPU_VXE3_P(opts) \
(opts->x_s390_arch_flags & PF_VXE3)
-#define TARGET_CPU_ARCH15 \
- (s390_arch_flags & PF_ARCH15)
-#define TARGET_CPU_ARCH15_P(opts) \
- (opts->x_s390_arch_flags & PF_ARCH15)
+#define TARGET_CPU_Z17 \
+ (s390_arch_flags & PF_Z17)
+#define TARGET_CPU_Z17_P(opts) \
+ (opts->x_s390_arch_flags & PF_Z17)
#define TARGET_HARD_FLOAT_P(opts) (!TARGET_SOFT_FLOAT_P(opts))
@@ -198,9 +198,9 @@ enum processor_flags
(TARGET_VX && TARGET_CPU_VXE3)
#define TARGET_VXE3_P(opts) \
(TARGET_VX_P (opts) && TARGET_CPU_VXE3_P (opts))
-#define TARGET_ARCH15 (TARGET_ZARCH && TARGET_CPU_ARCH15)
-#define TARGET_ARCH15_P(opts) \
- (TARGET_ZARCH_P (opts->x_target_flags) && TARGET_CPU_ARCH15_P (opts))
+#define TARGET_Z17 (TARGET_ZARCH && TARGET_CPU_Z17)
+#define TARGET_Z17_P(opts) \
+ (TARGET_ZARCH_P (opts->x_target_flags) && TARGET_CPU_Z17_P (opts))
#if defined(HAVE_AS_VECTOR_LOADSTORE_ALIGNMENT_HINTS_ON_Z13)
#define TARGET_VECTOR_LOADSTORE_ALIGNMENT_HINTS TARGET_Z13
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 9d49580..05b9da6 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -599,11 +599,11 @@
;; Processor type. This attribute must exactly match the processor_type
;; enumeration in s390.h.
-(define_attr "cpu" "z900,z990,z9_109,z9_ec,z10,z196,zEC12,z13,z14,z15,z16"
+(define_attr "cpu" "z900,z990,z9_109,z9_ec,z10,z196,zEC12,z13,z14,z15,z16,z17"
(const (symbol_ref "s390_tune_attr")))
(define_attr "cpu_facility"
- "standard,ieee,zarch,cpu_zarch,longdisp,extimm,dfp,z10,z196,zEC12,vx,z13,z14,vxe,z15,vxe2,z16,nnpa,vxe3,arch15"
+ "standard,ieee,zarch,cpu_zarch,longdisp,extimm,dfp,z10,z196,zEC12,vx,z13,z14,vxe,z15,vxe2,z16,nnpa,vxe3,z17"
(const_string "standard"))
(define_attr "enabled" ""
@@ -681,8 +681,8 @@
(match_test "TARGET_VXE3"))
(const_int 1)
- (and (eq_attr "cpu_facility" "arch15")
- (match_test "TARGET_ARCH15"))
+ (and (eq_attr "cpu_facility" "z17")
+ (match_test "TARGET_Z17"))
(const_int 1)
]
(const_int 0)))
@@ -725,6 +725,9 @@
;; Pipeline description for z16
(include "3931.md")
+;; Pipeline description for z17
+(include "9175.md")
+
;; Predicates
(include "predicates.md")
@@ -2056,7 +2059,7 @@
[(set (match_operand:DI 0 "register_operand" "=d")
(ashift:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "a"))
(const_int LXAMODEITER)))]
- "TARGET_ARCH15 && TARGET_64BIT"
+ "TARGET_Z17 && TARGET_64BIT"
"lxa<lxamode>\t%0,0(%1,0)"
[(set_attr "op_type" "RXY")])
@@ -2066,7 +2069,7 @@
(ashift:DI (sign_extend:DI (plus:SI (match_operand:SI 1 "register_operand" "a")
(match_operand:SI 2 "const_int_operand")))
(const_int LXAMODEITER)))]
- "TARGET_ARCH15 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF"
+ "TARGET_Z17 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF"
"lxa<lxamode>\t%0,%2(%1,0)"
[(set_attr "op_type" "RXY")])
@@ -2076,7 +2079,7 @@
(plus:DI (ashift:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "a"))
(const_int LXAMODEITER))
(match_operand:DI 2 "register_operand" "a")))]
- "TARGET_ARCH15 && TARGET_64BIT"
+ "TARGET_Z17 && TARGET_64BIT"
"lxa<lxamode>\t%0,0(%1,%2)"
[(set_attr "op_type" "RXY")])
@@ -2087,7 +2090,7 @@
(match_operand:SI 2 "const_int_operand")))
(const_int LXAMODEITER))
(match_operand:DI 3 "register_operand" "a")))]
- "TARGET_ARCH15 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF"
+ "TARGET_Z17 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF"
"lxa<lxamode>\t%0,%2(%1,%3)"
[(set_attr "op_type" "RXY")])
@@ -2096,7 +2099,7 @@
(plus:DI (sign_extend:DI (plus:SI (match_operand:SI 1 "register_operand" "a")
(match_operand:SI 2 "const_int_operand")))
(match_operand:DI 3 "register_operand" "a")))]
- "TARGET_ARCH15 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF"
+ "TARGET_Z17 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF"
"lxab\t%0,%2(%1,%3)"
[(set_attr "op_type" "RXY")])
@@ -2113,7 +2116,7 @@
0)
(const_int LXAMODEITER))
(const_int <LLXAMASK>)))]
- "TARGET_ARCH15 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF"
+ "TARGET_Z17 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF"
"llxa<lxamode>\t%0,%2(%1,0)"
[(set_attr "op_type" "RXY")])
@@ -2124,7 +2127,7 @@
(const_int LXAMODEITER))
(const_int <LLXAMASK>))
(match_operand:DI 2 "register_operand" "a")))]
- "TARGET_ARCH15 && TARGET_64BIT"
+ "TARGET_Z17 && TARGET_64BIT"
"llxa<lxamode>\t%0,0(%1,%2)"
[(set_attr "op_type" "RXY")])
@@ -2137,7 +2140,7 @@
(const_int LXAMODEITER))
(const_int <LLXAMASK>))
(match_operand:DI 3 "register_operand" "a")))]
- "TARGET_ARCH15 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF"
+ "TARGET_Z17 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF"
"llxa<lxamode>\t%0,%2(%1,%3)"
[(set_attr "op_type" "RXY")])
@@ -2146,7 +2149,7 @@
(plus:DI (zero_extend:DI (plus:SI (match_operand:SI 1 "register_operand" "a")
(match_operand:SI 2 "const_int_operand")))
(match_operand:DI 3 "register_operand" "a")))]
- "TARGET_ARCH15 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF"
+ "TARGET_Z17 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF"
"llxab\t%0,%2(%1,%3)"
[(set_attr "op_type" "RXY")])
@@ -3594,7 +3597,7 @@
(match_operand:BLK 1 "memory_operand" ""))
(use (match_operand 2 "const_int_operand" ""))
(use (match_operand 3 "immediate_operand" ""))
- (clobber (scratch))]
+ (clobber (match_scratch 4))]
"reload_completed"
[(parallel
[(set (match_dup 0) (match_dup 1))
@@ -3606,7 +3609,7 @@
(match_operand:BLK 1 "memory_operand" ""))
(use (match_operand 2 "register_operand" ""))
(use (match_operand 3 "memory_operand" ""))
- (clobber (scratch))]
+ (clobber (match_scratch 4))]
"reload_completed"
[(parallel
[(unspec [(match_dup 2) (match_dup 3)
@@ -3620,14 +3623,14 @@
(match_operand:BLK 1 "memory_operand" ""))
(use (match_operand 2 "register_operand" ""))
(use (const:BLK (unspec:BLK [(const_int 0)] UNSPEC_INSN)))
- (clobber (scratch))]
+ (clobber (match_scratch 3))]
"TARGET_Z10 && reload_completed"
[(parallel
[(unspec [(match_dup 2) (const_int 0)
- (label_ref (match_dup 3))] UNSPEC_EXECUTE)
+ (label_ref (match_dup 4))] UNSPEC_EXECUTE)
(set (match_dup 0) (match_dup 1))
(use (const_int 1))])]
- "operands[3] = gen_label_rtx ();")
+ "operands[4] = gen_label_rtx ();")
(define_split
[(set (match_operand:BLK 0 "memory_operand" "")
@@ -3849,7 +3852,7 @@
(const_int 0))
(use (match_operand 1 "const_int_operand" ""))
(use (match_operand 2 "immediate_operand" ""))
- (clobber (scratch))
+ (clobber (match_scratch 3))
(clobber (reg:CC CC_REGNUM))]
"reload_completed"
[(parallel
@@ -3863,7 +3866,7 @@
(const_int 0))
(use (match_operand 1 "register_operand" ""))
(use (match_operand 2 "memory_operand" ""))
- (clobber (scratch))
+ (clobber (match_scratch 3))
(clobber (reg:CC CC_REGNUM))]
"reload_completed"
[(parallel
@@ -3879,7 +3882,7 @@
(const_int 0))
(use (match_operand 1 "register_operand" ""))
(use (const:BLK (unspec:BLK [(const_int 0)] UNSPEC_INSN)))
- (clobber (scratch))
+ (clobber (match_scratch 2))
(clobber (reg:CC CC_REGNUM))]
"TARGET_Z10 && reload_completed"
[(parallel
@@ -4044,7 +4047,7 @@
(match_operand:BLK 1 "memory_operand" "")))
(use (match_operand 2 "const_int_operand" ""))
(use (match_operand 3 "immediate_operand" ""))
- (clobber (scratch))]
+ (clobber (match_scratch 4))]
"reload_completed"
[(parallel
[(set (reg:CCU CC_REGNUM) (compare:CCU (match_dup 0) (match_dup 1)))
@@ -4057,7 +4060,7 @@
(match_operand:BLK 1 "memory_operand" "")))
(use (match_operand 2 "register_operand" ""))
(use (match_operand 3 "memory_operand" ""))
- (clobber (scratch))]
+ (clobber (match_scratch 4))]
"reload_completed"
[(parallel
[(unspec [(match_dup 2) (match_dup 3)
@@ -4072,7 +4075,7 @@
(match_operand:BLK 1 "memory_operand" "")))
(use (match_operand 2 "register_operand" ""))
(use (const:BLK (unspec:BLK [(const_int 0)] UNSPEC_INSN)))
- (clobber (scratch))]
+ (clobber (match_scratch 3))]
"TARGET_Z10 && reload_completed"
[(parallel
[(unspec [(match_dup 2) (const_int 0)
@@ -4940,7 +4943,7 @@
(unspec:DI [(match_operand:DI 1 "register_operand" "d")
(match_operand:DI 2 "register_operand" "d")]
UNSPEC_BDEPG))]
- "TARGET_ARCH15 && TARGET_64BIT"
+ "TARGET_Z17 && TARGET_64BIT"
"bdepg\t%0,%1,%2"
[(set_attr "op_type" "RRF")])
@@ -4953,7 +4956,7 @@
(unspec:DI [(match_operand:DI 1 "register_operand" "d")
(match_operand:DI 2 "register_operand" "d")]
UNSPEC_BEXTG))]
- "TARGET_ARCH15 && TARGET_64BIT"
+ "TARGET_Z17 && TARGET_64BIT"
"bextg\t%0,%1,%2"
[(set_attr "op_type" "RRF")])
@@ -9580,7 +9583,7 @@
(clz:DI (match_operand:DI 1 "register_operand" "d")))]
"TARGET_EXTIMM && TARGET_ZARCH"
{
- if (!(TARGET_ARCH15 && TARGET_64BIT))
+ if (!(TARGET_Z17 && TARGET_64BIT))
{
rtx_insn *insn;
rtx clz_equal;
@@ -9601,7 +9604,7 @@
(define_insn "*clzg"
[(set (match_operand:DI 0 "register_operand" "=d")
(clz:DI (match_operand:DI 1 "register_operand" "d")))]
- "TARGET_ARCH15 && TARGET_64BIT"
+ "TARGET_Z17 && TARGET_64BIT"
"clzg\t%0,%1"
[(set_attr "op_type" "RRE")])
@@ -9630,7 +9633,7 @@
(define_insn "ctzdi2"
[(set (match_operand:DI 0 "register_operand" "=d")
(ctz:DI (match_operand:DI 1 "register_operand" "d")))]
- "TARGET_ARCH15 && TARGET_64BIT"
+ "TARGET_Z17 && TARGET_64BIT"
"ctzg\t%0,%1"
[(set_attr "op_type" "RRE")])
diff --git a/gcc/config/s390/s390.opt b/gcc/config/s390/s390.opt
index f064597..6753a93 100644
--- a/gcc/config/s390/s390.opt
+++ b/gcc/config/s390/s390.opt
@@ -122,7 +122,10 @@ EnumValue
Enum(processor_type) String(z16) Value(PROCESSOR_3931_Z16)
EnumValue
-Enum(processor_type) String(arch15) Value(PROCESSOR_ARCH15)
+Enum(processor_type) String(arch15) Value(PROCESSOR_9175_Z17)
+
+EnumValue
+Enum(processor_type) String(z17) Value(PROCESSOR_9175_Z17)
EnumValue
Enum(processor_type) String(native) Value(PROCESSOR_NATIVE) DriverOnly
diff --git a/gcc/config/sh/sh-modes.def b/gcc/config/sh/sh-modes.def
index 80650b4..e31ae69 100644
--- a/gcc/config/sh/sh-modes.def
+++ b/gcc/config/sh/sh-modes.def
@@ -17,6 +17,12 @@ You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
+/* SH has the same reversed quiet bit as MIPS. */
+RESET_FLOAT_FORMAT (SF, mips_single_format);
+RESET_FLOAT_FORMAT (DF, mips_double_format);
+/* TFmode: IEEE quad floating point (software). */
+FLOAT_MODE (TF, 16, mips_quad_format);
+
/* Vector modes. */
VECTOR_MODE (INT, QI, 2); /* V2QI */
VECTOR_MODES (INT, 4); /* V4QI V2HI */