aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorJerry DeLisle <jvdelisle@gcc.gnu.org>2025-09-02 15:58:26 -0700
committerJerry DeLisle <jvdelisle@gcc.gnu.org>2025-09-02 15:58:26 -0700
commit071b4126c613881f4cb25b4e5c39032964827f88 (patch)
tree7ed805786566918630d1d617b1ed8f7310f5fd8e /gcc/config
parent845d23f3ea08ba873197c275a8857eee7edad996 (diff)
parentcaa1c2f42691d68af4d894a5c3e700ecd2dba080 (diff)
downloadgcc-devel/gfortran-test.zip
gcc-devel/gfortran-test.tar.gz
gcc-devel/gfortran-test.tar.bz2
Merge branch 'master' into gfortran-testdevel/gfortran-test
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/aarch64/aarch64-cc-fusion.cc297
-rw-r--r--gcc/config/aarch64/aarch64-cores.def2
-rw-r--r--gcc/config/aarch64/aarch64-option-extensions.def12
-rw-r--r--gcc/config/aarch64/aarch64-passes.def1
-rw-r--r--gcc/config/aarch64/aarch64-protos.h9
-rw-r--r--gcc/config/aarch64/aarch64-sme.md15
-rw-r--r--gcc/config/aarch64/aarch64-sve-builtins-base.cc45
-rw-r--r--gcc/config/aarch64/aarch64-sve-builtins-functions.h8
-rw-r--r--gcc/config/aarch64/aarch64-sve-builtins-sme.def3
-rw-r--r--gcc/config/aarch64/aarch64-sve-builtins-sve2.cc4
-rw-r--r--gcc/config/aarch64/aarch64-sve-builtins.cc30
-rw-r--r--gcc/config/aarch64/aarch64-sve.md985
-rw-r--r--gcc/config/aarch64/aarch64-sve2.md104
-rw-r--r--gcc/config/aarch64/aarch64.cc421
-rw-r--r--gcc/config/aarch64/aarch64.h5
-rw-r--r--gcc/config/aarch64/aarch64.md221
-rw-r--r--gcc/config/aarch64/constraints.md10
-rw-r--r--gcc/config/aarch64/iterators.md35
-rw-r--r--gcc/config/aarch64/predicates.md29
-rw-r--r--gcc/config/aarch64/t-aarch646
-rw-r--r--gcc/config/aarch64/tuning_models/generic_armv9_a.h2
-rw-r--r--gcc/config/aarch64/tuning_models/olympus.h210
-rw-r--r--gcc/config/arc/arc.md6
-rw-r--r--gcc/config/arm/arm.cc5
-rw-r--r--gcc/config/arm/arm.md17
-rw-r--r--gcc/config/arm/thumb1.md9
-rw-r--r--gcc/config/arm/thumb2.md21
-rw-r--r--gcc/config/avr/avr-dimode.md87
-rw-r--r--gcc/config/avr/avr-fixed.md129
-rw-r--r--gcc/config/avr/avr-log.cc1
-rw-r--r--gcc/config/avr/avr-passes.cc139
-rw-r--r--gcc/config/avr/avr-passes.def8
-rw-r--r--gcc/config/avr/avr-protos.h5
-rw-r--r--gcc/config/avr/avr.cc183
-rw-r--r--gcc/config/avr/avr.h18
-rw-r--r--gcc/config/avr/avr.md1253
-rw-r--r--gcc/config/avr/avr.opt8
-rw-r--r--gcc/config/avr/avr.opt.urls5
-rw-r--r--gcc/config/avr/specs.h2
-rw-r--r--gcc/config/cris/cris.h2
-rw-r--r--gcc/config/darwin-sections.def7
-rw-r--r--gcc/config/darwin.cc67
-rw-r--r--gcc/config/darwin.h30
-rw-r--r--gcc/config/gcn/gcn-opts.h2
-rw-r--r--gcc/config/gcn/gcn-valu.md4
-rw-r--r--gcc/config/gcn/gcn.cc103
-rw-r--r--gcc/config/gcn/gcn.md40
-rw-r--r--gcc/config/h8300/addsub.md2
-rw-r--r--gcc/config/h8300/jumpcall.md12
-rw-r--r--gcc/config/h8300/testcompare.md26
-rw-r--r--gcc/config/i386/i386-expand.cc129
-rw-r--r--gcc/config/i386/i386-features.cc1130
-rw-r--r--gcc/config/i386/i386-modes.def2
-rw-r--r--gcc/config/i386/i386-options.cc45
-rw-r--r--gcc/config/i386/i386-passes.def2
-rw-r--r--gcc/config/i386/i386-protos.h5
-rw-r--r--gcc/config/i386/i386.cc305
-rw-r--r--gcc/config/i386/i386.h59
-rw-r--r--gcc/config/i386/i386.md449
-rw-r--r--gcc/config/i386/i386.opt4
-rw-r--r--gcc/config/i386/predicates.md17
-rw-r--r--gcc/config/i386/sse.md135
-rw-r--r--gcc/config/i386/x86-tune-costs.h192
-rw-r--r--gcc/config/loongarch/genopts/isa-evolution.in1
-rw-r--r--gcc/config/loongarch/loongarch-def.cc4
-rw-r--r--gcc/config/loongarch/loongarch-def.h10
-rw-r--r--gcc/config/loongarch/loongarch-evolution.cc4
-rw-r--r--gcc/config/loongarch/loongarch-evolution.h8
-rw-r--r--gcc/config/loongarch/loongarch-str.h1
-rw-r--r--gcc/config/loongarch/loongarch.cc81
-rw-r--r--gcc/config/loongarch/loongarch.h6
-rw-r--r--gcc/config/loongarch/loongarch.opt4
-rw-r--r--gcc/config/loongarch/loongarch.opt.urls3
-rw-r--r--gcc/config/loongarch/simd.md2
-rw-r--r--gcc/config/loongarch/sync.md641
-rw-r--r--gcc/config/mips/mips.h10
-rw-r--r--gcc/config/mips/mips.opt4
-rw-r--r--gcc/config/nvptx/nvptx.opt45
-rw-r--r--gcc/config/pru/pru.cc11
-rw-r--r--gcc/config/pru/pru.h3
-rw-r--r--gcc/config/pru/pru.md28
-rw-r--r--gcc/config/pru/pru.opt8
-rw-r--r--gcc/config/pru/pru.opt.urls6
-rw-r--r--gcc/config/pru/t-multilib29
-rwxr-xr-xgcc/config/riscv/arch-canonicalize583
-rw-r--r--gcc/config/riscv/autovec-opt.md182
-rw-r--r--gcc/config/riscv/constraints.md4
-rw-r--r--gcc/config/riscv/gen-riscv-ext-opt.cc44
-rw-r--r--gcc/config/riscv/gen-riscv-mcpu-texi.cc43
-rw-r--r--gcc/config/riscv/gen-riscv-mtune-texi.cc41
-rw-r--r--gcc/config/riscv/predicates.md20
-rw-r--r--gcc/config/riscv/riscv-avlprop.cc13
-rw-r--r--gcc/config/riscv/riscv-cores.def8
-rw-r--r--gcc/config/riscv/riscv-ext-mips.def13
-rw-r--r--gcc/config/riscv/riscv-ext.opt2
-rw-r--r--gcc/config/riscv/riscv-protos.h8
-rw-r--r--gcc/config/riscv/riscv-subset.h13
-rw-r--r--gcc/config/riscv/riscv-target-attr.cc102
-rw-r--r--gcc/config/riscv/riscv-v.cc165
-rw-r--r--gcc/config/riscv/riscv-vector-costs.cc28
-rw-r--r--gcc/config/riscv/riscv.cc399
-rw-r--r--gcc/config/riscv/riscv.h27
-rw-r--r--gcc/config/riscv/riscv.md71
-rw-r--r--gcc/config/riscv/sifive-p400.md20
-rw-r--r--gcc/config/riscv/sifive-p600.md17
-rw-r--r--gcc/config/riscv/sync.md8
-rw-r--r--gcc/config/riscv/t-riscv37
-rw-r--r--gcc/config/riscv/t-rtems9
-rw-r--r--gcc/config/riscv/vector.md265
-rw-r--r--gcc/config/riscv/xiangshan.md4
-rw-r--r--gcc/config/rl78/rl78.opt.urls2
-rw-r--r--gcc/config/rs6000/rs6000.cc41
-rw-r--r--gcc/config/rs6000/rs6000.md42
-rw-r--r--gcc/config/rx/rx.cc49
-rw-r--r--gcc/config/s390/s390-protos.h2
-rw-r--r--gcc/config/s390/s390.cc198
-rw-r--r--gcc/config/s390/s390.md67
-rw-r--r--gcc/config/s390/vector.md84
-rw-r--r--gcc/config/xtensa/constraints.md2
-rw-r--r--gcc/config/xtensa/predicates.md3
-rw-r--r--gcc/config/xtensa/xtensa-protos.h1
-rw-r--r--gcc/config/xtensa/xtensa.cc134
-rw-r--r--gcc/config/xtensa/xtensa.md429
123 files changed, 7549 insertions, 3659 deletions
diff --git a/gcc/config/aarch64/aarch64-cc-fusion.cc b/gcc/config/aarch64/aarch64-cc-fusion.cc
deleted file mode 100644
index cea54de..0000000
--- a/gcc/config/aarch64/aarch64-cc-fusion.cc
+++ /dev/null
@@ -1,297 +0,0 @@
-// Pass to fuse CC operations with other instructions.
-// Copyright (C) 2021-2025 Free Software Foundation, Inc.
-//
-// This file is part of GCC.
-//
-// GCC is free software; you can redistribute it and/or modify it under
-// the terms of the GNU General Public License as published by the Free
-// Software Foundation; either version 3, or (at your option) any later
-// version.
-//
-// GCC is distributed in the hope that it will be useful, but WITHOUT ANY
-// WARRANTY; without even the implied warranty of MERCHANTABILITY or
-// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-// for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with GCC; see the file COPYING3. If not see
-// <http://www.gnu.org/licenses/>.
-
-// This pass looks for sequences of the form:
-//
-// A: (set (reg R1) X1)
-// B: ...instructions that might change the value of X1...
-// C: (set (reg CC) X2) // X2 uses R1
-//
-// and tries to change them to:
-//
-// C': [(set (reg CC) X2')
-// (set (reg R1) X1)]
-// B: ...instructions that might change the value of X1...
-//
-// where X2' is the result of replacing R1 with X1 in X2.
-//
-// This sequence occurs in SVE code in two important cases:
-//
-// (a) Sometimes, to deal correctly with overflow, we need to increment
-// an IV after a WHILELO rather than before it. In this case:
-// - A is a WHILELO,
-// - B includes an IV increment and
-// - C is a separate PTEST.
-//
-// (b) ACLE code of the form:
-//
-// svbool_t ok = svrdffr ();
-// if (svptest_last (pg, ok))
-// ...
-//
-// must, for performance reasons, be code-generated as:
-//
-// RDFFRS Pok.B, Pg/Z
-// ...branch on flags result...
-//
-// without a separate PTEST of Pok. In this case:
-// - A is an aarch64_rdffr
-// - B includes an aarch64_update_ffrt
-// - C is a separate PTEST
-//
-// Combine can handle this optimization if B doesn't exist and if A and
-// C are in the same BB. This pass instead handles cases where B does
-// exist and cases where A and C are in different BBs of the same EBB.
-
-#define IN_TARGET_CODE 1
-
-#define INCLUDE_ALGORITHM
-#define INCLUDE_FUNCTIONAL
-#define INCLUDE_ARRAY
-#include "config.h"
-#include "system.h"
-#include "coretypes.h"
-#include "backend.h"
-#include "rtl.h"
-#include "df.h"
-#include "rtl-ssa.h"
-#include "tree-pass.h"
-
-using namespace rtl_ssa;
-
-namespace {
-const pass_data pass_data_cc_fusion =
-{
- RTL_PASS, // type
- "cc_fusion", // name
- OPTGROUP_NONE, // optinfo_flags
- TV_NONE, // tv_id
- 0, // properties_required
- 0, // properties_provided
- 0, // properties_destroyed
- 0, // todo_flags_start
- TODO_df_finish, // todo_flags_finish
-};
-
-// Class that represents one run of the pass.
-class cc_fusion
-{
-public:
- cc_fusion () : m_parallel () {}
- void execute ();
-
-private:
- rtx optimizable_set (const insn_info *);
- bool parallelize_insns (def_info *, rtx, def_info *, rtx);
- void optimize_cc_setter (def_info *, rtx);
-
- // A spare PARALLEL rtx, or null if none.
- rtx m_parallel;
-};
-
-// See whether INSN is a single_set that we can optimize. Return the
-// set if so, otherwise return null.
-rtx
-cc_fusion::optimizable_set (const insn_info *insn)
-{
- if (!insn->can_be_optimized ()
- || insn->is_asm ()
- || insn->has_volatile_refs ()
- || insn->has_pre_post_modify ())
- return NULL_RTX;
-
- return single_set (insn->rtl ());
-}
-
-// CC_SET is a single_set that sets (only) CC_DEF; OTHER_SET is likewise
-// a single_set that sets (only) OTHER_DEF. CC_SET is known to set the
-// CC register and the instruction that contains CC_SET is known to use
-// OTHER_DEF. Try to do CC_SET and OTHER_SET in parallel.
-bool
-cc_fusion::parallelize_insns (def_info *cc_def, rtx cc_set,
- def_info *other_def, rtx other_set)
-{
- auto attempt = crtl->ssa->new_change_attempt ();
-
- insn_info *cc_insn = cc_def->insn ();
- insn_info *other_insn = other_def->insn ();
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "trying to parallelize insn %d and insn %d\n",
- other_insn->uid (), cc_insn->uid ());
-
- // Try to substitute OTHER_SET into CC_INSN.
- insn_change_watermark rtl_watermark;
- rtx_insn *cc_rtl = cc_insn->rtl ();
- insn_propagation prop (cc_rtl, SET_DEST (other_set),
- SET_SRC (other_set));
- if (!prop.apply_to_pattern (&PATTERN (cc_rtl))
- || prop.num_replacements == 0)
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "-- failed to substitute all uses of r%d\n",
- other_def->regno ());
- return false;
- }
-
- // Restrict the uses to those outside notes.
- use_array cc_uses = remove_note_accesses (attempt, cc_insn->uses ());
- use_array other_set_uses = remove_note_accesses (attempt,
- other_insn->uses ());
-
- // Remove the use of the substituted value.
- access_array_builder uses_builder (attempt);
- uses_builder.reserve (cc_uses.size ());
- for (use_info *use : cc_uses)
- if (use->def () != other_def)
- uses_builder.quick_push (use);
- cc_uses = use_array (uses_builder.finish ());
-
- // Get the list of uses for the new instruction.
- insn_change cc_change (cc_insn);
- cc_change.new_uses = merge_access_arrays (attempt, other_set_uses, cc_uses);
- if (!cc_change.new_uses.is_valid ())
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "-- cannot merge uses\n");
- return false;
- }
-
- // The instruction initially defines just two registers. recog can add
- // extra clobbers if necessary.
- auto_vec<access_info *, 2> new_defs;
- new_defs.quick_push (cc_def);
- new_defs.quick_push (other_def);
- sort_accesses (new_defs);
- cc_change.new_defs = def_array (access_array (new_defs));
-
- // Make sure there is somewhere that the new instruction could live.
- auto other_change = insn_change::delete_insn (other_insn);
- insn_change *changes[] = { &other_change, &cc_change };
- cc_change.move_range = cc_insn->ebb ()->insn_range ();
- if (!restrict_movement (cc_change, ignore_changing_insns (changes)))
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "-- cannot satisfy all definitions and uses\n");
- return false;
- }
-
- // Tentatively install the new pattern. By convention, the CC set
- // must be first.
- if (m_parallel)
- {
- XVECEXP (m_parallel, 0, 0) = cc_set;
- XVECEXP (m_parallel, 0, 1) = other_set;
- }
- else
- {
- rtvec vec = gen_rtvec (2, cc_set, other_set);
- m_parallel = gen_rtx_PARALLEL (VOIDmode, vec);
- }
- validate_change (cc_rtl, &PATTERN (cc_rtl), m_parallel, 1);
-
- // These routines report failures themselves.
- if (!recog (attempt, cc_change, ignore_changing_insns (changes))
- || !changes_are_worthwhile (changes)
- || !crtl->ssa->verify_insn_changes (changes))
- return false;
-
- remove_reg_equal_equiv_notes (cc_rtl);
- confirm_change_group ();
- crtl->ssa->change_insns (changes);
- m_parallel = NULL_RTX;
- return true;
-}
-
-// Try to optimize the instruction that contains CC_DEF, where CC_DEF describes
-// a definition of the CC register by CC_SET.
-void
-cc_fusion::optimize_cc_setter (def_info *cc_def, rtx cc_set)
-{
- // Search the registers used by the CC setter for an easily-substitutable
- // def-use chain.
- for (use_info *other_use : cc_def->insn ()->uses ())
- if (def_info *other_def = other_use->def ())
- if (other_use->regno () != CC_REGNUM
- && other_def->ebb () == cc_def->ebb ())
- if (rtx other_set = optimizable_set (other_def->insn ()))
- {
- rtx dest = SET_DEST (other_set);
- if (REG_P (dest)
- && REGNO (dest) == other_def->regno ()
- && REG_NREGS (dest) == 1
- && parallelize_insns (cc_def, cc_set, other_def, other_set))
- return;
- }
-}
-
-// Run the pass on the current function.
-void
-cc_fusion::execute ()
-{
- // Initialization.
- calculate_dominance_info (CDI_DOMINATORS);
- df_analyze ();
- crtl->ssa = new rtl_ssa::function_info (cfun);
-
- // Walk through all instructions that set CC. Look for a PTEST instruction
- // that we can optimize.
- //
- // ??? The PTEST test isn't needed for correctness, but it ensures that the
- // pass no effect on non-SVE code.
- for (def_info *def : crtl->ssa->reg_defs (CC_REGNUM))
- if (rtx cc_set = optimizable_set (def->insn ()))
- if (REG_P (SET_DEST (cc_set))
- && REGNO (SET_DEST (cc_set)) == CC_REGNUM
- && GET_CODE (SET_SRC (cc_set)) == UNSPEC
- && XINT (SET_SRC (cc_set), 1) == UNSPEC_PTEST)
- optimize_cc_setter (def, cc_set);
-
- // Finalization.
- crtl->ssa->perform_pending_updates ();
- free_dominance_info (CDI_DOMINATORS);
-}
-
-class pass_cc_fusion : public rtl_opt_pass
-{
-public:
- pass_cc_fusion (gcc::context *ctxt)
- : rtl_opt_pass (pass_data_cc_fusion, ctxt)
- {}
-
- // opt_pass methods:
- virtual bool gate (function *) { return TARGET_SVE && optimize >= 2; }
- virtual unsigned int execute (function *);
-};
-
-unsigned int
-pass_cc_fusion::execute (function *)
-{
- cc_fusion ().execute ();
- return 0;
-}
-
-} // end namespace
-
-// Create a new CC fusion pass instance.
-
-rtl_opt_pass *
-make_pass_cc_fusion (gcc::context *ctxt)
-{
- return new pass_cc_fusion (ctxt);
-}
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index 8040409..6f11cc0 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -224,7 +224,7 @@ AARCH64_CORE("neoverse-v3ae", neoversev3ae, cortexa57, V9_2A, (SVE2_BITPERM, RNG
AARCH64_CORE("demeter", demeter, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversev2, 0x41, 0xd4f, -1)
/* NVIDIA ('N') cores. */
-AARCH64_CORE("olympus", olympus, cortexa57, V9_2A, (SVE2_BITPERM, RNG, LS64, MEMTAG, PROFILE, FAMINMAX, FP8FMA, FP8DOT2, FP8DOT4, LUT, SVE2_AES, SVE2_SHA3, SVE2_SM4), neoversev3, 0x4e, 0x10, -1)
+AARCH64_CORE("olympus", olympus, cortexa57, V9_2A, (SVE2_BITPERM, RNG, LS64, MEMTAG, PROFILE, FAMINMAX, FP8FMA, FP8DOT2, FP8DOT4, LUT, SVE2_AES, SVE2_SHA3, SVE2_SM4), olympus, 0x4e, 0x10, -1)
/* Armv9-A big.LITTLE processors. */
AARCH64_CORE("gb10", gb10, cortexa57, V9_2A, (SVE2_BITPERM, SVE2_AES, SVE2_SHA3, SVE2_SM4, MEMTAG, PROFILE), cortexx925, 0x41, AARCH64_BIG_LITTLE (0xd85, 0xd87), -1)
diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def
index 1c3e697..db88df0 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -128,7 +128,9 @@ AARCH64_OPT_FMV_EXTENSION("sha2", SHA2, (SIMD), (), (), "sha1 sha2")
AARCH64_FMV_FEATURE("sha3", SHA3, (SHA3))
-AARCH64_OPT_FMV_EXTENSION("aes", AES, (SIMD), (), (), "aes")
+AARCH64_OPT_EXTENSION("aes", AES, (SIMD), (), (), "aes")
+
+AARCH64_FMV_FEATURE("aes", PMULL, (AES))
/* +nocrypto disables AES, SHA2 and SM4, and anything that depends on them
(such as SHA3 and the SVE2 crypto extensions). */
@@ -171,8 +173,6 @@ AARCH64_OPT_FMV_EXTENSION("i8mm", I8MM, (SIMD), (), (), "i8mm")
instructions. */
AARCH64_OPT_FMV_EXTENSION("bf16", BF16, (FP), (SIMD), (), "bf16")
-AARCH64_FMV_FEATURE("rpres", RPRES, ())
-
AARCH64_OPT_FMV_EXTENSION("sve", SVE, (SIMD, F16, FCMA), (), (), "sve")
/* This specifically does not imply +sve. */
@@ -190,7 +190,7 @@ AARCH64_OPT_FMV_EXTENSION("sve2", SVE2, (SVE), (), (), "sve2")
AARCH64_OPT_EXTENSION("sve2-aes", SVE2_AES, (SVE2, AES), (), (), "sveaes")
-AARCH64_FMV_FEATURE("sve2-aes", SVE_AES, (SVE2_AES))
+AARCH64_FMV_FEATURE("sve2-aes", SVE_PMULL128, (SVE2_AES))
AARCH64_OPT_EXTENSION("sve2-bitperm", SVE2_BITPERM, (SVE2), (), (),
"svebitperm")
@@ -245,9 +245,9 @@ AARCH64_OPT_EXTENSION("sme-b16b16", SME_B16B16, (SME2, SVE_B16B16), (), (), "sme
AARCH64_OPT_EXTENSION("sme-f16f16", SME_F16F16, (SME2), (), (), "smef16f16")
-AARCH64_OPT_EXTENSION("mops", MOPS, (), (), (), "mops")
+AARCH64_OPT_FMV_EXTENSION("mops", MOPS, (), (), (), "mops")
-AARCH64_OPT_EXTENSION("cssc", CSSC, (), (), (), "cssc")
+AARCH64_OPT_FMV_EXTENSION("cssc", CSSC, (), (), (), "cssc")
AARCH64_OPT_EXTENSION("cmpbr", CMPBR, (), (), (), "cmpbr")
diff --git a/gcc/config/aarch64/aarch64-passes.def b/gcc/config/aarch64/aarch64-passes.def
index 9cf9d3e..6a53ff3 100644
--- a/gcc/config/aarch64/aarch64-passes.def
+++ b/gcc/config/aarch64/aarch64-passes.def
@@ -24,6 +24,5 @@ INSERT_PASS_BEFORE (pass_reorder_blocks, 1, pass_track_speculation);
INSERT_PASS_BEFORE (pass_late_thread_prologue_and_epilogue, 1, pass_switch_pstate_sm);
INSERT_PASS_BEFORE (pass_late_thread_prologue_and_epilogue, 1, pass_late_track_speculation);
INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_insert_bti);
-INSERT_PASS_AFTER (pass_if_after_combine, 1, pass_cc_fusion);
INSERT_PASS_BEFORE (pass_early_remat, 1, pass_ldp_fusion);
INSERT_PASS_BEFORE (pass_peephole2, 1, pass_ldp_fusion);
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index e946e8d..56efcf2 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1020,7 +1020,7 @@ void aarch64_err_no_fpadvsimd (machine_mode);
void aarch64_expand_epilogue (rtx_call_insn *);
rtx aarch64_ptrue_all (unsigned int);
opt_machine_mode aarch64_ptrue_all_mode (rtx);
-rtx aarch64_convert_sve_data_to_pred (rtx, machine_mode, rtx);
+rtx aarch64_convert_sve_data_to_pred (rtx, rtx);
rtx aarch64_expand_sve_dupq (rtx, machine_mode, rtx);
void aarch64_expand_mov_immediate (rtx, rtx);
rtx aarch64_stack_protect_canary_mem (machine_mode, rtx, aarch64_salt_type);
@@ -1031,6 +1031,7 @@ rtx aarch64_pfalse_reg (machine_mode);
bool aarch64_sve_same_pred_for_ptest_p (rtx *, rtx *);
rtx aarch64_sve_packed_pred (machine_mode);
rtx aarch64_sve_fp_pred (machine_mode, rtx *);
+rtx aarch64_sve_emit_masked_fp_pred (machine_mode, rtx);
void aarch64_emit_load_store_through_mode (rtx, rtx, machine_mode);
bool aarch64_expand_maskloadstore (rtx *, machine_mode);
void aarch64_emit_sve_pred_move (rtx, rtx, rtx);
@@ -1038,6 +1039,7 @@ void aarch64_expand_sve_mem_move (rtx, rtx, machine_mode);
bool aarch64_maybe_expand_sve_subreg_move (rtx, rtx);
rtx aarch64_replace_reg_mode (rtx, machine_mode);
void aarch64_split_sve_subreg_move (rtx, rtx, rtx);
+void aarch64_emit_sve_pred_vec_duplicate (machine_mode, rtx, rtx);
void aarch64_expand_prologue (void);
void aarch64_decompose_vec_struct_index (machine_mode, rtx *, rtx *, bool);
void aarch64_expand_vector_init (rtx, rtx);
@@ -1096,6 +1098,7 @@ bool aarch64_legitimate_address_p (machine_mode, rtx, bool,
aarch64_addr_query_type = ADDR_QUERY_M);
machine_mode aarch64_select_cc_mode (RTX_CODE, rtx, rtx);
rtx aarch64_gen_compare_reg (RTX_CODE, rtx, rtx);
+rtx aarch64_gen_compare_split_imm24 (rtx, rtx, rtx);
bool aarch64_maxmin_plus_const (rtx_code, rtx *, bool);
rtx aarch64_load_tp (rtx);
@@ -1234,7 +1237,6 @@ rtl_opt_pass *make_pass_fma_steering (gcc::context *);
rtl_opt_pass *make_pass_track_speculation (gcc::context *);
rtl_opt_pass *make_pass_late_track_speculation (gcc::context *);
rtl_opt_pass *make_pass_insert_bti (gcc::context *ctxt);
-rtl_opt_pass *make_pass_cc_fusion (gcc::context *ctxt);
rtl_opt_pass *make_pass_switch_pstate_sm (gcc::context *ctxt);
rtl_opt_pass *make_pass_ldp_fusion (gcc::context *);
@@ -1279,4 +1281,7 @@ extern bool aarch64_gcs_enabled ();
extern unsigned aarch64_data_alignment (const_tree exp, unsigned align);
extern unsigned aarch64_stack_alignment (const_tree exp, unsigned align);
+extern rtx aarch64_gen_compare_zero_and_branch (rtx_code code, rtx x,
+ rtx_code_label *label);
+
#endif /* GCC_AARCH64_PROTOS_H */
diff --git a/gcc/config/aarch64/aarch64-sme.md b/gcc/config/aarch64/aarch64-sme.md
index 6b3f439..0123ea0 100644
--- a/gcc/config/aarch64/aarch64-sme.md
+++ b/gcc/config/aarch64/aarch64-sme.md
@@ -62,6 +62,10 @@
;; (b) they are sometimes used conditionally, particularly in streaming-
;; compatible code.
;;
+;; To prevent the latter from upsetting the assembler, we emit the literal
+;; encodings of "SMSTART SM" and "SMSTOP SM" when compiling without
+;; TARGET_SME.
+;;
;; =========================================================================
;; -------------------------------------------------------------------------
@@ -161,7 +165,9 @@
(clobber (reg:VNx16BI P14_REGNUM))
(clobber (reg:VNx16BI P15_REGNUM))]
""
- "smstart\tsm"
+ {
+ return TARGET_SME ? "smstart\tsm" : ".inst 0xd503437f // smstart sm";
+ }
)
;; Turn off streaming mode. This clobbers all SVE state.
@@ -196,7 +202,9 @@
(clobber (reg:VNx16BI P14_REGNUM))
(clobber (reg:VNx16BI P15_REGNUM))]
""
- "smstop\tsm"
+ {
+ return TARGET_SME ? "smstop\tsm" : ".inst 0xd503427f // smstop sm";
+ }
)
;; -------------------------------------------------------------------------
@@ -392,7 +400,8 @@
auto label = gen_label_rtx ();
auto tpidr2 = gen_rtx_REG (DImode, R16_REGNUM);
emit_insn (gen_aarch64_read_tpidr2 (tpidr2));
- auto jump = emit_likely_jump_insn (gen_aarch64_cbznedi1 (tpidr2, label));
+ auto pat = aarch64_gen_compare_zero_and_branch (NE, tpidr2, label);
+ auto jump = emit_likely_jump_insn (pat);
JUMP_LABEL (jump) = label;
aarch64_restore_za (operands[0]);
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index b439683..ecc0687 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -214,7 +214,8 @@ public:
expand (function_expander &e) const override
{
e.add_ptrue_hint (0, e.gp_mode (0));
- insn_code icode = code_for_aarch64_pred_fac (m_unspec, e.vector_mode (0));
+ insn_code icode = code_for_aarch64_pred_fac_acle (m_unspec,
+ e.vector_mode (0));
return e.use_exact_insn (icode);
}
@@ -497,10 +498,10 @@ public:
{
bool unsigned_p = e.type_suffix (0).unsigned_p;
rtx_code code = get_rtx_code (m_code, unsigned_p);
- return e.use_exact_insn (code_for_aarch64_pred_cmp (code, mode));
+ return e.use_exact_insn (code_for_aarch64_pred_cmp_acle (code, mode));
}
- insn_code icode = code_for_aarch64_pred_fcm (m_unspec_for_fp, mode);
+ insn_code icode = code_for_aarch64_pred_fcm_acle (m_unspec_for_fp, mode);
return e.use_exact_insn (icode);
}
@@ -542,7 +543,7 @@ public:
/* If the argument is a constant that the unwidened comparisons
can handle directly, use them instead. */
- insn_code icode = code_for_aarch64_pred_cmp (code, mode);
+ insn_code icode = code_for_aarch64_pred_cmp_acle (code, mode);
rtx op2 = unwrap_const_vec_duplicate (e.args[3]);
if (CONSTANT_P (op2)
&& insn_data[icode].operand[4].predicate (op2, DImode))
@@ -581,7 +582,8 @@ public:
expand (function_expander &e) const override
{
e.add_ptrue_hint (0, e.gp_mode (0));
- return e.use_exact_insn (code_for_aarch64_pred_fcmuo (e.vector_mode (0)));
+ auto mode = e.vector_mode (0);
+ return e.use_exact_insn (code_for_aarch64_pred_fcmuo_acle (mode));
}
};
@@ -1048,6 +1050,23 @@ public:
rtx
expand (function_expander &e) const override
{
+ machine_mode mode = e.vector_mode (0);
+ if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
+ {
+ gcc_assert (e.pred == PRED_none);
+
+ rtx src = e.args[0];
+ if (GET_CODE (src) == CONST_INT)
+ return (src == const0_rtx
+ ? CONST0_RTX (VNx16BImode)
+ : aarch64_ptrue_all (e.type_suffix (0).element_bytes));
+
+ rtx dest = e.get_reg_target ();
+ src = force_reg (GET_MODE (src), src);
+ aarch64_emit_sve_pred_vec_duplicate (mode, dest, src);
+ return dest;
+ }
+
if (e.pred == PRED_none || e.pred == PRED_x)
/* There's no benefit to using predicated instructions for _x here. */
return e.use_unpred_insn (e.direct_optab_handler (vec_duplicate_optab));
@@ -1056,7 +1075,6 @@ public:
the duplicate of the function argument and the "false" value
is the value of inactive lanes. */
insn_code icode;
- machine_mode mode = e.vector_mode (0);
if (valid_for_const_vector_p (GET_MODE_INNER (mode), e.args.last ()))
/* Duplicate the constant to fill a vector. The pattern optimizes
various cases involving constant operands, falling back to SEL
@@ -1197,8 +1215,7 @@ public:
if (mode != e.vector_mode (0))
{
rtx data_dupq = aarch64_expand_sve_dupq (NULL, mode, vq_reg);
- return aarch64_convert_sve_data_to_pred (e.possible_target,
- e.vector_mode (0), data_dupq);
+ return aarch64_convert_sve_data_to_pred (e.possible_target, data_dupq);
}
return aarch64_expand_sve_dupq (e.possible_target, mode, vq_reg);
@@ -1259,9 +1276,10 @@ public:
index = target;
}
- e.args[0] = gen_lowpart (VNx2DImode, e.args[0]);
+ e.args[0] = aarch64_sve_reinterpret (VNx2DImode, e.args[0]);
e.args[1] = index;
- return e.use_exact_insn (CODE_FOR_aarch64_sve_tblvnx2di);
+ rtx res = e.use_exact_insn (CODE_FOR_aarch64_sve_tblvnx2di);
+ return aarch64_sve_reinterpret (mode, res);
}
};
@@ -2857,7 +2875,10 @@ public:
rtx
expand (function_expander &e) const override
{
- return e.use_exact_insn (code_for_aarch64_sve_rev (e.vector_mode (0)));
+ auto mode = e.vector_mode (0);
+ return e.use_exact_insn (e.type_suffix (0).bool_p
+ ? code_for_aarch64_sve_rev_acle (mode)
+ : code_for_aarch64_sve_rev (mode));
}
};
@@ -3248,7 +3269,7 @@ public:
unsigned int unpacks = m_high_p ? UNSPEC_UNPACKSHI : UNSPEC_UNPACKSLO;
insn_code icode;
if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
- icode = code_for_aarch64_sve_punpk (unpacku, mode);
+ icode = code_for_aarch64_sve_punpk_acle (unpacku);
else
{
int unspec = e.type_suffix (0).unsigned_p ? unpacku : unpacks;
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-functions.h b/gcc/config/aarch64/aarch64-sve-builtins-functions.h
index 6f1c694..c05946d 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-functions.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins-functions.h
@@ -630,7 +630,10 @@ public:
rtx
expand (function_expander &e) const override
{
- insn_code icode = code_for_aarch64_sve (m_unspec, e.vector_mode (0));
+ auto mode = e.vector_mode (0);
+ insn_code icode = (e.type_suffix (0).bool_p
+ ? code_for_aarch64_sve_acle (m_unspec, mode)
+ : code_for_aarch64_sve (m_unspec, mode));
return e.use_exact_insn (icode);
}
@@ -838,7 +841,8 @@ public:
machine_mode pred_mode = e.vector_mode (0);
scalar_mode reg_mode = GET_MODE_INNER (e.vector_mode (1));
- return e.use_exact_insn (code_for_while (unspec, reg_mode, pred_mode));
+ auto icode = code_for_aarch64_sve_while_acle (unspec, reg_mode, pred_mode);
+ return e.use_exact_insn (icode);
}
/* The unspec codes associated with signed and unsigned operations
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.def b/gcc/config/aarch64/aarch64-sve-builtins-sme.def
index 8e6aadc..117b70e 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sme.def
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sme.def
@@ -92,7 +92,8 @@ DEF_SME_FUNCTION (svstr_zt, str_zt, none, none)
DEF_SME_FUNCTION (svzero_zt, inherent_zt, none, none)
#undef REQUIRED_EXTENSIONS
-#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME2 && AARCH64_FL_FAMINMAX)
+#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME2 \
+ | AARCH64_FL_FAMINMAX)
DEF_SME_FUNCTION_GS (svamin, binary_opt_single_n, all_float, x24, none)
DEF_SME_FUNCTION_GS (svamax, binary_opt_single_n, all_float, x24, none)
#undef REQUIRED_EXTENSIONS
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
index 73004a8..95c5ed8 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
@@ -881,7 +881,9 @@ public:
{
for (unsigned int i = 0; i < 2; ++i)
e.args[i] = e.convert_to_pmode (e.args[i]);
- return e.use_exact_insn (code_for_while (m_unspec, Pmode, e.gp_mode (0)));
+ auto icode = code_for_aarch64_sve_while_acle (m_unspec, Pmode,
+ e.gp_mode (0));
+ return e.use_exact_insn (icode);
}
int m_unspec;
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
index 2b627a9..1764cf8 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -4004,7 +4004,8 @@ rtx
function_expander::get_reg_target ()
{
machine_mode target_mode = result_mode ();
- if (!possible_target || GET_MODE (possible_target) != target_mode)
+ if (!possible_target
+ || !register_operand (possible_target, target_mode))
possible_target = gen_reg_rtx (target_mode);
return possible_target;
}
@@ -4589,10 +4590,31 @@ function_expander::expand ()
{
/* The last element of these functions is always an fpm_t that must be
written to FPMR before the call to the instruction itself. */
- gcc_assert (args.last ()->mode == DImode);
- emit_move_insn (gen_rtx_REG (DImode, FPM_REGNUM), args.last ());
+ rtx fpm = args.last ();
+ gcc_assert (CONST_INT_P (fpm) || GET_MODE (fpm) == DImode);
+ emit_move_insn (gen_rtx_REG (DImode, FPM_REGNUM), fpm);
}
- return base->expand (*this);
+ rtx result = base->expand (*this);
+ if (function_returns_void_p ())
+ gcc_assert (result == const0_rtx);
+ else
+ {
+ auto expected_mode = result_mode ();
+ if (GET_MODE_CLASS (expected_mode) == MODE_INT)
+ /* Scalar integer constants don't store a mode.
+
+ It's OK for a variable result to have a different mode from the
+ function return type. In particular, some functions that return int
+ expand into instructions that have a DImode result, with all 64 bits
+ of the DImode being well-defined (usually zero). */
+ gcc_assert (CONST_SCALAR_INT_P (result)
+ || GET_MODE_CLASS (GET_MODE (result)) == MODE_INT);
+ else
+ /* In other cases, the return value should have the same mode
+ as the return type. */
+ gcc_assert (GET_MODE (result) == expected_mode);
+ }
+ return result;
}
/* Return a structure type that contains a single field of type FIELD_TYPE.
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index b252eef..51e2d7d 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -1542,18 +1542,18 @@
UNSPEC_LD1_GATHER))]
"TARGET_SVE && TARGET_NON_STREAMING"
{@ [cons: =0, 1, 2, 3, 4, 5 ]
- [&w, Z, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%2.s]
- [?w, Z, 0, Ui1, Ui1, Upl] ^
- [&w, vgw, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%2.s, #%1]
- [?w, vgw, 0, Ui1, Ui1, Upl] ^
- [&w, rk, w, Z, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
- [?w, rk, 0, Z, Ui1, Upl] ^
- [&w, rk, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
- [?w, rk, 0, Ui1, Ui1, Upl] ^
- [&w, rk, w, Z, i, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
- [?w, rk, 0, Z, i, Upl] ^
- [&w, rk, w, Ui1, i, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]
- [?w, rk, 0, Ui1, i, Upl] ^
+ [&w, Z, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%2.s]
+ [?w, Z, 0, Ui1, Ui1, Upl] ^
+ [&w, vg<Vesize>, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%2.s, #%1]
+ [?w, vg<Vesize>, 0, Ui1, Ui1, Upl] ^
+ [&w, rk, w, Z, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
+ [?w, rk, 0, Z, Ui1, Upl] ^
+ [&w, rk, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
+ [?w, rk, 0, Ui1, Ui1, Upl] ^
+ [&w, rk, w, Z, i, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
+ [?w, rk, 0, Z, i, Upl] ^
+ [&w, rk, w, Ui1, i, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]
+ [?w, rk, 0, Ui1, i, Upl] ^
}
)
@@ -1572,14 +1572,14 @@
UNSPEC_LD1_GATHER))]
"TARGET_SVE && TARGET_NON_STREAMING"
{@ [cons: =0, 1, 2, 3, 4, 5]
- [&w, Z, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%2.d]
- [?w, Z, 0, i, Ui1, Upl] ^
- [&w, vgd, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%2.d, #%1]
- [?w, vgd, 0, i, Ui1, Upl] ^
- [&w, rk, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d]
- [?w, rk, 0, i, Ui1, Upl] ^
- [&w, rk, w, i, i, Upl] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]
- [?w, rk, 0, i, i, Upl] ^
+ [&w, Z, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%2.d]
+ [?w, Z, 0, i, Ui1, Upl] ^
+ [&w, vg<Vesize>, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%2.d, #%1]
+ [?w, vg<Vesize>, 0, i, Ui1, Upl] ^
+ [&w, rk, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d]
+ [?w, rk, 0, i, Ui1, Upl] ^
+ [&w, rk, w, i, i, Upl] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]
+ [?w, rk, 0, i, i, Upl] ^
}
)
@@ -2488,13 +2488,13 @@
(match_operand:SVE_4 4 "register_operand")]
UNSPEC_ST1_SCATTER))]
"TARGET_SVE && TARGET_NON_STREAMING"
- {@ [ cons: 0 , 1 , 2 , 3 , 4 , 5 ]
- [ Z , w , Ui1 , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%1.s]
- [ vgw , w , Ui1 , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%1.s, #%0]
- [ rk , w , Z , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw]
- [ rk , w , Ui1 , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw]
- [ rk , w , Z , i , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3]
- [ rk , w , Ui1 , i , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]
+ {@ [ cons: 0 , 1 , 2 , 3 , 4 , 5 ]
+ [ Z , w , Ui1 , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%1.s]
+ [ vg<Vesize> , w , Ui1 , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%1.s, #%0]
+ [ rk , w , Z , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw]
+ [ rk , w , Ui1 , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw]
+ [ rk , w , Z , i , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3]
+ [ rk , w , Ui1 , i , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]
}
)
@@ -2511,11 +2511,11 @@
(match_operand:SVE_2 4 "register_operand")]
UNSPEC_ST1_SCATTER))]
"TARGET_SVE && TARGET_NON_STREAMING"
- {@ [ cons: 0 , 1 , 3 , 4 , 5 ]
- [ Z , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%1.d]
- [ vgd , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%1.d, #%0]
- [ rk , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%0, %1.d]
- [ rk , w , i , w , Upl ] st1<Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]
+ {@ [ cons: 0 , 1 , 3 , 4 , 5 ]
+ [ Z , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%1.d]
+ [ vg<Vesize> , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%1.d, #%0]
+ [ rk , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%0, %1.d]
+ [ rk , w , i , w , Upl ] st1<Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]
}
)
@@ -2990,10 +2990,7 @@
(vec_duplicate:PRED_ALL (match_operand:QI 1 "register_operand")))]
"TARGET_SVE"
{
- rtx tmp = gen_reg_rtx (DImode);
- rtx op1 = gen_lowpart (DImode, operands[1]);
- emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode)));
- emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp));
+ aarch64_emit_sve_pred_vec_duplicate (<MODE>mode, operands[0], operands[1]);
DONE;
}
)
@@ -5605,18 +5602,21 @@
;; Predicated floating-point operations with merging.
(define_expand "@cond_<optab><mode>"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ (unspec:SVE_F_B16B16
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F_B16B16 2 "<sve_pred_fp_rhs1_operand>")
- (match_operand:SVE_FULL_F_B16B16 3 "<sve_pred_fp_rhs2_operand>")]
+ (match_operand:SVE_F_B16B16 2 "<sve_pred_fp_rhs1_operand>")
+ (match_operand:SVE_F_B16B16 3 "<sve_pred_fp_rhs2_operand>")]
SVE_COND_FP_BINARY)
- (match_operand:SVE_FULL_F_B16B16 4 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_F_B16B16 4 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
+ {
+ operands[1] = aarch64_sve_emit_masked_fp_pred (<MODE>mode, operands[1]);
+ }
)
;; Predicated floating-point operations, merging with the first input.
@@ -5644,14 +5644,14 @@
)
(define_insn "*cond_<optab><mode>_2_strict"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F_B16B16
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")]
SVE_COND_FP_BINARY)
(match_dup 2)]
UNSPEC_SEL))]
@@ -5687,14 +5687,14 @@
)
(define_insn "*cond_<optab><mode>_2_const_strict"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F 2 "register_operand")
- (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
+ (match_operand:SVE_F 2 "register_operand")
+ (match_operand:SVE_F 3 "<sve_pred_fp_rhs2_immediate>")]
SVE_COND_FP_BINARY_I1)
(match_dup 2)]
UNSPEC_SEL))]
@@ -5730,14 +5730,14 @@
)
(define_insn "*cond_<optab><mode>_3_strict"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F_B16B16
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")]
SVE_COND_FP_BINARY)
(match_dup 3)]
UNSPEC_SEL))]
@@ -5794,16 +5794,16 @@
)
(define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F_B16B16
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")]
SVE_COND_FP_BINARY)
- (match_operand:SVE_FULL_F_B16B16 4 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_F_B16B16 4 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE
&& (<supports_bf16> || !<is_bf16>)
@@ -5868,16 +5868,16 @@
)
(define_insn_and_rewrite "*cond_<optab><mode>_any_const_strict"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F 2 "register_operand")
- (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
+ (match_operand:SVE_F 2 "register_operand")
+ (match_operand:SVE_F 3 "<sve_pred_fp_rhs2_immediate>")]
SVE_COND_FP_BINARY_I1)
- (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
{@ [ cons: =0 , 1 , 2 , 4 ]
@@ -5953,14 +5953,14 @@
)
(define_insn "*cond_add<mode>_2_const_strict"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F 2 "register_operand")
- (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
+ (match_operand:SVE_F 2 "register_operand")
+ (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
UNSPEC_COND_FADD)
(match_dup 2)]
UNSPEC_SEL))]
@@ -6015,16 +6015,16 @@
)
(define_insn_and_rewrite "*cond_add<mode>_any_const_strict"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F 2 "register_operand")
- (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
+ (match_operand:SVE_F 2 "register_operand")
+ (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
UNSPEC_COND_FADD)
- (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
{@ [ cons: =0 , 1 , 2 , 3 , 4 ]
@@ -6266,14 +6266,14 @@
)
(define_insn "*cond_sub<mode>_3_const_strict"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
- (match_operand:SVE_FULL_F 3 "register_operand")]
+ (match_operand:SVE_F 2 "aarch64_sve_float_arith_immediate")
+ (match_operand:SVE_F 3 "register_operand")]
UNSPEC_COND_FSUB)
(match_dup 3)]
UNSPEC_SEL))]
@@ -6323,16 +6323,16 @@
)
(define_insn_and_rewrite "*cond_sub<mode>_const_strict"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
- (match_operand:SVE_FULL_F 3 "register_operand")]
+ (match_operand:SVE_F 2 "aarch64_sve_float_arith_immediate")
+ (match_operand:SVE_F 3 "register_operand")]
UNSPEC_COND_FSUB)
- (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE && !rtx_equal_p (operands[3], operands[4])"
{@ [ cons: =0 , 1 , 3 , 4 ]
@@ -6913,7 +6913,7 @@
;; Predicate AND. We can reuse one of the inputs as the GP.
;; Doubling the second operand is the preferred implementation
;; of the MOV alias, so we use that instead of %1/z, %1, %2.
-(define_insn "and<mode>3"
+(define_insn "@and<mode>3"
[(set (match_operand:PRED_ALL 0 "register_operand")
(and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand")
(match_operand:PRED_ALL 2 "register_operand")))]
@@ -7595,29 +7595,29 @@
;; Unpredicated floating-point ternary operations.
(define_expand "<optab><mode>4"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
[(match_dup 4)
- (const_int SVE_RELAXED_GP)
- (match_operand:SVE_FULL_F_B16B16 1 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
+ (match_dup 5)
+ (match_operand:SVE_F_B16B16 1 "register_operand")
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")]
SVE_COND_FP_TERNARY))]
"TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
{
- operands[4] = aarch64_ptrue_reg (<VPRED>mode);
+ operands[4] = aarch64_sve_fp_pred (<MODE>mode, &operands[5]);
}
)
;; Predicated floating-point ternary operations.
(define_insn "@aarch64_pred_<optab><mode>"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
- [(match_operand:<VPRED> 1 "register_operand")
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
(match_operand:SI 5 "aarch64_sve_gp_strictness")
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 4 "register_operand")]
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")
+ (match_operand:SVE_F_B16B16 4 "register_operand")]
SVE_COND_FP_TERNARY))]
"TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
{@ [ cons: =0 , 1 , %2 , 3 , 4 ; attrs: movprfx , is_rev ]
@@ -7631,17 +7631,17 @@
;; Predicated floating-point ternary operations with merging.
(define_expand "@cond_<optab><mode>"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ (unspec:SVE_F_B16B16
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 4 "register_operand")]
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")
+ (match_operand:SVE_F_B16B16 4 "register_operand")]
SVE_COND_FP_TERNARY)
- (match_operand:SVE_FULL_F_B16B16 5 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_F_B16B16 5 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
{
@@ -7649,20 +7649,22 @@
second of the two. */
if (rtx_equal_p (operands[3], operands[5]))
std::swap (operands[2], operands[3]);
+
+ operands[1] = aarch64_sve_emit_masked_fp_pred (<MODE>mode, operands[1]);
})
;; Predicated floating-point ternary operations, merging with the
;; first input.
(define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F
+ (unspec:SVE_F
[(match_operand 5)
(const_int SVE_RELAXED_GP)
- (match_operand:SVE_FULL_F 2 "register_operand")
- (match_operand:SVE_FULL_F 3 "register_operand")
- (match_operand:SVE_FULL_F 4 "register_operand")]
+ (match_operand:SVE_F 2 "register_operand")
+ (match_operand:SVE_F 3 "register_operand")
+ (match_operand:SVE_F 4 "register_operand")]
SVE_COND_FP_TERNARY)
(match_dup 2)]
UNSPEC_SEL))]
@@ -7678,15 +7680,15 @@
)
(define_insn "*cond_<optab><mode>_2_strict"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F 2 "register_operand")
- (match_operand:SVE_FULL_F 3 "register_operand")
- (match_operand:SVE_FULL_F 4 "register_operand")]
+ (match_operand:SVE_F 2 "register_operand")
+ (match_operand:SVE_F 3 "register_operand")
+ (match_operand:SVE_F 4 "register_operand")]
SVE_COND_FP_TERNARY)
(match_dup 2)]
UNSPEC_SEL))]
@@ -7700,15 +7702,15 @@
;; Predicated floating-point ternary operations, merging with the
;; third input.
(define_insn_and_rewrite "*cond_<optab><mode>_4_relaxed"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ (unspec:SVE_F_B16B16
[(match_operand 5)
(const_int SVE_RELAXED_GP)
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 4 "register_operand")]
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")
+ (match_operand:SVE_F_B16B16 4 "register_operand")]
SVE_COND_FP_TERNARY)
(match_dup 4)]
UNSPEC_SEL))]
@@ -7724,15 +7726,15 @@
)
(define_insn "*cond_<optab><mode>_4_strict"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F_B16B16
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 4 "register_operand")]
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")
+ (match_operand:SVE_F_B16B16 4 "register_operand")]
SVE_COND_FP_TERNARY)
(match_dup 4)]
UNSPEC_SEL))]
@@ -7746,17 +7748,17 @@
;; Predicated floating-point ternary operations, merging with an
;; independent value.
(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ (unspec:SVE_F_B16B16
[(match_operand 6)
(const_int SVE_RELAXED_GP)
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 4 "register_operand")]
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")
+ (match_operand:SVE_F_B16B16 4 "register_operand")]
SVE_COND_FP_TERNARY)
- (match_operand:SVE_FULL_F_B16B16 5 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_F_B16B16 5 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE
&& (<supports_bf16> || !<is_bf16>)
@@ -7792,17 +7794,17 @@
)
(define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F_B16B16
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 4 "register_operand")]
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")
+ (match_operand:SVE_F_B16B16 4 "register_operand")]
SVE_COND_FP_TERNARY)
- (match_operand:SVE_FULL_F_B16B16 5 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_F_B16B16 5 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE
&& (<supports_bf16> || !<is_bf16>)
@@ -8201,20 +8203,23 @@
;;
;; For unpacked vectors, it doesn't really matter whether SEL uses the
;; the container size or the element size. If SEL used the container size,
-;; it would ignore undefined bits of the predicate but would copy the
-;; upper (undefined) bits of each container along with the defined bits.
-;; If SEL used the element size, it would use undefined bits of the predicate
-;; to select between undefined elements in each input vector. Thus the only
-;; difference is whether the undefined bits in a container always come from
-;; the same input as the defined bits, or whether the choice can vary
-;; independently of the defined bits.
+;; it would would copy the upper (undefined) bits of each container along
+;; with the corresponding defined bits. If SEL used the element size,
+;; it would use separate predicate bits to select between the undefined
+;; elements in each input vector; these seperate predicate bits might
+;; themselves be undefined, depending on the mode of the predicate.
+;;
+;; Thus the only difference is whether the undefined bits in a container
+;; always come from the same input as the defined bits, or whether the
+;; choice can vary independently of the defined bits.
;;
;; For the other instructions, using the element size is more natural,
;; so we do that for SEL as well.
+;;
(define_insn "*vcond_mask_<mode><vpred>"
[(set (match_operand:SVE_ALL 0 "register_operand")
(unspec:SVE_ALL
- [(match_operand:<VPRED> 3 "register_operand")
+ [(match_operand:<VPRED> 3 "aarch64_predicate_operand")
(match_operand:SVE_ALL 1 "aarch64_sve_reg_or_dup_imm")
(match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
@@ -8353,6 +8358,71 @@
}
)
+;; Likewise, but yield a VNx16BI result regardless of the element width.
+;; The .b case is equivalent to the above.
+(define_expand "@aarch64_pred_cmp<cmp_op><mode>_acle"
+ [(parallel
+ [(set (match_operand:<VPRED> 0 "register_operand")
+ (unspec:<VPRED>
+ [(match_operand:<VPRED> 1 "register_operand")
+ (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+ (SVE_INT_CMP:<VPRED>
+ (match_operand:VNx16QI_ONLY 3 "register_operand")
+ (match_operand:VNx16QI_ONLY 4 "aarch64_sve_cmp_<sve_imm_con>_operand"))]
+ UNSPEC_PRED_Z))
+ (clobber (reg:CC_NZC CC_REGNUM))])]
+ "TARGET_SVE"
+)
+
+;; For wider elements, bitcast the predicate result to a VNx16BI and use
+;; an (and ...) to indicate that only every second, fourth, or eighth bit
+;; is set.
+(define_expand "@aarch64_pred_cmp<cmp_op><mode>_acle"
+ [(parallel
+ [(set (match_operand:VNx16BI 0 "register_operand")
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:<VPRED>
+ [(match_operand:<VPRED> 1 "register_operand")
+ (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+ (SVE_INT_CMP:<VPRED>
+ (match_operand:SVE_FULL_HSDI 3 "register_operand")
+ (match_operand:SVE_FULL_HSDI 4 "aarch64_sve_cmp_<sve_imm_con>_operand"))]
+ UNSPEC_PRED_Z)
+ 0)
+ (match_dup 5)))
+ (clobber (reg:CC_NZC CC_REGNUM))])]
+ "TARGET_SVE"
+ {
+ operands[5] = aarch64_ptrue_all (GET_MODE_UNIT_SIZE (<MODE>mode));
+ }
+)
+
+(define_insn "*aarch64_pred_cmp<cmp_op><mode>_acle"
+ [(set (match_operand:VNx16BI 0 "register_operand")
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:<VPRED>
+ [(match_operand:<VPRED> 1 "register_operand")
+ (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+ (SVE_INT_CMP:<VPRED>
+ (match_operand:SVE_FULL_HSDI 3 "register_operand")
+ (match_operand:SVE_FULL_HSDI 4 "aarch64_sve_cmp_<sve_imm_con>_operand"))]
+ UNSPEC_PRED_Z)
+ 0)
+ (match_operand:<VPRED> 5 "aarch64_ptrue_all_operand")))
+ (clobber (reg:CC_NZC CC_REGNUM))]
+ "TARGET_SVE"
+ {@ [ cons: =0 , 1 , 3 , 4 ; attrs: pred_clobber ]
+ [ &Upa , Upl, w , <sve_imm_con>; yes ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #%4
+ [ ?Upl , 0 , w , <sve_imm_con>; yes ] ^
+ [ Upa , Upl, w , <sve_imm_con>; no ] ^
+ [ &Upa , Upl, w , w ; yes ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>
+ [ ?Upl , 0 , w , w ; yes ] ^
+ [ Upa , Upl, w , w ; no ] ^
+ }
+)
+
;; Predicated integer comparisons in which both the flag and predicate
;; results are interesting.
(define_insn_and_rewrite "*cmp<cmp_op><mode>_cc"
@@ -8394,6 +8464,49 @@
}
)
+(define_insn_and_rewrite "*cmp<cmp_op><mode>_acle_cc"
+ [(set (reg:CC_NZC CC_REGNUM)
+ (unspec:CC_NZC
+ [(match_operand:VNx16BI 1 "register_operand")
+ (match_operand 4)
+ (match_operand:SI 5 "aarch64_sve_ptrue_flag")
+ (unspec:<VPRED>
+ [(match_operand 6)
+ (match_operand:SI 7 "aarch64_sve_ptrue_flag")
+ (SVE_INT_CMP:<VPRED>
+ (match_operand:SVE_FULL_HSDI 2 "register_operand")
+ (match_operand:SVE_FULL_HSDI 3 "aarch64_sve_cmp_<sve_imm_con>_operand"))]
+ UNSPEC_PRED_Z)]
+ UNSPEC_PTEST))
+ (set (match_operand:VNx16BI 0 "register_operand")
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:<VPRED>
+ [(match_dup 6)
+ (match_dup 7)
+ (SVE_INT_CMP:<VPRED>
+ (match_dup 2)
+ (match_dup 3))]
+ UNSPEC_PRED_Z)
+ 0)
+ (match_operand:<VPRED> 8 "aarch64_ptrue_all_operand")))]
+ "TARGET_SVE
+ && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
+ {@ [ cons: =0 , 1 , 2 , 3 ; attrs: pred_clobber ]
+ [ &Upa , Upl, w , <sve_imm_con>; yes ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
+ [ ?Upl , 0 , w , <sve_imm_con>; yes ] ^
+ [ Upa , Upl, w , <sve_imm_con>; no ] ^
+ [ &Upa , Upl, w , w ; yes ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>
+ [ ?Upl , 0 , w , w ; yes ] ^
+ [ Upa , Upl, w , w ; no ] ^
+ }
+ "&& !rtx_equal_p (operands[4], operands[6])"
+ {
+ operands[6] = copy_rtx (operands[4]);
+ operands[7] = operands[5];
+ }
+)
+
;; Predicated integer comparisons in which only the flags result is
;; interesting.
(define_insn_and_rewrite "*cmp<cmp_op><mode>_ptest"
@@ -8459,14 +8572,52 @@
(clobber (reg:CC_NZC CC_REGNUM))])]
)
+(define_insn_and_split "*cmp<cmp_op><mode>_acle_and"
+ [(set (match_operand:VNx16BI 0 "register_operand" "=Upa, Upa")
+ (and:VNx16BI
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:<VPRED>
+ [(match_operand 4)
+ (const_int SVE_KNOWN_PTRUE)
+ (SVE_INT_CMP:<VPRED>
+ (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w")
+ (match_operand:SVE_FULL_HSDI 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
+ UNSPEC_PRED_Z)
+ 0)
+ (match_operand:VNx16BI 1 "register_operand" "Upl, Upl"))
+ (match_operand:<VPRED> 5 "aarch64_ptrue_all_operand")))
+ (clobber (reg:CC_NZC CC_REGNUM))]
+ "TARGET_SVE"
+ "#"
+ "&& 1"
+ [(parallel
+ [(set (match_dup 0)
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:<VPRED>
+ [(match_dup 1)
+ (const_int SVE_MAYBE_NOT_PTRUE)
+ (SVE_INT_CMP:<VPRED>
+ (match_dup 2)
+ (match_dup 3))]
+ UNSPEC_PRED_Z)
+ 0)
+ (match_dup 5)))
+ (clobber (reg:CC_NZC CC_REGNUM))])]
+ {
+ operands[1] = gen_lowpart (<VPRED>mode, operands[1]);
+ }
+)
+
;; Predicated integer wide comparisons.
(define_insn "@aarch64_pred_cmp<cmp_op><mode>_wide"
[(set (match_operand:<VPRED> 0 "register_operand")
(unspec:<VPRED>
- [(match_operand:VNx16BI 1 "register_operand")
+ [(match_operand:<VPRED> 1 "register_operand")
(match_operand:SI 2 "aarch64_sve_ptrue_flag")
(unspec:<VPRED>
- [(match_operand:SVE_FULL_BHSI 3 "register_operand")
+ [(match_operand:VNx16QI_ONLY 3 "register_operand")
(match_operand:VNx2DI 4 "register_operand")]
SVE_COND_INT_CMP_WIDE)]
UNSPEC_PRED_Z))
@@ -8479,16 +8630,61 @@
}
)
+(define_expand "@aarch64_pred_cmp<cmp_op><mode>_wide"
+ [(parallel
+ [(set (match_operand:VNx16BI 0 "register_operand")
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:<VPRED>
+ [(match_operand:<VPRED> 1 "register_operand")
+ (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+ (unspec:<VPRED>
+ [(match_operand:SVE_FULL_HSI 3 "register_operand")
+ (match_operand:VNx2DI 4 "register_operand")]
+ SVE_COND_INT_CMP_WIDE)]
+ UNSPEC_PRED_Z)
+ 0)
+ (match_dup 5)))
+ (clobber (reg:CC_NZC CC_REGNUM))])]
+ "TARGET_SVE"
+ {
+ operands[5] = aarch64_ptrue_all (GET_MODE_UNIT_SIZE (<MODE>mode));
+ }
+)
+
+(define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide"
+ [(set (match_operand:VNx16BI 0 "register_operand")
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:<VPRED>
+ [(match_operand:<VPRED> 1 "register_operand")
+ (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+ (unspec:<VPRED>
+ [(match_operand:SVE_FULL_HSI 3 "register_operand")
+ (match_operand:VNx2DI 4 "register_operand")]
+ SVE_COND_INT_CMP_WIDE)]
+ UNSPEC_PRED_Z)
+ 0)
+ (match_operand:<VPRED> 5 "aarch64_ptrue_all_operand")))
+ (clobber (reg:CC_NZC CC_REGNUM))]
+ "TARGET_SVE"
+ {@ [ cons: =0, 1 , 2, 3, 4; attrs: pred_clobber ]
+ [ &Upa , Upl, , w, w; yes ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.d
+ [ ?Upl , 0 , , w, w; yes ] ^
+ [ Upa , Upl, , w, w; no ] ^
+ }
+)
+
;; Predicated integer wide comparisons in which both the flag and
;; predicate results are interesting.
-(define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_cc"
+(define_insn_and_rewrite "*aarch64_pred_cmp<cmp_op><mode>_wide_cc"
[(set (reg:CC_NZC CC_REGNUM)
(unspec:CC_NZC
[(match_operand:VNx16BI 1 "register_operand")
(match_operand 4)
(match_operand:SI 5 "aarch64_sve_ptrue_flag")
(unspec:<VPRED>
- [(match_operand:VNx16BI 6 "register_operand")
+ [(match_operand:<VPRED> 6 "register_operand")
(match_operand:SI 7 "aarch64_sve_ptrue_flag")
(unspec:<VPRED>
[(match_operand:SVE_FULL_BHSI 2 "register_operand")
@@ -8512,18 +8708,65 @@
[ ?Upl , 0 , w, w, Upl; yes ] ^
[ Upa , Upl, w, w, Upl; no ] ^
}
+ "&& !rtx_equal_p (operands[4], operands[6])"
+ {
+ operands[6] = copy_rtx (operands[4]);
+ operands[7] = operands[5];
+ }
+)
+
+(define_insn_and_rewrite "*aarch64_pred_cmp<cmp_op><mode>_wide_cc"
+ [(set (reg:CC_NZC CC_REGNUM)
+ (unspec:CC_NZC
+ [(match_operand:VNx16BI 1 "register_operand")
+ (match_operand 4)
+ (match_operand:SI 5 "aarch64_sve_ptrue_flag")
+ (unspec:<VPRED>
+ [(match_operand:<VPRED> 6 "register_operand")
+ (match_operand:SI 7 "aarch64_sve_ptrue_flag")
+ (unspec:<VPRED>
+ [(match_operand:SVE_FULL_HSI 2 "register_operand")
+ (match_operand:VNx2DI 3 "register_operand")]
+ SVE_COND_INT_CMP_WIDE)]
+ UNSPEC_PRED_Z)]
+ UNSPEC_PTEST))
+ (set (match_operand:VNx16BI 0 "register_operand")
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:<VPRED>
+ [(match_dup 6)
+ (match_dup 7)
+ (unspec:<VPRED>
+ [(match_dup 2)
+ (match_dup 3)]
+ SVE_COND_INT_CMP_WIDE)]
+ UNSPEC_PRED_Z)
+ 0)
+ (match_operand:<VPRED> 8 "aarch64_ptrue_all_operand")))]
+ "TARGET_SVE
+ && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
+ {@ [ cons: =0, 1 , 2, 3, 6 ; attrs: pred_clobber ]
+ [ &Upa , Upl, w, w, Upl; yes ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.d
+ [ ?Upl , 0 , w, w, Upl; yes ] ^
+ [ Upa , Upl, w, w, Upl; no ] ^
+ }
+ "&& !rtx_equal_p (operands[4], operands[6])"
+ {
+ operands[6] = copy_rtx (operands[4]);
+ operands[7] = operands[5];
+ }
)
;; Predicated integer wide comparisons in which only the flags result
;; is interesting.
-(define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_ptest"
+(define_insn_and_rewrite "*aarch64_pred_cmp<cmp_op><mode>_wide_ptest"
[(set (reg:CC_NZC CC_REGNUM)
(unspec:CC_NZC
[(match_operand:VNx16BI 1 "register_operand")
(match_operand 4)
(match_operand:SI 5 "aarch64_sve_ptrue_flag")
(unspec:<VPRED>
- [(match_operand:VNx16BI 6 "register_operand")
+ [(match_operand:<VPRED> 6 "register_operand")
(match_operand:SI 7 "aarch64_sve_ptrue_flag")
(unspec:<VPRED>
[(match_operand:SVE_FULL_BHSI 2 "register_operand")
@@ -8539,6 +8782,11 @@
[ ?Upl , 0 , w, w, Upl; yes ] ^
[ Upa , Upl, w, w, Upl; no ] ^
}
+ "&& !rtx_equal_p (operands[4], operands[6])"
+ {
+ operands[6] = copy_rtx (operands[4]);
+ operands[7] = operands[5];
+ }
)
;; -------------------------------------------------------------------------
@@ -8576,6 +8824,58 @@
"while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
)
+;; Likewise, but yield a VNx16BI result regardless of the element width.
+;; The .b case is equivalent to the above.
+(define_expand "@aarch64_sve_while_<while_optab_cmp><GPI:mode><VNx16BI_ONLY:mode>_acle"
+ [(parallel
+ [(set (match_operand:VNx16BI_ONLY 0 "register_operand")
+ (unspec:VNx16BI_ONLY
+ [(const_int SVE_WHILE_B)
+ (match_operand:GPI 1 "aarch64_reg_or_zero")
+ (match_operand:GPI 2 "aarch64_reg_or_zero")]
+ SVE_WHILE))
+ (clobber (reg:CC_NZC CC_REGNUM))])]
+ "TARGET_SVE"
+)
+
+;; For wider elements, bitcast the predicate result to a VNx16BI and use
+;; an (and ...) to indicate that only every second, fourth, or eighth bit
+;; is set.
+(define_expand "@aarch64_sve_while_<while_optab_cmp><GPI:mode><PRED_HSD:mode>_acle"
+ [(parallel
+ [(set (match_operand:VNx16BI 0 "register_operand")
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:PRED_HSD
+ [(const_int SVE_WHILE_B)
+ (match_operand:GPI 1 "aarch64_reg_or_zero")
+ (match_operand:GPI 2 "aarch64_reg_or_zero")]
+ SVE_WHILE)
+ 0)
+ (match_dup 3)))
+ (clobber (reg:CC_NZC CC_REGNUM))])]
+ "TARGET_SVE"
+ {
+ operands[3] = aarch64_ptrue_all (<data_bytes>);
+ }
+)
+
+(define_insn "*aarch64_sve_while_<while_optab_cmp><GPI:mode><PRED_HSD:mode>_acle"
+ [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:PRED_HSD
+ [(const_int SVE_WHILE_B)
+ (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
+ (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
+ SVE_WHILE)
+ 0)
+ (match_operand:PRED_HSD 3 "aarch64_ptrue_all_operand")))
+ (clobber (reg:CC_NZC CC_REGNUM))]
+ "TARGET_SVE"
+ "while<cmp_op>\t%0.<PRED_HSD:Vetype>, %<w>1, %<w>2"
+)
+
;; The WHILE instructions set the flags in the same way as a PTEST with
;; a PTRUE GP. Handle the case in which both results are useful. The GP
;; operands to the PTEST aren't needed, so we allow them to be anything.
@@ -8607,6 +8907,38 @@
}
)
+(define_insn_and_rewrite "*while_<while_optab_cmp><GPI:mode><PRED_HSD:mode>_acle_cc"
+ [(set (reg:CC_NZC CC_REGNUM)
+ (unspec:CC_NZC
+ [(match_operand 3)
+ (match_operand 4)
+ (const_int SVE_KNOWN_PTRUE)
+ (unspec:PRED_HSD
+ [(const_int SVE_WHILE_B)
+ (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
+ (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
+ SVE_WHILE)]
+ UNSPEC_PTEST))
+ (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:PRED_HSD [(const_int SVE_WHILE_B)
+ (match_dup 1)
+ (match_dup 2)]
+ SVE_WHILE)
+ 0)
+ (match_operand:PRED_HSD 5 "aarch64_ptrue_all_operand")))]
+ "TARGET_SVE"
+ "while<cmp_op>\t%0.<PRED_HSD:Vetype>, %<w>1, %<w>2"
+ ;; Force the compiler to drop the unused predicate operand, so that we
+ ;; don't have an unnecessary PTRUE.
+ "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))"
+ {
+ operands[3] = CONSTM1_RTX (VNx16BImode);
+ operands[4] = CONSTM1_RTX (<PRED_HSD:MODE>mode);
+ }
+)
+
;; Same, but handle the case in which only the flags result is useful.
(define_insn_and_rewrite "@while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>_ptest"
[(set (reg:CC_NZC CC_REGNUM)
@@ -8677,6 +9009,43 @@
}
)
+(define_expand "@aarch64_pred_fcm<cmp_op><mode>_acle"
+ [(set (match_operand:VNx16BI 0 "register_operand")
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:<VPRED>
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+ (match_operand:SVE_F 3 "register_operand")
+ (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
+ SVE_COND_FP_CMP_I0)
+ 0)
+ (match_dup 5)))]
+ "TARGET_SVE"
+ {
+ operands[5] = aarch64_ptrue_all (GET_MODE_UNIT_SIZE (<MODE>mode));
+ }
+)
+
+(define_insn "*aarch64_pred_fcm<cmp_op><mode>_acle"
+ [(set (match_operand:VNx16BI 0 "register_operand")
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:<VPRED>
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+ (match_operand:SVE_F 3 "register_operand")
+ (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
+ SVE_COND_FP_CMP_I0)
+ 0)
+ (match_operand:<VPRED> 5 "aarch64_ptrue_all_operand")))]
+ "TARGET_SVE"
+ {@ [ cons: =0 , 1 , 3 , 4 ]
+ [ Upa , Upl , w , Dz ] fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #0.0
+ [ Upa , Upl , w , w ] fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>
+ }
+)
+
;; Same for unordered comparisons.
(define_insn "@aarch64_pred_fcmuo<mode>"
[(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
@@ -8690,6 +9059,40 @@
"fcmuo\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
)
+(define_expand "@aarch64_pred_fcmuo<mode>_acle"
+ [(set (match_operand:VNx16BI 0 "register_operand")
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:<VPRED>
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+ (match_operand:SVE_F 3 "register_operand")
+ (match_operand:SVE_F 4 "register_operand")]
+ UNSPEC_COND_FCMUO)
+ 0)
+ (match_dup 5)))]
+ "TARGET_SVE"
+ {
+ operands[5] = aarch64_ptrue_all (GET_MODE_UNIT_SIZE (<MODE>mode));
+ }
+)
+
+(define_insn "*aarch64_pred_fcmuo<mode>_acle"
+ [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:<VPRED>
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand" "Upl")
+ (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+ (match_operand:SVE_F 3 "register_operand" "w")
+ (match_operand:SVE_F 4 "register_operand" "w")]
+ UNSPEC_COND_FCMUO)
+ 0)
+ (match_operand:<VPRED> 5 "aarch64_ptrue_all_operand")))]
+ "TARGET_SVE"
+ "fcmuo\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
+)
+
;; Floating-point comparisons predicated on a PTRUE, with the results ANDed
;; with another predicate P. This does not have the same trapping behavior
;; as predicating the comparison itself on P, but it's a legitimate fold,
@@ -8908,23 +9311,30 @@
;; -------------------------------------------------------------------------
;; Predicated floating-point absolute comparisons.
-(define_expand "@aarch64_pred_fac<cmp_op><mode>"
- [(set (match_operand:<VPRED> 0 "register_operand")
- (unspec:<VPRED>
- [(match_operand:<VPRED> 1 "register_operand")
- (match_operand:SI 2 "aarch64_sve_ptrue_flag")
- (unspec:SVE_FULL_F
- [(match_dup 1)
- (match_dup 2)
- (match_operand:SVE_FULL_F 3 "register_operand")]
- UNSPEC_COND_FABS)
- (unspec:SVE_FULL_F
- [(match_dup 1)
- (match_dup 2)
- (match_operand:SVE_FULL_F 4 "register_operand")]
- UNSPEC_COND_FABS)]
- SVE_COND_FP_ABS_CMP))]
+(define_expand "@aarch64_pred_fac<cmp_op><mode>_acle"
+ [(set (match_operand:VNx16BI 0 "register_operand")
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:<VPRED>
+ [(match_operand:<VPRED> 1 "register_operand")
+ (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+ (unspec:SVE_FULL_F
+ [(match_dup 1)
+ (match_dup 2)
+ (match_operand:SVE_FULL_F 3 "register_operand")]
+ UNSPEC_COND_FABS)
+ (unspec:SVE_FULL_F
+ [(match_dup 1)
+ (match_dup 2)
+ (match_operand:SVE_FULL_F 4 "register_operand")]
+ UNSPEC_COND_FABS)]
+ SVE_COND_FP_ABS_CMP)
+ 0)
+ (match_dup 5)))]
"TARGET_SVE"
+ {
+ operands[5] = aarch64_ptrue_all (GET_MODE_UNIT_SIZE (<MODE>mode));
+ }
)
(define_insn_and_rewrite "*aarch64_pred_fac<cmp_op><mode>_relaxed"
@@ -8973,6 +9383,30 @@
"fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
)
+(define_insn "*aarch64_pred_fac<cmp_op><mode>_strict_acle"
+ [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:<VPRED>
+ [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (match_operand:SI 4 "aarch64_sve_ptrue_flag")
+ (unspec:SVE_FULL_F
+ [(match_dup 1)
+ (match_operand:SI 5 "aarch64_sve_gp_strictness")
+ (match_operand:SVE_FULL_F 2 "register_operand" "w")]
+ UNSPEC_COND_FABS)
+ (unspec:SVE_FULL_F
+ [(match_dup 1)
+ (match_operand:SI 6 "aarch64_sve_gp_strictness")
+ (match_operand:SVE_FULL_F 3 "register_operand" "w")]
+ UNSPEC_COND_FABS)]
+ SVE_COND_FP_ABS_CMP)
+ 0)
+ (match_operand:<VPRED> 7 "aarch64_ptrue_all_operand")))]
+ "TARGET_SVE"
+ "fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
+)
+
;; -------------------------------------------------------------------------
;; ---- [PRED] Select
;; -------------------------------------------------------------------------
@@ -9421,7 +9855,30 @@
(unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")]
UNSPEC_REV))]
"TARGET_SVE"
- "rev\t%0.<Vetype>, %1.<Vetype>")
+ "rev\t%0.<Vetype>, %1.<Vetype>"
+)
+
+(define_expand "@aarch64_sve_rev<mode>_acle"
+ [(set (match_operand:VNx16BI 0 "register_operand")
+ (unspec:VNx16BI
+ [(match_operand:VNx16BI 1 "register_operand")
+ (match_dup:PRED_ALL 2)]
+ UNSPEC_REV_PRED))]
+ "TARGET_SVE"
+ {
+ operands[2] = CONST0_RTX (<MODE>mode);
+ }
+)
+
+(define_insn "*aarch64_sve_rev<mode>_acle"
+ [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+ (unspec:VNx16BI
+ [(match_operand:VNx16BI 1 "register_operand" "Upa")
+ (match_operand:PRED_ALL 2 "aarch64_simd_imm_zero")]
+ UNSPEC_REV_PRED))]
+ "TARGET_SVE"
+ "rev\t%0.<Vetype>, %1.<Vetype>"
+)
;; -------------------------------------------------------------------------
;; ---- [PRED] Special-purpose binary permutes
@@ -9446,18 +9903,39 @@
"<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
)
-;; Special purpose permute used by the predicate generation instructions.
-;; Unlike the normal permute patterns, these instructions operate on VNx16BI
-;; regardless of the element size, so that all input and output bits are
-;; well-defined. Operand 3 then indicates the size of the permute.
-(define_insn "@aarch64_sve_trn1_conv<mode>"
+;; Special-purpose permutes used by the ACLE intrinsics and predicate
+;; generation instructions. Unlike the normal permute patterns, these
+;; instructions operate on VNx16BI regardless of the element size, so that
+;; all input and output bits are well-defined. Operand 3 then indicates
+;; the size of the permute.
+;;
+;; To make generation easier, this pattern embeds the permute type as the
+;; fourth operand to the unspec. On the one hand, this avoids overloading
+;; unspecs like UNSPEC_ZIP1 to represent two different operations. On the
+;; other hand, it avoids having a separate unspec for each variant, and
+;; having to map from one kind of unspec to the other.
+(define_expand "@aarch64_sve_<perm_insn><mode>_acle"
+ [(set (match_operand:VNx16BI 0 "register_operand")
+ (unspec:VNx16BI [(match_operand:VNx16BI 1 "register_operand")
+ (match_operand:VNx16BI 2 "register_operand")
+ (match_dup:PRED_ALL 3)
+ (const_int PERMUTE)]
+ UNSPEC_PERMUTE_PRED))]
+ "TARGET_SVE"
+ {
+ operands[3] = CONST0_RTX (<MODE>mode);
+ }
+)
+
+(define_insn "*aarch64_sve_<perm_insn><mode>_acle"
[(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
(unspec:VNx16BI [(match_operand:VNx16BI 1 "register_operand" "Upa")
(match_operand:VNx16BI 2 "register_operand" "Upa")
- (match_operand:PRED_ALL 3 "aarch64_simd_imm_zero")]
- UNSPEC_TRN1_CONV))]
+ (match_operand:PRED_ALL 3 "aarch64_simd_imm_zero")
+ (const_int PERMUTE)]
+ UNSPEC_PERMUTE_PRED))]
"TARGET_SVE"
- "trn1\t%0.<PRED_ALL:Vetype>, %1.<PRED_ALL:Vetype>, %2.<PRED_ALL:Vetype>"
+ "<perm_insn>\t%0.<PRED_ALL:Vetype>, %1.<PRED_ALL:Vetype>, %2.<PRED_ALL:Vetype>"
)
;; =========================================================================
@@ -10446,6 +10924,34 @@
"punpk<perm_hilo>\t%0.h, %1.b"
)
+(define_expand "@aarch64_sve_punpk<perm_hilo>_acle"
+ [(set (match_operand:VNx16BI 0 "register_operand")
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:VNx8BI
+ [(match_operand:VNx16BI 1 "register_operand")]
+ UNPACK_UNSIGNED)
+ 0)
+ (match_dup 2)))]
+ "TARGET_SVE"
+ {
+ operands[2] = aarch64_ptrue_all (2);
+ }
+)
+
+(define_insn "*aarch64_sve_punpk<perm_hilo>_acle"
+ [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:VNx8BI
+ [(match_operand:VNx16BI 1 "register_operand" "Upa")]
+ UNPACK_UNSIGNED)
+ 0)
+ (match_operand:VNx8BI 2 "aarch64_ptrue_all_operand")))]
+ "TARGET_SVE"
+ "punpk<perm_hilo>\t%0.h, %1.b"
+)
+
;; =========================================================================
;; == Vector partitioning
;; =========================================================================
@@ -10670,14 +11176,49 @@
;; -------------------------------------------------------------------------
(define_insn "@aarch64_sve_<sve_pred_op><mode>"
- [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
- (unspec:PRED_ALL
- [(match_operand:PRED_ALL 1 "register_operand" "Upa")
+ [(set (match_operand:VNx16BI_ONLY 0 "register_operand" "=Upa")
+ (unspec:VNx16BI_ONLY
+ [(match_operand:VNx16BI_ONLY 1 "register_operand" "Upa")
(match_operand:SI 2 "aarch64_sve_ptrue_flag")
- (match_operand:PRED_ALL 3 "register_operand" "0")]
+ (match_operand:VNx16BI_ONLY 3 "register_operand" "0")]
SVE_PITER))
(clobber (reg:CC_NZC CC_REGNUM))]
- "TARGET_SVE && <max_elem_bits> >= <elem_bits>"
+ "TARGET_SVE"
+ "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
+)
+
+(define_expand "@aarch64_sve_<sve_pred_op><mode>"
+ [(parallel
+ [(set (match_operand:VNx16BI 0 "register_operand")
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:PRED_HSD
+ [(match_operand:PRED_HSD 1 "register_operand")
+ (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+ (match_operand:PRED_HSD 3 "register_operand")]
+ PNEXT_ONLY)
+ 0)
+ (match_dup 4)))
+ (clobber (reg:CC_NZC CC_REGNUM))])]
+ "TARGET_SVE"
+ {
+ operands[4] = aarch64_ptrue_all (<data_bytes>);
+ }
+)
+
+(define_insn "*aarch64_sve_<sve_pred_op><mode>"
+ [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:PRED_HSD
+ [(match_operand:PRED_HSD 1 "register_operand" "Upa")
+ (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+ (match_operand:PRED_HSD 3 "register_operand" "0")]
+ PNEXT_ONLY)
+ 0)
+ (match_operand:PRED_HSD 4 "aarch64_ptrue_all_operand")))
+ (clobber (reg:CC_NZC CC_REGNUM))]
+ "TARGET_SVE"
"<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
)
@@ -10711,6 +11252,38 @@
}
)
+(define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_cc"
+ [(set (reg:CC_NZC CC_REGNUM)
+ (unspec:CC_NZC
+ [(match_operand:VNx16BI 1 "register_operand" "Upa")
+ (match_operand 2)
+ (match_operand:SI 3 "aarch64_sve_ptrue_flag")
+ (unspec:PRED_HSD
+ [(match_operand 4)
+ (match_operand:SI 5 "aarch64_sve_ptrue_flag")
+ (match_operand:PRED_HSD 6 "register_operand" "0")]
+ PNEXT_ONLY)]
+ UNSPEC_PTEST))
+ (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:PRED_HSD
+ [(match_dup 4)
+ (match_dup 5)
+ (match_dup 6)]
+ PNEXT_ONLY)
+ 0)
+ (match_operand:PRED_HSD 7 "aarch64_ptrue_all_operand")))]
+ "TARGET_SVE
+ && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])"
+ "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
+ "&& !rtx_equal_p (operands[2], operands[4])"
+ {
+ operands[4] = operands[2];
+ operands[5] = operands[3];
+ }
+)
+
;; Same, but with only the flags result being interesting.
(define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_ptest"
[(set (reg:CC_NZC CC_REGNUM)
diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md
index 31bdd85..a3cbbce 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -2211,14 +2211,14 @@
;; - FDOT (2-way, indexed) (FP8DOT2)
;; -------------------------------------------------------------------------
(define_insn "@aarch64_sve_dot<mode>"
- [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
- (unspec:SVE_FULL_HSF
- [(match_operand:SVE_FULL_HSF 1 "register_operand")
+ [(set (match_operand:SVE_FULL_HSF_FP8_FDOT 0 "register_operand")
+ (unspec:SVE_FULL_HSF_FP8_FDOT
+ [(match_operand:SVE_FULL_HSF_FP8_FDOT 1 "register_operand")
(match_operand:VNx16QI 2 "register_operand")
(match_operand:VNx16QI 3 "register_operand")
(reg:DI FPM_REGNUM)]
UNSPEC_DOT_FP8))]
- "TARGET_SSVE_FP8DOT4 && !(<MODE>mode == VNx8HFmode && !TARGET_SSVE_FP8DOT2)"
+ ""
{@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
[ w , 0 , w , w ; * ] fdot\t%0.<Vetype>, %2.b, %3.b
[ ?&w , w , w , w ; yes ] movprfx\t%0, %1\;fdot\t%0.<Vetype>, %2.b, %3.b
@@ -2226,15 +2226,15 @@
)
(define_insn "@aarch64_sve_dot_lane<mode>"
- [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
- (unspec:SVE_FULL_HSF
- [(match_operand:SVE_FULL_HSF 1 "register_operand")
+ [(set (match_operand:SVE_FULL_HSF_FP8_FDOT 0 "register_operand")
+ (unspec:SVE_FULL_HSF_FP8_FDOT
+ [(match_operand:SVE_FULL_HSF_FP8_FDOT 1 "register_operand")
(match_operand:VNx16QI 2 "register_operand")
(match_operand:VNx16QI 3 "register_operand")
(match_operand:SI 4 "const_int_operand")
(reg:DI FPM_REGNUM)]
UNSPEC_DOT_LANE_FP8))]
- "TARGET_SSVE_FP8DOT4 && !(<MODE>mode == VNx8HFmode && !TARGET_SSVE_FP8DOT2)"
+ ""
{@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
[ w , 0 , w , y ; * ] fdot\t%0.<Vetype>, %2.b, %3.b[%4]
[ ?&w , w , w , y ; yes ] movprfx\t%0, %1\;fdot\t%0.<Vetype>, %2.b, %3.b[%4]
@@ -4068,8 +4068,8 @@
[(match_operand:<VPRED> 1 "register_operand")
(match_operand:SI 2 "aarch64_sve_ptrue_flag")
(unspec:<VPRED>
- [(match_operand:SVE_FULL_BHI 3 "register_operand")
- (match_operand:SVE_FULL_BHI 4 "register_operand")]
+ [(match_operand:VNx16QI_ONLY 3 "register_operand")
+ (match_operand:VNx16QI_ONLY 4 "register_operand")]
SVE2_MATCH)]
UNSPEC_PRED_Z))
(clobber (reg:CC_NZC CC_REGNUM))]
@@ -4081,6 +4081,51 @@
}
)
+(define_expand "@aarch64_pred_<sve_int_op><mode>"
+ [(parallel
+ [(set (match_operand:VNx16BI 0 "register_operand")
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:<VPRED>
+ [(match_operand:<VPRED> 1 "register_operand")
+ (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+ (unspec:<VPRED>
+ [(match_operand:VNx8HI_ONLY 3 "register_operand")
+ (match_operand:VNx8HI_ONLY 4 "register_operand")]
+ SVE2_MATCH)]
+ UNSPEC_PRED_Z)
+ 0)
+ (match_dup 5)))
+ (clobber (reg:CC_NZC CC_REGNUM))])]
+ "TARGET_SVE2 && TARGET_NON_STREAMING"
+ {
+ operands[5] = aarch64_ptrue_all (GET_MODE_UNIT_SIZE (<MODE>mode));
+ }
+)
+
+(define_insn "*aarch64_pred_<sve_int_op><mode>"
+ [(set (match_operand:VNx16BI 0 "register_operand")
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:<VPRED>
+ [(match_operand:<VPRED> 1 "register_operand")
+ (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+ (unspec:<VPRED>
+ [(match_operand:VNx8HI_ONLY 3 "register_operand")
+ (match_operand:VNx8HI_ONLY 4 "register_operand")]
+ SVE2_MATCH)]
+ UNSPEC_PRED_Z)
+ 0)
+ (match_operand:<VPRED> 5 "aarch64_ptrue_all_operand")))
+ (clobber (reg:CC_NZC CC_REGNUM))]
+ "TARGET_SVE2 && TARGET_NON_STREAMING"
+ {@ [ cons: =0, 1 , 3, 4; attrs: pred_clobber ]
+ [ &Upa , Upl, w, w; yes ] <sve_int_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>
+ [ ?Upl , 0 , w, w; yes ] ^
+ [ Upa , Upl, w, w; no ] ^
+ }
+)
+
;; Predicated string matching in which both the flag and predicate results
;; are interesting.
(define_insn_and_rewrite "*aarch64_pred_<sve_int_op><mode>_cc"
@@ -4118,6 +4163,45 @@
}
)
+(define_insn_and_rewrite "*aarch64_pred_<sve_int_op><mode>_cc"
+ [(set (reg:CC_NZC CC_REGNUM)
+ (unspec:CC_NZC
+ [(match_operand:VNx16BI 1 "register_operand" "Upl")
+ (match_operand 4)
+ (match_operand:SI 5 "aarch64_sve_ptrue_flag")
+ (unspec:<VPRED>
+ [(match_operand 6)
+ (match_operand:SI 7 "aarch64_sve_ptrue_flag")
+ (unspec:<VPRED>
+ [(match_operand:VNx8HI_ONLY 2 "register_operand" "w")
+ (match_operand:VNx8HI_ONLY 3 "register_operand" "w")]
+ SVE2_MATCH)]
+ UNSPEC_PRED_Z)]
+ UNSPEC_PTEST))
+ (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:<VPRED>
+ [(match_dup 6)
+ (match_dup 7)
+ (unspec:<VPRED>
+ [(match_dup 2)
+ (match_dup 3)]
+ SVE2_MATCH)]
+ UNSPEC_PRED_Z)
+ 0)
+ (match_operand:<VPRED> 8 "aarch64_ptrue_all_operand")))]
+ "TARGET_SVE2
+ && TARGET_NON_STREAMING
+ && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
+ "<sve_int_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
+ "&& !rtx_equal_p (operands[4], operands[6])"
+ {
+ operands[6] = copy_rtx (operands[4]);
+ operands[7] = operands[5];
+ }
+)
+
;; Predicated string matching in which only the flags result is interesting.
(define_insn_and_rewrite "*aarch64_pred_<sve_int_op><mode>_ptest"
[(set (reg:CC_NZC CC_REGNUM)
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 4d9d83d..ef9c165 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -430,6 +430,7 @@ static const struct aarch64_flag_desc aarch64_tuning_flags[] =
#include "tuning_models/neoversev2.h"
#include "tuning_models/neoversev3.h"
#include "tuning_models/neoversev3ae.h"
+#include "tuning_models/olympus.h"
#include "tuning_models/a64fx.h"
#include "tuning_models/fujitsu_monaka.h"
@@ -974,19 +975,24 @@ aarch64_cb_rhs (rtx_code op_code, rtx rhs)
{
case EQ:
case NE:
- case GT:
- case GTU:
case LT:
case LTU:
+ case GE:
+ case GEU:
+ /* EQ/NE range is 0 .. 63.
+ LT/LTU range is 0 .. 63.
+ GE/GEU range is 1 .. 64 => GT x - 1, but also supports 0 via XZR.
+ So the intersection is 0 .. 63. */
return IN_RANGE (rhs_val, 0, 63);
- case GE: /* CBGE: signed greater than or equal */
- case GEU: /* CBHS: unsigned greater than or equal */
- return IN_RANGE (rhs_val, 1, 64);
-
- case LE: /* CBLE: signed less than or equal */
- case LEU: /* CBLS: unsigned less than or equal */
- return IN_RANGE (rhs_val, -1, 62);
+ case GT:
+ case GTU:
+ case LE:
+ case LEU:
+ /* GT/GTU range is 0 .. 63
+ LE/LEU range is -1 .. 62 => LT x + 1.
+ So the intersection is 0 .. 62. */
+ return IN_RANGE (rhs_val, 0, 62);
default:
return false;
@@ -2881,10 +2887,47 @@ aarch64_gen_compare_reg_maybe_ze (RTX_CODE code, rtx x, rtx y,
return aarch64_gen_compare_reg (code, x, y);
}
+/* Split IMM into two 12-bit halves, producing an EQ/NE comparison vs X.
+ TMP may be a scratch. This optimizes a sequence from
+ mov x0, #imm1
+ movk x0, #imm2, lsl 16 -- x0 contains CST
+ cmp x1, x0
+ into the shorter:
+ sub tmp, x1, #(CST & 0xfff000)
+ subs tmp, tmp, #(CST & 0x000fff)
+*/
+rtx
+aarch64_gen_compare_split_imm24 (rtx x, rtx imm, rtx tmp)
+{
+ HOST_WIDE_INT lo_imm = UINTVAL (imm) & 0xfff;
+ HOST_WIDE_INT hi_imm = UINTVAL (imm) & 0xfff000;
+ enum machine_mode mode = GET_MODE (x);
+
+ if (GET_CODE (tmp) == SCRATCH)
+ tmp = gen_reg_rtx (mode);
+
+ emit_insn (gen_add3_insn (tmp, x, GEN_INT (-hi_imm)));
+ /* TODO: We don't need the gpr result of the second insn. */
+ switch (mode)
+ {
+ case SImode:
+ tmp = gen_addsi3_compare0 (tmp, tmp, GEN_INT (-lo_imm));
+ break;
+ case DImode:
+ tmp = gen_adddi3_compare0 (tmp, tmp, GEN_INT (-lo_imm));
+ break;
+ default:
+ abort ();
+ }
+ emit_insn (tmp);
+
+ return gen_rtx_REG (CC_NZmode, CC_REGNUM);
+}
+
/* Generate conditional branch to LABEL, comparing X to 0 using CODE.
Return the jump instruction. */
-static rtx
+rtx
aarch64_gen_compare_zero_and_branch (rtx_code code, rtx x,
rtx_code_label *label)
{
@@ -3932,18 +3975,53 @@ aarch64_sve_fp_pred (machine_mode data_mode, rtx *strictness)
return aarch64_ptrue_reg (aarch64_sve_pred_mode (data_mode));
}
+/* PRED is a predicate that governs an operation on DATA_MODE. If DATA_MODE
+ is a partial vector mode, and if exceptions must be suppressed for its
+ undefined elements, convert PRED from a container-level predicate to
+ an element-level predicate and ensure that the undefined elements
+ are inactive. Make no changes otherwise.
+
+ Return the resultant predicate. */
+rtx
+aarch64_sve_emit_masked_fp_pred (machine_mode data_mode, rtx pred)
+{
+ unsigned int vec_flags = aarch64_classify_vector_mode (data_mode);
+ if (flag_trapping_math && (vec_flags & VEC_PARTIAL))
+ {
+ /* Generate an element-level mask. */
+ rtx mask = aarch64_sve_packed_pred (data_mode);
+ machine_mode pmode = GET_MODE (mask);
+
+ /* Apply the existing predicate. */
+ rtx dst = gen_reg_rtx (pmode);
+ emit_insn (gen_and3 (pmode, dst, mask,
+ gen_lowpart (pmode, pred)));
+ return dst;
+ }
+
+ return pred;
+}
+
/* Emit a comparison CMP between OP0 and OP1, both of which have mode
DATA_MODE, and return the result in a predicate of mode PRED_MODE.
- Use TARGET as the target register if nonnull and convenient. */
+ Use TARGET as the target register if nonnull and convenient.
+
+ PRED_MODE can be either VNx16BI or the natural predicate mode for
+ DATA_MODE. */
static rtx
aarch64_sve_emit_int_cmp (rtx target, machine_mode pred_mode, rtx_code cmp,
machine_mode data_mode, rtx op1, rtx op2)
{
- insn_code icode = code_for_aarch64_pred_cmp (cmp, data_mode);
+ auto src_pred_mode = aarch64_sve_pred_mode (data_mode);
+ insn_code icode;
+ if (known_eq (GET_MODE_NUNITS (pred_mode), GET_MODE_NUNITS (data_mode)))
+ icode = code_for_aarch64_pred_cmp (cmp, data_mode);
+ else
+ icode = code_for_aarch64_pred_cmp_acle (cmp, data_mode);
expand_operand ops[5];
create_output_operand (&ops[0], target, pred_mode);
- create_input_operand (&ops[1], CONSTM1_RTX (pred_mode), pred_mode);
+ create_input_operand (&ops[1], CONSTM1_RTX (src_pred_mode), src_pred_mode);
create_integer_operand (&ops[2], SVE_KNOWN_PTRUE);
create_input_operand (&ops[3], op1, data_mode);
create_input_operand (&ops[4], op2, data_mode);
@@ -3951,15 +4029,14 @@ aarch64_sve_emit_int_cmp (rtx target, machine_mode pred_mode, rtx_code cmp,
return ops[0].value;
}
-/* Use a comparison to convert integer vector SRC into MODE, which is
- the corresponding SVE predicate mode. Use TARGET for the result
- if it's nonnull and convenient. */
+/* Use a comparison to convert integer vector SRC into VNx16BI.
+ Use TARGET for the result if it's nonnull and convenient. */
rtx
-aarch64_convert_sve_data_to_pred (rtx target, machine_mode mode, rtx src)
+aarch64_convert_sve_data_to_pred (rtx target, rtx src)
{
machine_mode src_mode = GET_MODE (src);
- return aarch64_sve_emit_int_cmp (target, mode, NE, src_mode,
+ return aarch64_sve_emit_int_cmp (target, VNx16BImode, NE, src_mode,
src, CONST0_RTX (src_mode));
}
@@ -6041,9 +6118,9 @@ aarch64_sve_move_pred_via_while (rtx target, machine_mode mode,
unsigned int vl)
{
rtx limit = force_reg (DImode, gen_int_mode (vl, DImode));
- target = aarch64_target_reg (target, mode);
- emit_insn (gen_while (UNSPEC_WHILELO, DImode, mode,
- target, const0_rtx, limit));
+ target = aarch64_target_reg (target, VNx16BImode);
+ emit_insn (gen_aarch64_sve_while_acle (UNSPEC_WHILELO, DImode, mode,
+ target, const0_rtx, limit));
return target;
}
@@ -6189,8 +6266,7 @@ aarch64_expand_sve_const_pred_trn (rtx target, rtx_vector_builder &builder,
operands but permutes them as though they had mode MODE. */
machine_mode mode = aarch64_sve_pred_mode (permute_size).require ();
target = aarch64_target_reg (target, GET_MODE (a));
- rtx type_reg = CONST0_RTX (mode);
- emit_insn (gen_aarch64_sve_trn1_conv (mode, target, a, b, type_reg));
+ emit_insn (gen_aarch64_sve_acle (UNSPEC_TRN1, mode, target, a, b));
return target;
}
@@ -6272,8 +6348,7 @@ aarch64_expand_sve_const_pred (rtx target, rtx_vector_builder &builder)
for (unsigned int i = 0; i < builder.encoded_nelts (); ++i)
int_builder.quick_push (INTVAL (builder.elt (i))
? constm1_rtx : const0_rtx);
- return aarch64_convert_sve_data_to_pred (target, VNx16BImode,
- int_builder.build ());
+ return aarch64_convert_sve_data_to_pred (target, int_builder.build ());
}
/* Set DEST to immediate IMM. */
@@ -6725,6 +6800,27 @@ aarch64_split_sve_subreg_move (rtx dest, rtx ptrue, rtx src)
dest, ptrue, src));
}
+/* Set predicate register DEST such that every element has the scalar
+ boolean value in SRC, with any nonzero source counting as "true".
+ MODE is a MODE_VECTOR_BOOL that determines the element size;
+ DEST can have this mode or VNx16BImode. In the latter case,
+ the upper bits of each element are defined to be zero, as for
+ the .H, .S, and .D forms of PTRUE. */
+
+void
+aarch64_emit_sve_pred_vec_duplicate (machine_mode mode, rtx dest, rtx src)
+{
+ rtx tmp = gen_reg_rtx (DImode);
+ emit_insn (gen_ashldi3 (tmp, gen_lowpart (DImode, src),
+ gen_int_mode (63, DImode)));
+ if (GET_MODE (dest) == VNx16BImode)
+ emit_insn (gen_aarch64_sve_while_acle (UNSPEC_WHILELO, DImode, mode,
+ dest, const0_rtx, tmp));
+ else
+ emit_insn (gen_while (UNSPEC_WHILELO, DImode, mode,
+ dest, const0_rtx, tmp));
+}
+
static bool
aarch64_function_ok_for_sibcall (tree, tree exp)
{
@@ -14326,42 +14422,58 @@ aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
{
/* Conditional branch. */
- if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
+ enum machine_mode cmpmode = GET_MODE (inner);
+ if (GET_MODE_CLASS (cmpmode) == MODE_CC)
return true;
- else
+
+ if (comparator == const0_rtx)
{
- if (cmpcode == NE || cmpcode == EQ)
+ switch (cmpcode)
{
- if (comparator == const0_rtx)
+ case NE:
+ case EQ:
+ if (cmpmode != SImode && cmpmode != DImode)
+ break;
+ if (GET_CODE (inner) == ZERO_EXTRACT)
{
- /* TBZ/TBNZ/CBZ/CBNZ. */
- if (GET_CODE (inner) == ZERO_EXTRACT)
- /* TBZ/TBNZ. */
- *cost += rtx_cost (XEXP (inner, 0), VOIDmode,
- ZERO_EXTRACT, 0, speed);
- else
- /* CBZ/CBNZ. */
- *cost += rtx_cost (inner, VOIDmode, cmpcode, 0, speed);
-
+ /* TBZ/TBNZ. */
+ *cost += rtx_cost (XEXP (inner, 0), VOIDmode,
+ ZERO_EXTRACT, 0, speed);
return true;
}
- if (register_operand (inner, VOIDmode)
- && aarch64_imm24 (comparator, VOIDmode))
- {
- /* SUB and SUBS. */
- *cost += COSTS_N_INSNS (2);
- if (speed)
- *cost += extra_cost->alu.arith * 2;
- return true;
- }
- }
- else if (cmpcode == LT || cmpcode == GE)
- {
- /* TBZ/TBNZ. */
- if (comparator == const0_rtx)
- return true;
+ /* FALLTHRU */
+
+ case LT:
+ case GE:
+ /* CBZ/CBNZ/TBZ/TBNZ. */
+ *cost += rtx_cost (inner, cmpmode, cmpcode, 0, speed);
+ return true;
+
+ default:
+ break;
}
}
+
+ if ((cmpcode == NE || cmpcode == EQ)
+ && (cmpmode == SImode || cmpmode == DImode)
+ && aarch64_split_imm24 (comparator, cmpmode))
+ {
+ /* SUB and SUBS. */
+ *cost += rtx_cost (inner, cmpmode, cmpcode, 0, speed);
+ *cost += COSTS_N_INSNS (2);
+ if (speed)
+ *cost += extra_cost->alu.arith * 2;
+ return true;
+ }
+
+ if (TARGET_CMPBR)
+ {
+ *cost += rtx_cost (inner, cmpmode, cmpcode, 0, speed);
+ if ((cmpmode != SImode && cmpmode != DImode)
+ || !aarch64_cb_rhs (cmpcode, comparator))
+ *cost += rtx_cost (comparator, cmpmode, cmpcode, 1, speed);
+ return true;
+ }
}
else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
{
@@ -16945,6 +17057,14 @@ private:
or vector loop. There is one entry for each tuning option of
interest. */
auto_vec<aarch64_vec_op_count, 2> m_ops;
+
+ /* When doing inner-loop vectorization the constraints on the data-refs in the
+ outer-loop could limit the inner loop references. i.e. the outerloop can
+ force the inner-loop to do a load and splat which will result in the loop
+ being entirely scalar as all lanes work on a duplicate. Currently we don't
+ support unrolling of the inner loop independently from the outerloop during
+ outer-loop vectorization which tends to lead to pipeline bubbles. */
+ bool m_loop_fully_scalar_dup = false;
};
aarch64_vector_costs::aarch64_vector_costs (vec_info *vinfo,
@@ -17165,8 +17285,8 @@ aarch64_ld234_st234_vectors (vect_cost_for_stmt kind, stmt_vec_info stmt_info,
&& STMT_VINFO_DATA_REF (stmt_info))
{
stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
- if (stmt_info
- && vect_mem_access_type (stmt_info, node) == VMAT_LOAD_STORE_LANES)
+ if (node
+ && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_LOAD_STORE_LANES)
return DR_GROUP_SIZE (stmt_info);
}
return 0;
@@ -17266,13 +17386,14 @@ aarch64_multiply_add_p (vec_info *vinfo, stmt_vec_info stmt_info,
static bool
aarch64_bool_compound_p (vec_info *vinfo, stmt_vec_info stmt_info,
- unsigned int vec_flags)
+ slp_tree node, unsigned int vec_flags)
{
gassign *assign = dyn_cast<gassign *> (stmt_info->stmt);
if (!assign
+ || !node
|| gimple_assign_rhs_code (assign) != BIT_AND_EXPR
- || !STMT_VINFO_VECTYPE (stmt_info)
- || !VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_info)))
+ || !SLP_TREE_VECTYPE (node)
+ || !VECTOR_BOOLEAN_TYPE_P (SLP_TREE_VECTYPE (node)))
return false;
for (int i = 1; i < 3; ++i)
@@ -17307,10 +17428,11 @@ aarch64_bool_compound_p (vec_info *vinfo, stmt_vec_info stmt_info,
instructions. */
static unsigned int
aarch64_sve_in_loop_reduction_latency (vec_info *vinfo,
+ slp_tree node,
stmt_vec_info stmt_info,
const sve_vec_cost *sve_costs)
{
- switch (vect_reduc_type (vinfo, stmt_info))
+ switch (vect_reduc_type (vinfo, node))
{
case EXTRACT_LAST_REDUCTION:
return sve_costs->clast_cost;
@@ -17350,7 +17472,9 @@ aarch64_sve_in_loop_reduction_latency (vec_info *vinfo,
- If VEC_FLAGS & VEC_ANY_SVE, return the loop carry latency of the
SVE implementation. */
static unsigned int
-aarch64_in_loop_reduction_latency (vec_info *vinfo, stmt_vec_info stmt_info,
+aarch64_in_loop_reduction_latency (vec_info *vinfo,
+ slp_tree node,
+ stmt_vec_info stmt_info,
unsigned int vec_flags)
{
const cpu_vector_cost *vec_costs = aarch64_tune_params.vec_costs;
@@ -17363,7 +17487,8 @@ aarch64_in_loop_reduction_latency (vec_info *vinfo, stmt_vec_info stmt_info,
if (sve_costs)
{
unsigned int latency
- = aarch64_sve_in_loop_reduction_latency (vinfo, stmt_info, sve_costs);
+ = aarch64_sve_in_loop_reduction_latency (vinfo, node,
+ stmt_info, sve_costs);
if (latency)
return latency;
}
@@ -17437,8 +17562,9 @@ aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind,
for each element. We therefore need to divide the full-instruction
cost by the number of elements in the vector. */
if (kind == scalar_load
+ && node
&& sve_costs
- && vect_mem_access_type (stmt_info, node) == VMAT_GATHER_SCATTER)
+ && mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node)))
{
unsigned int nunits = vect_nunits_for_cost (vectype);
/* Test for VNx2 modes, which have 64-bit containers. */
@@ -17450,8 +17576,9 @@ aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind,
/* Detect cases in which a scalar_store is really storing one element
in a scatter operation. */
if (kind == scalar_store
+ && node
&& sve_costs
- && vect_mem_access_type (stmt_info, node) == VMAT_GATHER_SCATTER)
+ && mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node)))
return sve_costs->scatter_store_elt_cost;
/* Detect cases in which vec_to_scalar represents an in-loop reduction. */
@@ -17460,7 +17587,8 @@ aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind,
&& sve_costs)
{
unsigned int latency
- = aarch64_sve_in_loop_reduction_latency (vinfo, stmt_info, sve_costs);
+ = aarch64_sve_in_loop_reduction_latency (vinfo, node,
+ stmt_info, sve_costs);
if (latency)
return latency;
}
@@ -17609,7 +17737,7 @@ aarch64_adjust_stmt_cost (vec_info *vinfo, vect_cost_for_stmt kind,
/* For vector boolean ANDs with a compare operand we just need
one insn. */
- if (aarch64_bool_compound_p (vinfo, stmt_info, vec_flags))
+ if (aarch64_bool_compound_p (vinfo, stmt_info, node, vec_flags))
return 0;
}
@@ -17642,13 +17770,12 @@ aarch64_adjust_stmt_cost (vec_info *vinfo, vect_cost_for_stmt kind,
with the single accumulator being read and written multiple times. */
static bool
-aarch64_force_single_cycle (vec_info *vinfo, stmt_vec_info stmt_info)
+aarch64_force_single_cycle (vec_info *vinfo, slp_tree node)
{
- if (!STMT_VINFO_REDUC_DEF (stmt_info))
+ auto reduc_info = info_for_reduction (as_a <loop_vec_info> (vinfo), node);
+ if (!reduc_info)
return false;
-
- auto reduc_info = info_for_reduction (vinfo, stmt_info);
- return STMT_VINFO_FORCE_SINGLE_CYCLE (reduc_info);
+ return VECT_REDUC_INFO_FORCE_SINGLE_CYCLE (reduc_info);
}
/* COUNT, KIND and STMT_INFO are the same as for vector_costs::add_stmt_cost
@@ -17672,8 +17799,10 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind,
&& vect_is_reduction (stmt_info))
{
unsigned int base
- = aarch64_in_loop_reduction_latency (m_vinfo, stmt_info, m_vec_flags);
- if (aarch64_force_single_cycle (m_vinfo, stmt_info))
+ = aarch64_in_loop_reduction_latency (m_vinfo, node,
+ stmt_info, m_vec_flags);
+ if (m_costing_for_scalar
+ || aarch64_force_single_cycle (m_vinfo, node))
/* ??? Ideally we'd use a tree to reduce the copies down to 1 vector,
and then accumulate that, but at the moment the loop-carried
dependency includes all copies. */
@@ -17690,7 +17819,7 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind,
/* Assume that bool AND with compare operands will become a single
operation. */
- if (aarch64_bool_compound_p (m_vinfo, stmt_info, m_vec_flags))
+ if (aarch64_bool_compound_p (m_vinfo, stmt_info, node, m_vec_flags))
return;
}
@@ -17707,7 +17836,7 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind,
if (stmt_info
&& kind == vec_to_scalar
&& (m_vec_flags & VEC_ADVSIMD)
- && vect_mem_access_type (stmt_info, node) == VMAT_GATHER_SCATTER)
+ && mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node)))
{
auto dr = STMT_VINFO_DATA_REF (stmt_info);
tree dr_ref = DR_REF (dr);
@@ -17720,7 +17849,7 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind,
{
if (gimple_vuse (SSA_NAME_DEF_STMT (offset)))
{
- if (STMT_VINFO_TYPE (stmt_info) == load_vec_info_type)
+ if (SLP_TREE_TYPE (node) == load_vec_info_type)
ops->loads += count - 1;
else
/* Stores want to count both the index to array and data to
@@ -17786,7 +17915,7 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind,
have only accounted for one. */
if (stmt_info
&& (kind == vector_stmt || kind == vec_to_scalar)
- && vect_reduc_type (m_vinfo, stmt_info) == COND_REDUCTION)
+ && vect_reduc_type (m_vinfo, node) == COND_REDUCTION)
ops->general_ops += count;
/* Count the predicate operations needed by an SVE comparison. */
@@ -17822,7 +17951,7 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind,
if (stmt_info
&& sve_issue
&& (kind == scalar_load || kind == scalar_store)
- && vect_mem_access_type (stmt_info, node) == VMAT_GATHER_SCATTER)
+ && mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node)))
{
unsigned int pairs = CEIL (count, 2);
ops->pred_ops += sve_issue->gather_scatter_pair_pred_ops * pairs;
@@ -17931,6 +18060,17 @@ aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
tree vectype, int misalign,
vect_cost_model_location where)
{
+ /* When costing for scalars, vectype will be NULL; so look up the type via
+ stmt_info's statement. */
+ if (m_costing_for_scalar && stmt_info)
+ {
+ gcc_assert (!vectype);
+ /* This won't work for e.g. gconds or other statements without a lhs,
+ but those only work on GPR anyway and this is the best we can do. */
+ if (tree lhs = gimple_get_lhs (STMT_VINFO_STMT (stmt_info)))
+ vectype = TREE_TYPE (lhs);
+ }
+
fractional_cost stmt_cost
= aarch64_builtin_vectorization_cost (kind, vectype, misalign);
@@ -17946,6 +18086,28 @@ aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
analyze_loop_vinfo (loop_vinfo);
m_analyzed_vinfo = true;
+ if (in_inner_loop_p)
+ m_loop_fully_scalar_dup = true;
+ }
+
+ /* Detect whether the loop is working on fully duplicated lanes. This would
+ only be possible with inner loop vectorization since otherwise we wouldn't
+ try to vectorize. */
+ if (in_inner_loop_p
+ && node
+ && m_loop_fully_scalar_dup
+ && SLP_TREE_LANES (node) == 1
+ && !SLP_TREE_CHILDREN (node).exists ())
+ {
+ /* Check if load is a duplicate. */
+ if (gimple_vuse (stmt_info->stmt)
+ && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_INVARIANT)
+ ;
+ else if (SLP_TREE_DEF_TYPE (node) == vect_constant_def
+ || SLP_TREE_DEF_TYPE (node) == vect_external_def)
+ ;
+ else
+ m_loop_fully_scalar_dup = false;
}
/* Apply the heuristic described above m_stp_sequence_cost. */
@@ -17977,9 +18139,10 @@ aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
/* Check if we've seen an SVE gather/scatter operation and which size. */
if (kind == scalar_load
+ && node
&& vectype
&& aarch64_sve_mode_p (TYPE_MODE (vectype))
- && vect_mem_access_type (stmt_info, node) == VMAT_GATHER_SCATTER)
+ && mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node)))
{
const sve_vec_cost *sve_costs = aarch64_tune_params.vec_costs->sve;
if (sve_costs)
@@ -18311,8 +18474,19 @@ adjust_body_cost (loop_vec_info loop_vinfo,
if (m_vec_flags & VEC_ANY_SVE)
threshold = CEIL (threshold, aarch64_estimated_sve_vq ());
- if (m_num_vector_iterations >= 1
- && m_num_vector_iterations < threshold)
+ /* Increase the cost of the vector code if it looks like the vector code has
+ limited throughput due to outer-loop vectorization. */
+ if (m_loop_fully_scalar_dup)
+ {
+ body_cost *= estimated_vf;
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Increasing body cost to %d because vector code has"
+ " low throughput of per iteration due to splats\n",
+ body_cost);
+ }
+ else if (m_num_vector_iterations >= 1
+ && m_num_vector_iterations < threshold)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
@@ -20481,6 +20655,8 @@ aarch64_compare_version_priority (tree decl1, tree decl2)
unsigned long _size; // Size of the struct, so it can grow.
unsigned long _hwcap;
unsigned long _hwcap2;
+ unsigned long _hwcap3;
+ unsigned long _hwcap4;
}
*/
@@ -20497,14 +20673,24 @@ build_ifunc_arg_type ()
tree field3 = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
get_identifier ("_hwcap2"),
long_unsigned_type_node);
+ tree field4 = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
+ get_identifier ("_hwcap3"),
+ long_unsigned_type_node);
+ tree field5 = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
+ get_identifier ("_hwcap4"),
+ long_unsigned_type_node);
DECL_FIELD_CONTEXT (field1) = ifunc_arg_type;
DECL_FIELD_CONTEXT (field2) = ifunc_arg_type;
DECL_FIELD_CONTEXT (field3) = ifunc_arg_type;
+ DECL_FIELD_CONTEXT (field4) = ifunc_arg_type;
+ DECL_FIELD_CONTEXT (field5) = ifunc_arg_type;
TYPE_FIELDS (ifunc_arg_type) = field1;
DECL_CHAIN (field1) = field2;
DECL_CHAIN (field2) = field3;
+ DECL_CHAIN (field3) = field4;
+ DECL_CHAIN (field4) = field5;
layout_type (ifunc_arg_type);
@@ -25366,20 +25552,41 @@ aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
}
+/* Return true if function declaration FNDECL needs to be marked as
+ having a variant PCS. */
+
+static bool
+aarch64_is_variant_pcs (tree fndecl)
+{
+ /* Check for ABIs that preserve more registers than usual. */
+ arm_pcs pcs = (arm_pcs) fndecl_abi (fndecl).id ();
+ if (pcs == ARM_PCS_SIMD || pcs == ARM_PCS_SVE)
+ return true;
+
+ /* Check for ABIs that allow PSTATE.SM to be 1 on entry. */
+ tree fntype = TREE_TYPE (fndecl);
+ if (aarch64_fntype_pstate_sm (fntype) != AARCH64_ISA_MODE_SM_OFF)
+ return true;
+
+ /* Check for ABIs that require PSTATE.ZA to be 1 on entry, either because
+ of ZA or ZT0. */
+ if (aarch64_fntype_pstate_za (fntype) != 0)
+ return true;
+
+ return false;
+}
+
/* Output .variant_pcs for aarch64_vector_pcs function symbols. */
static void
aarch64_asm_output_variant_pcs (FILE *stream, const tree decl, const char* name)
{
- if (TREE_CODE (decl) == FUNCTION_DECL)
+ if (TREE_CODE (decl) == FUNCTION_DECL
+ && aarch64_is_variant_pcs (decl))
{
- arm_pcs pcs = (arm_pcs) fndecl_abi (decl).id ();
- if (pcs == ARM_PCS_SIMD || pcs == ARM_PCS_SVE)
- {
- fprintf (stream, "\t.variant_pcs\t");
- assemble_name (stream, name);
- fprintf (stream, "\n");
- }
+ fprintf (stream, "\t.variant_pcs\t");
+ assemble_name (stream, name);
+ fprintf (stream, "\n");
}
}
@@ -31718,7 +31925,7 @@ aarch64_expand_reversed_crc_using_pmull (scalar_mode crc_mode,
/* Expand the spaceship optab for floating-point operands.
- If the result is compared against (-1, 0, 1 , 2), expand into
+ If the result is compared against (-1, 0, 1, -128), expand into
fcmpe + conditional branch insns.
Otherwise (the result is just stored as an integer), expand into
@@ -31757,7 +31964,7 @@ aarch64_expand_fp_spaceship (rtx dest, rtx op0, rtx op1, rtx hint)
emit_jump (end_label);
emit_label (un_label);
- emit_move_insn (dest, const2_rtx);
+ emit_move_insn (dest, GEN_INT (-128));
emit_jump (end_label);
emit_label (gt_label);
@@ -31963,9 +32170,43 @@ aarch64_test_sysreg_encoding_clashes (void)
static void
aarch64_test_sve_folding ()
{
+ aarch64_target_switcher switcher (AARCH64_FL_SVE);
+
tree res = fold_unary (BIT_NOT_EXPR, ssizetype,
ssize_int (poly_int64 (1, 1)));
ASSERT_TRUE (operand_equal_p (res, ssize_int (poly_int64 (-2, -1))));
+
+ auto build_v16bi = [](bool a, bool b)
+ {
+ rtx_vector_builder builder (VNx16BImode, 2, 1);
+ builder.quick_push (a ? const1_rtx : const0_rtx);
+ builder.quick_push (b ? const1_rtx : const0_rtx);
+ return builder.build ();
+ };
+ rtx v16bi_10 = build_v16bi (1, 0);
+ rtx v16bi_01 = build_v16bi (0, 1);
+
+ for (auto mode : { VNx8BImode, VNx4BImode, VNx2BImode })
+ {
+ rtx reg = gen_rtx_REG (mode, LAST_VIRTUAL_REGISTER + 1);
+ rtx subreg = lowpart_subreg (VNx16BImode, reg, mode);
+ rtx and1 = simplify_gen_binary (AND, VNx16BImode, subreg, v16bi_10);
+ ASSERT_EQ (lowpart_subreg (mode, and1, VNx16BImode), reg);
+ rtx and0 = simplify_gen_binary (AND, VNx16BImode, subreg, v16bi_01);
+ ASSERT_EQ (lowpart_subreg (mode, and0, VNx16BImode), CONST0_RTX (mode));
+
+ rtx ior1 = simplify_gen_binary (IOR, VNx16BImode, subreg, v16bi_10);
+ ASSERT_EQ (lowpart_subreg (mode, ior1, VNx16BImode), CONSTM1_RTX (mode));
+ rtx ior0 = simplify_gen_binary (IOR, VNx16BImode, subreg, v16bi_01);
+ ASSERT_EQ (lowpart_subreg (mode, ior0, VNx16BImode), reg);
+
+ rtx xor1 = simplify_gen_binary (XOR, VNx16BImode, subreg, v16bi_10);
+ ASSERT_RTX_EQ (lowpart_subreg (mode, xor1, VNx16BImode),
+ lowpart_subreg (mode, gen_rtx_NOT (VNx16BImode, subreg),
+ VNx16BImode));
+ rtx xor0 = simplify_gen_binary (XOR, VNx16BImode, subreg, v16bi_01);
+ ASSERT_EQ (lowpart_subreg (mode, xor0, VNx16BImode), reg);
+ }
}
/* Run all target-specific selftests. */
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 096c853..2b3610c 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -410,8 +410,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED
/* CSSC instructions are enabled through +cssc. */
#define TARGET_CSSC AARCH64_HAVE_ISA (CSSC)
-/* CB<cc> instructions are enabled through +cmpbr. */
-#define TARGET_CMPBR AARCH64_HAVE_ISA (CMPBR)
+/* CB<cc> instructions are enabled through +cmpbr,
+ but are incompatible with -mtrack-speculation. */
+#define TARGET_CMPBR (AARCH64_HAVE_ISA (CMPBR) && !aarch64_track_speculation)
/* Make sure this is always defined so we don't have to check for ifdefs
but rather use normal ifs. */
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index a4ae685..6e215c4 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -280,6 +280,7 @@
UNSPEC_PACIBSP
UNSPEC_PRLG_STK
UNSPEC_REV
+ UNSPEC_REV_PRED
UNSPEC_SADALP
UNSPEC_SCVTF
UNSPEC_SET_LANE
@@ -440,6 +441,16 @@
; must not operate on inactive inputs if doing so could induce a fault.
(SVE_STRICT_GP 1)])
+;; These constants are used as a const_int in MTE instructions
+(define_constants
+ [; 0xf0ff...
+ ; Tag mask for the 4-bit tag stored in the top 8 bits of a pointer.
+ (MEMTAG_TAG_MASK -1080863910568919041)
+
+ ; 0x00ff...
+ ; Tag mask 56-bit address used by subp instruction.
+ (MEMTAG_ADDR_MASK 72057594037927935)])
+
(include "constraints.md")
(include "predicates.md")
(include "iterators.md")
@@ -724,8 +735,8 @@
(BRANCH_LEN_N_32KiB -32768)
;; +/- 1KiB. Used by CBB<cond>, CBH<cond>, CB<cond>.
- (BRANCH_LEN_P_1Kib 1020)
- (BRANCH_LEN_N_1Kib -1024)
+ (BRANCH_LEN_P_1KiB 1020)
+ (BRANCH_LEN_N_1KiB -1024)
]
)
@@ -803,7 +814,7 @@
)
;; For an EQ/NE comparison against zero, emit `CBZ`/`CBNZ`
-(define_insn "aarch64_cbz<optab><mode>1"
+(define_insn "*aarch64_cbz<optab><mode>"
[(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r")
(const_int 0))
(label_ref (match_operand 1))
@@ -837,27 +848,13 @@
[(set (pc) (if_then_else (LTGE (match_operand:ALLI 0 "register_operand" "r")
(const_int 0))
(label_ref (match_operand 1))
- (pc)))
- (clobber (reg:CC CC_REGNUM))]
+ (pc)))]
"!aarch64_track_speculation"
{
- if (get_attr_length (insn) == 8)
- {
- if (get_attr_far_branch (insn) == FAR_BRANCH_YES)
- return aarch64_gen_far_branch (operands, 1, "Ltb",
- "<inv_tb>\\t%<w>0, <sizem1>, ");
- else
- {
- char buf[64];
- uint64_t val = ((uint64_t) 1)
- << (GET_MODE_SIZE (<MODE>mode) * BITS_PER_UNIT - 1);
- sprintf (buf, "tst\t%%<w>0, %" PRId64, val);
- output_asm_insn (buf, operands);
- return "<bcond>\t%l1";
- }
- }
- else
+ if (get_attr_length (insn) == 4)
return "<tbz>\t%<w>0, <sizem1>, %l1";
+ return aarch64_gen_far_branch (operands, 1, "Ltb",
+ "<inv_tb>\\t%<w>0, <sizem1>, ");
}
[(set_attr "type" "branch")
(set (attr "length")
@@ -869,44 +866,44 @@
(const_int 8)))
(set (attr "far_branch")
(if_then_else (and (ge (minus (match_dup 1) (pc))
- (const_int BRANCH_LEN_N_1MiB))
+ (const_int BRANCH_LEN_N_32KiB))
(lt (minus (match_dup 1) (pc))
- (const_int BRANCH_LEN_P_1MiB)))
+ (const_int BRANCH_LEN_P_32KiB)))
(const_string "no")
(const_string "yes")))]
)
;; Emit a `CB<cond> (register)` or `CB<cond> (immediate)` instruction.
;; The immediate range depends on the comparison code.
-;; Comparisons against immediates outside this range fall back to
-;; CMP + B<cond>.
-(define_insn "aarch64_cb<INT_CMP:code><GPI:mode>"
- [(set (pc) (if_then_else (INT_CMP
- (match_operand:GPI 0 "register_operand" "r")
- (match_operand:GPI 1 "nonmemory_operand"
- "r<INT_CMP:cmpbr_imm_constraint>"))
- (label_ref (match_operand 2))
- (pc)))]
- "TARGET_CMPBR && aarch64_cb_rhs (<INT_CMP:CODE>, operands[1])"
+(define_insn "*aarch64_cb<code><mode>"
+ [(set (pc) (if_then_else
+ (INT_CMP
+ (match_operand:GPI 0 "register_operand" "r")
+ (match_operand:GPI 1
+ "aarch64_reg_<cmpbr_imm_constraint>_operand"
+ "r<cmpbr_imm_constraint>"))
+ (label_ref (match_operand 2))
+ (pc)))]
+ "TARGET_CMPBR"
{
- return (get_attr_far_branch (insn) == FAR_BRANCH_NO)
- ? "cb<INT_CMP:cmp_op>\\t%<w>0, %<w>1, %l2"
- : aarch64_gen_far_branch (operands, 2, "L",
- "cb<INT_CMP:inv_cmp_op>\\t%<w>0, %<w>1, ");
+ if (get_attr_length (insn) == 4)
+ return "cb<cmp_op>\t%<w>0, %<w>1, %l2";
+ return aarch64_gen_far_branch (operands, 2, "L",
+ "cb<inv_cmp_op>\t%<w>0, %<w>1, ");
}
[(set_attr "type" "branch")
(set (attr "length")
(if_then_else (and (ge (minus (match_dup 2) (pc))
- (const_int BRANCH_LEN_N_1Kib))
+ (const_int BRANCH_LEN_N_1KiB))
(lt (minus (match_dup 2) (pc))
- (const_int BRANCH_LEN_P_1Kib)))
+ (const_int BRANCH_LEN_P_1KiB)))
(const_int 4)
(const_int 8)))
(set (attr "far_branch")
(if_then_else (and (ge (minus (match_dup 2) (pc))
- (const_int BRANCH_LEN_N_1Kib))
+ (const_int BRANCH_LEN_N_1KiB))
(lt (minus (match_dup 2) (pc))
- (const_int BRANCH_LEN_P_1Kib)))
+ (const_int BRANCH_LEN_P_1KiB)))
(const_string "no")
(const_string "yes")))]
)
@@ -928,16 +925,16 @@
[(set_attr "type" "branch")
(set (attr "length")
(if_then_else (and (ge (minus (match_dup 2) (pc))
- (const_int BRANCH_LEN_N_1Kib))
+ (const_int BRANCH_LEN_N_1KiB))
(lt (minus (match_dup 2) (pc))
- (const_int BRANCH_LEN_P_1Kib)))
+ (const_int BRANCH_LEN_P_1KiB)))
(const_int 4)
(const_int 8)))
(set (attr "far_branch")
(if_then_else (and (ge (minus (match_dup 2) (pc))
- (const_int BRANCH_LEN_N_1Kib))
+ (const_int BRANCH_LEN_N_1KiB))
(lt (minus (match_dup 2) (pc))
- (const_int BRANCH_LEN_P_1Kib)))
+ (const_int BRANCH_LEN_P_1KiB)))
(const_string "no")
(const_string "yes")))]
)
@@ -977,37 +974,24 @@
(const_string "yes")))]
)
-;; For a 24-bit immediate CST we can optimize the compare for equality
-;; and branch sequence from:
-;; mov x0, #imm1
-;; movk x0, #imm2, lsl 16 /* x0 contains CST. */
-;; cmp x1, x0
-;; b<ne,eq> .Label
-;; into the shorter:
-;; sub x0, x1, #(CST & 0xfff000)
-;; subs x0, x0, #(CST & 0x000fff)
-;; b<ne,eq> .Label
+;; For a 24-bit immediate CST we can optimize the compare for equality.
(define_insn_and_split "*aarch64_bcond_wide_imm<GPI:mode>"
- [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r")
- (match_operand:GPI 1 "aarch64_imm24" "n"))
- (label_ref:P (match_operand 2))
- (pc)))]
- "!aarch64_move_imm (INTVAL (operands[1]), <GPI:MODE>mode)
- && !aarch64_plus_operand (operands[1], <GPI:MODE>mode)
- && !reload_completed"
+ [(set (pc) (if_then_else
+ (match_operator 0 "aarch64_equality_operator"
+ [(match_operand:GPI 1 "register_operand" "r")
+ (match_operand:GPI 2 "aarch64_split_imm24" "n")])
+ (label_ref (match_operand 3))
+ (pc)))
+ (clobber (reg:CC CC_REGNUM))
+ (clobber (match_scratch:GPI 4 "=r"))]
+ ""
"#"
- "&& true"
+ ""
[(const_int 0)]
{
- HOST_WIDE_INT lo_imm = UINTVAL (operands[1]) & 0xfff;
- HOST_WIDE_INT hi_imm = UINTVAL (operands[1]) & 0xfff000;
- rtx tmp = gen_reg_rtx (<GPI:MODE>mode);
- emit_insn (gen_add<GPI:mode>3 (tmp, operands[0], GEN_INT (-hi_imm)));
- emit_insn (gen_add<GPI:mode>3_compare0 (tmp, tmp, GEN_INT (-lo_imm)));
- rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
- rtx cmp_rtx = gen_rtx_fmt_ee (<EQL:CMP>, <GPI:MODE>mode,
- cc_reg, const0_rtx);
- emit_jump_insn (gen_aarch64_bcond (cmp_rtx, cc_reg, operands[2]));
+ rtx cc_reg = aarch64_gen_compare_split_imm24 (operands[1], operands[2],
+ operands[4]);
+ emit_jump_insn (gen_aarch64_bcond (operands[0], cc_reg, operands[3]));
DONE;
}
)
@@ -1412,16 +1396,16 @@
/* Save GCS with code like
mov x16, 1
chkfeat x16
- tbnz x16, 0, .L_done
+ cbnz x16, .L_done
mrs tmp, gcspr_el0
str tmp, [%0, 8]
.L_done: */
- rtx done_label = gen_label_rtx ();
+ auto done_label = gen_label_rtx ();
rtx r16 = gen_rtx_REG (DImode, R16_REGNUM);
emit_move_insn (r16, const1_rtx);
emit_insn (gen_aarch64_chkfeat ());
- emit_insn (gen_tbranch_neqi3 (r16, const0_rtx, done_label));
+ emit_jump_insn (aarch64_gen_compare_zero_and_branch (NE, r16, done_label));
rtx gcs_slot = adjust_address (operands[0], Pmode, GET_MODE_SIZE (Pmode));
rtx gcs = gen_reg_rtx (Pmode);
emit_insn (gen_aarch64_load_gcspr (gcs));
@@ -1444,7 +1428,7 @@
/* Restore GCS with code like
mov x16, 1
chkfeat x16
- tbnz x16, 0, .L_done
+ cbnz x16, .L_done
ldr tmp1, [%1, 8]
mrs tmp2, gcspr_el0
subs tmp2, tmp1, tmp2
@@ -1455,12 +1439,12 @@
b.ne .L_loop
.L_done: */
- rtx loop_label = gen_label_rtx ();
- rtx done_label = gen_label_rtx ();
+ auto loop_label = gen_label_rtx ();
+ auto done_label = gen_label_rtx ();
rtx r16 = gen_rtx_REG (DImode, R16_REGNUM);
emit_move_insn (r16, const1_rtx);
emit_insn (gen_aarch64_chkfeat ());
- emit_insn (gen_tbranch_neqi3 (r16, const0_rtx, done_label));
+ emit_jump_insn (aarch64_gen_compare_zero_and_branch (NE, r16, done_label));
rtx gcs_slot = adjust_address (operands[1], Pmode, GET_MODE_SIZE (Pmode));
rtx gcs_old = gen_reg_rtx (Pmode);
emit_move_insn (gcs_old, gcs_slot);
@@ -4523,7 +4507,7 @@
[(set_attr "type" "fcmp<stype>")]
)
-(define_insn "*cmp_swp_<shift>_reg<mode>"
+(define_insn "cmp_swp_<shift>_reg<mode>"
[(set (reg:CC_SWP CC_REGNUM)
(compare:CC_SWP (ASHIFT:GPI
(match_operand:GPI 0 "register_operand" "r")
@@ -4650,39 +4634,24 @@
[(set_attr "type" "csel")]
)
-;; For a 24-bit immediate CST we can optimize the compare for equality
-;; and branch sequence from:
-;; mov x0, #imm1
-;; movk x0, #imm2, lsl 16 /* x0 contains CST. */
-;; cmp x1, x0
-;; cset x2, <ne,eq>
-;; into the shorter:
-;; sub x0, x1, #(CST & 0xfff000)
-;; subs x0, x0, #(CST & 0x000fff)
-;; cset x2, <ne, eq>.
+;; For a 24-bit immediate CST we can optimize the compare for equality.
(define_insn_and_split "*compare_cstore<mode>_insn"
[(set (match_operand:GPI 0 "register_operand" "=r")
- (EQL:GPI (match_operand:GPI 1 "register_operand" "r")
- (match_operand:GPI 2 "aarch64_imm24" "n")))
- (clobber (reg:CC CC_REGNUM))]
- "!aarch64_move_imm (INTVAL (operands[2]), <MODE>mode)
- && !aarch64_plus_operand (operands[2], <MODE>mode)
- && !reload_completed"
+ (match_operator:GPI 1 "aarch64_equality_operator"
+ [(match_operand:GPI 2 "register_operand" "r")
+ (match_operand:GPI 3 "aarch64_split_imm24" "n")]))
+ (clobber (reg:CC CC_REGNUM))
+ (clobber (match_scratch:GPI 4 "=r"))]
+ ""
"#"
- "&& true"
+ ""
[(const_int 0)]
{
- HOST_WIDE_INT lo_imm = UINTVAL (operands[2]) & 0xfff;
- HOST_WIDE_INT hi_imm = UINTVAL (operands[2]) & 0xfff000;
- rtx tmp = gen_reg_rtx (<MODE>mode);
- emit_insn (gen_add<mode>3 (tmp, operands[1], GEN_INT (-hi_imm)));
- emit_insn (gen_add<mode>3_compare0 (tmp, tmp, GEN_INT (-lo_imm)));
- rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
- rtx cmp_rtx = gen_rtx_fmt_ee (<EQL:CMP>, <MODE>mode, cc_reg, const0_rtx);
- emit_insn (gen_aarch64_cstore<mode> (operands[0], cmp_rtx, cc_reg));
+ rtx cc_reg = aarch64_gen_compare_split_imm24 (operands[2], operands[3],
+ operands[4]);
+ emit_insn (gen_aarch64_cstore<mode> (operands[0], operands[1], cc_reg));
DONE;
}
- [(set_attr "type" "csel")]
)
;; zero_extend version of the above
@@ -4812,15 +4781,21 @@
(match_operand:ALLI 3 "register_operand")))]
""
{
- rtx ccreg;
enum rtx_code code = GET_CODE (operands[1]);
-
if (code == UNEQ || code == LTGT)
FAIL;
- ccreg = aarch64_gen_compare_reg (code, XEXP (operands[1], 0),
- XEXP (operands[1], 1));
- operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx);
+ rtx ccreg = XEXP (operands[1], 0);
+ enum machine_mode ccmode = GET_MODE (ccreg);
+ if (GET_MODE_CLASS (ccmode) == MODE_CC)
+ gcc_assert (XEXP (operands[1], 1) == const0_rtx);
+ else if (ccmode == QImode || ccmode == HImode)
+ FAIL;
+ else
+ {
+ ccreg = aarch64_gen_compare_reg (code, ccreg, XEXP (operands[1], 1));
+ operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx);
+ }
}
)
@@ -7715,6 +7690,22 @@
}
)
+(define_expand "isinf<mode>2"
+ [(match_operand:SI 0 "register_operand")
+ (match_operand:GPF 1 "register_operand")]
+ "TARGET_FLOAT"
+{
+ rtx op = force_lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
+ rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
+ emit_move_insn (tmp, GEN_INT (HOST_WIDE_INT_M1U << (<mantissa_bits> + 1)));
+ rtx cc_reg = gen_rtx_REG (CC_SWPmode, CC_REGNUM);
+ emit_insn (gen_cmp_swp_lsl_reg<v_int_equiv> (op, GEN_INT (1), tmp));
+ rtx cmp = gen_rtx_fmt_ee (EQ, SImode, cc_reg, const0_rtx);
+ emit_insn (gen_aarch64_cstoresi (operands[0], cmp, cc_reg));
+ DONE;
+}
+)
+
;; -------------------------------------------------------------------
;; Reload support
;; -------------------------------------------------------------------
@@ -8565,7 +8556,7 @@
[(set (match_operand:DI 0 "register_operand" "=rk")
(ior:DI
(and:DI (match_operand:DI 1 "register_operand" "rk")
- (const_int -1080863910568919041)) ;; 0xf0ff...
+ (const_int MEMTAG_TAG_MASK))
(ashift:DI (unspec:QI [(match_operand:DI 2 "register_operand" "r")]
UNSPEC_GEN_TAG_RND)
(const_int 56))))]
@@ -8608,9 +8599,9 @@
[(set (match_operand:DI 0 "register_operand" "=r")
(minus:DI
(and:DI (match_operand:DI 1 "register_operand" "rk")
- (const_int 72057594037927935)) ;; 0x00ff...
+ (const_int MEMTAG_ADDR_MASK))
(and:DI (match_operand:DI 2 "register_operand" "rk")
- (const_int 72057594037927935))))] ;; 0x00ff...
+ (const_int MEMTAG_ADDR_MASK))))]
"TARGET_MEMTAG"
"subp\\t%0, %1, %2"
[(set_attr "type" "memtag")]
@@ -8620,7 +8611,7 @@
(define_insn "ldg"
[(set (match_operand:DI 0 "register_operand" "+r")
(ior:DI
- (and:DI (match_dup 0) (const_int -1080863910568919041)) ;; 0xf0ff...
+ (and:DI (match_dup 0) (const_int MEMTAG_TAG_MASK))
(ashift:DI
(mem:QI (unspec:DI
[(and:DI (plus:DI (match_operand:DI 1 "register_operand" "rk")
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index dc1925d..7b9e558 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -312,15 +312,9 @@
(define_constraint "Uc1"
"@internal
- A constraint that matches the integers 1...64."
+ A constraint that matches the integers 0...62."
(and (match_code "const_int")
- (match_test "IN_RANGE (ival, 1, 64)")))
-
-(define_constraint "Uc2"
- "@internal
- A constraint that matches the integers -1...62."
- (and (match_code "const_int")
- (match_test "IN_RANGE (ival, -1, 62)")))
+ (match_test "IN_RANGE (ival, 0, 62)")))
(define_constraint "Up3"
"@internal
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 8533912..b15e578 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -455,6 +455,7 @@
(define_mode_iterator VCVTFPM [V4HF V8HF V4SF])
;; Iterators for single modes, for "@" patterns.
+(define_mode_iterator VNx16BI_ONLY [VNx16BI])
(define_mode_iterator VNx16QI_ONLY [VNx16QI])
(define_mode_iterator VNx16SI_ONLY [VNx16SI])
(define_mode_iterator VNx8HI_ONLY [VNx8HI])
@@ -542,6 +543,12 @@
;; elements.
(define_mode_iterator SVE_FULL_HSF [VNx8HF VNx4SF])
+;; Like SVE_FULL_HSF, but selectively enables those modes that are valid
+;; for the variant of the SVE2 FP8 FDOT instruction associated with that
+;; mode.
+(define_mode_iterator SVE_FULL_HSF_FP8_FDOT [(VNx4SF "TARGET_SSVE_FP8DOT4")
+ (VNx8HF "TARGET_SSVE_FP8DOT2")])
+
;; Partial SVE floating-point vector modes that have 16-bit or 32-bit
;; elements.
(define_mode_iterator SVE_PARTIAL_HSF [VNx2HF VNx4HF VNx2SF])
@@ -930,7 +937,6 @@
UNSPEC_UZP2Q ; Used in aarch64-sve.md.
UNSPEC_ZIP1Q ; Used in aarch64-sve.md.
UNSPEC_ZIP2Q ; Used in aarch64-sve.md.
- UNSPEC_TRN1_CONV ; Used in aarch64-sve.md.
UNSPEC_COND_CMPEQ_WIDE ; Used in aarch64-sve.md.
UNSPEC_COND_CMPGE_WIDE ; Used in aarch64-sve.md.
UNSPEC_COND_CMPGT_WIDE ; Used in aarch64-sve.md.
@@ -1185,6 +1191,9 @@
UNSPEC_LUTI2 ; Used in aarch64-simd.md.
UNSPEC_LUTI4 ; Used in aarch64-simd.md.
+ ;; All used in aarch64-sve.md
+ UNSPEC_PERMUTE_PRED
+
;; All used in aarch64-sve2.md
UNSPEC_ADDQV
UNSPEC_ANDQV
@@ -1331,6 +1340,8 @@
(define_mode_attr half_mask [(HI "255") (SI "65535") (DI "4294967295")])
+(define_mode_attr mantissa_bits [(SF "23") (DF "52")])
+
;; For constraints used in scalar immediate vector moves
(define_mode_attr hq [(HI "h") (QI "q")])
@@ -2977,19 +2988,15 @@
(define_code_iterator INT_CMP [lt le eq ne ge gt ltu leu geu gtu])
+;; Inverse comparisons must have the same constraint so that
+;; branches can be redirected during late compilation.
(define_code_attr cmpbr_imm_constraint [
- (eq "Uc0")
- (ne "Uc0")
- (gt "Uc0")
- (gtu "Uc0")
- (lt "Uc0")
- (ltu "Uc0")
-
- (ge "Uc1")
- (geu "Uc1")
-
- (le "Uc2")
- (leu "Uc2")
+ (eq "Uc0") (ne "Uc0")
+ (lt "Uc0") (ge "Uc0")
+ (ltu "Uc0") (geu "Uc0")
+
+ (gt "Uc1") (le "Uc1")
+ (gtu "Uc1") (leu "Uc1")
])
(define_code_attr fix_trunc_optab [(fix "fix_trunc")
@@ -3877,6 +3884,8 @@
(define_int_iterator SVE_PITER [UNSPEC_PFIRST UNSPEC_PNEXT])
+(define_int_iterator PNEXT_ONLY [UNSPEC_PNEXT])
+
(define_int_iterator MATMUL [UNSPEC_SMATMUL UNSPEC_UMATMUL
UNSPEC_USMATMUL])
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 32056da..42304ce 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -286,10 +286,15 @@
(and (match_code "const_int")
(match_test "UINTVAL (op) <= 7")))
-;; An immediate that fits into 24 bits.
-(define_predicate "aarch64_imm24"
- (and (match_code "const_int")
- (match_test "IN_RANGE (UINTVAL (op), 0, 0xffffff)")))
+;; An immediate that fits into 24 bits, but needs splitting.
+(define_predicate "aarch64_split_imm24"
+ (match_code "const_int")
+{
+ unsigned HOST_WIDE_INT i = UINTVAL (op);
+ return (IN_RANGE (i, 0, 0xffffff)
+ && !aarch64_move_imm (i, mode)
+ && !aarch64_uimm12_shift (i));
+})
(define_predicate "aarch64_mem_pair_offset"
(and (match_code "const_int")
@@ -1078,3 +1083,19 @@
(define_predicate "aarch64_maskload_else_operand"
(and (match_code "const_vector")
(match_test "op == CONST0_RTX (GET_MODE (op))")))
+
+;; Check for a VNx16BI predicate that is a canonical PTRUE for the given
+;; predicate mode.
+(define_special_predicate "aarch64_ptrue_all_operand"
+ (and (match_code "const_vector")
+ (match_test "aarch64_ptrue_all_mode (op) == mode")))
+
+(define_predicate "aarch64_reg_Uc0_operand"
+ (ior (match_operand 0 "register_operand")
+ (and (match_code "const_int")
+ (match_test "satisfies_constraint_Uc0 (op)"))))
+
+(define_predicate "aarch64_reg_Uc1_operand"
+ (ior (match_operand 0 "register_operand")
+ (and (match_code "const_int")
+ (match_test "satisfies_constraint_Uc1 (op)"))))
diff --git a/gcc/config/aarch64/t-aarch64 b/gcc/config/aarch64/t-aarch64
index 38a8c06..63ca8e9 100644
--- a/gcc/config/aarch64/t-aarch64
+++ b/gcc/config/aarch64/t-aarch64
@@ -190,12 +190,6 @@ aarch-bti-insert.o: $(srcdir)/config/arm/aarch-bti-insert.cc \
$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
$(srcdir)/config/arm/aarch-bti-insert.cc
-aarch64-cc-fusion.o: $(srcdir)/config/aarch64/aarch64-cc-fusion.cc \
- $(CONFIG_H) $(SYSTEM_H) $(CORETYPES_H) $(BACKEND_H) $(RTL_H) $(DF_H) \
- $(RTL_SSA_H) tree-pass.h
- $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
- $(srcdir)/config/aarch64/aarch64-cc-fusion.cc
-
aarch64-early-ra.o: $(srcdir)/config/aarch64/aarch64-early-ra.cc \
$(CONFIG_H) $(SYSTEM_H) $(CORETYPES_H) $(BACKEND_H) $(RTL_H) $(DF_H) \
$(RTL_SSA_H) tree-pass.h
diff --git a/gcc/config/aarch64/tuning_models/generic_armv9_a.h b/gcc/config/aarch64/tuning_models/generic_armv9_a.h
index f76a250..9eb1a20 100644
--- a/gcc/config/aarch64/tuning_models/generic_armv9_a.h
+++ b/gcc/config/aarch64/tuning_models/generic_armv9_a.h
@@ -26,7 +26,7 @@
static const struct cpu_addrcost_table generic_armv9_a_addrcost_table =
{
{
- 1, /* hi */
+ 0, /* hi */
0, /* si */
0, /* di */
1, /* ti */
diff --git a/gcc/config/aarch64/tuning_models/olympus.h b/gcc/config/aarch64/tuning_models/olympus.h
new file mode 100644
index 0000000..268789d
--- /dev/null
+++ b/gcc/config/aarch64/tuning_models/olympus.h
@@ -0,0 +1,210 @@
+/* Tuning model description for the NVIDIA Olympus core.
+ Copyright The GNU Toolchain Authors.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef GCC_AARCH64_H_OLYMPUS
+#define GCC_AARCH64_H_OLYMPUS
+
+#include "generic.h"
+
+static struct cpu_regmove_cost olympus_regmove_cost =
+{
+ 1, /* GP2GP */
+ /* Spilling to int<->fp instead of memory is recommended so set
+ realistic costs compared to memmov_cost. */
+ 3, /* GP2FP */
+ 3, /* FP2GP */
+ 2 /* FP2FP */
+};
+
+static advsimd_vec_cost olympus_advsimd_vector_cost =
+{
+ 2, /* int_stmt_cost */
+ 2, /* fp_stmt_cost */
+ 2, /* ld2_st2_permute_cost */
+ 2, /* ld3_st3_permute_cost */
+ 3, /* ld4_st4_permute_cost */
+ 2, /* permute_cost */
+ 5, /* reduc_i8_cost */
+ 3, /* reduc_i16_cost */
+ 3, /* reduc_i32_cost */
+ 2, /* reduc_i64_cost */
+ 4, /* reduc_f16_cost */
+ 4, /* reduc_f32_cost */
+ 4, /* reduc_f64_cost */
+ 2, /* store_elt_extra_cost */
+ 8, /* vec_to_scalar_cost */
+ 4, /* scalar_to_vec_cost */
+ 6, /* align_load_cost */
+ 6, /* unalign_load_cost */
+ 1, /* unalign_store_cost */
+ 1 /* store_cost */
+};
+
+static sve_vec_cost olympus_sve_vector_cost =
+{
+ {
+ 2, /* int_stmt_cost */
+ 2, /* fp_stmt_cost */
+ 2, /* ld2_st2_permute_cost */
+ 3, /* ld3_st3_permute_cost */
+ 3, /* ld4_st4_permute_cost */
+ 2, /* permute_cost */
+ 9, /* reduc_i8_cost */
+ 8, /* reduc_i16_cost */
+ 6, /* reduc_i32_cost */
+ 2, /* reduc_i64_cost */
+ 8, /* reduc_f16_cost */
+ 6, /* reduc_f32_cost */
+ 4, /* reduc_f64_cost */
+ 2, /* store_elt_extra_cost */
+ 8, /* vec_to_scalar_cost */
+ 4, /* scalar_to_vec_cost */
+ 4, /* align_load_cost */
+ 6, /* unalign_load_cost */
+ 1, /* unalign_store_cost */
+ 1 /* store_cost */
+ },
+ 3, /* clast_cost */
+ 10, /* fadda_f16_cost */
+ 6, /* fadda_f32_cost */
+ 4, /* fadda_f64_cost */
+ 14, /* gather_load_x32_cost */
+ 12, /* gather_load_x64_cost */
+ 42, /* gather_load_x32_init_cost */
+ 24, /* gather_load_x64_init_cost */
+ 1 /* scatter_store_elt_cost */
+};
+
+static aarch64_scalar_vec_issue_info olympus_scalar_issue_info =
+{
+ 4, /* loads_stores_per_cycle */
+ 2, /* stores_per_cycle */
+ 8, /* general_ops_per_cycle */
+ 0, /* fp_simd_load_general_ops */
+ 1 /* fp_simd_store_general_ops */
+};
+
+static aarch64_advsimd_vec_issue_info olympus_advsimd_issue_info =
+{
+ {
+ 3, /* loads_stores_per_cycle */
+ 2, /* stores_per_cycle */
+ 6, /* general_ops_per_cycle */
+ 0, /* fp_simd_load_general_ops */
+ 1 /* fp_simd_store_general_ops */
+ },
+ 2, /* ld2_st2_general_ops */
+ 2, /* ld3_st3_general_ops */
+ 3 /* ld4_st4_general_ops */
+};
+
+static aarch64_sve_vec_issue_info olympus_sve_issue_info =
+{
+ {
+ {
+ 3, /* loads_stores_per_cycle */
+ 2, /* stores_per_cycle */
+ 6, /* general_ops_per_cycle */
+ 0, /* fp_simd_load_general_ops */
+ 1 /* fp_simd_store_general_ops */
+ },
+ 2, /* ld2_st2_general_ops */
+ 2, /* ld3_st3_general_ops */
+ 3 /* ld4_st4_general_ops */
+ },
+ 2, /* pred_ops_per_cycle */
+ 1, /* while_pred_ops */
+ 0, /* int_cmp_pred_ops */
+ 0, /* fp_cmp_pred_ops */
+ 1, /* gather_scatter_pair_general_ops */
+ 1 /* gather_scatter_pair_pred_ops */
+};
+
+static aarch64_vec_issue_info olympus_vec_issue_info =
+{
+ &olympus_scalar_issue_info,
+ &olympus_advsimd_issue_info,
+ &olympus_sve_issue_info
+};
+
+/* Olympus costs for vector insn classes. */
+static struct cpu_vector_cost olympus_vector_cost =
+{
+ 1, /* scalar_int_stmt_cost */
+ 2, /* scalar_fp_stmt_cost */
+ 4, /* scalar_load_cost */
+ 1, /* scalar_store_cost */
+ 1, /* cond_taken_branch_cost */
+ 1, /* cond_not_taken_branch_cost */
+ &olympus_advsimd_vector_cost, /* advsimd */
+ &olympus_sve_vector_cost, /* sve */
+ &olympus_vec_issue_info /* issue_info */
+};
+
+/* Olympus prefetch settings (which disable prefetch). */
+static cpu_prefetch_tune olympus_prefetch_tune =
+{
+ 0, /* num_slots */
+ -1, /* l1_cache_size */
+ 64, /* l1_cache_line_size */
+ -1, /* l2_cache_size */
+ true, /* prefetch_dynamic_strides */
+ -1, /* minimum_stride */
+ -1 /* default_opt_level */
+};
+
+static struct tune_params olympus_tunings =
+{
+ &cortexa76_extra_costs,
+ &generic_armv9_a_addrcost_table,
+ &olympus_regmove_cost,
+ &olympus_vector_cost,
+ &generic_branch_cost,
+ &generic_approx_modes,
+ SVE_128, /* sve_width */
+ { 4, /* load_int. */
+ 1, /* store_int. */
+ 6, /* load_fp. */
+ 3, /* store_fp. */
+ 5, /* load_pred. */
+ 1 /* store_pred. */
+ }, /* memmov_cost. */
+ 10, /* issue_rate */
+ AARCH64_FUSE_NEOVERSE_BASE, /* fusible_ops */
+ "32:16", /* function_align. */
+ "4", /* jump_align. */
+ "32:16", /* loop_align. */
+ 8, /* int_reassoc_width. */
+ 6, /* fp_reassoc_width. */
+ 4, /* fma_reassoc_width. */
+ 6, /* vec_reassoc_width. */
+ 2, /* min_div_recip_mul_sf. */
+ 2, /* min_div_recip_mul_df. */
+ 0, /* max_case_values. */
+ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
+ (AARCH64_EXTRA_TUNE_BASE
+ | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
+ | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
+ | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW), /* tune_flags. */
+ &olympus_prefetch_tune,
+ AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */
+ AARCH64_LDP_STP_POLICY_ALWAYS /* stp_policy_model. */
+};
+
+#endif /* GCC_AARCH64_H_OLYMPUS. */
diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index d119464..8f7e537 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -66,9 +66,9 @@
;; I signed 12-bit immediate (for ARCompact)
;; K unsigned 3-bit immediate (for ARCompact)
;; L unsigned 6-bit immediate (for ARCompact)
-;; M unsinged 5-bit immediate (for ARCompact)
-;; O unsinged 7-bit immediate (for ARCompact)
-;; P unsinged 8-bit immediate (for ARCompact)
+;; M unsigned 5-bit immediate (for ARCompact)
+;; O unsigned 7-bit immediate (for ARCompact)
+;; P unsigned 8-bit immediate (for ARCompact)
;; N constant '1' (for ARCompact)
diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index 29b45ae..8b951f3 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -18983,7 +18983,8 @@ cmse_nonsecure_call_inline_register_clear (void)
call = SET_SRC (call);
/* Check if it is a cmse_nonsecure_call. */
- unspec = XEXP (call, 0);
+ unspec = XVECEXP (pat, 0, 2);
+
if (GET_CODE (unspec) != UNSPEC
|| XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
continue;
@@ -19010,7 +19011,7 @@ cmse_nonsecure_call_inline_register_clear (void)
/* Make sure the register used to hold the function address is not
cleared. */
- address = RTVEC_ELT (XVEC (unspec, 0), 0);
+ address = XEXP (call, 0);
gcc_assert (MEM_P (address));
gcc_assert (REG_P (XEXP (address, 0)));
address_regnum = REGNO (XEXP (address, 0));
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 5e5e112..422ae54 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -8623,7 +8623,7 @@
if (detect_cmse_nonsecure_call (addr))
{
pat = gen_nonsecure_call_internal (operands[0], operands[1],
- operands[2]);
+ operands[2], const0_rtx);
emit_call_insn (pat);
}
else
@@ -8665,10 +8665,10 @@
(clobber (reg:SI LR_REGNUM))])])
(define_expand "nonsecure_call_internal"
- [(parallel [(call (unspec:SI [(match_operand 0 "memory_operand")]
- UNSPEC_NONSECURE_MEM)
+ [(parallel [(call (match_operand 0 "memory_operand")
(match_operand 1 "general_operand"))
(use (match_operand 2 "" ""))
+ (unspec:SI [(match_operand 3)] UNSPEC_NONSECURE_MEM)
(clobber (reg:SI LR_REGNUM))])]
"use_cmse"
{
@@ -8745,7 +8745,8 @@
if (detect_cmse_nonsecure_call (addr))
{
pat = gen_nonsecure_call_value_internal (operands[0], operands[1],
- operands[2], operands[3]);
+ operands[2], operands[3],
+ const0_rtx);
emit_call_insn (pat);
}
else
@@ -8779,10 +8780,10 @@
(define_expand "nonsecure_call_value_internal"
[(parallel [(set (match_operand 0 "" "")
- (call (unspec:SI [(match_operand 1 "memory_operand")]
- UNSPEC_NONSECURE_MEM)
+ (call (match_operand 1 "memory_operand")
(match_operand 2 "general_operand")))
(use (match_operand 3 "" ""))
+ (unspec:SI [(match_operand 4)] UNSPEC_NONSECURE_MEM)
(clobber (reg:SI LR_REGNUM))])]
"use_cmse"
"
@@ -13025,7 +13026,7 @@
"arm_coproc_builtin_available (VUNSPEC_<MCRR>)"
{
arm_const_bounds (operands[0], 0, 16);
- arm_const_bounds (operands[1], 0, 8);
+ arm_const_bounds (operands[1], 0, 16);
arm_const_bounds (operands[3], 0, (1 << 5));
return "<mcrr>\\tp%c0, %1, %Q2, %R2, CR%c3";
}
@@ -13040,7 +13041,7 @@
"arm_coproc_builtin_available (VUNSPEC_<MRRC>)"
{
arm_const_bounds (operands[1], 0, 16);
- arm_const_bounds (operands[2], 0, 8);
+ arm_const_bounds (operands[2], 0, 16);
arm_const_bounds (operands[3], 0, (1 << 5));
return "<mrrc>\\tp%c1, %2, %Q0, %R0, CR%c3";
}
diff --git a/gcc/config/arm/thumb1.md b/gcc/config/arm/thumb1.md
index f9e89e9..4da0086 100644
--- a/gcc/config/arm/thumb1.md
+++ b/gcc/config/arm/thumb1.md
@@ -1874,10 +1874,10 @@
)
(define_insn "*nonsecure_call_reg_thumb1_v5"
- [(call (unspec:SI [(mem:SI (reg:SI R4_REGNUM))]
- UNSPEC_NONSECURE_MEM)
+ [(call (mem:SI (reg:SI R4_REGNUM))
(match_operand 0 "" ""))
(use (match_operand 1 "" ""))
+ (unspec:SI [(match_operand 2)]UNSPEC_NONSECURE_MEM)
(clobber (reg:SI LR_REGNUM))]
"TARGET_THUMB1 && use_cmse && !SIBLING_CALL_P (insn)"
"bl\\t__gnu_cmse_nonsecure_call"
@@ -1919,11 +1919,10 @@
(define_insn "*nonsecure_call_value_reg_thumb1_v5"
[(set (match_operand 0 "" "")
- (call (unspec:SI
- [(mem:SI (reg:SI R4_REGNUM))]
- UNSPEC_NONSECURE_MEM)
+ (call (mem:SI (reg:SI R4_REGNUM))
(match_operand 1 "" "")))
(use (match_operand 2 "" ""))
+ (unspec:SI [(match_operand 3)] UNSPEC_NONSECURE_MEM)
(clobber (reg:SI LR_REGNUM))]
"TARGET_THUMB1 && use_cmse"
"bl\\t__gnu_cmse_nonsecure_call"
diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
index 019f9d4..2c2026b 100644
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.md
@@ -537,10 +537,10 @@
)
(define_insn "*nonsecure_call_reg_thumb2_fpcxt"
- [(call (unspec:SI [(mem:SI (match_operand:SI 0 "s_register_operand" "l*r"))]
- UNSPEC_NONSECURE_MEM)
+ [(call (mem:SI (match_operand:SI 0 "s_register_operand" "l*r"))
(match_operand 1 "" ""))
(use (match_operand 2 "" ""))
+ (unspec:SI [(match_operand 3)] UNSPEC_NONSECURE_MEM)
(clobber (reg:SI LR_REGNUM))]
"TARGET_THUMB2 && use_cmse && TARGET_HAVE_FPCXT_CMSE"
"blxns\\t%0"
@@ -549,10 +549,10 @@
)
(define_insn "*nonsecure_call_reg_thumb2"
- [(call (unspec:SI [(mem:SI (reg:SI R4_REGNUM))]
- UNSPEC_NONSECURE_MEM)
+ [(call (mem:SI (reg:SI R4_REGNUM))
(match_operand 0 "" ""))
(use (match_operand 1 "" ""))
+ (unspec:SI [(match_operand 2)] UNSPEC_NONSECURE_MEM)
(clobber (reg:SI LR_REGNUM))]
"TARGET_THUMB2 && use_cmse && !TARGET_HAVE_FPCXT_CMSE"
"bl\\t__gnu_cmse_nonsecure_call"
@@ -573,11 +573,10 @@
(define_insn "*nonsecure_call_value_reg_thumb2_fpcxt"
[(set (match_operand 0 "" "")
- (call
- (unspec:SI [(mem:SI (match_operand:SI 1 "register_operand" "l*r"))]
- UNSPEC_NONSECURE_MEM)
- (match_operand 2 "" "")))
+ (call (mem:SI (match_operand:SI 1 "register_operand" "l*r"))
+ (match_operand 2 "" "")))
(use (match_operand 3 "" ""))
+ (unspec:SI [(match_operand 4)] UNSPEC_NONSECURE_MEM)
(clobber (reg:SI LR_REGNUM))]
"TARGET_THUMB2 && use_cmse && TARGET_HAVE_FPCXT_CMSE"
"blxns\\t%1"
@@ -587,10 +586,10 @@
(define_insn "*nonsecure_call_value_reg_thumb2"
[(set (match_operand 0 "" "")
- (call
- (unspec:SI [(mem:SI (reg:SI R4_REGNUM))] UNSPEC_NONSECURE_MEM)
- (match_operand 1 "" "")))
+ (call (mem:SI (reg:SI R4_REGNUM))
+ (match_operand 1 "" "")))
(use (match_operand 2 "" ""))
+ (unspec:SI [(match_operand 3)] UNSPEC_NONSECURE_MEM)
(clobber (reg:SI LR_REGNUM))]
"TARGET_THUMB2 && use_cmse && !TARGET_HAVE_FPCXT_CMSE"
"bl\\t__gnu_cmse_nonsecure_call"
diff --git a/gcc/config/avr/avr-dimode.md b/gcc/config/avr/avr-dimode.md
index 903bfbf..66ba5a9 100644
--- a/gcc/config/avr/avr-dimode.md
+++ b/gcc/config/avr/avr-dimode.md
@@ -101,10 +101,8 @@
"avr_have_dimode"
"#"
"&& reload_completed"
- [(parallel [(set (reg:ALL8 ACC_A)
- (plus:ALL8 (reg:ALL8 ACC_A)
- (reg:ALL8 ACC_B)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*add<mode>3_insn"
[(set (reg:ALL8 ACC_A)
@@ -122,10 +120,8 @@
"avr_have_dimode"
"#"
"&& reload_completed"
- [(parallel [(set (reg:DI ACC_A)
- (plus:DI (reg:DI ACC_A)
- (sign_extend:DI (reg:QI REG_X))))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*adddi3_const8_insn"
[(set (reg:DI ACC_A)
@@ -146,12 +142,10 @@
(match_operand:ALL8 0 "const_operand" "n Ynn")))]
"avr_have_dimode
&& !s8_operand (operands[0], VOIDmode)"
- "#"
- "&& reload_completed"
- [(parallel [(set (reg:ALL8 ACC_A)
- (plus:ALL8 (reg:ALL8 ACC_A)
- (match_dup 0)))
- (clobber (reg:CC REG_CC))])])
+ "#"
+ "&& reload_completed"
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*add<mode>3_const_insn"
[(set (reg:ALL8 ACC_A)
@@ -211,10 +205,8 @@
"avr_have_dimode"
"#"
"&& reload_completed"
- [(parallel [(set (reg:ALL8 ACC_A)
- (minus:ALL8 (reg:ALL8 ACC_A)
- (reg:ALL8 ACC_B)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*sub<mode>3_insn"
[(set (reg:ALL8 ACC_A)
@@ -236,10 +228,8 @@
"avr_have_dimode"
"#"
"&& reload_completed"
- [(parallel [(set (reg:ALL8 ACC_A)
- (minus:ALL8 (reg:ALL8 ACC_A)
- (match_dup 0)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*sub<mode>3_const_insn"
[(set (reg:ALL8 ACC_A)
@@ -288,10 +278,8 @@
"avr_have_dimode"
"#"
"&& reload_completed"
- [(parallel [(set (reg:ALL8S ACC_A)
- (ss_addsub:ALL8S (reg:ALL8S ACC_A)
- (reg:ALL8S ACC_B)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*<code_stdname><mode>3_insn"
[(set (reg:ALL8S ACC_A)
@@ -309,10 +297,8 @@
"avr_have_dimode"
"#"
"&& reload_completed"
- [(parallel [(set (reg:ALL8S ACC_A)
- (ss_addsub:ALL8S (reg:ALL8S ACC_A)
- (match_dup 0)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*<code_stdname><mode>3_const_insn"
[(set (reg:ALL8S ACC_A)
@@ -361,10 +347,8 @@
"avr_have_dimode"
"#"
"&& reload_completed"
- [(parallel [(set (reg:ALL8U ACC_A)
- (us_addsub:ALL8U (reg:ALL8U ACC_A)
- (reg:ALL8U ACC_B)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*<code_stdname><mode>3_insn"
[(set (reg:ALL8U ACC_A)
@@ -382,10 +366,8 @@
"avr_have_dimode"
"#"
"&& reload_completed"
- [(parallel [(set (reg:ALL8U ACC_A)
- (us_addsub:ALL8U (reg:ALL8U ACC_A)
- (match_operand:ALL8U 0 "const_operand" "n Ynn")))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*<code_stdname><mode>3_const_insn"
[(set (reg:ALL8U ACC_A)
@@ -421,9 +403,8 @@
"avr_have_dimode"
"#"
"&& reload_completed"
- [(parallel [(set (reg:DI ACC_A)
- (neg:DI (reg:DI ACC_A)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*negdi2_insn"
[(set (reg:DI ACC_A)
@@ -500,7 +481,7 @@
"avr_have_dimode"
"#"
"&& reload_completed"
- [(const_int 0)]
+ [(scratch)]
{
emit_insn (gen_compare_<mode>2 ());
emit_jump_insn (gen_conditional_jump (operands[0], operands[1]));
@@ -529,7 +510,7 @@
"avr_have_dimode"
"#"
"&& reload_completed"
- [(const_int 0)]
+ [(scratch)]
{
emit_insn (gen_compare_const8_di2 ());
emit_jump_insn (gen_conditional_jump (operands[0], operands[1]));
@@ -556,7 +537,7 @@
&& !s8_operand (operands[1], VOIDmode)"
"#"
"&& reload_completed"
- [(const_int 0)]
+ [(scratch)]
{
emit_insn (gen_compare_const_<mode>2 (operands[1], operands[3]));
emit_jump_insn (gen_conditional_jump (operands[0], operands[2]));
@@ -629,10 +610,8 @@
"avr_have_dimode"
"#"
"&& reload_completed"
- [(parallel [(set (reg:ALL8 ACC_A)
- (di_shifts:ALL8 (reg:ALL8 ACC_A)
- (reg:QI 16)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*<code_stdname><mode>3_insn"
[(set (reg:ALL8 ACC_A)
@@ -674,14 +653,10 @@
(clobber (reg:HI REG_Z))]
"avr_have_dimode
&& AVR_HAVE_MUL"
- "#"
- "&& reload_completed"
- [(parallel [(set (reg:DI ACC_A)
- (mult:DI (any_extend:DI (reg:SI 18))
- (any_extend:DI (reg:SI 22))))
- (clobber (reg:HI REG_X))
- (clobber (reg:HI REG_Z))
- (clobber (reg:CC REG_CC))])])
+ "#"
+ "&& reload_completed"
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*<extend_u>mulsidi3_insn"
[(set (reg:DI ACC_A)
diff --git a/gcc/config/avr/avr-fixed.md b/gcc/config/avr/avr-fixed.md
index ce46beb..22061fc 100644
--- a/gcc/config/avr/avr-fixed.md
+++ b/gcc/config/avr/avr-fixed.md
@@ -62,10 +62,8 @@
"<FIXED_B:MODE>mode != <FIXED_A:MODE>mode"
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (fract_convert:FIXED_A
- (match_dup 1)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*fract<FIXED_B:mode><FIXED_A:mode>2"
[(set (match_operand:FIXED_A 0 "register_operand" "=r")
@@ -86,10 +84,8 @@
"<FIXED_B:MODE>mode != <FIXED_A:MODE>mode"
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (unsigned_fract_convert:FIXED_A
- (match_dup 1)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*fractuns<FIXED_B:mode><FIXED_A:mode>2"
[(set (match_operand:FIXED_A 0 "register_operand" "=r")
@@ -124,10 +120,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (ss_addsub:ALL124S (match_dup 1)
- (match_dup 2)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*<code_stdname><mode>3"
[(set (match_operand:ALL124S 0 "register_operand" "=??d,d")
@@ -149,10 +143,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (us_addsub:ALL124U (match_dup 1)
- (match_dup 2)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*<code_stdname><mode>3"
[(set (match_operand:ALL124U 0 "register_operand" "=??r,d")
@@ -189,9 +181,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (ss_neg:QQ (match_dup 1)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*ssnegqq2"
[(set (match_operand:QQ 0 "register_operand" "=r")
@@ -207,9 +198,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (ss_abs:QQ (match_dup 1)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*ssabsqq2"
[(set (match_operand:QQ 0 "register_operand" "=r")
@@ -241,9 +231,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (reg:ALL2S 24)
- (ss_abs_neg:ALL2S (reg:ALL2S 24)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*<code_stdname><mode>2"
[(set (reg:ALL2S 24)
@@ -261,9 +250,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (reg:ALL4S 22)
- (ss_abs_neg:ALL4S (reg:ALL4S 22)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*<code_stdname><mode>2"
[(set (reg:ALL4S 22)
@@ -296,10 +284,8 @@
"AVR_HAVE_MUL"
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (mult:QQ (match_dup 1)
- (match_dup 2)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*mulqq3_enh"
[(set (match_operand:QQ 0 "register_operand" "=r")
@@ -317,10 +303,8 @@
"AVR_HAVE_MUL"
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (mult:UQQ (match_dup 1)
- (match_dup 2)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*muluqq3_enh"
[(set (match_operand:UQQ 0 "register_operand" "=r")
@@ -377,12 +361,8 @@
"!AVR_HAVE_MUL"
"#"
"&& reload_completed"
- [(parallel [(set (reg:QQ 23)
- (mult:QQ (reg:QQ 24)
- (reg:QQ 25)))
- (clobber (reg:QI 22))
- (clobber (reg:HI 24))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*mulqq3.call"
[(set (reg:QQ 23)
@@ -425,11 +405,8 @@
"AVR_HAVE_MUL"
"#"
"&& reload_completed"
- [(parallel [(set (reg:ALL2QA 24)
- (mult:ALL2QA (reg:ALL2QA 18)
- (reg:ALL2QA 26)))
- (clobber (reg:HI 22))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*mul<mode>3.call"
[(set (reg:ALL2QA 24)
@@ -468,10 +445,8 @@
"AVR_HAVE_MUL"
"#"
"&& reload_completed"
- [(parallel [(set (reg:ALL4A 24)
- (mult:ALL4A (reg:ALL4A 16)
- (reg:ALL4A 20)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*mul<mode>3.call"
[(set (reg:ALL4A 24)
@@ -514,11 +489,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (reg:ALL1Q 24)
- (usdiv:ALL1Q (reg:ALL1Q 25)
- (reg:ALL1Q 22)))
- (clobber (reg:QI 25))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*<code><mode>3.call"
[(set (reg:ALL1Q 24)
@@ -560,12 +532,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (reg:ALL2QA 24)
- (usdiv:ALL2QA (reg:ALL2QA 26)
- (reg:ALL2QA 22)))
- (clobber (reg:HI 26))
- (clobber (reg:QI 21))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*<code><mode>3.call"
[(set (reg:ALL2QA 24)
@@ -608,12 +576,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (reg:ALL4A 22)
- (usdiv:ALL4A (reg:ALL4A 24)
- (reg:ALL4A 18)))
- (clobber (reg:HI 26))
- (clobber (reg:HI 30))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*<code><mode>3.call"
[(set (reg:ALL4A 22)
@@ -684,12 +648,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (unspec:ALL124QA [(match_dup 1)
- (match_dup 2)
- (const_int 0)]
- UNSPEC_ROUND))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*round<mode>3_const"
[(set (match_operand:ALL124QA 0 "register_operand" "=d")
@@ -714,11 +674,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (reg:ALL1Q 24)
- (unspec:ALL1Q [(reg:ALL1Q 22)
- (reg:QI 24)] UNSPEC_ROUND))
- (clobber (reg:ALL1Q 22))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*round<mode>3.libgcc"
[(set (reg:ALL1Q 24)
@@ -740,11 +697,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (reg:ALL2QA 24)
- (unspec:ALL2QA [(reg:ALL2QA 22)
- (reg:QI 24)] UNSPEC_ROUND))
- (clobber (reg:ALL2QA 22))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*round<mode>3.libgcc"
[(set (reg:ALL2QA 24)
@@ -766,11 +720,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (reg:ALL4QA 22)
- (unspec:ALL4QA [(reg:ALL4QA 18)
- (reg:QI 24)] UNSPEC_ROUND))
- (clobber (reg:ALL4QA 18))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*round<mode>3.libgcc"
[(set (reg:ALL4QA 22)
diff --git a/gcc/config/avr/avr-log.cc b/gcc/config/avr/avr-log.cc
index fadb3ca..972ba6b 100644
--- a/gcc/config/avr/avr-log.cc
+++ b/gcc/config/avr/avr-log.cc
@@ -373,7 +373,6 @@ avr_log_set_avr_log (void)
SET_DUMP_DETAIL (insn_addresses);
SET_DUMP_DETAIL (legitimate_address_p);
SET_DUMP_DETAIL (legitimize_address);
- SET_DUMP_DETAIL (legitimize_reload_address);
SET_DUMP_DETAIL (progmem);
SET_DUMP_DETAIL (rtx_costs);
diff --git a/gcc/config/avr/avr-passes.cc b/gcc/config/avr/avr-passes.cc
index 6a88a27..69df6d2 100644
--- a/gcc/config/avr/avr-passes.cc
+++ b/gcc/config/avr/avr-passes.cc
@@ -4843,6 +4843,137 @@ avr_pass_fuse_add::execute1 (function *func)
//////////////////////////////////////////////////////////////////////////////
+// Fuse 2 move insns after combine.
+
+static const pass_data avr_pass_data_2moves =
+{
+ RTL_PASS, // type
+ "", // name (will be patched)
+ OPTGROUP_NONE, // optinfo_flags
+ TV_DF_SCAN, // tv_id
+ 0, // properties_required
+ 0, // properties_provided
+ 0, // properties_destroyed
+ 0, // todo_flags_start
+ 0 // todo_flags_finish
+};
+
+class avr_pass_2moves : public rtl_opt_pass
+{
+public:
+ avr_pass_2moves (gcc::context *ctxt, const char *name)
+ : rtl_opt_pass (avr_pass_data_2moves, ctxt)
+ {
+ this->name = name;
+ }
+
+ unsigned int execute (function *func) final override
+ {
+ if (optimize && avropt_fuse_move2)
+ {
+ bool changed = false;
+ basic_block bb;
+
+ FOR_EACH_BB_FN (bb, func)
+ {
+ changed |= optimize_2moves_bb (bb);
+ }
+
+ if (changed)
+ {
+ df_note_add_problem ();
+ df_analyze ();
+ }
+ }
+
+ return 0;
+ }
+
+ bool optimize_2moves (rtx_insn *, rtx_insn *);
+ bool optimize_2moves_bb (basic_block);
+}; // avr_pass_2moves
+
+bool
+avr_pass_2moves::optimize_2moves_bb (basic_block bb)
+{
+ bool changed = false;
+ rtx_insn *insn1 = nullptr;
+ rtx_insn *insn2 = nullptr;
+ rtx_insn *curr;
+
+ FOR_BB_INSNS (bb, curr)
+ {
+ if (insn1 && INSN_P (insn1)
+ && insn2 && INSN_P (insn2))
+ changed |= optimize_2moves (insn1, insn2);
+
+ insn1 = insn2;
+ insn2 = curr;
+ }
+
+ return changed;
+}
+
+bool
+avr_pass_2moves::optimize_2moves (rtx_insn *insn1, rtx_insn *insn2)
+{
+ bool good = false;
+ bool bad = false;
+ rtx set1, dest1, src1;
+ rtx set2, dest2, src2;
+
+ if ((set1 = single_set (insn1))
+ && (set2 = single_set (insn2))
+ && (src1 = SET_SRC (set1))
+ && REG_P (src2 = SET_SRC (set2))
+ && REG_P (dest1 = SET_DEST (set1))
+ && REG_P (dest2 = SET_DEST (set2))
+ && rtx_equal_p (dest1, src2)
+ // Now we have:
+ // insn1: dest1 = src1
+ // insn2: dest2 = dest1
+ && REGNO (dest1) >= FIRST_PSEUDO_REGISTER
+ // Paranoia.
+ && GET_CODE (PATTERN (insn1)) != PARALLEL
+ && GET_CODE (PATTERN (insn2)) != PARALLEL
+ && (rtx_equal_p (dest2, src1)
+ || !reg_overlap_mentioned_p (dest2, src1)))
+ {
+ avr_dump ("\n;; Found 2moves:\n%r\n%r\n", insn1, insn2);
+ avr_dump (";; reg %d: insn uses uids:", REGNO (dest1));
+
+ // Go check that dest1 is used exactly once, namely by insn2.
+
+ df_ref use = DF_REG_USE_CHAIN (REGNO (dest1));
+ for (; use; use = DF_REF_NEXT_REG (use))
+ {
+ rtx_insn *user = DF_REF_INSN (use);
+ avr_dump (" %d", INSN_UID (user));
+ good |= INSN_UID (user) == INSN_UID (insn2);
+ bad |= INSN_UID (user) != INSN_UID (insn2);
+ }
+ avr_dump (".\n");
+
+ if (good && !bad
+ // Propagate src1 to insn2:
+ // insn1: # Deleted
+ // insn2: dest2 = src1
+ && validate_change (insn2, &SET_SRC (set2), src1, false))
+ {
+ SET_INSN_DELETED (insn1);
+ return true;
+ }
+ }
+
+ if (good && !bad)
+ avr_dump (";; Failed\n");
+
+ return false;
+}
+
+
+
+//////////////////////////////////////////////////////////////////////////////
// Split insns with nonzero_bits() after combine.
static const pass_data avr_pass_data_split_nzb =
@@ -5704,6 +5835,14 @@ make_avr_pass_casesi (gcc::context *ctxt)
return new avr_pass_casesi (ctxt, "avr-casesi");
}
+// Optimize 2 consecutive moves after combine.
+
+rtl_opt_pass *
+make_avr_pass_2moves (gcc::context *ctxt)
+{
+ return new avr_pass_2moves (ctxt, "avr-2moves");
+}
+
rtl_opt_pass *
make_avr_pass_split_nzb (gcc::context *ctxt)
{
diff --git a/gcc/config/avr/avr-passes.def b/gcc/config/avr/avr-passes.def
index eb60a93..d668c7f 100644
--- a/gcc/config/avr/avr-passes.def
+++ b/gcc/config/avr/avr-passes.def
@@ -74,6 +74,14 @@ INSERT_PASS_BEFORE (pass_free_cfg, 1, avr_pass_recompute_notes);
INSERT_PASS_AFTER (pass_expand, 1, avr_pass_casesi);
+/* Insn combine may come up with superfluous reg-reg moves, where the combine
+ people say that these are no problem since reg-alloc is supposed to optimize
+ them. The issue is that the lower-subreg pass sitting between combine and
+ reg-alloc may split such moves, coming up with a zoo of subregs which are
+ only handled poorly by the register allocator. */
+
+INSERT_PASS_AFTER (pass_combine, 1, avr_pass_2moves);
+
/* Some combine insns have nonzero_bits() in their condition, though insns
should not use such stuff in their condition. Therefore, we split such
insn into something without nonzero_bits() in their condition right after
diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h
index ca30136..8ba1945 100644
--- a/gcc/config/avr/avr-protos.h
+++ b/gcc/config/avr/avr-protos.h
@@ -146,7 +146,6 @@ extern void out_shift_with_cnt (const char *templ, rtx_insn *insn,
extern enum reg_class avr_mode_code_base_reg_class (machine_mode, addr_space_t, rtx_code, rtx_code);
extern bool avr_regno_mode_code_ok_for_base_p (int, machine_mode, addr_space_t, rtx_code, rtx_code);
extern rtx avr_incoming_return_addr_rtx (void);
-extern rtx avr_legitimize_reload_address (rtx*, machine_mode, int, int, int, int, rtx (*)(rtx,int));
extern bool avr_adiw_reg_p (rtx);
extern bool avr_mem_flash_p (rtx);
extern bool avr_mem_flashx_p (rtx);
@@ -168,6 +167,8 @@ regmask (machine_mode mode, unsigned regno)
extern void avr_fix_inputs (rtx*, unsigned, unsigned);
extern bool avr_emit3_fix_outputs (rtx (*)(rtx,rtx,rtx), rtx*, unsigned, unsigned);
+extern rtx avr_add_ccclobber (rtx_insn *);
+#define DONE_ADD_CCC emit (avr_add_ccclobber (curr_insn)); DONE;
extern rtx lpm_reg_rtx;
extern rtx lpm_addr_reg_rtx;
@@ -208,6 +209,7 @@ extern rtl_opt_pass *make_avr_pass_casesi (gcc::context *);
extern rtl_opt_pass *make_avr_pass_ifelse (gcc::context *);
extern rtl_opt_pass *make_avr_pass_split_nzb (gcc::context *);
extern rtl_opt_pass *make_avr_pass_split_after_peephole2 (gcc::context *);
+extern rtl_opt_pass *make_avr_pass_2moves (gcc::context *);
#ifdef RTX_CODE
extern bool avr_casei_sequence_check_operands (rtx *xop);
extern bool avr_split_fake_addressing_move (rtx_insn *insn, rtx *operands);
@@ -238,7 +240,6 @@ typedef struct
unsigned insn_addresses :1;
unsigned legitimate_address_p :1;
unsigned legitimize_address :1;
- unsigned legitimize_reload_address :1;
unsigned progmem :1;
unsigned rtx_costs :1;
} avr_log_t;
diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index c469297..ae49d4d 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -411,6 +411,29 @@ avr_to_int_mode (rtx x)
}
+/* Return the pattern of INSN, but with added (clobber (reg:CC REG_CC)).
+ The pattern of INSN must be a PARALLEL or a SET. INSN is unchanged. */
+
+rtx
+avr_add_ccclobber (rtx_insn *insn)
+{
+ rtx pat = PATTERN (insn);
+ gcc_assert (GET_CODE (pat) == SET || GET_CODE (pat) == PARALLEL);
+
+ int newlen = GET_CODE (pat) == SET ? 2 : 1 + XVECLEN (pat, 0);
+ rtx newpat = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (newlen));
+ rtx elt0 = GET_CODE (pat) == SET ? pat : XVECEXP (pat, 0, 0);
+
+ XVECEXP (newpat, 0, 0) = copy_rtx (elt0);
+ XVECEXP (newpat, 0, newlen - 1) = gen_rtx_CLOBBER (VOIDmode, cc_reg_rtx);
+
+ for (int i = 1; i < newlen - 1; ++i)
+ XVECEXP (newpat, 0, i) = copy_rtx (XVECEXP (pat, 0, i));
+
+ return newpat;
+}
+
+
/* Return true if hard register REG supports the ADIW and SBIW instructions. */
bool
@@ -430,13 +453,6 @@ avr_ld_regno_p (int regno)
}
-static bool
-ra_in_progress ()
-{
- return avropt_lra_p ? lra_in_progress : reload_in_progress;
-}
-
-
/* Set `avr_arch' as specified by `-mmcu='.
Return true on success. */
@@ -2324,8 +2340,8 @@ avr_legitimate_address_p (machine_mode mode, rtx x, bool strict)
if (avr_log.legitimate_address_p)
{
avr_edump ("\n%?: ret=%d, mode=%m strict=%d "
- "reload_completed=%d ra_in_progress=%d %s:",
- ok, mode, strict, reload_completed, ra_in_progress (),
+ "reload_completed=%d lra_in_progress=%d %s:",
+ ok, mode, strict, reload_completed, lra_in_progress,
reg_renumber ? "(reg_renumber)" : "");
if (GET_CODE (x) == PLUS
@@ -2395,88 +2411,6 @@ avr_legitimize_address (rtx x, rtx oldx, machine_mode mode)
}
-/* Implement `LEGITIMIZE_RELOAD_ADDRESS'. */
-/* This will allow register R26/27 to be used where it is no worse than normal
- base pointers R28/29 or R30/31. For example, if base offset is greater
- than 63 bytes or for R++ or --R addressing. */
-
-rtx
-avr_legitimize_reload_address (rtx *px, machine_mode mode, int opnum,
- int type, int addr_type, int /*ind_levels*/,
- rtx (*mk_memloc)(rtx,int))
-{
- rtx x = *px;
-
- if (avr_log.legitimize_reload_address)
- avr_edump ("\n%?:%m %r\n", mode, x);
-
- if (1 && (GET_CODE (x) == POST_INC
- || GET_CODE (x) == PRE_DEC))
- {
- push_reload (XEXP (x, 0), XEXP (x, 0), &XEXP (x, 0), &XEXP (x, 0),
- POINTER_REGS, GET_MODE (x), GET_MODE (x), 0, 0,
- opnum, RELOAD_OTHER);
-
- if (avr_log.legitimize_reload_address)
- avr_edump (" RCLASS.1 = %R\n IN = %r\n OUT = %r\n",
- POINTER_REGS, XEXP (x, 0), XEXP (x, 0));
-
- return x;
- }
-
- if (GET_CODE (x) == PLUS
- && REG_P (XEXP (x, 0))
- && reg_equiv_constant (REGNO (XEXP (x, 0))) == 0
- && CONST_INT_P (XEXP (x, 1))
- && INTVAL (XEXP (x, 1)) >= 1)
- {
- bool fit = INTVAL (XEXP (x, 1)) <= MAX_LD_OFFSET (mode);
-
- if (fit)
- {
- if (reg_equiv_address (REGNO (XEXP (x, 0))) != 0)
- {
- int regno = REGNO (XEXP (x, 0));
- rtx mem = mk_memloc (x, regno);
-
- push_reload (XEXP (mem, 0), NULL_RTX, &XEXP (mem, 0), NULL,
- POINTER_REGS, Pmode, VOIDmode, 0, 0,
- 1, (enum reload_type) addr_type);
-
- if (avr_log.legitimize_reload_address)
- avr_edump (" RCLASS.2 = %R\n IN = %r\n OUT = %r\n",
- POINTER_REGS, XEXP (mem, 0), NULL_RTX);
-
- push_reload (mem, NULL_RTX, &XEXP (x, 0), NULL,
- BASE_POINTER_REGS, GET_MODE (x), VOIDmode, 0, 0,
- opnum, (enum reload_type) type);
-
- if (avr_log.legitimize_reload_address)
- avr_edump (" RCLASS.2 = %R\n IN = %r\n OUT = %r\n",
- BASE_POINTER_REGS, mem, NULL_RTX);
-
- return x;
- }
- }
- else if (! (frame_pointer_needed
- && XEXP (x, 0) == frame_pointer_rtx))
- {
- push_reload (x, NULL_RTX, px, NULL,
- POINTER_REGS, GET_MODE (x), VOIDmode, 0, 0,
- opnum, (enum reload_type) type);
-
- if (avr_log.legitimize_reload_address)
- avr_edump (" RCLASS.3 = %R\n IN = %r\n OUT = %r\n",
- POINTER_REGS, x, NULL_RTX);
-
- return x;
- }
- }
-
- return NULL_RTX;
-}
-
-
/* Helper function to print assembler resp. track instruction
sequence lengths. Always return "".
@@ -12824,6 +12758,16 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
return true;
case SIGN_EXTEND:
+ if (GET_CODE (XEXP (x, 0)) == ASHIFT
+ && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
+ {
+ // "*sext.ashift<QIPSI:mode><HISI:mode>2_split"
+ int m0 = GET_MODE_SIZE (GET_MODE (XEXP (x, 0)));
+ int m1 = GET_MODE_SIZE (mode);
+ *total = COSTS_N_INSNS (m0 * INTVAL (XEXP (XEXP (x, 0), 1))
+ + m1 - m0);
+ return true;
+ }
*total = COSTS_N_INSNS (n_bytes + 2
- GET_MODE_SIZE (GET_MODE (XEXP (x, 0))));
*total += avr_operand_rtx_cost (XEXP (x, 0), GET_MODE (XEXP (x, 0)),
@@ -13936,8 +13880,8 @@ extra_constraint_Q (rtx x)
|| xx == arg_pointer_rtx);
if (avr_log.constraints)
- avr_edump ("\n%?=%d reload_completed=%d ra_in_progress=%d\n %r\n",
- ok, reload_completed, ra_in_progress (), x);
+ avr_edump ("\n%?=%d reload_completed=%d lra_in_progress=%d\n %r\n",
+ ok, reload_completed, lra_in_progress, x);
}
return ok;
@@ -14142,17 +14086,6 @@ avr_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
if (GET_MODE_SIZE (mode) == 1)
return true;
- /* FIXME: Ideally, the following test is not needed.
- However, it turned out that it can reduce the number
- of spill fails. AVR and it's poor endowment with
- address registers is extreme stress test for reload. */
-
- if (GET_MODE_SIZE (mode) >= 4
- && regno + GET_MODE_SIZE (mode) >= REG_30
- // This problem only concerned the old reload.
- && ! avropt_lra_p)
- return false;
-
/* All modes larger than 8 bits should start in an even register. */
return !(regno & 1);
@@ -14418,6 +14351,13 @@ avr_output_addr_vec (rtx_insn *labl, rtx table)
// Output the label that precedes the table.
ASM_OUTPUT_ALIGN (stream, 1);
+
+ char s_labl[40];
+ targetm.asm_out.generate_internal_label (s_labl, "L",
+ CODE_LABEL_NUMBER (labl));
+ ASM_OUTPUT_TYPE_DIRECTIVE (stream, s_labl,
+ AVR_HAVE_JMP_CALL ? "object" : "function");
+
targetm.asm_out.internal_label (stream, "L", CODE_LABEL_NUMBER (labl));
// Output the table's content.
@@ -14907,8 +14847,8 @@ avr_addr_space_legitimate_address_p (machine_mode mode, rtx x, bool strict,
if (avr_log.legitimate_address_p)
{
avr_edump ("\n%?: ret=%b, mode=%m strict=%d "
- "reload_completed=%d ra_in_progress=%d %s:",
- ok, mode, strict, reload_completed, ra_in_progress (),
+ "reload_completed=%d lra_in_progress=%d %s:",
+ ok, mode, strict, reload_completed, lra_in_progress,
reg_renumber ? "(reg_renumber)" : "");
if (GET_CODE (x) == PLUS
@@ -14984,10 +14924,11 @@ avr_addr_space_convert (rtx src, tree type_old, tree type_new)
/* Linearize memory: RAM has bit 23 set. When as_new = __flashx then
this is basically UB since __flashx mistreats RAM addresses, but there
- is no way to bail out. (Though -Waddr-space-convert will tell.) */
+ is no way to bail out. (Though -Waddr-space-convert will tell.)
+ ...but PR121277 is confusing, in particular when NULL is coming in. */
int msb = ADDR_SPACE_GENERIC_P (as_old)
- ? 0x80
+ ? as_new == ADDR_SPACE_MEMX ? 0x80 : 0x00
: avr_addrspace[as_old].segment;
src = force_reg (Pmode, src);
@@ -15085,10 +15026,16 @@ avr_convert_to_type (tree type, tree expr)
const char *name_old = avr_addrspace[as_old].name;
const char *name_new = avr_addrspace[as_new].name;
- warning (OPT_Waddr_space_convert,
- "conversion from address space %qs to address space %qs",
- ADDR_SPACE_GENERIC_P (as_old) ? "generic" : name_old,
- ADDR_SPACE_GENERIC_P (as_new) ? "generic" : name_new);
+ // Be relaxed when NULL is used, and when 0x0 stands for
+ // address 0x0.
+ bool nowarn = (expr == null_pointer_node
+ && (as_new == ADDR_SPACE_FLASHX
+ || as_new == ADDR_SPACE_FLASH));
+ if (!nowarn)
+ warning (OPT_Waddr_space_convert,
+ "conversion from address space %qs to address space %qs",
+ ADDR_SPACE_GENERIC_P (as_old) ? "generic" : name_old,
+ ADDR_SPACE_GENERIC_P (as_new) ? "generic" : name_new);
return fold_build1_loc (loc, ADDR_SPACE_CONVERT_EXPR, type, expr);
}
@@ -16679,15 +16626,6 @@ avr_unwind_word_mode ()
return Pmode;
}
-
-/* Implement `TARGET_LRA_P'. */
-
-static bool
-avr_use_lra_p ()
-{
- return avropt_lra_p;
-}
-
/* Initialize the GCC target structure. */
@@ -16829,9 +16767,6 @@ avr_use_lra_p ()
#undef TARGET_CONVERT_TO_TYPE
#define TARGET_CONVERT_TO_TYPE avr_convert_to_type
-#undef TARGET_LRA_P
-#define TARGET_LRA_P avr_use_lra_p
-
#undef TARGET_ADDR_SPACE_SUBSET_P
#define TARGET_ADDR_SPACE_SUBSET_P avr_addr_space_subset_p
diff --git a/gcc/config/avr/avr.h b/gcc/config/avr/avr.h
index cb818c3..335f9fa5 100644
--- a/gcc/config/avr/avr.h
+++ b/gcc/config/avr/avr.h
@@ -309,12 +309,6 @@ enum reg_class {
#define STATIC_CHAIN_REGNUM ((AVR_TINY) ? 18 :2)
-#define RELOAD_ELIMINABLE_REGS { \
- { ARG_POINTER_REGNUM, STACK_POINTER_REGNUM }, \
- { ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM }, \
- { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM }, \
- { FRAME_POINTER_REGNUM + 1, STACK_POINTER_REGNUM + 1 } }
-
#define ELIMINABLE_REGS \
{ \
{ ARG_POINTER_REGNUM, STACK_POINTER_REGNUM }, \
@@ -358,18 +352,6 @@ typedef struct avr_args
#define MAX_REGS_PER_ADDRESS 1
-#define LEGITIMIZE_RELOAD_ADDRESS(X,MODE,OPNUM,TYPE,IND_L,WIN) \
- do { \
- rtx new_x = avr_legitimize_reload_address (&(X), MODE, OPNUM, TYPE, \
- ADDR_TYPE (TYPE), \
- IND_L, make_memloc); \
- if (new_x) \
- { \
- X = new_x; \
- goto WIN; \
- } \
- } while (0)
-
/* We increase branch costs after reload in order to keep basic-block
reordering from introducing out-of-line jumps and to prefer fall-through
edges instead. The default branch costs are 0, mainly because otherwise
diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md
index f8bbdc7..60b1f60 100644
--- a/gcc/config/avr/avr.md
+++ b/gcc/config/avr/avr.md
@@ -574,9 +574,8 @@
&& REG_Z == REGNO (XEXP (operands[0], 0))"
"#"
"&& reload_completed"
- [(parallel [(set (reg:MOVMODE 22)
- (match_dup 0))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*load_<mode>_libgcc"
[(set (reg:MOVMODE 22)
@@ -716,14 +715,8 @@
|| avr_load_libgcc_insn_p (insn, ADDR_SPACE_FLASHX, true)"
"#"
"&& reload_completed"
- [(parallel [(set (reg:MOVMODE REG_22)
- (match_dup 0))
- (clobber (reg:QI REG_21))
- (clobber (reg:HI REG_Z))
- (clobber (reg:CC REG_CC))])]
- {
- operands[0] = SET_SRC (single_set (curr_insn));
- })
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*fxload_<mode>_libgcc"
[(set (reg:MOVMODE REG_22)
@@ -853,9 +846,8 @@
|| reg_or_0_operand (operands[1], <MODE>mode)"
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (match_dup 1))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
;; "movqi_insn"
;; "movqq_insn" "movuqq_insn"
@@ -964,9 +956,8 @@
|| reg_or_0_operand (operands[1], <MODE>mode)"
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (match_dup 1))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*mov<mode>"
[(set (match_operand:ALL2 0 "nonimmediate_operand" "=r,r ,r,m ,d,*r,q,r")
@@ -1137,9 +1128,8 @@
|| const0_rtx == operands[1]"
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (match_dup 1))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*movpsi"
[(set (match_operand:PSI 0 "nonimmediate_operand" "=r,r,r ,Qm,!d,r")
@@ -1197,9 +1187,8 @@
|| reg_or_0_operand (operands[1], <MODE>mode)"
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (match_dup 1))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*mov<mode>"
[(set (match_operand:ALL4 0 "nonimmediate_operand" "=r,r ,r ,Qm ,!d,r")
@@ -1245,9 +1234,8 @@
|| reg_or_0_operand (operands[1], SFmode)"
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (match_dup 1))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*movsf"
[(set (match_operand:SF 0 "nonimmediate_operand" "=r,r,r ,Qm,!d,r")
@@ -1326,16 +1314,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (mem:BLK (reg:HI REG_X))
- (mem:BLK (reg:HI REG_Z)))
- (unspec [(match_dup 0)]
- UNSPEC_CPYMEM)
- (use (match_dup 1))
- (clobber (reg:HI REG_X))
- (clobber (reg:HI REG_Z))
- (clobber (reg:QI LPM_REGNO))
- (clobber (match_dup 2))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*cpymem_<mode>"
[(set (mem:BLK (reg:HI REG_X))
@@ -1382,22 +1362,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (mem:BLK (reg:HI REG_X))
- (match_dup 2))
- (unspec [(match_dup 0)]
- UNSPEC_CPYMEM)
- (use (reg:QIHI 24))
- (clobber (reg:HI REG_X))
- (clobber (reg:HI REG_Z))
- (clobber (reg:QI LPM_REGNO))
- (clobber (reg:HI 24))
- (clobber (reg:QI 23))
- (clobber (mem:QI (match_dup 1)))
- (clobber (reg:CC REG_CC))])]
- {
- rtx xset = XVECEXP (PATTERN (curr_insn), 0, 0);
- operands[2] = SET_SRC (xset);
- })
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*cpymemx_<mode>"
[(set (mem:BLK (reg:HI REG_X))
@@ -1461,13 +1427,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (mem:BLK (match_dup 0))
- (const_int 0))
- (use (match_dup 1))
- (use (match_dup 2))
- (clobber (match_dup 3))
- (clobber (match_dup 4))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*clrmemqi"
[(set (mem:BLK (match_operand:HI 0 "register_operand" "e"))
@@ -1492,14 +1453,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (mem:BLK (match_dup 0))
- (const_int 0))
- (use (match_dup 1))
- (use (match_dup 2))
- (clobber (match_dup 3))
- (clobber (match_dup 4))
- (clobber (reg:CC REG_CC))])]
- ""
+ [(scratch)]
+ { DONE_ADD_CCC }
[(set_attr "isa" "adiw,*")])
@@ -1550,13 +1505,8 @@
""
"#"
"&& reload_completed"
- [(parallel
- [(set (match_dup 0)
- (unspec:HI [(mem:BLK (match_dup 1))
- (const_int 0)
- (match_dup 2)]
- UNSPEC_STRLEN))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*strlenhi"
[(set (match_operand:HI 0 "register_operand" "=e")
@@ -1581,10 +1531,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (plus:ALL1 (match_dup 1)
- (match_dup 2)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*add<mode>3"
[(set (match_operand:ALL1 0 "register_operand" "=r,d ,r ,r ,r ,r")
@@ -1640,10 +1588,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (plus:HI (zero_extend:HI (match_dup 1))
- (match_dup 2)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*addhi3_zero_extend"
[(set (match_operand:HI 0 "register_operand" "=r,*?r")
@@ -1663,10 +1609,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (plus:HI (match_dup 1)
- (zero_extend:HI (match_dup 2))))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*addhi3_zero_extend1"
[(set (match_operand:HI 0 "register_operand" "=r")
@@ -1684,10 +1628,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (plus:HI (zero_extend:HI (match_dup 1))
- (match_dup 2)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*addhi3_zero_extend.const"
[(set (match_operand:HI 0 "register_operand" "=d")
@@ -1723,11 +1665,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (plus:HI (ashift:HI (zero_extend:HI (match_dup 1))
- (const_int 1))
- (match_dup 2)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*addhi3_zero_extend.ashift1"
[(set (match_operand:HI 0 "register_operand" "=r")
@@ -1752,11 +1691,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (plus:HI (zero_extend:HI (match_dup 1))
- (zero_extend:HI (match_dup 2))))
- (clobber (reg:CC REG_CC))])])
-
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*usum_widenqihi3"
[(set (match_operand:HI 0 "register_operand" "=r")
@@ -1774,10 +1710,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (minus:HI (zero_extend:HI (match_dup 1))
- (zero_extend:HI (match_dup 2))))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*udiff_widenqihi3"
[(set (match_operand:HI 0 "register_operand" "=r")
@@ -1797,7 +1731,7 @@
return avr_out_addto_sp (operands, NULL);
}
""
- [(const_int 0)]
+ [(scratch)]
{
// Do not attempt to split this pattern. This FAIL is necessary
// to prevent the splitter from matching *add<ALL2>3_split, splitting
@@ -1909,11 +1843,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (plus:ALL2 (match_dup 1)
- (match_dup 2)))
- (clobber (match_dup 3))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
;; "*addhi3_clobber"
;; "*addhq3_clobber" "*adduhq3_clobber"
@@ -1943,11 +1874,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (plus:ALL4 (match_dup 1)
- (match_dup 2)))
- (clobber (match_dup 3))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*add<mode>3"
[(set (match_operand:ALL4 0 "register_operand" "=??r,d ,r")
@@ -1979,10 +1907,8 @@
&& (<HISI:SIZE> > 2 || <CODE> == SIGN_EXTEND)"
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (plus:HISI (any_extend:HISI (match_dup 1))
- (match_dup 2)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
;; "*addhi3.sign_extend.qi"
;; "*addpsi3.zero_extend.qi" "*addpsi3.sign_extend.qi"
@@ -2019,10 +1945,8 @@
"<HISI:SIZE> > <QIPSI:SIZE>"
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (minus:HISI (match_dup 1)
- (any_extend:HISI (match_dup 2))))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
;; "*subhi3.zero_extend.qi" "*subhi3.sign_extend.qi"
;; "*subpsi3.zero_extend.qi" "*subpsi3.sign_extend.qi"
@@ -2053,11 +1977,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (plus:PSI (match_dup 1)
- (match_dup 2)))
- (clobber (match_dup 3 ))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*addpsi3"
[(set (match_operand:PSI 0 "register_operand" "=??r,d ,d,r")
@@ -2079,10 +2000,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (minus:PSI (match_dup 1)
- (match_dup 2)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*subpsi3"
[(set (match_operand:PSI 0 "register_operand" "=r")
@@ -2106,10 +2025,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (minus:ALL1 (match_dup 1)
- (match_dup 2)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*sub<mode>3"
[(set (match_operand:ALL1 0 "register_operand" "=??r,d ,r ,r ,r ,r")
@@ -2137,11 +2054,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (minus:ALL2 (match_dup 1)
- (match_dup 2)))
- (clobber (match_dup 3))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*sub<mode>3"
[(set (match_operand:ALL2 0 "register_operand" "=??r,d ,*r")
@@ -2167,11 +2081,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (minus:ALL4 (match_dup 1)
- (match_dup 2)))
- (clobber (match_dup 3))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*sub<mode>3"
[(set (match_operand:ALL4 0 "register_operand" "=??r,d ,r")
@@ -2209,10 +2120,8 @@
"AVR_HAVE_MUL"
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (mult:QI (match_dup 1)
- (match_dup 2)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*mulqi3_enh"
[(set (match_operand:QI 0 "register_operand" "=r")
@@ -2243,10 +2152,8 @@
"!AVR_HAVE_MUL"
"#"
"&& reload_completed"
- [(parallel [(set (reg:QI 24)
- (mult:QI (reg:QI 24) (reg:QI 22)))
- (clobber (reg:QI 22))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*mulqi3_call"
[(set (reg:QI 24)
@@ -2269,12 +2176,8 @@
"AVR_HAVE_MUL"
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (truncate:QI
- (lshiftrt:HI (mult:HI (any_extend:HI (match_dup 1))
- (any_extend:HI (match_dup 2)))
- (const_int 8))))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*<extend_su>mulqi3_highpart"
[(set (match_operand:QI 0 "register_operand" "=r")
@@ -2361,21 +2264,21 @@
(const_int 0))))
(clobber (reg:CC REG_CC))])])
-;; *subqi3.lt0 *subqi3.ge0
-;; *subhi3.lt0 *subhi3.ge0
-;; *subpsi3.lt0 *subpsi3.ge0
-;; *subsi3.lt0 *subsi3.ge0
-(define_insn "*sub<QISI:mode>3.<code>0"
- [(set (match_operand:QISI 0 "register_operand" "=r")
- (minus:QISI (match_operand:QISI 1 "register_operand" "0")
- (gelt:QISI (match_operand:QISI2 2 "register_operand" "r")
- (const_int 0))))
- (clobber (reg:CC REG_CC))]
- "reload_completed"
- {
- return avr_out_add_msb (insn, operands, <CODE>, nullptr);
- }
- [(set_attr "adjust_len" "add_<code>0")])
+;; *addqi3.lt0_split *addqi3.ge0_split
+;; *addhi3.lt0_split *addhi3.ge0_split
+;; *addpsi3.lt0_split *addpsi3.ge0_split
+;; *addsi3.lt0_split *addsi3.ge0_split
+(define_insn_and_split "*add<QISI:mode>3.<code>0_split"
+ [(set (match_operand:QISI 0 "register_operand" "=r")
+ (plus:QISI (gelt:QISI (match_operand:QISI2 1 "register_operand" "r")
+ (const_int 0))
+ (match_operand:QISI 2 "register_operand" "0")))]
+ ""
+ "#"
+ "&& reload_completed"
+ ; *add<QISI:mode>3.<code>0
+ [(scratch)]
+ { DONE_ADD_CCC })
;; *addqi3.lt0 *addqi3.ge0
;; *addhi3.lt0 *addhi3.ge0
@@ -2393,25 +2296,6 @@
}
[(set_attr "adjust_len" "add_<code>0")])
-;; *addqi3.lt0_split *addqi3.ge0_split
-;; *addhi3.lt0_split *addhi3.ge0_split
-;; *addpsi3.lt0_split *addpsi3.ge0_split
-;; *addsi3.lt0_split *addsi3.ge0_split
-(define_insn_and_split "*add<QISI:mode>3.<code>0_split"
- [(set (match_operand:QISI 0 "register_operand" "=r")
- (plus:QISI (gelt:QISI (match_operand:QISI2 1 "register_operand" "r")
- (const_int 0))
- (match_operand:QISI 2 "register_operand" "0")))]
- ""
- "#"
- "&& reload_completed"
- [; *add<QISI:mode>3.<code>0
- (parallel [(set (match_dup 0)
- (plus:QISI (gelt:QISI (match_dup 1)
- (const_int 0))
- (match_dup 2)))
- (clobber (reg:CC REG_CC))])])
-
;; *subqi3.lt0_split *subqi3.ge0_split
;; *subhi3.lt0_split *subhi3.ge0_split
;; *subpsi3.lt0_split *subpsi3.ge0_split
@@ -2424,13 +2308,25 @@
""
"#"
"&& reload_completed"
- [; *sub<QISI:mode>3.<code>0
- (parallel [(set (match_dup 0)
- (minus:QISI (match_dup 1)
- (gelt:QISI (match_dup 2)
- (const_int 0))))
- (clobber (reg:CC REG_CC))])])
+ ; *sub<QISI:mode>3.<code>0
+ [(scratch)]
+ { DONE_ADD_CCC })
+;; *subqi3.lt0 *subqi3.ge0
+;; *subhi3.lt0 *subhi3.ge0
+;; *subpsi3.lt0 *subpsi3.ge0
+;; *subsi3.lt0 *subsi3.ge0
+(define_insn "*sub<QISI:mode>3.<code>0"
+ [(set (match_operand:QISI 0 "register_operand" "=r")
+ (minus:QISI (match_operand:QISI 1 "register_operand" "0")
+ (gelt:QISI (match_operand:QISI2 2 "register_operand" "r")
+ (const_int 0))))
+ (clobber (reg:CC REG_CC))]
+ "reload_completed"
+ {
+ return avr_out_add_msb (insn, operands, <CODE>, nullptr);
+ }
+ [(set_attr "adjust_len" "add_<code>0")])
(define_insn_and_split "*umulqihi3.call_split"
[(set (reg:HI 24)
@@ -2441,12 +2337,8 @@
"!AVR_HAVE_MUL"
"#"
"&& reload_completed"
- [(parallel [(set (reg:HI 24)
- (mult:HI (zero_extend:HI (reg:QI 22))
- (zero_extend:HI (reg:QI 24))))
- (clobber (reg:QI 21))
- (clobber (reg:HI 22))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*umulqihi3.call"
[(set (reg:HI 24)
@@ -2469,10 +2361,8 @@
"AVR_HAVE_MUL"
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (mult:HI (any_extend:HI (match_dup 1))
- (any_extend:HI (match_dup 2))))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "<extend_u>mulqihi3"
[(set (match_operand:HI 0 "register_operand" "=r")
@@ -2492,10 +2382,8 @@
"AVR_HAVE_MUL"
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (mult:HI (zero_extend:HI (match_dup 1))
- (sign_extend:HI (match_dup 2))))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*usmulqihi3"
[(set (match_operand:HI 0 "register_operand" "=r")
@@ -2517,10 +2405,8 @@
"AVR_HAVE_MUL"
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (mult:HI (sign_extend:HI (match_dup 1))
- (zero_extend:HI (match_dup 2))))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*sumulqihi3"
[(set (match_operand:HI 0 "register_operand" "=r")
@@ -2542,10 +2428,8 @@
"AVR_HAVE_MUL"
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (mult:HI (not:HI (zero_extend:HI (not:QI (match_dup 1))))
- (sign_extend:HI (match_dup 2))))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*osmulqihi3"
[(set (match_operand:HI 0 "register_operand" "=&r")
@@ -2566,10 +2450,8 @@
"AVR_HAVE_MUL"
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (mult:HI (not:HI (zero_extend:HI (not:QI (match_dup 1))))
- (zero_extend:HI (match_dup 2))))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*oumulqihi3"
[(set (match_operand:HI 0 "register_operand" "=&r")
@@ -2596,11 +2478,8 @@
"AVR_HAVE_MUL"
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (plus:QI (mult:QI (match_dup 1)
- (match_dup 2))
- (match_dup 3)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*maddqi4"
[(set (match_operand:QI 0 "register_operand" "=r")
@@ -2622,11 +2501,8 @@
"AVR_HAVE_MUL"
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (minus:QI (match_dup 3)
- (mult:QI (match_dup 1)
- (match_dup 2))))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*msubqi4"
[(set (match_operand:QI 0 "register_operand" "=r")
@@ -2705,11 +2581,8 @@
"AVR_HAVE_MUL"
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (plus:HI (mult:HI (any_extend:HI (match_dup 1))
- (any_extend:HI (match_dup 2)))
- (match_dup 3)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*<extend_u>maddqihi4"
[(set (match_operand:HI 0 "register_operand" "=r")
@@ -2734,11 +2607,8 @@
"AVR_HAVE_MUL"
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (minus:HI (match_dup 3)
- (mult:HI (any_extend:HI (match_dup 1))
- (any_extend:HI (match_dup 2)))))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*<extend_u>msubqihi4"
[(set (match_operand:HI 0 "register_operand" "=r")
@@ -2765,11 +2635,8 @@
&& <any_extend:CODE> != <any_extend2:CODE>"
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (plus:HI (mult:HI (any_extend:HI (match_dup 1))
- (any_extend2:HI (match_dup 2)))
- (match_dup 3)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*<any_extend:extend_su><any_extend2:extend_su>msubqihi4"
[(set (match_operand:HI 0 "register_operand" "=r")
@@ -2800,11 +2667,8 @@
&& <any_extend:CODE> != <any_extend2:CODE>"
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (minus:HI (match_dup 3)
- (mult:HI (any_extend:HI (match_dup 1))
- (any_extend2:HI (match_dup 2)))))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*<any_extend:extend_su><any_extend2:extend_su>msubqihi4"
[(set (match_operand:HI 0 "register_operand" "=r")
@@ -3072,16 +2936,14 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (ashift:HI (sign_extend:HI (match_dup 1))
- (const_int 1)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*ashiftqihi2.signx.1"
[(set (match_operand:HI 0 "register_operand" "=r,*r")
(ashift:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "0,r"))
(const_int 1)))
- (clobber (reg:CC REG_CC)) ]
+ (clobber (reg:CC REG_CC))]
"reload_completed"
"@
lsl %A0\;sbc %B0,%B0
@@ -3142,6 +3004,41 @@
operands[2] = gen_int_mode (1 << INTVAL (operands[2]), QImode);
})
+(define_insn_and_split "*sext.ashift<QIPSI:mode><HISI:mode>2_split"
+ [(set (match_operand:HISI 0 "register_operand" "=r")
+ (sign_extend:HISI (ashift:QIPSI (match_operand:QIPSI 1 "register_operand" "0")
+ (match_operand:QI 2 "const_int_operand" "PKC03"))))]
+ "<HISI:SIZE> > <QIPSI:SIZE>
+ && IN_RANGE (INTVAL (operands[2]), 1, 2 + (<QIPSI:SIZE> <= 2))"
+ "#"
+ "&& reload_completed"
+ [(scratch)]
+ { DONE_ADD_CCC })
+
+(define_insn "*sext.ashift<QIPSI:mode><HISI:mode>2"
+ [(set (match_operand:HISI 0 "register_operand" "=r")
+ (sign_extend:HISI (ashift:QIPSI (match_operand:QIPSI 1 "register_operand" "0")
+ (match_operand:QI 2 "const_int_operand" "PKC03"))))
+ (clobber (reg:CC REG_CC))]
+ "reload_completed
+ && <HISI:SIZE> > <QIPSI:SIZE>
+ && IN_RANGE (INTVAL (operands[2]), 1, 2 + (<QIPSI:SIZE> <= 2))"
+ {
+ const int regno = REGNO (operands[0]);
+ // The shift.
+ for (int s = 0; s < (int) INTVAL (operands[2]); ++s)
+ for (int b = 0; b < <QIPSI:SIZE>; ++b)
+ output_asm_insn (b == 0 ? "lsl %0" : "rol %0",
+ &all_regs_rtx[regno + b]);
+ // Sign-extend can use carry.
+ for (int b = <QIPSI:SIZE>; b < <HISI:SIZE>; ++b)
+ output_asm_insn ("sbc %0,%0", &all_regs_rtx[regno + b]);
+ return "";
+ }
+ [(set (attr "length")
+ (plus (symbol_ref "<QIPSI:SIZE> * INTVAL (operands[2])")
+ (symbol_ref "<HISI:SIZE> - <QIPSI:SIZE>")))])
+
;******************************************************************************
; mul HI: $1 = sign-/zero-/one-extend, $2 = reg
;******************************************************************************
@@ -3153,10 +3050,8 @@
"AVR_HAVE_MUL"
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (mult:HI (sign_extend:HI (match_dup 1))
- (match_dup 2)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*mulsqihi3"
[(set (match_operand:HI 0 "register_operand" "=&r")
@@ -3178,10 +3073,8 @@
"AVR_HAVE_MUL"
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (mult:HI (zero_extend:HI (match_dup 1))
- (match_dup 2)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*muluqihi3"
[(set (match_operand:HI 0 "register_operand" "=&r")
@@ -3205,10 +3098,8 @@
"AVR_HAVE_MUL"
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (mult:HI (not:HI (zero_extend:HI (not:QI (match_dup 1))))
- (match_dup 2)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*muloqihi3"
[(set (match_operand:HI 0 "register_operand" "=&r")
@@ -3277,10 +3168,8 @@
"AVR_HAVE_MUL"
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (mult:HI (match_dup 1)
- (match_dup 2)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*mulhi3_enh"
[(set (match_operand:HI 0 "register_operand" "=&r")
@@ -3319,11 +3208,8 @@
"!AVR_HAVE_MUL"
"#"
"&& reload_completed"
- [(parallel [(set (reg:HI 24)
- (mult:HI (reg:HI 24) (reg:HI 22)))
- (clobber (reg:HI 22))
- (clobber (reg:QI 21))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*mulhi3_call"
[(set (reg:HI 24)
@@ -3719,11 +3605,8 @@
"AVR_HAVE_MUL"
"#"
"&& reload_completed"
- [(parallel [(set (reg:SI 22)
- (mult:SI (reg:SI 22)
- (reg:SI 18)))
- (clobber (reg:HI 26))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn_and_split "*mulsi3_call_pr118012_split"
[(set (reg:SI 22)
@@ -3737,13 +3620,8 @@
&& ! AVR_TINY"
"#"
"&& reload_completed"
- [(parallel [(set (reg:SI 22)
- (mult:SI (reg:SI 22)
- (reg:SI 18)))
- (clobber (reg:SI 18))
- (clobber (reg:HI 26))
- (clobber (reg:HI 30))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*mulsi3_call"
[(set (reg:SI 22)
@@ -3779,10 +3657,8 @@
"AVR_HAVE_MUL"
"#"
"&& reload_completed"
- [(parallel [(set (reg:SI 22)
- (mult:SI (any_extend:SI (reg:HI 18))
- (any_extend:SI (reg:HI 26))))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*<extend_u>mulhisi3_call"
[(set (reg:SI 22)
@@ -3804,12 +3680,8 @@
"AVR_HAVE_MUL"
"#"
"&& reload_completed"
- [(parallel [(set (reg:HI 24)
- (truncate:HI (lshiftrt:SI (mult:SI (any_extend:SI (reg:HI 18))
- (any_extend:SI (reg:HI 26)))
- (const_int 16))))
- (clobber (reg:HI 22))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*<extend_su>mulhi3_highpart_call"
[(set (reg:HI 24)
@@ -3829,10 +3701,8 @@
"AVR_HAVE_MUL"
"#"
"&& reload_completed"
- [(parallel [(set (reg:SI 22)
- (mult:SI (zero_extend:SI (reg:HI 18))
- (sign_extend:SI (reg:HI 26))))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*usmulhisi3_call"
[(set (reg:SI 22)
@@ -3850,10 +3720,8 @@
"AVR_HAVE_MUL"
"#"
"&& reload_completed"
- [(parallel [(set (reg:SI 22)
- (mult:SI (any_extend:SI (reg:HI 26))
- (reg:SI 18)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*mul<extend_su>hisi3_call"
[(set (reg:SI 22)
@@ -3871,10 +3739,8 @@
"AVR_HAVE_MUL"
"#"
"&& reload_completed"
- [(parallel [(set (reg:SI 22)
- (mult:SI (not:SI (zero_extend:SI (not:HI (reg:HI 26))))
- (reg:SI 18)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*mulohisi3_call"
[(set (reg:SI 22)
@@ -3925,11 +3791,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (reg:QI 24) (div:QI (reg:QI 24) (reg:QI 22)))
- (set (reg:QI 25) (mod:QI (reg:QI 24) (reg:QI 22)))
- (clobber (reg:QI 22))
- (clobber (reg:QI 23))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*divmodqi4_call"
[(set (reg:QI 24) (div:QI (reg:QI 24) (reg:QI 22)))
@@ -3969,10 +3832,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (reg:QI 24) (udiv:QI (reg:QI 24) (reg:QI 22)))
- (set (reg:QI 25) (umod:QI (reg:QI 24) (reg:QI 22)))
- (clobber (reg:QI 23))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*udivmodqi4_call"
[(set (reg:QI 24) (udiv:QI (reg:QI 24) (reg:QI 22)))
@@ -4013,11 +3874,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (reg:HI 22) (div:HI (reg:HI 24) (reg:HI 22)))
- (set (reg:HI 24) (mod:HI (reg:HI 24) (reg:HI 22)))
- (clobber (reg:HI 26))
- (clobber (reg:QI 21))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*divmodhi4_call"
[(set (reg:HI 22) (div:HI (reg:HI 24) (reg:HI 22)))
@@ -4059,11 +3917,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (reg:HI 22) (udiv:HI (reg:HI 24) (reg:HI 22)))
- (set (reg:HI 24) (umod:HI (reg:HI 24) (reg:HI 22)))
- (clobber (reg:HI 26))
- (clobber (reg:QI 21))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*udivmodhi4_call"
[(set (reg:HI 22) (udiv:HI (reg:HI 24) (reg:HI 22)))
@@ -4112,10 +3967,8 @@
"AVR_HAVE_MUL"
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (mult:PSI (zero_extend:PSI (match_dup 1))
- (zero_extend:PSI (match_dup 2))))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*umulqihipsi3"
[(set (match_operand:PSI 0 "register_operand" "=&r")
@@ -4134,31 +3987,17 @@
(define_insn_and_split "*umulhiqipsi3_split"
[(set (match_operand:PSI 0 "register_operand" "=&r")
- (mult:PSI (zero_extend:PSI (match_operand:HI 2 "register_operand" "r"))
- (zero_extend:PSI (match_operand:QI 1 "register_operand" "r"))))]
+ (mult:PSI (zero_extend:PSI (match_operand:HI 1 "register_operand" "r"))
+ (zero_extend:PSI (match_operand:QI 2 "register_operand" "r"))))]
"AVR_HAVE_MUL"
"#"
"&& reload_completed"
+ ; "*umulqihipsi3"
[(parallel [(set (match_dup 0)
(mult:PSI (zero_extend:PSI (match_dup 2))
(zero_extend:PSI (match_dup 1))))
(clobber (reg:CC REG_CC))])])
-(define_insn "*umulhiqipsi3"
- [(set (match_operand:PSI 0 "register_operand" "=&r")
- (mult:PSI (zero_extend:PSI (match_operand:HI 2 "register_operand" "r"))
- (zero_extend:PSI (match_operand:QI 1 "register_operand" "r"))))
- (clobber (reg:CC REG_CC))]
- "AVR_HAVE_MUL && reload_completed"
- "mul %1,%A2
- movw %A0,r0
- mul %1,%B2
- add %B0,r0
- mov %C0,r1
- clr __zero_reg__
- adc %C0,__zero_reg__"
- [(set_attr "length" "7")])
-
(define_expand "mulsqipsi3"
[(parallel [(set (match_operand:PSI 0 "pseudo_register_operand" "")
(mult:PSI (sign_extend:PSI (match_operand:QI 1 "pseudo_register_operand" ""))
@@ -4229,10 +4068,8 @@
"AVR_HAVE_MUL"
"#"
"&& reload_completed"
- [(parallel [(set (reg:PSI 18)
- (mult:PSI (sign_extend:PSI (reg:QI 25))
- (reg:PSI 22)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*mulsqipsi3.libgcc"
[(set (reg:PSI 18)
@@ -4253,13 +4090,8 @@
"AVR_HAVE_MUL"
"#"
"&& reload_completed"
- [(parallel [(set (reg:PSI 22)
- (mult:PSI (reg:PSI 22)
- (reg:PSI 18)))
- (clobber (reg:QI 21))
- (clobber (reg:QI 25))
- (clobber (reg:HI 26))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*mulpsi3.libgcc"
[(set (reg:PSI 22)
@@ -4311,12 +4143,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (reg:PSI 22) (div:PSI (reg:PSI 22) (reg:PSI 18)))
- (set (reg:PSI 18) (mod:PSI (reg:PSI 22) (reg:PSI 18)))
- (clobber (reg:QI 21))
- (clobber (reg:QI 25))
- (clobber (reg:QI 26))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*divmodpsi4_call"
[(set (reg:PSI 22) (div:PSI (reg:PSI 22) (reg:PSI 18)))
@@ -4360,12 +4188,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (reg:PSI 22) (udiv:PSI (reg:PSI 22) (reg:PSI 18)))
- (set (reg:PSI 18) (umod:PSI (reg:PSI 22) (reg:PSI 18)))
- (clobber (reg:QI 21))
- (clobber (reg:QI 25))
- (clobber (reg:QI 26))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*udivmodpsi4_call"
[(set (reg:PSI 22) (udiv:PSI (reg:PSI 22) (reg:PSI 18)))
@@ -4411,11 +4235,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (reg:SI 18) (div:SI (reg:SI 22) (reg:SI 18)))
- (set (reg:SI 22) (mod:SI (reg:SI 22) (reg:SI 18)))
- (clobber (reg:HI 26))
- (clobber (reg:HI 30))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*divmodsi4_call"
[(set (reg:SI 18) (div:SI (reg:SI 22) (reg:SI 18)))
@@ -4458,11 +4279,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (reg:SI 18) (udiv:SI (reg:SI 22) (reg:SI 18)))
- (set (reg:SI 22) (umod:SI (reg:SI 22) (reg:SI 18)))
- (clobber (reg:HI 26))
- (clobber (reg:HI 30))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*udivmodsi4_call"
[(set (reg:SI 18) (udiv:SI (reg:SI 22) (reg:SI 18)))
@@ -4484,10 +4302,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (and:QI (match_dup 1)
- (match_dup 2)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*andqi3"
[(set (match_operand:QI 0 "register_operand" "=??r,d,*l ,r")
@@ -4511,11 +4327,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (and:HI (match_dup 1)
- (match_dup 2)))
- (clobber (match_dup 3))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*andhi3"
[(set (match_operand:HI 0 "register_operand" "=??r,d,d,r ,r ,r")
@@ -4545,11 +4358,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (and:PSI (match_dup 1)
- (match_dup 2)))
- (clobber (match_dup 3))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*andpsi3"
[(set (match_operand:PSI 0 "register_operand" "=??r,d,r ,r ,r")
@@ -4580,11 +4390,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (and:SI (match_dup 1)
- (match_dup 2)))
- (clobber (match_dup 3))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*andsi3"
[(set (match_operand:SI 0 "register_operand" "=??r,d,r ,r ,r")
@@ -4634,10 +4441,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (ior:QI (match_dup 1)
- (match_dup 2)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*iorqi3"
[(set (match_operand:QI 0 "register_operand" "=??r,d,*l")
@@ -4659,11 +4464,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (ior:HI (match_dup 1)
- (match_dup 2)))
- (clobber (match_dup 3))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*iorhi3"
[(set (match_operand:HI 0 "register_operand" "=??r,d,d,r ,r")
@@ -4691,11 +4493,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (ior:PSI (match_dup 1)
- (match_dup 2)))
- (clobber (match_dup 3))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*iorpsi3"
[(set (match_operand:PSI 0 "register_operand" "=??r,d,r ,r")
@@ -4723,11 +4522,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (ior:SI (match_dup 1)
- (match_dup 2)))
- (clobber (match_dup 3))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*iorsi3"
[(set (match_operand:SI 0 "register_operand" "=??r,d,r ,r")
@@ -4758,10 +4554,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (xor:QI (match_dup 1)
- (match_dup 2)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*xorqi3"
[(set (match_operand:QI 0 "register_operand" "=r")
@@ -4780,11 +4574,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (xor:HI (match_dup 1)
- (match_dup 2)))
- (clobber (match_dup 3))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*xorhi3"
[(set (match_operand:HI 0 "register_operand" "=??r,r ,d ,r")
@@ -4810,11 +4601,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (xor:PSI (match_dup 1)
- (match_dup 2)))
- (clobber (match_dup 3))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*xorpsi3"
[(set (match_operand:PSI 0 "register_operand" "=??r,r ,d ,r")
@@ -4842,11 +4630,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (xor:SI (match_dup 1)
- (match_dup 2)))
- (clobber (match_dup 3))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*xorsi3"
[(set (match_operand:SI 0 "register_operand" "=??r,r ,d ,r")
@@ -4918,7 +4703,7 @@
(clobber (reg:CC REG_CC))])]
"optimize
&& reload_completed"
- [(const_int 1)]
+ [(scratch)]
{
for (int i = 0; i < <SIZE>; i++)
{
@@ -5026,10 +4811,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (rotate:QI (match_dup 1)
- (match_dup 2)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*rotlqi3"
[(set (match_operand:QI 0 "register_operand" "=r,r,r ,r ,r ,r ,r ,r")
@@ -5099,10 +4882,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (rotate:HI (match_dup 1)
- (const_int 1)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*rotlhi2.1"
[(set (match_operand:HI 0 "register_operand" "=r")
@@ -5120,10 +4901,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (rotate:HI (match_dup 1)
- (const_int 15)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*rotlhi2.15"
[(set (match_operand:HI 0 "register_operand" "=r")
@@ -5141,10 +4920,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (rotate:PSI (match_dup 1)
- (const_int 1)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*rotlpsi2.1"
[(set (match_operand:PSI 0 "register_operand" "=r")
@@ -5162,10 +4939,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (rotate:PSI (match_dup 1)
- (const_int 23)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*rotlpsi2.23"
[(set (match_operand:PSI 0 "register_operand" "=r")
@@ -5183,10 +4958,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (rotate:SI (match_dup 1)
- (const_int 1)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*rotlsi2.1"
[(set (match_operand:SI 0 "register_operand" "=r")
@@ -5204,10 +4977,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (rotate:SI (match_dup 1)
- (const_int 31)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*rotlsi2.31"
[(set (match_operand:SI 0 "register_operand" "=r")
@@ -5239,7 +5010,7 @@
&& 0 == INTVAL (operands[2]) % 16"
"#"
"&& reload_completed"
- [(const_int 0)]
+ [(scratch)]
{
avr_rotate_bytes (operands);
DONE;
@@ -5263,7 +5034,7 @@
&& 0 == INTVAL (operands[2]) % 16))"
"#"
"&& reload_completed"
- [(const_int 0)]
+ [(scratch)]
{
avr_rotate_bytes (operands);
DONE;
@@ -5273,41 +5044,6 @@
;;<< << << << << << << << << << << << << << << << << << << << << << << << << <<
;; arithmetic shift left
-;; Work around PR120423: Transform left shift of a paradoxical subreg
-;; into left shift of the zero-extended entity.
-(define_split ; PR120423
- [(set (match_operand:HISI 0 "register_operand")
- (ashift:HISI (subreg:HISI (match_operand:QIPSI 1 "nonimmediate_operand")
- 0)
- (match_operand:QI 2 "const_int_operand")))]
- "!reload_completed
- && !avropt_lra_p
- && <HISI:SIZE> > <QIPSI:SIZE>"
- [(set (match_dup 4)
- (zero_extend:HISI (match_dup 5)))
- (set (match_dup 0)
- (ashift:HISI (match_dup 4)
- (match_dup 2)))]
- {
- operands[4] = gen_reg_rtx (<HISI:MODE>mode);
- operands[5] = force_reg (<QIPSI:MODE>mode, operands[1]);
- })
-
-;; Similar happens for PR116389.
-(define_split ; PR116389
- [(set (match_operand:HISI 0 "register_operand")
- (subreg:HISI (match_operand:QIPSI 1 "nonimmediate_operand")
- 0))]
- "!reload_completed
- && !avropt_lra_p
- && <HISI:SIZE> > <QIPSI:SIZE>"
- [(set (match_dup 0)
- (zero_extend:HISI (match_dup 2)))]
- {
- operands[2] = force_reg (<QIPSI:MODE>mode, operands[1]);
- })
-
-
;; "ashlqi3"
;; "ashlqq3" "ashluqq3"
(define_expand "ashl<mode>3"
@@ -5363,10 +5099,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (ashift:ALL1 (match_dup 1)
- (match_dup 2)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*ashl<mode>3"
[(set (match_operand:ALL1 0 "register_operand" "=r,r ,r ,r,r")
@@ -5390,11 +5124,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (ashift:ALL2 (match_dup 1)
- (match_dup 2)))
- (clobber (reg:CC REG_CC))])]
- ""
+ [(scratch)]
+ { DONE_ADD_CCC }
[(set_attr "isa" "*,*,*,3op,*,*")])
;; "*ashlhi3"
@@ -5506,11 +5237,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (ashift:ALL4 (match_dup 1)
- (match_dup 2)))
- (clobber (reg:CC REG_CC))])]
- ""
+ [(scratch)]
+ { DONE_ADD_CCC }
[(set_attr "isa" "*,*,*,3op,*,*")])
(define_insn "*ashl<mode>3"
@@ -5749,12 +5477,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (ashift:PSI (match_dup 1)
- (match_dup 2)))
- (clobber (match_dup 3))
- (clobber (reg:CC REG_CC))])]
- ""
+ [(scratch)]
+ { DONE_ADD_CCC }
[(set_attr "isa" "*,*,*,3op,*")])
(define_insn "*ashlpsi3"
@@ -5808,10 +5532,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (ashiftrt:ALL1 (match_dup 1)
- (match_dup 2)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*ashr<mode>3"
[(set (match_operand:ALL1 0 "register_operand" "=r,r ,r ,r")
@@ -5835,11 +5557,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (ashiftrt:ALL2 (match_dup 1)
- (match_dup 2)))
- (clobber (reg:CC REG_CC))])]
- ""
+ [(scratch)]
+ { DONE_ADD_CCC }
[(set_attr "isa" "*,*,*,3op,*,*")])
;; "*ashrhi3"
@@ -5866,12 +5585,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (ashiftrt:PSI (match_dup 1)
- (match_dup 2)))
- (clobber (match_dup 3))
- (clobber (reg:CC REG_CC))])]
- ""
+ [(scratch)]
+ { DONE_ADD_CCC }
[(set_attr "isa" "*,*,*,3op,*")])
(define_insn "*ashrpsi3"
@@ -5898,11 +5613,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (ashiftrt:ALL4 (match_dup 1)
- (match_dup 2)))
- (clobber (reg:CC REG_CC))])]
- ""
+ [(scratch)]
+ { DONE_ADD_CCC }
[(set_attr "isa" "*,*,*,3op,*,*")])
(define_insn "*ashr<mode>3"
@@ -6013,10 +5725,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (lshiftrt:ALL1 (match_dup 1)
- (match_dup 2)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*lshr<mode>3"
[(set (match_operand:ALL1 0 "register_operand" "=r,r ,r ,r,r")
@@ -6039,11 +5749,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (lshiftrt:ALL2 (match_dup 1)
- (match_dup 2)))
- (clobber (reg:CC REG_CC))])]
- ""
+ [(scratch)]
+ { DONE_ADD_CCC }
[(set_attr "isa" "*,*,*,3op,*,*")])
(define_insn "*lshr<mode>3"
@@ -6066,12 +5773,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (lshiftrt:PSI (match_dup 1)
- (match_dup 2)))
- (clobber (match_dup 3))
- (clobber (reg:CC REG_CC))])]
- ""
+ [(scratch)]
+ { DONE_ADD_CCC }
[(set_attr "isa" "*,*,*,3op,*")])
(define_insn "*lshrpsi3"
@@ -6098,11 +5801,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (lshiftrt:ALL4 (match_dup 1)
- (match_dup 2)))
- (clobber (reg:CC REG_CC))])]
- ""
+ [(scratch)]
+ { DONE_ADD_CCC }
[(set_attr "isa" "*,*,*,3op,*,*")])
(define_insn "*lshr<mode>3"
@@ -6217,9 +5917,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (abs:QI (match_dup 1)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*absqi2"
[(set (match_operand:QI 0 "register_operand" "=r")
@@ -6237,9 +5936,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (abs:SF (match_dup 1)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*abssf2"
[(set (match_operand:SF 0 "register_operand" "=d,r")
@@ -6260,9 +5958,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (neg:QI (match_dup 1)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*negqi2"
[(set (match_operand:QI 0 "register_operand" "=r")
@@ -6278,9 +5975,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (neg:HI (sign_extend:HI (match_dup 1))))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*negqihi2"
[(set (match_operand:HI 0 "register_operand" "=r")
@@ -6296,9 +5992,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (neg:HI (match_dup 1)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*neghi2"
[(set (match_operand:HI 0 "register_operand" "=r,&r")
@@ -6316,9 +6011,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (neg:PSI (match_dup 1)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*negpsi2"
[(set (match_operand:PSI 0 "register_operand" "=!d,r,&r")
@@ -6337,10 +6031,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (neg:SI (match_dup 1)))
- (clobber (reg:CC REG_CC))])]
- ""
+ [(scratch)]
+ { DONE_ADD_CCC }
[(set_attr "isa" "*,*,mov,movw")])
(define_insn "*negsi2.libgcc"
@@ -6371,9 +6063,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (neg:SF (match_dup 1)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*negsf2"
[(set (match_operand:SF 0 "register_operand" "=d,r")
@@ -6394,9 +6085,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (not:QI (match_dup 1)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*one_cmplqi2"
[(set (match_operand:QI 0 "register_operand" "=r")
@@ -6412,9 +6102,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (not:HI (match_dup 1)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*one_cmplhi2"
[(set (match_operand:HI 0 "register_operand" "=r")
@@ -6431,9 +6120,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (not:PSI (match_dup 1)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*one_cmplpsi2"
[(set (match_operand:PSI 0 "register_operand" "=r")
@@ -6449,9 +6137,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (not:SI (match_dup 1)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*one_cmplsi2"
[(set (match_operand:SI 0 "register_operand" "=r")
@@ -6480,9 +6167,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (sign_extend:HI (match_dup 1)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*extendqihi2"
[(set (match_operand:HI 0 "register_operand" "=r,r")
@@ -6501,9 +6187,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (sign_extend:PSI (match_dup 1)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*extendqipsi2"
[(set (match_operand:PSI 0 "register_operand" "=r,r")
@@ -6522,9 +6207,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (sign_extend:SI (match_dup 1)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*extendqisi2"
[(set (match_operand:SI 0 "register_operand" "=r,r")
@@ -6543,9 +6227,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (sign_extend:PSI (match_dup 1)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*extendhipsi2"
[(set (match_operand:PSI 0 "register_operand" "=r,r")
@@ -6564,9 +6247,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (sign_extend:SI (match_dup 1)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*extendhisi2"
[(set (match_operand:SI 0 "register_operand" "=r,r")
@@ -6585,9 +6267,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (sign_extend:SI (match_dup 1)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*extendpsisi2"
[(set (match_operand:SI 0 "register_operand" "=r")
@@ -7032,10 +6713,11 @@
"#"
"reload_completed"
[(set (reg:CC REG_CC)
- (compare:CC (match_dup 1) (match_dup 2)))
+ (compare:CC (match_dup 1)
+ (match_dup 2)))
(set (pc)
- (if_then_else (match_op_dup 0
- [(reg:CC REG_CC) (const_int 0)])
+ (if_then_else (match_op_dup 0 [(reg:CC REG_CC)
+ (const_int 0)])
(label_ref (match_dup 3))
(pc)))])
@@ -7054,11 +6736,12 @@
"#"
"reload_completed"
[(parallel [(set (reg:CC REG_CC)
- (compare:CC (match_dup 1) (match_dup 2)))
+ (compare:CC (match_dup 1)
+ (match_dup 2)))
(clobber (match_dup 4))])
(set (pc)
- (if_then_else (match_op_dup 0
- [(reg:CC REG_CC) (const_int 0)])
+ (if_then_else (match_op_dup 0 [(reg:CC REG_CC)
+ (const_int 0)])
(label_ref (match_dup 3))
(pc)))]
{
@@ -7081,11 +6764,12 @@
"#"
"reload_completed"
[(parallel [(set (reg:CC REG_CC)
- (compare:CC (match_dup 1) (match_dup 2)))
+ (compare:CC (match_dup 1)
+ (match_dup 2)))
(clobber (match_dup 4))])
(set (pc)
- (if_then_else (match_op_dup 0
- [(reg:CC REG_CC) (const_int 0)])
+ (if_then_else (match_op_dup 0 [(reg:CC REG_CC)
+ (const_int 0)])
(label_ref (match_dup 3))
(pc)))]
{
@@ -7109,11 +6793,12 @@
"#"
"reload_completed"
[(parallel [(set (reg:CC REG_CC)
- (compare:CC (match_dup 1) (match_dup 2)))
+ (compare:CC (match_dup 1)
+ (match_dup 2)))
(clobber (match_dup 4))])
(set (pc)
- (if_then_else (match_op_dup 0
- [(reg:CC REG_CC) (const_int 0)])
+ (if_then_else (match_op_dup 0 [(reg:CC REG_CC)
+ (const_int 0)])
(label_ref (match_dup 3))
(pc)))]
{
@@ -7668,17 +7353,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (pc)
- (if_then_else
- (match_op_dup 0
- [(zero_extract:QIDI
- (match_dup 1)
- (const_int 1)
- (match_dup 2))
- (const_int 0)])
- (label_ref (match_dup 3))
- (pc)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*sbrx_branch<mode>"
[(set (pc)
@@ -7721,13 +7397,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (pc)
- (if_then_else (match_op_dup 0 [(and:QISI (match_dup 1)
- (match_dup 2))
- (const_int 0)])
- (label_ref (match_dup 3))
- (pc)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*sbrx_and_branch<mode>"
[(set (pc)
@@ -7968,14 +7639,8 @@
"!AVR_HAVE_EIJMP_EICALL"
"#"
"&& reload_completed"
- [(parallel [(set (pc)
- (unspec:HI [(match_dup 0)]
- UNSPEC_INDEX_JMP))
- (use (label_ref (match_dup 1)))
- (clobber (match_dup 2))
- (clobber (const_int 0))
- (clobber (reg:CC REG_CC))])]
- ""
+ [(scratch)]
+ { DONE_ADD_CCC }
[(set_attr "isa" "rjmp,rjmp,jmp")])
(define_insn "*tablejump"
@@ -8004,14 +7669,8 @@
"AVR_HAVE_EIJMP_EICALL"
"#"
"&& reload_completed"
- [(parallel [(set (pc)
- (unspec:HI [(reg:HI REG_Z)]
- UNSPEC_INDEX_JMP))
- (use (label_ref (match_dup 0)))
- (clobber (reg:HI REG_Z))
- (clobber (reg:QI 24))
- (clobber (reg:CC REG_CC))])]
- ""
+ [(scratch)]
+ { DONE_ADD_CCC }
[(set_attr "isa" "eijmp")])
@@ -8182,17 +7841,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (pc)
- (if_then_else
- (match_operator 0 "eqne_operator"
- [(zero_extract:QIHI
- (mem:QI (match_dup 1))
- (const_int 1)
- (match_dup 2))
- (const_int 0)])
- (label_ref (match_dup 3))
- (pc)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*sbix_branch"
[(set (pc)
@@ -8230,14 +7880,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (pc)
- (if_then_else
- (match_operator 0 "gelt_operator"
- [(mem:QI (match_dup 1))
- (const_int 0)])
- (label_ref (match_dup 2))
- (pc)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*sbix_branch_bit7"
[(set (pc)
@@ -8277,17 +7921,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (pc)
- (if_then_else
- (match_operator 0 "eqne_operator"
- [(zero_extract:QIHI
- (mem:QI (match_dup 1))
- (const_int 1)
- (match_dup 2))
- (const_int 0)])
- (label_ref (match_dup 3))
- (pc)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*sbix_branch_tmp"
[(set (pc)
@@ -8324,14 +7959,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (pc)
- (if_then_else
- (match_operator 0 "gelt_operator"
- [(mem:QI (match_dup 1))
- (const_int 0)])
- (label_ref (match_dup 2))
- (pc)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*sbix_branch_tmp_bit7"
[(set (pc)
@@ -8784,13 +8413,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(unspec_volatile [(match_dup 0)
- (const_int 1)]
- UNSPECV_DELAY_CYCLES)
- (set (match_dup 1)
- (unspec_volatile:BLK [(match_dup 1)] UNSPECV_MEMORY_BARRIER))
- (clobber (match_dup 2))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*delay_cycles_1"
[(unspec_volatile [(match_operand:QI 0 "const_int_operand" "n")
@@ -8816,14 +8440,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(unspec_volatile [(match_dup 0)
- (const_int 2)]
- UNSPECV_DELAY_CYCLES)
- (set (match_dup 1)
- (unspec_volatile:BLK [(match_dup 1)] UNSPECV_MEMORY_BARRIER))
- (clobber (match_dup 2))
- (clobber (reg:CC REG_CC))])]
- ""
+ [(scratch)]
+ { DONE_ADD_CCC }
[(set_attr "isa" "adiw,no_adiw")])
(define_insn "*delay_cycles_2"
@@ -8853,15 +8471,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(unspec_volatile [(match_dup 0)
- (const_int 3)]
- UNSPECV_DELAY_CYCLES)
- (set (match_dup 1)
- (unspec_volatile:BLK [(match_dup 1)] UNSPECV_MEMORY_BARRIER))
- (clobber (match_dup 2))
- (clobber (match_dup 3))
- (clobber (match_dup 4))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*delay_cycles_3"
[(unspec_volatile [(match_operand:SI 0 "const_int_operand" "n")
@@ -8896,16 +8507,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(unspec_volatile [(match_dup 0)
- (const_int 4)]
- UNSPECV_DELAY_CYCLES)
- (set (match_dup 1)
- (unspec_volatile:BLK [(match_dup 1)] UNSPECV_MEMORY_BARRIER))
- (clobber (match_dup 2))
- (clobber (match_dup 3))
- (clobber (match_dup 4))
- (clobber (match_dup 5))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*delay_cycles_4"
[(unspec_volatile [(match_operand:SI 0 "const_int_operand" "n")
@@ -8942,12 +8545,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (unspec:QI [(match_dup 1)
- (match_dup 2)
- (match_dup 3)]
- UNSPEC_INSERT_BITS))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*insert_bits"
[(set (match_operand:QI 0 "register_operand" "=r ,d ,r")
@@ -9127,12 +8726,13 @@
"#"
"reload_completed"
[(set (reg:CC REG_CC)
- (compare:CC (match_dup 0) (const_int 0)))
+ (compare:CC (match_dup 0)
+ (const_int 0)))
(set (pc)
- (if_then_else (ge (reg:CC REG_CC) (const_int 0))
+ (if_then_else (ge (reg:CC REG_CC)
+ (const_int 0))
(label_ref (match_dup 1))
- (pc)))]
- "")
+ (pc)))])
(define_expand "flash_segment"
[(parallel [(match_operand:QI 0 "register_operand" "")
@@ -9235,9 +8835,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (reg:HI 24)
- (parity:HI (reg:HI 24)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*parityhi2.libgcc"
[(set (reg:HI 24)
@@ -9253,9 +8852,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (reg:HI 24)
- (zero_extend:HI (parity:QI (reg:QI 24))))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*parityqihi2.libgcc"
[(set (reg:HI 24)
@@ -9271,9 +8869,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (reg:HI 24)
- (truncate:HI (parity:SI (reg:SI 22))))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*paritysihi2.libgcc"
[(set (reg:HI 24)
@@ -9329,9 +8926,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (reg:HI 24)
- (popcount:HI (reg:HI 24)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*popcounthi2.libgcc"
[(set (reg:HI 24)
@@ -9347,9 +8943,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (reg:HI 24)
- (truncate:HI (popcount:SI (reg:SI 22))))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*popcountsi2.libgcc"
[(set (reg:HI 24)
@@ -9365,9 +8960,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (reg:QI 24)
- (popcount:QI (reg:QI 24)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*popcountqi2.libgcc"
[(set (reg:QI 24)
@@ -9421,10 +9015,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (reg:HI 24)
- (clz:HI (reg:HI 24)))
- (clobber (reg:QI 26))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*clzhi2.libgcc"
[(set (reg:HI 24)
@@ -9442,10 +9034,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (reg:HI 24)
- (truncate:HI (clz:SI (reg:SI 22))))
- (clobber (reg:QI 26))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*clzsihi2.libgcc"
[(set (reg:HI 24)
@@ -9490,10 +9080,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (reg:HI 24)
- (ctz:HI (reg:HI 24)))
- (clobber (reg:QI 26))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*ctzhi2.libgcc"
[(set (reg:HI 24)
@@ -9512,11 +9100,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (reg:HI 24)
- (truncate:HI (ctz:SI (reg:SI 22))))
- (clobber (reg:QI 22))
- (clobber (reg:QI 26))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*ctzsihi2.libgcc"
[(set (reg:HI 24)
@@ -9562,10 +9147,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (reg:HI 24)
- (ffs:HI (reg:HI 24)))
- (clobber (reg:QI 26))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*ffshi2.libgcc"
[(set (reg:HI 24)
@@ -9584,11 +9167,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (reg:HI 24)
- (truncate:HI (ffs:SI (reg:SI 22))))
- (clobber (reg:QI 22))
- (clobber (reg:QI 26))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*ffssihi2.libgcc"
[(set (reg:HI 24)
@@ -9633,9 +9213,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (reg:SI 22)
- (bswap:SI (reg:SI 22)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*bswapsi2.libgcc"
[(set (reg:SI 22)
@@ -9742,11 +9321,8 @@
"AVR_HAVE_MUL"
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (unspec:HI [(match_dup 1)
- (match_dup 2)]
- UNSPEC_FMUL))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*fmul_insn"
[(set (match_operand:HI 0 "register_operand" "=r")
@@ -9768,11 +9344,8 @@
"!AVR_HAVE_MUL"
"#"
"&& reload_completed"
- [(parallel [(set (reg:HI 22)
- (unspec:HI [(reg:QI 24)
- (reg:QI 25)] UNSPEC_FMUL))
- (clobber (reg:HI 24))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*fmul.call"
[(set (reg:HI 22)
@@ -9814,11 +9387,8 @@
"AVR_HAVE_MUL"
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (unspec:HI [(match_dup 1)
- (match_dup 2)]
- UNSPEC_FMULS))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*fmuls_insn"
[(set (match_operand:HI 0 "register_operand" "=r")
@@ -9840,11 +9410,8 @@
"!AVR_HAVE_MUL"
"#"
"&& reload_completed"
- [(parallel [(set (reg:HI 22)
- (unspec:HI [(reg:QI 24)
- (reg:QI 25)] UNSPEC_FMULS))
- (clobber (reg:HI 24))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*fmuls.call"
[(set (reg:HI 22)
@@ -9886,11 +9453,8 @@
"AVR_HAVE_MUL"
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (unspec:HI [(match_dup 1)
- (match_dup 2)]
- UNSPEC_FMULSU))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*fmulsu_insn"
[(set (match_operand:HI 0 "register_operand" "=r")
@@ -9912,11 +9476,8 @@
"!AVR_HAVE_MUL"
"#"
"&& reload_completed"
- [(parallel [(set (reg:HI 22)
- (unspec:HI [(reg:QI 24)
- (reg:QI 25)] UNSPEC_FMULSU))
- (clobber (reg:HI 24))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*fmulsu.call"
[(set (reg:HI 22)
@@ -10037,11 +9598,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (zero_extract:QI (match_dup 0)
- (const_int 1)
- (match_dup 1))
- (match_dup 2))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*insv.reg"
[(set (zero_extract:QI (match_operand:QI 0 "register_operand" "+r,d,d,l,l")
@@ -10478,11 +10036,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (zero_extract:QI (not:QI (match_dup 1))
- (const_int 1)
- (match_dup 2)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*extzv.not"
[(set (match_operand:QI 0 "register_operand" "=r")
@@ -10619,11 +10174,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (and:QISI (any_shift:QISI (match_dup 1)
- (match_dup 2))
- (match_dup 3)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*insv.any_shift.<mode>"
[(set (match_operand:QISI 0 "register_operand" "=r")
@@ -10686,11 +10238,8 @@
""
"#"
"&& reload_completed"
- [(parallel [(set (match_dup 0)
- (sign_extract:QISI (match_dup 1)
- (const_int 1)
- (match_dup 2)))
- (clobber (reg:CC REG_CC))])])
+ [(scratch)]
+ { DONE_ADD_CCC })
(define_insn "*sextr.<QISI:mode>.<QISI2:mode>"
[(set (match_operand:QISI 0 "register_operand" "=r")
diff --git a/gcc/config/avr/avr.opt b/gcc/config/avr/avr.opt
index 9883119..2bed8ea 100644
--- a/gcc/config/avr/avr.opt
+++ b/gcc/config/avr/avr.opt
@@ -18,10 +18,6 @@
; along with GCC; see the file COPYING3. If not see
; <http://www.gnu.org/licenses/>.
-mlra
-Target Var(avropt_lra_p) UInteger Init(1) Optimization Undocumented
-Usa LRA for reload instead of the old reload framework. This option is experimental, on per default, and it may be removed in future versions of the compiler.
-
mcall-prologues
Target Mask(CALL_PROLOGUES) Optimization
Optimization. Use subroutines for function prologues and epilogues.
@@ -164,6 +160,10 @@ mfuse-move=
Target Joined RejectNegative UInteger Var(avropt_fuse_move) Init(0) Optimization IntegerRange(0, 23)
-mfuse-move=<0,23> Optimization. Run a post-reload pass that tweaks move instructions.
+mfuse-move2
+Target Var(avropt_fuse_move2) Init(0) Optimization
+Optimization. Fuse some move insns after insn combine.
+
mabsdata
Target Mask(ABSDATA)
Assume that all data in static storage can be accessed by LDS / STS instructions. This option is only useful for reduced Tiny devices like ATtiny40.
diff --git a/gcc/config/avr/avr.opt.urls b/gcc/config/avr/avr.opt.urls
index 662fdee..fa560bc 100644
--- a/gcc/config/avr/avr.opt.urls
+++ b/gcc/config/avr/avr.opt.urls
@@ -1,7 +1,5 @@
; Autogenerated by regenerate-opt-urls.py from gcc/config/avr/avr.opt and generated HTML
-; skipping UrlSuffix for 'mlra' due to finding no URLs
-
mcall-prologues
UrlSuffix(gcc/AVR-Options.html#index-mcall-prologues)
@@ -92,6 +90,9 @@ UrlSuffix(gcc/AVR-Options.html#index-mfuse-move)
mfuse-move=
UrlSuffix(gcc/AVR-Options.html#index-mfuse-move)
+mfuse-move2
+UrlSuffix(gcc/AVR-Options.html#index-mfuse-move2)
+
mabsdata
UrlSuffix(gcc/AVR-Options.html#index-mabsdata)
diff --git a/gcc/config/avr/specs.h b/gcc/config/avr/specs.h
index ff269bf..c95c758 100644
--- a/gcc/config/avr/specs.h
+++ b/gcc/config/avr/specs.h
@@ -57,7 +57,7 @@ along with GCC; see the file COPYING3. If not see
"%(asm_errata_skip) "
#define LINK_RELAX_SPEC \
- "%{mrelax:--relax} "
+ "%{!r:%{mrelax:--relax}} "
#undef LINK_SPEC
#define LINK_SPEC \
diff --git a/gcc/config/cris/cris.h b/gcc/config/cris/cris.h
index 1681c79..f356679 100644
--- a/gcc/config/cris/cris.h
+++ b/gcc/config/cris/cris.h
@@ -171,7 +171,7 @@ extern int cris_cpu_version;
/* For the cris-*-elf subtarget. */
#define CRIS_ASM_SUBTARGET_SPEC \
- "--em=criself %{!march=*:%{!mcpu=*:" CRIS_DEFAULT_ASM_ARCH_OPTION "}}"
+ "--emulation=criself %{!march=*:%{!mcpu=*:" CRIS_DEFAULT_ASM_ARCH_OPTION "}}"
/* FIXME: We should propagate the -melf option to make the criself
"emulation" unless a linker script is provided (-T*), but I don't know
diff --git a/gcc/config/darwin-sections.def b/gcc/config/darwin-sections.def
index 44adcc6..76587c2 100644
--- a/gcc/config/darwin-sections.def
+++ b/gcc/config/darwin-sections.def
@@ -215,3 +215,10 @@ DEF_SECTION (objc2_method_names_section, 0,
DEF_SECTION (objc2_method_types_section, 0,
".section __TEXT, __objc_methtype, cstring_literals", 1)
+
+/* ASAN sections. */
+
+DEF_SECTION (asan_string_section, 0, ".section __TEXT, __asan_cstring", 0)
+DEF_SECTION (asan_globals_section, 0, ".section __DATA, __asan_globals", 0)
+DEF_SECTION (asan_liveness_section, 0,
+ ".section __DATA,__asan_liveness,regular,live_support", 0)
diff --git a/gcc/config/darwin.cc b/gcc/config/darwin.cc
index be2daed..75ac356 100644
--- a/gcc/config/darwin.cc
+++ b/gcc/config/darwin.cc
@@ -49,6 +49,7 @@ along with GCC; see the file COPYING3. If not see
#include "optabs.h"
#include "flags.h"
#include "opts.h"
+#include "asan.h"
/* Fix and Continue.
@@ -1298,6 +1299,39 @@ darwin_encode_section_info (tree decl, rtx rtl, int first)
SYMBOL_FLAG_EXTERNAL. */
default_encode_section_info (decl, rtl, first);
+ if (CONSTANT_CLASS_P (decl))
+ {
+ bool is_str = TREE_CODE (decl) == STRING_CST;
+ rtx sym_ref = XEXP (rtl, 0);
+
+ /* Unless this is a string cst or we are in an anchored section we have
+ nothing more to do here. */
+ if (!is_str && !SYMBOL_REF_HAS_BLOCK_INFO_P (sym_ref))
+ return;
+
+ tree sym_decl = SYMBOL_REF_DECL (sym_ref);
+ const char *name = XSTR (sym_ref, 0);
+ gcc_checking_assert (strncmp ("*lC", name, 3) == 0);
+
+ char *buf;
+ if (is_str)
+ {
+ bool for_asan = (flag_sanitize & SANITIZE_ADDRESS)
+ && asan_protect_global (CONST_CAST_TREE (decl));
+ /* When we are generating code for sanitized strings, the string
+ internal symbols are made visible in the object. */
+ buf = xasprintf ("*%c.str.%s", for_asan ? 'l' : 'L', &name[3]);
+ }
+ else
+ /* Lets identify anchored constants with a different prefix, for the
+ sake of inspection only. */
+ buf = xasprintf ("*LaC%s", &name[3]);
+ if (sym_decl)
+ DECL_NAME (sym_decl) = get_identifier (buf);
+ XSTR (sym_ref, 0) = ggc_strdup (buf);
+ free (buf);
+ }
+
if (! VAR_OR_FUNCTION_DECL_P (decl))
return;
@@ -1683,6 +1717,17 @@ machopic_select_section (tree decl,
ro = TREE_READONLY (decl) || TREE_CONSTANT (decl) ;
+ /* Trump categorize_decl_for_section () for ASAN stuff - the Darwin
+ categorisations are special. */
+ if (flag_sanitize & SANITIZE_ADDRESS)
+ {
+ if (TREE_CODE (decl) == STRING_CST
+ && asan_protect_global (CONST_CAST_TREE (decl)))
+ {
+ return darwin_sections[asan_string_section];
+ }
+ }
+
switch (categorize_decl_for_section (decl, reloc))
{
case SECCAT_TEXT:
@@ -1699,7 +1744,12 @@ machopic_select_section (tree decl,
break;
case SECCAT_RODATA_MERGE_STR_INIT:
- base_section = darwin_mergeable_string_section (DECL_INITIAL (decl), align);
+ if ((flag_sanitize & SANITIZE_ADDRESS)
+ && asan_protect_global (CONST_CAST_TREE (decl)))
+ /* or !flag_merge_constants */
+ return darwin_sections[asan_string_section];
+ else
+ return darwin_mergeable_string_section (DECL_INITIAL (decl), align);
break;
case SECCAT_RODATA_MERGE_CONST:
@@ -3297,11 +3347,16 @@ darwin_use_anchors_for_symbol_p (const_rtx symbol)
{
if (DARWIN_SECTION_ANCHORS && flag_section_anchors)
{
- section *sect;
- /* If the section contains a zero-sized object it's ineligible. */
- sect = SYMBOL_REF_BLOCK (symbol)->sect;
- /* This should have the effect of disabling anchors for vars that follow
- any zero-sized one, in a given section. */
+ tree decl = SYMBOL_REF_DECL (symbol);
+ /* If the symbol would be linker-visible, then it can split at that
+ so we must disallow. This is more strict than the default impl.
+ TODO: add other cases. */
+ if (decl && DECL_P (decl)
+ && (TREE_PUBLIC (decl) || !DECL_ARTIFICIAL (decl)))
+ return false;
+
+ /* We mark sections containing unsuitable entries. */
+ section *sect = SYMBOL_REF_BLOCK (symbol)->sect;
if (sect->common.flags & SECTION_NO_ANCHOR)
return false;
diff --git a/gcc/config/darwin.h b/gcc/config/darwin.h
index 9b9a3fe..c3e28e2 100644
--- a/gcc/config/darwin.h
+++ b/gcc/config/darwin.h
@@ -287,6 +287,19 @@ extern GTY(()) int darwin_ms_struct;
#define DARWIN_RDYNAMIC "%{rdynamic:%nrdynamic is not supported}"
#endif
+#if LD64_HAS_NO_DEDUPLICATE
+/* What we want is "when the optimization level is debug OR when it is
+ a compile & link job with implied O0 optimization". */
+#define DARWIN_LD_NO_DEDUPLICATE \
+ "%{O0|O1|O|Og: -no_deduplicate} \
+ %{!O*:\
+ %{.c|.cc|.C|.cpp|.cp|.c++|.cxx|.CPP|.m|.mm|.s|.S|.i|.ii|.mi|.mii|\
+ .f|.for|.ftn|.fpp|.f90|.f95|.f03|.f08|.f77|.F|.F90|.F95|.F03|.F08|\
+ .d|.mod: -no_deduplicate }} "
+#else
+#define DARWIN_LD_NO_DEDUPLICATE ""
+#endif
+
#if LD64_HAS_MACOS_VERSION_MIN
# define DARWIN_PLATFORM_ID \
"%{mmacosx-version-min=*:-macos_version_min %*} "
@@ -403,10 +416,14 @@ extern GTY(()) int darwin_ms_struct;
%(linker)" \
DARWIN_LD_DEMANGLE \
LINK_PLUGIN_SPEC \
+ DARWIN_LD_NO_DEDUPLICATE \
"%{flto*:%<fcompare-debug*} \
%{flto} %{fno-lto} %{flto=*} \
- %l " \
+ %{static}%{!static:%{!dynamic:-dynamic}} \
+ %{force_cpusubtype_ALL:-arch %(darwin_arch)} \
+ %{!force_cpusubtype_ALL:-arch %(darwin_subarch)} "\
DARWIN_PLATFORM_ID \
+ " %l " \
LINK_COMPRESS_DEBUG_SPEC \
"%X %{s} %{t} %{Z} %{u*} \
%{e*} %{r} \
@@ -493,9 +510,8 @@ extern GTY(()) int darwin_ms_struct;
Note that options taking arguments may appear multiple times on a command
line with different arguments each time, so put a * after their names so
all of them get passed. */
-#define LINK_SPEC \
- "%{static}%{!static:%{!dynamic:-dynamic}} \
- %:remove-outfile(-ldl) \
+#define LINK_SPEC \
+ "%:remove-outfile(-ldl) \
%:remove-outfile(-lm) \
%:remove-outfile(-lpthread) \
%{fgnu-runtime: %{static|static-libgcc: \
@@ -511,9 +527,7 @@ extern GTY(()) int darwin_ms_struct;
%{static|static-libgm2:%:replace-outfile(-lm2iso libm2iso.a%s)}\
%{static|static-libgm2:%:replace-outfile(-lm2min libm2min.a%s)}\
%{static|static-libgm2:%:replace-outfile(-lm2log libm2log.a%s)}\
- %{static|static-libgm2:%:replace-outfile(-lm2cor libm2cor.a%s)}\
- %{force_cpusubtype_ALL:-arch %(darwin_arch)} \
- %{!force_cpusubtype_ALL:-arch %(darwin_subarch)} "\
+ %{static|static-libgm2:%:replace-outfile(-lm2cor libm2cor.a%s)} "\
LINK_SYSROOT_SPEC \
"%{!multiply_defined*:%{shared-libgcc: \
%:version-compare(< 10.5 mmacosx-version-min= -multiply_defined) \
@@ -1005,6 +1019,8 @@ extern GTY(()) section * darwin_sections[NUM_DARWIN_SECTIONS];
sprintf (LABEL, "*%s%ld", "lASAN", (long)(NUM));\
else if (strcmp ("LTRAMP", PREFIX) == 0) \
sprintf (LABEL, "*%s%ld", "lTRAMP", (long)(NUM));\
+ else if (strncmp ("LANCHOR", PREFIX, 7) == 0) \
+ sprintf (LABEL, "*%s%ld", "lANCHOR", (long)(NUM));\
else \
sprintf (LABEL, "*%s%ld", PREFIX, (long)(NUM)); \
} while (0)
diff --git a/gcc/config/gcn/gcn-opts.h b/gcc/config/gcn/gcn-opts.h
index fe68678..0287400 100644
--- a/gcc/config/gcn/gcn-opts.h
+++ b/gcc/config/gcn/gcn-opts.h
@@ -92,6 +92,8 @@ enum hsaco_attr_type
/* Whether to use the 'globally coherent' (glc) or the 'scope' (sc0) flag
for non-scalar memory operations. The string starts on purpose with a space.
Note: for scalar memory operations (i.e. 's_...'), 'glc' is still used.
+ Note: on atomics, glc/sc0 denotes whether the pre-op operation should
+ be used.
CDNA3 also uses 'nt' instead of 'slc' and 'sc1' instead of 'scc'; however,
there is no non-scalar user so far. */
#define TARGET_GLC_NAME (TARGET_CDNA3 ? " sc0" : " glc")
diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md
index 0994329..a34d2e3 100644
--- a/gcc/config/gcn/gcn-valu.md
+++ b/gcc/config/gcn/gcn-valu.md
@@ -3938,6 +3938,7 @@
v_cmpx%E1\t%2, %3
v_cmpx%E1\t%2, %3"
[(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a,vopc,vopc")
+ (set_attr "vcmp" "vcmp,vcmp,vcmpx,vcmpx,vcmp,vcmp,vcmpx,vcmpx")
(set_attr "length" "4,8,4,8,8,8,4,8")
(set_attr "rdna" "*,*,no,no,*,*,yes,yes")])
@@ -3992,6 +3993,7 @@
v_cmpx%E1\t%2, %3
v_cmpx%E1\t%2, %3"
[(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a,vopc,vopc")
+ (set_attr "vcmp" "vcmp,vcmp,vcmpx,vcmpx,vcmp,vcmp,vcmpx,vcmpx")
(set_attr "length" "4,8,4,8,8,8,4,8")
(set_attr "rdna" "*,*,no,no,*,*,yes,yes")])
@@ -4050,6 +4052,7 @@
v_cmpx%E1\t%2, %3
v_cmpx%E1\t%2, %3"
[(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vopc,vopc")
+ (set_attr "vcmp" "vcmp,vcmp,vcmpx,vcmpx,vcmp,vcmpx,vcmpx")
(set_attr "length" "4,8,4,8,8,4,8")
(set_attr "rdna" "*,*,no,no,*,yes,yes")])
@@ -4073,6 +4076,7 @@
v_cmpx%E1\t%2, %3
v_cmpx%E1\t%2, %3"
[(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vopc,vopc")
+ (set_attr "vcmp" "vcmp,vcmp,vcmpx,vcmpx,vcmp,vcmpx,vcmpx")
(set_attr "length" "4,8,4,8,8,4,8")
(set_attr "rdna" "*,*,no,no,*,yes,yes")])
diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index 8959118..5ffeb23 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -54,6 +54,7 @@
#include "gimple.h"
#include "cgraph.h"
#include "case-cfn-macros.h"
+#include "opts.h"
/* This file should be included last. */
#include "target-def.h"
@@ -183,6 +184,11 @@ gcn_option_override (void)
if (flag_sram_ecc == HSACO_ATTR_DEFAULT)
flag_sram_ecc = gcn_devices[gcn_arch].sramecc_default;
+
+ /* TODO: This seems to produce tighter loops, but the testsuites expects it
+ to be set to '2', so I'll leave it default for now.
+ SET_OPTION_IF_UNSET (&global_options, &global_options_set,
+ param_vect_partial_vector_usage, 1); */
}
/* }}} */
@@ -5789,45 +5795,19 @@ gcn_libc_has_function (enum function_class fn_class,
return bsd_libc_has_function (fn_class, type);
}
-/* }}} */
-/* {{{ md_reorg pass. */
-
-/* Identify V_CMPX from the "type" attribute;
- note: this will also match 'v_cmp %E1 vcc'. */
+/* Implement TARGET_VECTORIZE_PREFER_GATHER_SCATTER. */
static bool
-gcn_cmpx_insn_p (attr_type type)
+gcn_prefer_gather_scatter (machine_mode ARG_UNUSED (mode),
+ int ARG_UNUSED (scale),
+ unsigned int ARG_UNUSED (group_size))
{
- switch (type)
- {
- case TYPE_VOPC:
- return true;
- case TYPE_MUBUF:
- case TYPE_MTBUF:
- case TYPE_FLAT:
- case TYPE_VOP3P_MAI:
- case TYPE_UNKNOWN:
- case TYPE_SOP1:
- case TYPE_SOP2:
- case TYPE_SOPK:
- case TYPE_SOPC:
- case TYPE_SOPP:
- case TYPE_SMEM:
- case TYPE_DS:
- case TYPE_VOP2:
- case TYPE_VOP1:
- case TYPE_VOP3A:
- case TYPE_VOP3B:
- case TYPE_VOP_SDWA:
- case TYPE_VOP_DPP:
- case TYPE_MULT:
- case TYPE_VMULT:
- return false;
- }
- gcc_unreachable ();
- return false;
+ return true;
}
+/* }}} */
+/* {{{ md_reorg pass. */
+
/* Identify VMEM instructions from their "type" attribute. */
static bool
@@ -6356,19 +6336,59 @@ gcn_md_reorg (void)
reg_class_contents[(int)VCC_CONDITIONAL_REG])))
nops_rqd = ivccwait - prev_insn->age;
+ /* NOTE: The following condition for adding wait state exists, but
+ GCC does not access the special registers using their SGPR#.
+ Thus, no action is required here. The following wait-state
+ condition exists at least for VEGA/gfx900+ to CDNA3:
+ Mixed use of VCC: alias vs. SGPR# - v_readlane,
+ v_readfirstlane, v_cmp, v_add_*i/u, v_sub_*i/u, v_div_*scale
+ followed by VALU reads VCC as constant requires 1 wait state.
+ (As carry-in, it requires none.)
+ [VCC can be accessed by name or logical SGPR that holds it.] */
+
+ /* Testing indicates that CDNA3 requires an s_nop between
+ e.g. 'v_cmp_eq_u64 vcc, v[4:5], v[8:9]' and 'v_mov_b32 v0, vcc_lo'.
+ Thus: add it between v_cmp writing VCC and VALU read of VCC. */
+ if (TARGET_CDNA3_NOPS
+ && (prev_insn->age + nops_rqd) < 1
+ && iunit == UNIT_VECTOR
+ && (hard_reg_set_intersect_p
+ (depregs, reg_class_contents[(int)VCC_CONDITIONAL_REG]))
+ && get_attr_vcmp (prev_insn->insn) == VCMP_VCMP)
+ nops_rqd = 1 - prev_insn->age;
+
+ /* CDNA3: VALU writes SGPR/VCC: v_readlane, v_readfirstlane, v_cmp,
+ v_add_*i/u, v_sub_*i/u, v_div_*scale - followed by:
+ - VALU reads SGPR as constant requires 1 waite state
+ - VALU reads SGPR as carry-in requires no waite state
+ - v_readlane/v_writelane reads SGPR as lane select requires 4 wait
+ states. */
+ if (TARGET_CDNA3_NOPS
+ && (prev_insn->age + nops_rqd) < 4
+ && iunit == UNIT_VECTOR
+ && prev_insn->unit == UNIT_VECTOR
+ && hard_reg_set_intersect_p
+ (depregs, reg_class_contents[(int) SGPR_SRC_REGS]))
+ {
+ if (get_attr_laneselect (insn) != LANESELECT_NO)
+ nops_rqd = 4 - prev_insn->age;
+ else if ((prev_insn->age + nops_rqd) < 1)
+ nops_rqd = 1 - prev_insn->age;
+ }
+
/* CDNA3: v_cmpx followed by
- V_readlane, v_readfirstlane, v_writelane requires 4 wait states
- VALU reads EXEC as constant requires 2 wait states
- other VALU requires no wait state */
if (TARGET_CDNA3_NOPS
&& (prev_insn->age + nops_rqd) < 4
- && gcn_cmpx_insn_p (prev_insn->type)
+ && get_attr_vcmp (prev_insn->insn) == VCMP_VCMPX
&& get_attr_laneselect (insn) != LANESELECT_NO)
nops_rqd = 4 - prev_insn->age;
else if (TARGET_CDNA3_NOPS
&& (prev_insn->age + nops_rqd) < 2
&& iunit == UNIT_VECTOR
- && gcn_cmpx_insn_p (prev_insn->type)
+ && get_attr_vcmp (prev_insn->insn) == VCMP_VCMPX
&& TEST_HARD_REG_BIT (ireads, EXECZ_REG))
nops_rqd = 2 - prev_insn->age;
@@ -6436,8 +6456,8 @@ gcn_md_reorg (void)
}
/* Insert the required number of NOPs. */
- for (int i = nops_rqd; i > 0; i--)
- emit_insn_after (gen_nop (), last_insn);
+ if (nops_rqd > 0)
+ emit_insn_after (gen_nops (GEN_INT (nops_rqd-1)), last_insn);
/* Age the previous instructions. We can also ignore writes to
registers subsequently overwritten. */
@@ -7283,6 +7303,11 @@ print_operand_address (FILE *file, rtx mem)
H - print second part of a multi-reg value (high-part of 2-reg value)
J - print third part of a multi-reg value
K - print fourth part of a multi-reg value
+ R Print a scalar register number as an integer. Temporary hack.
+ V - Print a vector register number as an integer. Temporary hack.
+
+ Additionally, the standard builtin c, n, a, and l exist; see gccint's
+ "Output Templates and Operand Substitution" for details.
*/
void
@@ -8131,6 +8156,8 @@ gcn_dwarf_register_span (rtx rtl)
gcn_vectorize_builtin_vectorized_function
#undef TARGET_VECTORIZE_GET_MASK_MODE
#define TARGET_VECTORIZE_GET_MASK_MODE gcn_vectorize_get_mask_mode
+#undef TARGET_VECTORIZE_PREFER_GATHER_SCATTER
+#define TARGET_VECTORIZE_PREFER_GATHER_SCATTER gcn_prefer_gather_scatter
#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE gcn_vectorize_preferred_simd_mode
#undef TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT
diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md
index fad42e6..4130cf6 100644
--- a/gcc/config/gcn/gcn.md
+++ b/gcc/config/gcn/gcn.md
@@ -324,6 +324,11 @@
"store,storex34,load,atomic,atomicwait,cmpswapx2,no"
(const_string "no"))
+; Identify v_cmp and v_cmpx instructions for "Manually Inserted Wait State"
+; handling.
+
+(define_attr "vcmp" "vcmp,vcmpx,no" (const_string "no"))
+
; Identify instructions that require "Manually Inserted Wait State" if
; a previous instruction writes to VCC. The number gives the number of NOPs.
@@ -424,6 +429,15 @@
"s_nop\t0x0"
[(set_attr "type" "sopp")])
+; Variant of 'nop' that accepts a count argument.
+; s_nop accepts 0x0 to 0xf for 1 to 16 nops; however,
+; as %0 prints decimals, only 0 to 9 (= 1 to 10 nops) can be used.
+(define_insn "nops"
+ [(match_operand 0 "const_int_operand")]
+ ""
+ "s_nop\t0x%0"
+ [(set_attr "type" "sopp")])
+
; FIXME: What should the value of the immediate be? Zero is disallowed, so
; pick 1 for now.
(define_insn "trap"
@@ -566,6 +580,7 @@
[(set_attr "type" "sop1,vop1,vop3a,sopk,vopc,mult,smem,smem,smem,flat,flat,
flat,flat,flat,flat")
(set_attr "flatmemaccess" "*,*,*,*,*,*,*,*,*,load,load,store,load,load,store")
+ (set_attr "vcmp" "*,*,*,*,vcmp,*,*,*,*,*,*,*,*,*,*")
(set_attr "exec" "*,*,none,*,*,*,*,*,*,*,*,*,*,*,*")
(set_attr "length" "4,4,4,4,4,8,12,12,12,12,12,12,12,12,12")
(set_attr "xnack" "*,*,*,*,*,*,off,on,*,off,on,*,off,on,*")
@@ -1089,6 +1104,7 @@
s_cmp%D1\t%2, %3
v_cmp%E1\tvcc, %2, %3"
[(set_attr "type" "sopc,vopc")
+ (set_attr "vcmp" "vcmp")
(set_attr "length" "8")])
(define_insn "cstoredi4_vector"
@@ -1099,6 +1115,7 @@
""
"v_cmp%E1\tvcc, %2, %3"
[(set_attr "type" "vopc")
+ (set_attr "vcmp" "vcmp")
(set_attr "length" "8")])
(define_expand "cbranchdi4"
@@ -1125,6 +1142,7 @@
""
"v_cmp%E1\tvcc, %2, %3"
[(set_attr "type" "vopc")
+ (set_attr "vcmp" "vcmp")
(set_attr "length" "8")])
(define_expand "cbranch<mode>4"
@@ -2165,7 +2183,7 @@
? "buffer_gl1_inv\;buffer_gl0_inv\;flat_load%o0\t%0, %A1%O1 %G1\;"
"s_waitcnt\t0\;buffer_gl1_inv\;buffer_gl0_inv"
: TARGET_TARGET_SC_CACHE
- ? "buffer_inv sc1\;flat_load%o0\t%0, %A1%O1 %G1\;"
+ ? "buffer_wbl2\tsc0\;s_waitcnt\t0\;flat_load%o0\t%0, %A1%O1 %G1\;"
"s_waitcnt\t0\;buffer_inv sc1"
: "buffer_wbinvl1_vol\;flat_load%o0\t%0, %A1%O1 %G1\;"
"s_waitcnt\t0\;buffer_wbinvl1_vol");
@@ -2177,7 +2195,7 @@
? "buffer_gl1_inv\;buffer_gl0_inv\;global_load%o0\t%0, %A1%O1 %G1\;"
"s_waitcnt\tvmcnt(0)\;buffer_gl1_inv\;buffer_gl0_inv"
: TARGET_TARGET_SC_CACHE
- ? "buffer_inv sc1\;global_load%o0\t%0, %A1%O1 %G1\;"
+ ? "buffer_wbl2\tsc0\;s_waitcnt\tvmcnt(0)\;global_load%o0\t%0, %A1%O1 %G1\;"
"s_waitcnt\tvmcnt(0)\;buffer_inv sc1"
: "buffer_wbinvl1_vol\;global_load%o0\t%0, %A1%O1 %G1\;"
"s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol");
@@ -2224,7 +2242,7 @@
: TARGET_WBINVL1_CACHE
? "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 %G1"
: TARGET_TARGET_SC_CACHE
- ? "buffer_inv sc1\;flat_store%o1\t%A0, %1%O0 %G1"
+ ? "buffer_wbl2\tsc0\;s_waitcnt\t0\;flat_store%o1\t%A0, %1%O0 %G1"
: "error: cache architectire unspecified");
case 2:
return (TARGET_GLn_CACHE
@@ -2232,7 +2250,7 @@
: TARGET_WBINVL1_CACHE
? "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 %G1"
: TARGET_TARGET_SC_CACHE
- ? "buffer_inv sc1\;global_store%o1\t%A0, %1%O0 %G1"
+ ? "buffer_wbl2\tsc0\;s_waitcnt\tvmcnt(0)\;global_store%o1\t%A0, %1%O0 %G1"
: "error: cache architecture unspecified");
}
break;
@@ -2252,7 +2270,8 @@
? "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 %G1\;"
"s_waitcnt\t0\;buffer_wbinvl1_vol"
: TARGET_TARGET_SC_CACHE
- ? "buffer_inv sc1\;flat_store%o1\t%A0, %1%O0 %G1\;"
+ ? "buffer_wbl2\tsc0\;s_waitcnt\t0\;"
+ "flat_store%o1\t%A0, %1%O0 %G1\;"
"s_waitcnt\t0\;buffer_inv sc1"
: "error: cache architecture unspecified");
case 2:
@@ -2263,7 +2282,8 @@
? "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 %G1\;"
"s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol"
: TARGET_TARGET_SC_CACHE
- ? "buffer_inv sc1\;global_store%o1\t%A0, %1%O0 %G1\;"
+ ? "buffer_wbl2\tsc0\;s_waitcnt\tvmcnt(0)\;"
+ "global_store%o1\t%A0, %1%O0 %G1\;"
"s_waitcnt\tvmcnt(0)\;buffer_inv sc1"
: "error: cache architecture unspecified");
}
@@ -2347,7 +2367,7 @@
? "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 %G1\;"
"s_waitcnt\t0"
: TARGET_TARGET_SC_CACHE
- ? "buffer_inv sc1\;flat_atomic_swap<X>\t%0, %1, %2 %G1\;"
+ ? "buffer_wbl2\tsc0\;s_waitcnt\t0\;flat_atomic_swap<X>\t%0, %1, %2 %G1\;"
"s_waitcnt\t0"
: "error: cache architecture unspecified");
case 2:
@@ -2360,7 +2380,7 @@
"global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;"
"s_waitcnt\tvmcnt(0)"
: TARGET_TARGET_SC_CACHE
- ? "buffer_inv sc1\;"
+ ? "buffer_wbl2\tsc0\;s_waitcnt\tvmcnt(0)\;"
"global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;"
"s_waitcnt\tvmcnt(0)"
: "error: cache architecture unspecified");
@@ -2382,7 +2402,7 @@
? "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 %G1\;"
"s_waitcnt\t0\;buffer_wbinvl1_vol"
: TARGET_TARGET_SC_CACHE
- ? "buffer_inv sc1\;flat_atomic_swap<X>\t%0, %1, %2 %G1\;"
+ ? "buffer_wbl2\tsc0\;s_waitcnt\t0\;flat_atomic_swap<X>\t%0, %1, %2 %G1\;"
"s_waitcnt\t0\;buffer_inv sc1"
: "error: cache architecture unspecified");
case 2:
@@ -2395,7 +2415,7 @@
"global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;"
"s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol"
: TARGET_TARGET_SC_CACHE
- ? "buffer_inv sc1\;"
+ ? "buffer_wbl2\tsc0\;s_waitcnt\tvmcnt(0)\;"
"global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;"
"s_waitcnt\tvmcnt(0)\;buffer_inv sc1"
: "error: cache architecture unspecified");
diff --git a/gcc/config/h8300/addsub.md b/gcc/config/h8300/addsub.md
index 32eba9d..f153625 100644
--- a/gcc/config/h8300/addsub.md
+++ b/gcc/config/h8300/addsub.md
@@ -271,7 +271,7 @@
(match_operand:QHSI 2 "register_operand" "r"))
(match_dup 1)))
(set (match_operand:QHSI 0 "register_operand" "=r")
- (plus (match_dup 1) (match_dup 2)))
+ (plus:QHSI (match_dup 1) (match_dup 2)))
(clobber (reg:CC CC_REG))]
""
{
diff --git a/gcc/config/h8300/jumpcall.md b/gcc/config/h8300/jumpcall.md
index 4e63408..44847e4 100644
--- a/gcc/config/h8300/jumpcall.md
+++ b/gcc/config/h8300/jumpcall.md
@@ -156,7 +156,7 @@
"#"
"&& reload_completed"
[(set (reg:CCZ CC_REG)
- (eq (zero_extract:HSI (match_dup 1) (const_int 1) (match_dup 2))
+ (eq:CCZ (zero_extract:HSI (match_dup 1) (const_int 1) (match_dup 2))
(const_int 0)))
(set (pc)
(if_then_else (match_op_dup 3 [(reg:CCZ CC_REG) (const_int 0)])
@@ -181,7 +181,7 @@
(lshiftrt:SI (match_dup 1) (const_int 16))))
(clobber (reg:CC CC_REG))])
(set (reg:CCZ CC_REG)
- (eq (zero_extract:SI (match_dup 4) (const_int 1) (match_dup 2))
+ (eq:CCZ (zero_extract:SI (match_dup 4) (const_int 1) (match_dup 2))
(const_int 0)))
(set (pc)
(if_then_else (match_op_dup 3 [(reg:CCZ CC_REG) (const_int 0)])
@@ -288,7 +288,7 @@
})
(define_insn "call_insn_<mode>"
- [(call (mem:QI (match_operand 0 "call_insn_operand" "Cr"))
+ [(call (mem:QI (match_operand:P 0 "call_insn_operand" "Cr"))
(match_operand:P 1 "general_operand" "g"))]
"!SIBLING_CALL_P (insn)"
{
@@ -326,7 +326,7 @@
(define_insn "call_value_insn_<mode>"
[(set (match_operand 0 "" "=r")
- (call (mem:QI (match_operand 1 "call_insn_operand" "Cr"))
+ (call (mem:QI (match_operand:P 1 "call_insn_operand" "Cr"))
(match_operand:P 2 "general_operand" "g")))]
"!SIBLING_CALL_P (insn)"
{
@@ -358,7 +358,7 @@
})
(define_insn "sibcall_insn_<mode>"
- [(call (mem:QI (match_operand 0 "call_insn_operand" "Cr"))
+ [(call (mem:QI (match_operand:P 0 "call_insn_operand" "Cr"))
(match_operand:P 1 "general_operand" "g"))]
"SIBLING_CALL_P (insn)"
{
@@ -396,7 +396,7 @@
(define_insn "sibcall_value_insn_<mode>"
[(set (match_operand 0 "" "=r")
- (call (mem:QI (match_operand 1 "call_insn_operand" "Cr"))
+ (call (mem:QI (match_operand:P 1 "call_insn_operand" "Cr"))
(match_operand:P 2 "general_operand" "g")))]
"SIBLING_CALL_P (insn)"
{
diff --git a/gcc/config/h8300/testcompare.md b/gcc/config/h8300/testcompare.md
index 694c9e6..3b43381 100644
--- a/gcc/config/h8300/testcompare.md
+++ b/gcc/config/h8300/testcompare.md
@@ -28,7 +28,7 @@
;;
(define_insn ""
[(set (reg:CCZ CC_REG)
- (eq (zero_extract:HSI (match_operand:HSI 0 "register_operand" "r")
+ (eq:CCZ (zero_extract:HSI (match_operand:HSI 0 "register_operand" "r")
(const_int 1)
(match_operand 1 "const_int_operand" "n"))
(const_int 0)))]
@@ -54,7 +54,7 @@
(define_insn "*tsthi_upper"
[(set (reg:CCZN CC_REG)
- (compare (and:HI (match_operand:HI 0 "register_operand" "r")
+ (compare:CCZN (and:HI (match_operand:HI 0 "register_operand" "r")
(const_int -256))
(const_int 0)))]
"reload_completed"
@@ -63,7 +63,7 @@
(define_insn "*tsthi_upper_z"
[(set (reg:CCZ CC_REG)
- (compare (and:HI (match_operand:HI 0 "register_operand" "r")
+ (compare:CCZ (and:HI (match_operand:HI 0 "register_operand" "r")
(const_int -256))
(const_int 0)))]
"reload_completed"
@@ -72,7 +72,7 @@
(define_insn "*tstsi_upper"
[(set (reg:CCZN CC_REG)
- (compare (and:SI (match_operand:SI 0 "register_operand" "r")
+ (compare:CCZN (and:SI (match_operand:SI 0 "register_operand" "r")
(const_int -65536))
(const_int 0)))]
"reload_completed"
@@ -81,7 +81,7 @@
(define_insn "*cmp<mode>_c"
[(set (reg:CCC CC_REG)
- (ltu (match_operand:QHSI 0 "h8300_dst_operand" "rQ")
+ (ltu:CCC (match_operand:QHSI 0 "h8300_dst_operand" "rQ")
(match_operand:QHSI 1 "h8300_src_operand" "rQi")))]
"reload_completed"
{
@@ -97,7 +97,7 @@
(define_insn "*cmpqi_z"
[(set (reg:CCZ CC_REG)
- (eq (match_operand:QI 0 "h8300_dst_operand" "rQ")
+ (eq:CCZ (match_operand:QI 0 "h8300_dst_operand" "rQ")
(match_operand:QI 1 "h8300_src_operand" "rQi")))]
"reload_completed"
{ return "cmp.b %X1,%X0"; }
@@ -105,7 +105,7 @@
(define_insn "*cmphi_z"
[(set (reg:CCZ CC_REG)
- (eq (match_operand:HI 0 "h8300_dst_operand" "rQ")
+ (eq:CCZ (match_operand:HI 0 "h8300_dst_operand" "rQ")
(match_operand:HI 1 "h8300_src_operand" "rQi")))]
"reload_completed"
{ return "cmp.w %T1,%T0"; }
@@ -113,7 +113,7 @@
(define_insn "*cmpsi_z"
[(set (reg:CCZ CC_REG)
- (eq (match_operand:SI 0 "h8300_dst_operand" "rQ")
+ (eq:CCZ (match_operand:SI 0 "h8300_dst_operand" "rQ")
(match_operand:SI 1 "h8300_src_operand" "rQi")))]
"reload_completed"
{ return "cmp.l %S1,%S0"; }
@@ -121,7 +121,7 @@
(define_insn "*cmpqi"
[(set (reg:CC CC_REG)
- (compare (match_operand:QI 0 "h8300_dst_operand" "rQ")
+ (compare:CC (match_operand:QI 0 "h8300_dst_operand" "rQ")
(match_operand:QI 1 "h8300_src_operand" "rQi")))]
"reload_completed"
"cmp.b %X1,%X0"
@@ -129,7 +129,7 @@
(define_insn "*cmphi"
[(set (reg:CC CC_REG)
- (compare (match_operand:HI 0 "h8300_dst_operand" "rU,rQ")
+ (compare:CC (match_operand:HI 0 "h8300_dst_operand" "rU,rQ")
(match_operand:HI 1 "h8300_src_operand" "P3>X,rQi")))]
"reload_completed"
{
@@ -150,7 +150,7 @@
(define_insn "cmpsi"
[(set (reg:CC CC_REG)
- (compare (match_operand:SI 0 "h8300_dst_operand" "r,rQ")
+ (compare:CC (match_operand:SI 0 "h8300_dst_operand" "r,rQ")
(match_operand:SI 1 "h8300_src_operand" "P3>X,rQi")))]
"reload_completed"
{
@@ -176,7 +176,7 @@
(define_peephole2
[(match_scratch:QHSI 1 "r")
(set (reg:CC CC_REG)
- (compare (match_operand:QHSI 0 "memory_operand" "")
+ (compare:CC (match_operand:QHSI 0 "memory_operand" "")
(const_int 0)))]
"!mode_dependent_address_p (XEXP (operands[0], 0), MEM_ADDR_SPACE (operands[0]))"
[(parallel [(set (reg:CCZN CC_REG) (compare:CCZN (match_dup 0) (const_int 0)))
@@ -187,7 +187,7 @@
(define_peephole2
[(match_scratch:QHSI 1 "r")
(set (reg:CC CC_REG)
- (compare (match_operand:QHSI 0 "memory_operand" "")
+ (compare:CC (match_operand:QHSI 0 "memory_operand" "")
(const_int 0)))]
"mode_dependent_address_p (XEXP (operands[0], 0), MEM_ADDR_SPACE (operands[0]))"
[(parallel [(set (match_dup 1) (match_dup 0)) (clobber (reg:CC CC_REG))])
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 09aa9b1..3278f1f 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -3151,7 +3151,7 @@ ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
}
/* Expand floating point op0 <=> op1, i.e.
- dest = op0 == op1 ? 0 : op0 < op1 ? -1 : op0 > op1 ? 1 : 2. */
+ dest = op0 == op1 ? 0 : op0 < op1 ? -1 : op0 > op1 ? 1 : -128. */
void
ix86_expand_fp_spaceship (rtx dest, rtx op0, rtx op1, rtx op2)
@@ -3264,7 +3264,7 @@ ix86_expand_fp_spaceship (rtx dest, rtx op0, rtx op1, rtx op2)
if (l2)
{
emit_label (l2);
- emit_move_insn (dest, op2 == const0_rtx ? const2_rtx : op2);
+ emit_move_insn (dest, op2 == const0_rtx ? GEN_INT (-128) : op2);
}
emit_label (lend);
}
@@ -8241,8 +8241,10 @@ expand_cpymem_epilogue (rtx destmem, rtx srcmem,
unsigned HOST_WIDE_INT countval = UINTVAL (count);
unsigned HOST_WIDE_INT epilogue_size = countval % max_size;
unsigned int destalign = MEM_ALIGN (destmem);
+ cfun->machine->by_pieces_in_use = true;
move_by_pieces (destmem, srcmem, epilogue_size, destalign,
RETURN_BEGIN);
+ cfun->machine->by_pieces_in_use = false;
return;
}
if (max_size > 8)
@@ -8405,8 +8407,8 @@ expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
/* Callback routine for store_by_pieces. Return the RTL of a register
containing GET_MODE_SIZE (MODE) bytes in the RTL register op_p which
- is a word or a word vector register. If PREV_P isn't nullptr, it
- has the RTL info from the previous iteration. */
+ is an integer or a word vector register. If PREV_P isn't nullptr,
+ it has the RTL info from the previous iteration. */
static rtx
setmem_epilogue_gen_val (void *op_p, void *prev_p, HOST_WIDE_INT,
@@ -8435,10 +8437,6 @@ setmem_epilogue_gen_val (void *op_p, void *prev_p, HOST_WIDE_INT,
rtx op = (rtx) op_p;
machine_mode op_mode = GET_MODE (op);
- gcc_assert (op_mode == word_mode
- || (VECTOR_MODE_P (op_mode)
- && GET_MODE_INNER (op_mode) == word_mode));
-
if (VECTOR_MODE_P (mode))
{
gcc_assert (GET_MODE_INNER (mode) == QImode);
@@ -8460,16 +8458,17 @@ setmem_epilogue_gen_val (void *op_p, void *prev_p, HOST_WIDE_INT,
return tmp;
}
- target = gen_reg_rtx (word_mode);
if (VECTOR_MODE_P (op_mode))
{
+ gcc_assert (GET_MODE_INNER (op_mode) == word_mode);
+ target = gen_reg_rtx (word_mode);
op = gen_rtx_SUBREG (word_mode, op, 0);
emit_move_insn (target, op);
}
else
target = op;
- if (mode == word_mode)
+ if (mode == GET_MODE (target))
return target;
rtx tmp = gen_reg_rtx (mode);
@@ -8490,9 +8489,11 @@ expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
unsigned HOST_WIDE_INT countval = UINTVAL (count);
unsigned HOST_WIDE_INT epilogue_size = countval % max_size;
unsigned int destalign = MEM_ALIGN (destmem);
+ cfun->machine->by_pieces_in_use = true;
store_by_pieces (destmem, epilogue_size, setmem_epilogue_gen_val,
vec_value ? vec_value : value, destalign, true,
RETURN_BEGIN);
+ cfun->machine->by_pieces_in_use = false;
return;
}
if (max_size > 32)
@@ -9574,8 +9575,9 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
case vector_loop:
need_zero_guard = true;
unroll_factor = 4;
- /* Get the vector mode to move MOVE_MAX bytes. */
- nunits = MOVE_MAX / GET_MODE_SIZE (word_mode);
+ /* Get the vector mode to move STORE_MAX_PIECES/MOVE_MAX bytes. */
+ nunits = issetmem ? STORE_MAX_PIECES : MOVE_MAX;
+ nunits /= GET_MODE_SIZE (word_mode);
if (nunits > 1)
{
move_mode = mode_for_vector (word_mode, nunits).require ();
@@ -27033,6 +27035,109 @@ ix86_expand_ternlog (machine_mode mode, rtx op0, rtx op1, rtx op2, int idx,
return target;
}
+/* GF2P8AFFINEQB matrixes to implement shift and rotate. */
+
+static const uint64_t matrix_ashift[8] =
+{
+ 0,
+ 0x0001020408102040, /* 1 l */
+ 0x0000010204081020, /* 2 l */
+ 0x0000000102040810, /* 3 l */
+ 0x0000000001020408, /* 4 l */
+ 0x0000000000010204, /* 5 l */
+ 0x0000000000000102, /* 6 l */
+ 0x0000000000000001 /* 7 l */
+};
+
+static const uint64_t matrix_lshiftrt[8] =
+{
+ 0,
+ 0x0204081020408000, /* 1 r */
+ 0x0408102040800000, /* 2 r */
+ 0x0810204080000000, /* 3 r */
+ 0x1020408000000000, /* 4 r */
+ 0x2040800000000000, /* 5 r */
+ 0x4080000000000000, /* 6 r */
+ 0x8000000000000000 /* 7 r */
+};
+
+static const uint64_t matrix_ashiftrt[8] =
+{
+ 0,
+ 0x0204081020408080, /* 1 r */
+ 0x0408102040808080, /* 2 r */
+ 0x0810204080808080, /* 3 r */
+ 0x1020408080808080, /* 4 r */
+ 0x2040808080808080, /* 5 r */
+ 0x4080808080808080, /* 6 r */
+ 0x8080808080808080 /* 7 r */
+};
+
+static const uint64_t matrix_rotate[8] =
+{
+ 0,
+ 0x8001020408102040, /* 1 rol8 */
+ 0x4080010204081020, /* 2 rol8 */
+ 0x2040800102040810, /* 3 rol8 */
+ 0x1020408001020408, /* 4 rol8 */
+ 0x0810204080010204, /* 5 rol8 */
+ 0x0408102040800102, /* 6 rol8 */
+ 0x0204081020408001 /* 7 rol8 */
+};
+
+static const uint64_t matrix_rotatert[8] =
+{
+ 0,
+ 0x0204081020408001, /* 1 ror8 */
+ 0x0408102040800102, /* 2 ror8 */
+ 0x0810204080010204, /* 3 ror8 */
+ 0x1020408001020408, /* 4 ror8 */
+ 0x2040800102040810, /* 5 ror8 */
+ 0x4080010204081020, /* 6 ror8 */
+ 0x8001020408102040 /* 7 ror8 */
+};
+
+/* Return rtx to load a 64bit GF2P8AFFINE GP(2) matrix implementing a shift
+ for CODE and shift count COUNT into register with vector of size of SRC. */
+
+rtx
+ix86_vgf2p8affine_shift_matrix (rtx src, rtx count, enum rtx_code code)
+{
+ machine_mode mode = GET_MODE (src);
+ const uint64_t *matrix;
+ unsigned shift = INTVAL (count) & 7;
+ gcc_assert (shift > 0 && shift < 8);
+
+ switch (code)
+ {
+ case ASHIFT:
+ matrix = matrix_ashift;
+ break;
+ case ASHIFTRT:
+ matrix = matrix_ashiftrt;
+ break;
+ case LSHIFTRT:
+ matrix = matrix_lshiftrt;
+ break;
+ case ROTATE:
+ matrix = matrix_rotate;
+ break;
+ case ROTATERT:
+ matrix = matrix_rotatert;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ int nelts = GET_MODE_NUNITS (mode);
+ rtvec vec = rtvec_alloc (nelts);
+ uint64_t ma = matrix[shift];
+ for (int i = 0; i < nelts; i++)
+ RTVEC_ELT (vec, i) = gen_int_mode ((ma >> ((i % 8) * 8)) & 0xff, QImode);
+
+ return force_reg (mode, gen_rtx_CONST_VECTOR (mode, vec));
+}
+
/* Trunc a vector to a narrow vector, like v4di -> v4si. */
void
diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index c131577..0608dd2 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -3085,21 +3085,68 @@ ix86_rpad_gate ()
&& optimize_function_for_speed_p (cfun));
}
+enum x86_cse_kind
+{
+ X86_CSE_CONST0_VECTOR,
+ X86_CSE_CONSTM1_VECTOR,
+ X86_CSE_VEC_DUP,
+ X86_CSE_TLS_GD,
+ X86_CSE_TLS_LD_BASE,
+ X86_CSE_TLSDESC
+};
+
+struct redundant_pattern
+{
+ /* Bitmap of basic blocks with broadcast instructions. */
+ auto_bitmap bbs;
+ /* Bitmap of broadcast instructions. */
+ auto_bitmap insns;
+ /* The broadcast inner scalar. */
+ rtx val;
+ /* The actual redundant source value for UNSPEC_TLSDESC. */
+ rtx tlsdesc_val;
+ /* The inner scalar mode. */
+ machine_mode mode;
+ /* The instruction which sets the inner scalar. Nullptr if the inner
+ scalar is applied to the whole function, instead of within the same
+ block. */
+ rtx_insn *def_insn;
+ /* The widest broadcast source. */
+ rtx broadcast_source;
+ /* The widest broadcast register. */
+ rtx broadcast_reg;
+ /* The basic block of the broadcast instruction. */
+ basic_block bb;
+ /* The number of broadcast instructions with the same inner scalar. */
+ unsigned HOST_WIDE_INT count;
+ /* The threshold of broadcast instructions with the same inner
+ scalar. */
+ unsigned int threshold;
+ /* The widest broadcast size in bytes. */
+ unsigned int size;
+ /* Load kind. */
+ x86_cse_kind kind;
+};
+
/* Generate a vector set, DEST = SRC, at entry of the nearest dominator
for basic block map BBS, which is in the fake loop that contains the
whole function, so that there is only a single vector set in the
- whole function. If not nullptr, INNER_SCALAR is the inner scalar of
- SRC, as (reg:SI 99) in (vec_duplicate:V4SI (reg:SI 99)). */
+ whole function. If not nullptr, LOAD is a pointer to the load. */
static void
ix86_place_single_vector_set (rtx dest, rtx src, bitmap bbs,
- rtx inner_scalar = nullptr)
+ redundant_pattern *load = nullptr)
{
basic_block bb = nearest_common_dominator_for_set (CDI_DOMINATORS, bbs);
- while (bb->loop_father->latch
- != EXIT_BLOCK_PTR_FOR_FN (cfun))
- bb = get_immediate_dominator (CDI_DOMINATORS,
- bb->loop_father->header);
+ /* For X86_CSE_VEC_DUP, don't place the vector set outside of the loop
+ to avoid extra spills. */
+ if (!load || load->kind != X86_CSE_VEC_DUP)
+ {
+ while (bb->loop_father->latch
+ != EXIT_BLOCK_PTR_FOR_FN (cfun))
+ bb = get_immediate_dominator (CDI_DOMINATORS,
+ bb->loop_father->header);
+ }
rtx set = gen_rtx_SET (dest, src);
@@ -3141,8 +3188,14 @@ ix86_place_single_vector_set (rtx dest, rtx src, bitmap bbs,
}
}
- if (inner_scalar)
+ if (load && load->kind == X86_CSE_VEC_DUP)
{
+ /* Get the source from LOAD as (reg:SI 99) in
+
+ (vec_duplicate:V4SI (reg:SI 99))
+
+ */
+ rtx inner_scalar = load->val;
/* Set the source in (vec_duplicate:V4SI (reg:SI 99)). */
rtx reg = XEXP (src, 0);
if ((REG_P (inner_scalar) || MEM_P (inner_scalar))
@@ -3226,7 +3279,7 @@ remove_partial_avx_dependency (void)
break;
}
- /* Only hanlde conversion here. */
+ /* Only handle conversion here. */
machine_mode src_mode
= convert_p ? GET_MODE (XEXP (src, 0)) : VOIDmode;
switch (src_mode)
@@ -3489,44 +3542,6 @@ replace_vector_const (machine_mode vector_mode, rtx vector_const,
}
}
-enum x86_cse_kind
-{
- X86_CSE_CONST0_VECTOR,
- X86_CSE_CONSTM1_VECTOR,
- X86_CSE_VEC_DUP
-};
-
-struct redundant_load
-{
- /* Bitmap of basic blocks with broadcast instructions. */
- auto_bitmap bbs;
- /* Bitmap of broadcast instructions. */
- auto_bitmap insns;
- /* The broadcast inner scalar. */
- rtx val;
- /* The inner scalar mode. */
- machine_mode mode;
- /* The instruction which sets the inner scalar. Nullptr if the inner
- scalar is applied to the whole function, instead of within the same
- block. */
- rtx_insn *def_insn;
- /* The widest broadcast source. */
- rtx broadcast_source;
- /* The widest broadcast register. */
- rtx broadcast_reg;
- /* The basic block of the broadcast instruction. */
- basic_block bb;
- /* The number of broadcast instructions with the same inner scalar. */
- unsigned HOST_WIDE_INT count;
- /* The threshold of broadcast instructions with the same inner
- scalar. */
- unsigned int threshold;
- /* The widest broadcast size in bytes. */
- unsigned int size;
- /* Load kind. */
- x86_cse_kind kind;
-};
-
/* Return the inner scalar if OP is a broadcast, else return nullptr. */
static rtx
@@ -3629,6 +3644,8 @@ ix86_broadcast_inner (rtx op, machine_mode mode,
Set *INSN_P to nullptr and return SET_SRC if SET_SRC is an
integer constant. */
op = src;
+ if (mode != GET_MODE (reg))
+ op = gen_int_mode (INTVAL (src), mode);
*insn_p = nullptr;
}
else
@@ -3669,25 +3686,719 @@ ix86_broadcast_inner (rtx op, machine_mode mode,
return op;
}
-/* At entry of the nearest common dominator for basic blocks with vector
- CONST0_RTX and integer CONSTM1_RTX uses, generate a single widest
- vector set instruction for all CONST0_RTX and integer CONSTM1_RTX
- uses.
+/* Replace CALL instruction in TLS_CALL_INSNS with SET from SRC and
+ put the updated instruction in UPDATED_TLS_INSNS. */
- NB: We want to generate only a single widest vector set to cover the
- whole function. The LCM algorithm isn't appropriate here since it
- may place a vector set inside the loop. */
+static void
+replace_tls_call (rtx src, auto_bitmap &tls_call_insns,
+ auto_bitmap &updated_tls_insns)
+{
+ bitmap_iterator bi;
+ unsigned int id;
-static unsigned int
-remove_redundant_vector_load (void)
+ EXECUTE_IF_SET_IN_BITMAP (tls_call_insns, 0, id, bi)
+ {
+ rtx_insn *insn = DF_INSN_UID_GET (id)->insn;
+
+ /* If this isn't a CALL, only GNU2 TLS implicit CALL patterns are
+ allowed. */
+ if (!CALL_P (insn))
+ {
+ attr_tls64 tls64 = get_attr_tls64 (insn);
+ if (tls64 != TLS64_CALL && tls64 != TLS64_COMBINE)
+ gcc_unreachable ();
+ }
+
+ rtx pat = PATTERN (insn);
+ gcc_assert (GET_CODE (pat) == PARALLEL);
+ rtx set = XVECEXP (pat, 0, 0);
+ gcc_assert (GET_CODE (set) == SET);
+ rtx dest = SET_DEST (set);
+
+ set = gen_rtx_SET (dest, src);
+ rtx_insn *set_insn = emit_insn_after (set, insn);
+ if (recog_memoized (set_insn) < 0)
+ gcc_unreachable ();
+
+ /* Put SET_INSN in UPDATED_TLS_INSNS. */
+ bitmap_set_bit (updated_tls_insns, INSN_UID (set_insn));
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "\nReplace:\n\n");
+ print_rtl_single (dump_file, insn);
+ fprintf (dump_file, "\nwith:\n\n");
+ print_rtl_single (dump_file, set_insn);
+ fprintf (dump_file, "\n");
+ }
+
+ /* Delete the CALL insn. */
+ delete_insn (insn);
+
+ df_insn_rescan (set_insn);
+ }
+}
+
+/* Return the basic block which dominates all basic blocks which set
+ hard register REGNO used in basic block BB. */
+
+static basic_block
+ix86_get_dominator_for_reg (unsigned int regno, basic_block bb)
+{
+ basic_block set_bb;
+ auto_bitmap set_bbs;
+
+ /* Get all BBs which set REGNO and dominate the current BB from all
+ DEFs of REGNO. */
+ for (df_ref def = DF_REG_DEF_CHAIN (regno);
+ def;
+ def = DF_REF_NEXT_REG (def))
+ if (!DF_REF_IS_ARTIFICIAL (def)
+ && !DF_REF_FLAGS_IS_SET (def, DF_REF_MAY_CLOBBER)
+ && !DF_REF_FLAGS_IS_SET (def, DF_REF_MUST_CLOBBER))
+ {
+ set_bb = DF_REF_BB (def);
+ if (dominated_by_p (CDI_DOMINATORS, bb, set_bb))
+ bitmap_set_bit (set_bbs, set_bb->index);
+ }
+
+ bb = nearest_common_dominator_for_set (CDI_DOMINATORS, set_bbs);
+ return bb;
+}
+
+/* Mark FLAGS register as live in DATA, a bitmap of live caller-saved
+ registers, if DEST is FLAGS register. */
+
+static void
+ix86_check_flags_reg (rtx dest, const_rtx, void *data)
+{
+ auto_bitmap *live_caller_saved_regs = (auto_bitmap *) data;
+ if (REG_P (dest) && REGNO (dest) == FLAGS_REG)
+ bitmap_set_bit (*live_caller_saved_regs, FLAGS_REG);
+}
+
+/* Emit a TLS_SET instruction of KIND in basic block BB. Store the
+ insertion point in *BEFORE_P for emit_insn_before or in *AFTER_P
+ for emit_insn_after. UPDATED_GNU_TLS_INSNS contains instructions
+ which replace the GNU TLS instructions. UPDATED_GNU2_TLS_INSNS
+ contains instructions which replace the GNU2 TLS instructions. */
+
+static rtx_insn *
+ix86_emit_tls_call (rtx tls_set, x86_cse_kind kind, basic_block bb,
+ rtx_insn **before_p, rtx_insn **after_p,
+ auto_bitmap &updated_gnu_tls_insns,
+ auto_bitmap &updated_gnu2_tls_insns)
+{
+ rtx_insn *tls_insn;
+
+ do
+ {
+ rtx_insn *insn = BB_HEAD (bb);
+ while (insn && !NONDEBUG_INSN_P (insn))
+ {
+ if (insn == BB_END (bb))
+ {
+ /* This must be the beginning basic block:
+
+ (note 4 0 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (note 2 4 26 2 NOTE_INSN_FUNCTION_BEG)
+
+ or a basic block with only a label:
+
+ (code_label 78 11 77 3 14 (nil) [1 uses])
+ (note 77 78 54 3 [bb 3] NOTE_INSN_BASIC_BLOCK)
+
+ or a basic block with only a debug marker:
+
+ (note 3 0 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (note 2 3 5 2 NOTE_INSN_FUNCTION_BEG)
+ (debug_insn 5 2 16 2 (debug_marker) "x.c":6:3 -1 (nil))
+
+ */
+ gcc_assert (DEBUG_INSN_P (insn)
+ || (NOTE_P (insn)
+ && ((NOTE_KIND (insn)
+ == NOTE_INSN_FUNCTION_BEG)
+ || (NOTE_KIND (insn)
+ == NOTE_INSN_BASIC_BLOCK))));
+ insn = NULL;
+ break;
+ }
+ insn = NEXT_INSN (insn);
+ }
+
+ /* TLS_GD and TLS_LD_BASE instructions are normal functions which
+ clobber caller-saved registers. TLSDESC instructions only
+ clobber FLAGS. If any registers clobbered by TLS instructions
+ are live in this basic block, we must insert TLS instructions
+ after all live registers clobbered are dead. */
+
+ auto_bitmap live_caller_saved_regs;
+ bitmap in = df_live ? DF_LIVE_IN (bb) : DF_LR_IN (bb);
+
+ if (bitmap_bit_p (in, FLAGS_REG))
+ bitmap_set_bit (live_caller_saved_regs, FLAGS_REG);
+
+ unsigned int i;
+
+ /* Get all live caller-saved registers for TLS_GD and TLS_LD_BASE
+ instructions. */
+ if (kind != X86_CSE_TLSDESC)
+ for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+ if (call_used_regs[i]
+ && !fixed_regs[i]
+ && bitmap_bit_p (in, i))
+ bitmap_set_bit (live_caller_saved_regs, i);
+
+ if (bitmap_empty_p (live_caller_saved_regs))
+ {
+ if (insn == BB_HEAD (bb))
+ {
+ *before_p = insn;
+ tls_insn = emit_insn_before (tls_set, insn);
+ }
+ else
+ {
+ /* Emit the TLS call after NOTE_INSN_FUNCTION_BEG in the
+ beginning basic block:
+
+ (note 4 0 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (note 2 4 26 2 NOTE_INSN_FUNCTION_BEG)
+
+ or after NOTE_INSN_BASIC_BLOCK in a basic block with
+ only a label:
+
+ (code_label 78 11 77 3 14 (nil) [1 uses])
+ (note 77 78 54 3 [bb 3] NOTE_INSN_BASIC_BLOCK)
+
+ or after debug marker in a basic block with only a
+ debug marker:
+
+ (note 3 0 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (note 2 3 5 2 NOTE_INSN_FUNCTION_BEG)
+ (debug_insn 5 2 16 2 (debug_marker) "x.c":6:3 -1 (nil))
+
+ */
+ insn = insn ? PREV_INSN (insn) : BB_END (bb);
+ *after_p = insn;
+ tls_insn = emit_insn_after (tls_set, insn);
+ }
+ return tls_insn;
+ }
+
+ bool repeat = false;
+
+ /* Search for REG_DEAD notes in this basic block. */
+ FOR_BB_INSNS (bb, insn)
+ {
+ if (!NONDEBUG_INSN_P (insn))
+ continue;
+
+ /* NB: Conditional jump is the only instruction which reads
+ flags register and changes control flow. We can never
+ place the TLS call after unconditional jump. */
+ if (JUMP_P (insn))
+ {
+ /* This must be a conditional jump. */
+ rtx label = JUMP_LABEL (insn);
+ if (label == nullptr
+ || ANY_RETURN_P (label)
+ || !(LABEL_P (label) || SYMBOL_REF_P (label)))
+ gcc_unreachable ();
+
+ /* Place the call before all FLAGS_REG setting BBs since
+ we can't place a call before nor after a conditional
+ jump. */
+ bb = ix86_get_dominator_for_reg (FLAGS_REG, bb);
+
+ /* Start over again. */
+ repeat = true;
+ break;
+ }
+
+ if (bitmap_bit_p (updated_gnu_tls_insns, INSN_UID (insn)))
+ {
+ /* Insert the __tls_get_addr call before INSN which
+ replaces a __tls_get_addr call. */
+ *before_p = insn;
+ tls_insn = emit_insn_before (tls_set, insn);
+ return tls_insn;
+ }
+
+ if (bitmap_bit_p (updated_gnu2_tls_insns, INSN_UID (insn)))
+ {
+ /* Mark FLAGS register as dead since FLAGS register
+ would be clobbered by the GNU2 TLS instruction. */
+ bitmap_clear_bit (live_caller_saved_regs, FLAGS_REG);
+ continue;
+ }
+
+ /* Check if FLAGS register is live. */
+ note_stores (insn, ix86_check_flags_reg,
+ &live_caller_saved_regs);
+
+ rtx link;
+ for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
+ if (REG_NOTE_KIND (link) == REG_DEAD
+ && REG_P (XEXP (link, 0)))
+ {
+ /* Mark the live caller-saved register as dead. */
+ for (i = REGNO (XEXP (link, 0));
+ i < END_REGNO (XEXP (link, 0));
+ i++)
+ if (i < FIRST_PSEUDO_REGISTER)
+ bitmap_clear_bit (live_caller_saved_regs, i);
+
+ if (bitmap_empty_p (live_caller_saved_regs))
+ {
+ *after_p = insn;
+ tls_insn = emit_insn_after (tls_set, insn);
+ return tls_insn;
+ }
+ }
+ }
+
+ /* NB: Start over again for conditional jump. */
+ if (repeat)
+ continue;
+
+ gcc_assert (!bitmap_empty_p (live_caller_saved_regs));
+
+ /* If any live caller-saved registers aren't dead at the end of
+ this basic block, get the basic block which dominates all
+ basic blocks which set the remaining live registers. */
+ auto_bitmap set_bbs;
+ bitmap_iterator bi;
+ unsigned int id;
+ EXECUTE_IF_SET_IN_BITMAP (live_caller_saved_regs, 0, id, bi)
+ {
+ basic_block set_bb = ix86_get_dominator_for_reg (id, bb);
+ bitmap_set_bit (set_bbs, set_bb->index);
+ }
+ bb = nearest_common_dominator_for_set (CDI_DOMINATORS, set_bbs);
+ }
+ while (true);
+}
+
+/* Generate a TLS call of KIND with VAL and copy the call result to DEST,
+ at entry of the nearest dominator for basic block map BBS, which is in
+ the fake loop that contains the whole function, so that there is only
+ a single TLS CALL of KIND with VAL in the whole function.
+ UPDATED_GNU_TLS_INSNS contains instructions which replace the GNU TLS
+ instructions. UPDATED_GNU2_TLS_INSNS contains instructions which
+ replace the GNU2 TLS instructions. If TLSDESC_SET isn't nullptr,
+ insert it before the TLS call. */
+
+static void
+ix86_place_single_tls_call (rtx dest, rtx val, x86_cse_kind kind,
+ auto_bitmap &bbs,
+ auto_bitmap &updated_gnu_tls_insns,
+ auto_bitmap &updated_gnu2_tls_insns,
+ rtx tlsdesc_set = nullptr)
+{
+ basic_block bb = nearest_common_dominator_for_set (CDI_DOMINATORS, bbs);
+ while (bb->loop_father->latch
+ != EXIT_BLOCK_PTR_FOR_FN (cfun))
+ bb = get_immediate_dominator (CDI_DOMINATORS,
+ bb->loop_father->header);
+
+ rtx rax = nullptr, rdi;
+ rtx eqv = nullptr;
+ rtx caddr;
+ rtx set;
+ rtx clob;
+ rtx symbol;
+ rtx tls;
+
+ switch (kind)
+ {
+ case X86_CSE_TLS_GD:
+ rax = gen_rtx_REG (Pmode, AX_REG);
+ rdi = gen_rtx_REG (Pmode, DI_REG);
+ caddr = ix86_tls_get_addr ();
+
+ symbol = XVECEXP (val, 0, 0);
+ tls = gen_tls_global_dynamic_64 (Pmode, rax, symbol, caddr, rdi);
+
+ if (GET_MODE (symbol) != Pmode)
+ symbol = gen_rtx_ZERO_EXTEND (Pmode, symbol);
+ eqv = symbol;
+ break;
+
+ case X86_CSE_TLS_LD_BASE:
+ rax = gen_rtx_REG (Pmode, AX_REG);
+ rdi = gen_rtx_REG (Pmode, DI_REG);
+ caddr = ix86_tls_get_addr ();
+
+ tls = gen_tls_local_dynamic_base_64 (Pmode, rax, caddr, rdi);
+
+ /* Attach a unique REG_EQUAL to DEST, to allow the RTL optimizers
+ to share the LD_BASE result with other LD model accesses. */
+ eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
+ UNSPEC_TLS_LD_BASE);
+
+ break;
+
+ case X86_CSE_TLSDESC:
+ set = gen_rtx_SET (dest, val);
+ clob = gen_rtx_CLOBBER (VOIDmode,
+ gen_rtx_REG (CCmode, FLAGS_REG));
+ tls = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clob));
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ /* Emit the TLS CALL insn. */
+ rtx_insn *before = nullptr;
+ rtx_insn *after = nullptr;
+ rtx_insn *tls_insn = ix86_emit_tls_call (tls, kind, bb, &before,
+ &after,
+ updated_gnu_tls_insns,
+ updated_gnu2_tls_insns);
+
+ rtx_insn *tlsdesc_insn = nullptr;
+ if (tlsdesc_set)
+ {
+ rtx dest = copy_rtx (SET_DEST (tlsdesc_set));
+ rtx src = copy_rtx (SET_SRC (tlsdesc_set));
+ tlsdesc_set = gen_rtx_SET (dest, src);
+ tlsdesc_insn = emit_insn_before (tlsdesc_set, tls_insn);
+ }
+
+ if (kind != X86_CSE_TLSDESC)
+ {
+ RTL_CONST_CALL_P (tls_insn) = 1;
+
+ /* Indicate that this function can't jump to non-local gotos. */
+ make_reg_eh_region_note_nothrow_nononlocal (tls_insn);
+ }
+
+ if (recog_memoized (tls_insn) < 0)
+ gcc_unreachable ();
+
+ if (dump_file)
+ {
+ if (after)
+ {
+ fprintf (dump_file, "\nPlace:\n\n");
+ if (tlsdesc_insn)
+ print_rtl_single (dump_file, tlsdesc_insn);
+ print_rtl_single (dump_file, tls_insn);
+ fprintf (dump_file, "\nafter:\n\n");
+ print_rtl_single (dump_file, after);
+ fprintf (dump_file, "\n");
+ }
+ else
+ {
+ fprintf (dump_file, "\nPlace:\n\n");
+ if (tlsdesc_insn)
+ print_rtl_single (dump_file, tlsdesc_insn);
+ print_rtl_single (dump_file, tls_insn);
+ fprintf (dump_file, "\nbefore:\n\n");
+ print_rtl_single (dump_file, before);
+ fprintf (dump_file, "\n");
+ }
+ }
+
+ if (kind != X86_CSE_TLSDESC)
+ {
+ /* Copy RAX to DEST. */
+ set = gen_rtx_SET (dest, rax);
+ rtx_insn *set_insn = emit_insn_after (set, tls_insn);
+ set_dst_reg_note (set_insn, REG_EQUAL, copy_rtx (eqv), dest);
+ if (dump_file)
+ {
+ fprintf (dump_file, "\nPlace:\n\n");
+ print_rtl_single (dump_file, set_insn);
+ fprintf (dump_file, "\nafter:\n\n");
+ print_rtl_single (dump_file, tls_insn);
+ fprintf (dump_file, "\n");
+ }
+ }
+}
+
+namespace {
+
+const pass_data pass_data_x86_cse =
+{
+ RTL_PASS, /* type */
+ "x86_cse", /* name */
+ OPTGROUP_NONE, /* optinfo_flags */
+ TV_MACH_DEP, /* tv_id */
+ 0, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ 0, /* todo_flags_finish */
+};
+
+class pass_x86_cse : public rtl_opt_pass
+{
+public:
+ pass_x86_cse (gcc::context *ctxt)
+ : rtl_opt_pass (pass_data_x86_cse, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ bool gate (function *fun) final override
+ {
+ return (TARGET_SSE2
+ && optimize
+ && optimize_function_for_speed_p (fun));
+ }
+
+ unsigned int execute (function *) final override
+ {
+ return x86_cse ();
+ }
+
+private:
+ /* The redundant source value. */
+ rtx val;
+ /* The actual redundant source value for UNSPEC_TLSDESC. */
+ rtx tlsdesc_val;
+ /* The instruction which defines the redundant value. */
+ rtx_insn *def_insn;
+ /* Mode of the destination of the candidate redundant instruction. */
+ machine_mode mode;
+ /* Mode of the source of the candidate redundant instruction. */
+ machine_mode scalar_mode;
+ /* The classification of the candidate redundant instruction. */
+ x86_cse_kind kind;
+
+ unsigned int x86_cse (void);
+ bool candidate_gnu_tls_p (rtx_insn *, attr_tls64);
+ bool candidate_gnu2_tls_p (rtx, attr_tls64);
+ bool candidate_vector_p (rtx);
+ rtx_insn *tls_set_insn_from_symbol (const_rtx, const_rtx);
+}; // class pass_x86_cse
+
+/* Return the instruction which sets REG from TLS_SYMBOL. */
+
+rtx_insn *
+pass_x86_cse::tls_set_insn_from_symbol (const_rtx reg,
+ const_rtx tls_symbol)
+{
+ rtx_insn *set_insn = nullptr;
+ for (df_ref ref = DF_REG_DEF_CHAIN (REGNO (reg));
+ ref;
+ ref = DF_REF_NEXT_REG (ref))
+ {
+ if (DF_REF_IS_ARTIFICIAL (ref))
+ return nullptr;
+
+ set_insn = DF_REF_INSN (ref);
+ if (get_attr_tls64 (set_insn) != TLS64_LEA)
+ return nullptr;
+
+ rtx tls_set = PATTERN (set_insn);
+ rtx tls_src = XVECEXP (SET_SRC (tls_set), 0, 0);
+ if (!rtx_equal_p (tls_symbol, tls_src))
+ return nullptr;
+ }
+
+ return set_insn;
+}
+
+/* Return true and output def_insn, val, mode, scalar_mode and kind if
+ INSN is UNSPEC_TLS_GD or UNSPEC_TLS_LD_BASE. */
+
+bool
+pass_x86_cse::candidate_gnu_tls_p (rtx_insn *insn, attr_tls64 tls64)
+{
+ if (!TARGET_64BIT || !cfun->machine->tls_descriptor_call_multiple_p)
+ return false;
+
+ /* Record the redundant TLS CALLs for 64-bit:
+
+ (parallel [
+ (set (reg:DI 0 ax)
+ (call:DI (mem:QI (symbol_ref:DI ("__tls_get_addr")))
+ (const_int 0 [0])))
+ (unspec:DI [(symbol_ref:DI ("foo") [flags 0x50])
+ (reg/f:DI 7 sp)] UNSPEC_TLS_GD)
+ (clobber (reg:DI 5 di))])
+
+
+ and
+
+ (parallel [
+ (set (reg:DI 0 ax)
+ (call:DI (mem:QI (symbol_ref:DI ("__tls_get_addr")))
+ (const_int 0 [0])))
+ (unspec:DI [(reg/f:DI 7 sp)] UNSPEC_TLS_LD_BASE)])
+
+ */
+
+ rtx pat = PATTERN (insn);
+ rtx set = XVECEXP (pat, 0, 0);
+ gcc_assert (GET_CODE (set) == SET);
+ rtx dest = SET_DEST (set);
+ scalar_mode = mode = GET_MODE (dest);
+ val = XVECEXP (pat, 0, 1);
+ gcc_assert (GET_CODE (val) == UNSPEC);
+
+ if (tls64 == TLS64_GD)
+ kind = X86_CSE_TLS_GD;
+ else
+ kind = X86_CSE_TLS_LD_BASE;
+
+ def_insn = nullptr;
+ return true;
+}
+
+/* Return true and output def_insn, val, mode, scalar_mode and kind if
+ SET is UNSPEC_TLSDESC. */
+
+bool
+pass_x86_cse::candidate_gnu2_tls_p (rtx set, attr_tls64 tls64)
+{
+ if (!TARGET_64BIT || !cfun->machine->tls_descriptor_call_multiple_p)
+ return false;
+
+ rtx tls_symbol;
+ rtx_insn *set_insn;
+ rtx src = SET_SRC (set);
+ val = src;
+ tlsdesc_val = src;
+ kind = X86_CSE_TLSDESC;
+
+ if (tls64 == TLS64_COMBINE)
+ {
+ /* Record 64-bit TLS64_COMBINE:
+
+ (set (reg/f:DI 104)
+ (plus:DI (unspec:DI [
+ (symbol_ref:DI ("_TLS_MODULE_BASE_") [flags 0x10])
+ (reg:DI 114)
+ (reg/f:DI 7 sp)] UNSPEC_TLSDESC)
+ (const:DI (unspec:DI [
+ (symbol_ref:DI ("e") [flags 0x1a])
+ ] UNSPEC_DTPOFF))))
+
+ (set (reg/f:DI 104)
+ (plus:DI (unspec:DI [
+ (symbol_ref:DI ("_TLS_MODULE_BASE_") [flags 0x10])
+ (unspec:DI [
+ (symbol_ref:DI ("_TLS_MODULE_BASE_") [flags 0x10])
+ ] UNSPEC_TLSDESC)
+ (reg/f:DI 7 sp)] UNSPEC_TLSDESC)
+ (const:DI (unspec:DI [
+ (symbol_ref:DI ("e") [flags 0x1a])
+ ] UNSPEC_DTPOFF))))
+ */
+
+ scalar_mode = mode = GET_MODE (src);
+
+ /* Since the first operand of PLUS in the source TLS_COMBINE
+ pattern is unused, use the second operand of PLUS:
+
+ (const:DI (unspec:DI [
+ (symbol_ref:DI ("e") [flags 0x1a])
+ ] UNSPEC_DTPOFF))
+
+ as VAL to check if 2 TLS_COMBINE patterns have the same
+ source. */
+ val = XEXP (src, 1);
+ gcc_assert (GET_CODE (val) == CONST
+ && GET_CODE (XEXP (val, 0)) == UNSPEC
+ && XINT (XEXP (val, 0), 1) == UNSPEC_DTPOFF
+ && SYMBOL_REF_P (XVECEXP (XEXP (val, 0), 0, 0)));
+ def_insn = nullptr;
+ return true;
+ }
+
+ /* Record 64-bit TLS_CALL:
+
+ (set (reg:DI 101)
+ (unspec:DI [(symbol_ref:DI ("foo") [flags 0x50])
+ (reg:DI 112)
+ (reg/f:DI 7 sp)] UNSPEC_TLSDESC))
+
+ */
+
+ gcc_assert (GET_CODE (src) == UNSPEC);
+ tls_symbol = XVECEXP (src, 0, 0);
+ src = XVECEXP (src, 0, 1);
+ scalar_mode = mode = GET_MODE (src);
+ gcc_assert (REG_P (src));
+
+ /* All definitions of reg:DI 129 in
+
+ (set (reg:DI 110)
+ (unspec:DI [(symbol_ref:DI ("foo"))
+ (reg:DI 129)
+ (reg/f:DI 7 sp)] UNSPEC_TLSDESC))
+
+ should have the same source as in
+
+ (set (reg:DI 129)
+ (unspec:DI [(symbol_ref:DI ("foo"))] UNSPEC_TLSDESC))
+
+ */
+
+ set_insn = tls_set_insn_from_symbol (src, tls_symbol);
+ if (!set_insn)
+ return false;
+
+ /* Use TLS_SYMBOL as VAL to check if 2 patterns have the same source. */
+ val = tls_symbol;
+ def_insn = set_insn;
+ return true;
+}
+
+/* Return true and output def_insn, val, mode, scalar_mode and kind if
+ INSN is a vector broadcast instruction. */
+
+bool
+pass_x86_cse::candidate_vector_p (rtx set)
+{
+ rtx src = SET_SRC (set);
+ rtx dest = SET_DEST (set);
+ mode = GET_MODE (dest);
+ /* Skip non-vector instruction. */
+ if (!VECTOR_MODE_P (mode))
+ return false;
+
+ /* Skip non-vector load instruction. */
+ if (!REG_P (dest) && !SUBREG_P (dest))
+ return false;
+
+ val = ix86_broadcast_inner (src, mode, &scalar_mode, &kind,
+ &def_insn);
+ return val ? true : false;
+}
+
+/* At entry of the nearest common dominator for basic blocks with
+
+ 1. Vector CONST0_RTX patterns.
+ 2. Vector CONSTM1_RTX patterns.
+ 3. Vector broadcast patterns.
+ 4. UNSPEC_TLS_GD patterns.
+ 5. UNSPEC_TLS_LD_BASE patterns.
+ 6. UNSPEC_TLSDESC patterns.
+
+ generate a single pattern whose destination is used to replace the
+ source in all identical patterns.
+
+ NB: We want to generate a pattern, which is executed only once, to
+ cover the whole function. The LCM algorithm isn't appropriate here
+ since it may place a pattern inside the loop. */
+
+unsigned int
+pass_x86_cse::x86_cse (void)
{
timevar_push (TV_MACH_DEP);
- auto_vec<redundant_load *> loads;
- redundant_load *load;
+ auto_vec<redundant_pattern *> loads;
+ redundant_pattern *load;
basic_block bb;
rtx_insn *insn;
unsigned int i;
+ auto_bitmap updated_gnu_tls_insns;
+ auto_bitmap updated_gnu2_tls_insns;
df_set_flags (DF_DEFER_INSN_RESCAN);
@@ -3700,61 +4411,74 @@ remove_redundant_vector_load (void)
if (!NONDEBUG_INSN_P (insn))
continue;
+ bool matched = false;
+ /* Remove redundant pattens if there are more than 2 of
+ them. */
+ unsigned int threshold = 2;
+
rtx set = single_set (insn);
- if (!set)
+ if (!set && !CALL_P (insn))
continue;
- /* Record single set vector instruction with CONST0_RTX and
- CONSTM1_RTX source. Record basic blocks with CONST0_RTX and
- CONSTM1_RTX. Count CONST0_RTX and CONSTM1_RTX. Record the
- maximum size of CONST0_RTX and CONSTM1_RTX. */
+ tlsdesc_val = nullptr;
- rtx dest = SET_DEST (set);
- machine_mode mode = GET_MODE (dest);
- /* Skip non-vector instruction. */
- if (!VECTOR_MODE_P (mode))
- continue;
+ attr_tls64 tls64 = get_attr_tls64 (insn);
+ switch (tls64)
+ {
+ case TLS64_GD:
+ case TLS64_LD_BASE:
+ /* Verify UNSPEC_TLS_GD and UNSPEC_TLS_LD_BASE. */
+ if (candidate_gnu_tls_p (insn, tls64))
+ break;
+ continue;
- rtx src = SET_SRC (set);
- /* Skip non-vector load instruction. */
- if (!REG_P (dest) && !SUBREG_P (dest))
- continue;
+ case TLS64_CALL:
+ case TLS64_COMBINE:
+ /* Verify UNSPEC_TLSDESC. */
+ if (candidate_gnu2_tls_p (set, tls64))
+ break;
+ continue;
- rtx_insn *def_insn;
- machine_mode scalar_mode;
- x86_cse_kind kind;
- rtx val = ix86_broadcast_inner (src, mode, &scalar_mode,
- &kind, &def_insn);
- if (!val)
- continue;
+ case TLS64_LEA:
+ /* Skip TLS64_LEA. */
+ continue;
- /* Remove redundant register loads if there are more than 2
- loads will be used. */
- unsigned int threshold = 2;
+ case TLS64_NONE:
+ if (!set)
+ continue;
- /* Check if there is a matching redundant vector load. */
- bool matched = false;
+ /* Check for vector broadcast. */
+ if (candidate_vector_p (set))
+ break;
+ continue;
+ }
+
+ /* Check if there is a matching redundant load. */
FOR_EACH_VEC_ELT (loads, i, load)
if (load->val
&& load->kind == kind
&& load->mode == scalar_mode
&& (load->bb == bb
- || kind < X86_CSE_VEC_DUP
+ || kind != X86_CSE_VEC_DUP
/* Non all 0s/1s vector load must be in the same
basic block if it is in a recursive call. */
|| !recursive_call_p)
&& rtx_equal_p (load->val, val))
{
- /* Record vector instruction. */
+ /* Record instruction. */
bitmap_set_bit (load->insns, INSN_UID (insn));
/* Record the maximum vector size. */
- if (load->size < GET_MODE_SIZE (mode))
+ if (kind <= X86_CSE_VEC_DUP
+ && load->size < GET_MODE_SIZE (mode))
load->size = GET_MODE_SIZE (mode);
/* Record the basic block. */
bitmap_set_bit (load->bbs, bb->index);
+
+ /* Increment the count. */
load->count++;
+
matched = true;
break;
}
@@ -3762,10 +4486,17 @@ remove_redundant_vector_load (void)
if (matched)
continue;
- /* We see this vector broadcast the first time. */
- load = new redundant_load;
+ /* We see this instruction the first time. Record the
+ redundant source value, its mode, the destination size,
+ instruction which defines the redundant source value,
+ instruction basic block and the instruction kind. */
+ load = new redundant_pattern;
load->val = copy_rtx (val);
+ if (tlsdesc_val)
+ load->tlsdesc_val = copy_rtx (tlsdesc_val);
+ else
+ load->tlsdesc_val = nullptr;
load->mode = scalar_mode;
load->size = GET_MODE_SIZE (mode);
load->def_insn = def_insn;
@@ -3782,49 +4513,64 @@ remove_redundant_vector_load (void)
}
bool replaced = false;
- rtx reg, broadcast_source, broadcast_reg;
FOR_EACH_VEC_ELT (loads, i, load)
if (load->count >= load->threshold)
{
- machine_mode mode = ix86_get_vector_cse_mode (load->size,
- load->mode);
- broadcast_reg = gen_reg_rtx (mode);
- if (load->def_insn)
- {
- /* Replace redundant vector loads with a single vector load
- in the same basic block. */
- reg = load->val;
- if (load->mode != GET_MODE (reg))
- reg = gen_rtx_SUBREG (load->mode, reg, 0);
- broadcast_source = gen_rtx_VEC_DUPLICATE (mode, reg);
- replace_vector_const (mode, broadcast_reg, load->insns,
- load->mode);
- }
- else
+ machine_mode mode;
+ rtx reg, broadcast_source, broadcast_reg;
+ replaced = true;
+ switch (load->kind)
{
- /* This is a constant integer/double vector. If the
- inner scalar is 0 or -1, set vector to CONST0_RTX
- or CONSTM1_RTX directly. */
- rtx reg;
- switch (load->kind)
+ case X86_CSE_TLS_GD:
+ case X86_CSE_TLS_LD_BASE:
+ case X86_CSE_TLSDESC:
+ broadcast_reg = gen_reg_rtx (load->mode);
+ replace_tls_call (broadcast_reg, load->insns,
+ (load->kind == X86_CSE_TLSDESC
+ ? updated_gnu2_tls_insns
+ : updated_gnu_tls_insns));
+ load->broadcast_reg = broadcast_reg;
+ break;
+
+ case X86_CSE_CONST0_VECTOR:
+ case X86_CSE_CONSTM1_VECTOR:
+ case X86_CSE_VEC_DUP:
+ mode = ix86_get_vector_cse_mode (load->size, load->mode);
+ broadcast_reg = gen_reg_rtx (mode);
+ if (load->def_insn)
{
- case X86_CSE_CONST0_VECTOR:
- broadcast_source = CONST0_RTX (mode);
- break;
- case X86_CSE_CONSTM1_VECTOR:
- broadcast_source = CONSTM1_RTX (mode);
- break;
- default:
- reg = gen_reg_rtx (load->mode);
+ /* Replace redundant vector loads with a single vector
+ load in the same basic block. */
+ reg = load->val;
+ if (load->mode != GET_MODE (reg))
+ reg = gen_rtx_SUBREG (load->mode, reg, 0);
broadcast_source = gen_rtx_VEC_DUPLICATE (mode, reg);
- break;
}
+ else
+ /* This is a constant integer/double vector. If the
+ inner scalar is 0 or -1, set vector to CONST0_RTX
+ or CONSTM1_RTX directly. */
+ switch (load->kind)
+ {
+ case X86_CSE_CONST0_VECTOR:
+ broadcast_source = CONST0_RTX (mode);
+ break;
+ case X86_CSE_CONSTM1_VECTOR:
+ broadcast_source = CONSTM1_RTX (mode);
+ break;
+ case X86_CSE_VEC_DUP:
+ reg = gen_reg_rtx (load->mode);
+ broadcast_source = gen_rtx_VEC_DUPLICATE (mode, reg);
+ break;
+ default:
+ gcc_unreachable ();
+ }
replace_vector_const (mode, broadcast_reg, load->insns,
load->mode);
+ load->broadcast_source = broadcast_source;
+ load->broadcast_reg = broadcast_reg;
+ break;
}
- load->broadcast_source = broadcast_source;
- load->broadcast_reg = broadcast_reg;
- replaced = true;
}
if (replaced)
@@ -3839,43 +4585,75 @@ remove_redundant_vector_load (void)
FOR_EACH_VEC_ELT (loads, i, load)
if (load->count >= load->threshold)
{
+ rtx set;
if (load->def_insn)
- {
- /* Insert a broadcast after the original scalar
- definition. */
- rtx set = gen_rtx_SET (load->broadcast_reg,
- load->broadcast_source);
- insn = emit_insn_after (set, load->def_insn);
-
- if (cfun->can_throw_non_call_exceptions)
- {
- /* Handle REG_EH_REGION note in DEF_INSN. */
- rtx note = find_reg_note (load->def_insn,
- REG_EH_REGION, nullptr);
- if (note)
- {
- control_flow_insns.safe_push (load->def_insn);
- add_reg_note (insn, REG_EH_REGION,
- XEXP (note, 0));
- }
- }
+ switch (load->kind)
+ {
+ case X86_CSE_TLSDESC:
+ ix86_place_single_tls_call (load->broadcast_reg,
+ load->tlsdesc_val,
+ load->kind,
+ load->bbs,
+ updated_gnu_tls_insns,
+ updated_gnu2_tls_insns,
+ PATTERN (load->def_insn));
+ break;
+ case X86_CSE_VEC_DUP:
+ /* Insert a broadcast after the original scalar
+ definition. */
+ set = gen_rtx_SET (load->broadcast_reg,
+ load->broadcast_source);
+ insn = emit_insn_after (set, load->def_insn);
+
+ if (cfun->can_throw_non_call_exceptions)
+ {
+ /* Handle REG_EH_REGION note in DEF_INSN. */
+ rtx note = find_reg_note (load->def_insn,
+ REG_EH_REGION, nullptr);
+ if (note)
+ {
+ control_flow_insns.safe_push (load->def_insn);
+ add_reg_note (insn, REG_EH_REGION,
+ XEXP (note, 0));
+ }
+ }
- if (dump_file)
- {
- fprintf (dump_file, "\nAdd:\n\n");
- print_rtl_single (dump_file, insn);
- fprintf (dump_file, "\nafter:\n\n");
- print_rtl_single (dump_file, load->def_insn);
- fprintf (dump_file, "\n");
- }
- }
+ if (dump_file)
+ {
+ fprintf (dump_file, "\nAdd:\n\n");
+ print_rtl_single (dump_file, insn);
+ fprintf (dump_file, "\nafter:\n\n");
+ print_rtl_single (dump_file, load->def_insn);
+ fprintf (dump_file, "\n");
+ }
+ break;
+ default:
+ gcc_unreachable ();
+ }
else
- ix86_place_single_vector_set (load->broadcast_reg,
- load->broadcast_source,
- load->bbs,
- (load->kind == X86_CSE_VEC_DUP
- ? load->val
- : nullptr));
+ switch (load->kind)
+ {
+ case X86_CSE_TLS_GD:
+ case X86_CSE_TLS_LD_BASE:
+ case X86_CSE_TLSDESC:
+ ix86_place_single_tls_call (load->broadcast_reg,
+ (load->kind == X86_CSE_TLSDESC
+ ? load->tlsdesc_val
+ : load->val),
+ load->kind,
+ load->bbs,
+ updated_gnu_tls_insns,
+ updated_gnu2_tls_insns);
+ break;
+ case X86_CSE_CONST0_VECTOR:
+ case X86_CSE_CONSTM1_VECTOR:
+ case X86_CSE_VEC_DUP:
+ ix86_place_single_vector_set (load->broadcast_reg,
+ load->broadcast_source,
+ load->bbs,
+ load);
+ break;
+ }
}
loop_optimizer_finalize ();
@@ -3905,48 +4683,12 @@ remove_redundant_vector_load (void)
return 0;
}
-namespace {
-
-const pass_data pass_data_remove_redundant_vector_load =
-{
- RTL_PASS, /* type */
- "rrvl", /* name */
- OPTGROUP_NONE, /* optinfo_flags */
- TV_MACH_DEP, /* tv_id */
- 0, /* properties_required */
- 0, /* properties_provided */
- 0, /* properties_destroyed */
- 0, /* todo_flags_start */
- 0, /* todo_flags_finish */
-};
-
-class pass_remove_redundant_vector_load : public rtl_opt_pass
-{
-public:
- pass_remove_redundant_vector_load (gcc::context *ctxt)
- : rtl_opt_pass (pass_data_remove_redundant_vector_load, ctxt)
- {}
-
- /* opt_pass methods: */
- bool gate (function *fun) final override
- {
- return (TARGET_SSE2
- && optimize
- && optimize_function_for_speed_p (fun));
- }
-
- unsigned int execute (function *) final override
- {
- return remove_redundant_vector_load ();
- }
-}; // class pass_remove_redundant_vector_load
-
} // anon namespace
rtl_opt_pass *
-make_pass_remove_redundant_vector_load (gcc::context *ctxt)
+make_pass_x86_cse (gcc::context *ctxt)
{
- return new pass_remove_redundant_vector_load (ctxt);
+ return new pass_x86_cse (ctxt);
}
/* Convert legacy instructions that clobbers EFLAGS to APX_NF
diff --git a/gcc/config/i386/i386-modes.def b/gcc/config/i386/i386-modes.def
index 2fedbeb..c2db305 100644
--- a/gcc/config/i386/i386-modes.def
+++ b/gcc/config/i386/i386-modes.def
@@ -91,7 +91,6 @@ VECTOR_MODES (FLOAT, 16); /* V8HF V4SF V2DF */
VECTOR_MODES (FLOAT, 32); /* V16HF V8SF V4DF V2TF */
VECTOR_MODES (FLOAT, 64); /* V32HF V16SF V8DF V4TF */
VECTOR_MODES (FLOAT, 128); /* V64HF V32SF V16DF V8TF */
-VECTOR_MODES (FLOAT, 256); /* V128HF V64SF V32DF V16TF */
VECTOR_MODE (FLOAT, HF, 2); /* V2HF */
VECTOR_MODE (FLOAT, BF, 2); /* V2BF */
VECTOR_MODE (FLOAT, HF, 6); /* V6HF */
@@ -102,7 +101,6 @@ VECTOR_MODE (INT, QI, 2); /* V2QI */
VECTOR_MODE (INT, QI, 12); /* V12QI */
VECTOR_MODE (INT, QI, 14); /* V14QI */
VECTOR_MODE (INT, HI, 6); /* V6HI */
-VECTOR_MODE (INT, SI, 64); /* V64SI */
INT_MODE (OI, 32);
INT_MODE (XI, 64);
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
index ca6bb83..abb5dd7 100644
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -1172,6 +1172,10 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
OPT_mrecip,
MASK_RECIP),
+ IX86_ATTR_YES ("80387",
+ OPT_m80387,
+ MASK_80387),
+
IX86_ATTR_IX86_YES ("general-regs-only",
OPT_mgeneral_regs_only,
OPTION_MASK_GENERAL_REGS_ONLY),
@@ -1281,6 +1285,8 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
else if (type == ix86_opt_yes || type == ix86_opt_no)
{
+ opts_set->x_target_flags |= mask;
+
if (type == ix86_opt_no)
opt_set_p = !opt_set_p;
@@ -3556,6 +3562,10 @@ ix86_set_current_function (tree fndecl)
isa = "AVX";
else if (cfun->machine->func_type != TYPE_NORMAL)
isa = "SSE";
+ else if (TARGET_MMX)
+ isa = "MMX/3Dnow";
+ else if (TARGET_80387)
+ isa = "80387";
else
isa = NULL;
}
@@ -3615,6 +3625,18 @@ ix86_handle_cconv_attribute (tree *node, tree name, tree args, int,
return NULL_TREE;
}
+ if (TARGET_64BIT)
+ {
+ /* Do not warn when emulating the MS ABI. */
+ if ((TREE_CODE (*node) != FUNCTION_TYPE
+ && TREE_CODE (*node) != METHOD_TYPE)
+ || ix86_function_type_abi (*node) != MS_ABI)
+ warning (OPT_Wattributes, "%qE attribute ignored",
+ name);
+ *no_add_attrs = true;
+ return NULL_TREE;
+ }
+
/* Can combine regparm with all attributes but fastcall, and thiscall. */
if (is_attribute_p ("regparm", name))
{
@@ -3627,7 +3649,7 @@ ix86_handle_cconv_attribute (tree *node, tree name, tree args, int,
if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
{
- error ("regparam and thiscall attributes are not compatible");
+ error ("regparm and thiscall attributes are not compatible");
}
cst = TREE_VALUE (args);
@@ -3648,19 +3670,7 @@ ix86_handle_cconv_attribute (tree *node, tree name, tree args, int,
return NULL_TREE;
}
- if (TARGET_64BIT)
- {
- /* Do not warn when emulating the MS ABI. */
- if ((TREE_CODE (*node) != FUNCTION_TYPE
- && TREE_CODE (*node) != METHOD_TYPE)
- || ix86_function_type_abi (*node) != MS_ABI)
- warning (OPT_Wattributes, "%qE attribute ignored",
- name);
- *no_add_attrs = true;
- return NULL_TREE;
- }
-
- /* Can combine fastcall with stdcall (redundant) and sseregparm. */
+ /* Can combine fastcall with sseregparm. */
if (is_attribute_p ("fastcall", name))
{
if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
@@ -3681,8 +3691,7 @@ ix86_handle_cconv_attribute (tree *node, tree name, tree args, int,
}
}
- /* Can combine stdcall with fastcall (redundant), regparm and
- sseregparm. */
+ /* Can combine stdcall with regparm and sseregparm. */
else if (is_attribute_p ("stdcall", name))
{
if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
@@ -3732,6 +3741,10 @@ ix86_handle_cconv_attribute (tree *node, tree name, tree args, int,
{
error ("cdecl and thiscall attributes are not compatible");
}
+ if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
+ {
+ error ("regparm and thiscall attributes are not compatible");
+ }
}
/* Can combine sseregparm with all attributes. */
diff --git a/gcc/config/i386/i386-passes.def b/gcc/config/i386/i386-passes.def
index 06f0288..553b46d 100644
--- a/gcc/config/i386/i386-passes.def
+++ b/gcc/config/i386/i386-passes.def
@@ -35,6 +35,6 @@ along with GCC; see the file COPYING3. If not see
PR116174. */
INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_align_tight_loops);
- INSERT_PASS_AFTER (pass_late_combine, 1, pass_remove_redundant_vector_load);
+ INSERT_PASS_AFTER (pass_late_combine, 1, pass_x86_cse);
INSERT_PASS_AFTER (pass_late_combine, 1, pass_remove_partial_avx_dependency);
INSERT_PASS_AFTER (pass_rtl_ifcvt, 1, pass_apx_nf_convert);
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 69bc0ee..bdb8bb9 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -290,6 +290,7 @@ extern rtx ix86_tls_module_base (void);
extern bool ix86_gpr_tls_address_pattern_p (rtx);
extern bool ix86_tls_address_pattern_p (rtx);
extern rtx ix86_rewrite_tls_address (rtx);
+extern rtx ix86_tls_get_addr (void);
extern void ix86_expand_vector_init (bool, rtx, rtx);
extern void ix86_expand_vector_set (bool, rtx, rtx, int);
@@ -430,8 +431,7 @@ extern rtl_opt_pass *make_pass_insert_endbr_and_patchable_area
(gcc::context *);
extern rtl_opt_pass *make_pass_remove_partial_avx_dependency
(gcc::context *);
-extern rtl_opt_pass *make_pass_remove_redundant_vector_load
- (gcc::context *);
+extern rtl_opt_pass *make_pass_x86_cse (gcc::context *);
extern rtl_opt_pass *make_pass_apx_nf_convert (gcc::context *);
extern rtl_opt_pass *make_pass_align_tight_loops (gcc::context *);
@@ -448,3 +448,4 @@ extern void ix86_set_handled_components (sbitmap);
/* In i386-expand.cc. */
bool ix86_check_builtin_isa_match (unsigned int, HOST_WIDE_INT*,
HOST_WIDE_INT*);
+rtx ix86_vgf2p8affine_shift_matrix (rtx, rtx, enum rtx_code);
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 4682db85..471be3e 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -11382,6 +11382,23 @@ ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
return cost;
}
+
+/* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
+
+bool
+ix86_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
+ unsigned int align,
+ enum by_pieces_operation op,
+ bool speed_p)
+{
+ /* Return true when we are currently expanding memcpy/memset epilogue
+ with move_by_pieces or store_by_pieces. */
+ if (cfun->machine->by_pieces_in_use)
+ return true;
+
+ return default_use_by_pieces_infrastructure_p (size, align, op,
+ speed_p);
+}
/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
this is used for to form addresses to local data when -fPIC is in
@@ -12439,9 +12456,31 @@ ix86_tls_index (void)
static GTY(()) rtx ix86_tls_symbol;
-static rtx
+rtx
ix86_tls_get_addr (void)
{
+ if (cfun->machine->call_saved_registers
+ == TYPE_NO_CALLER_SAVED_REGISTERS)
+ {
+ /* __tls_get_addr doesn't preserve vector registers. When a
+ function with no_caller_saved_registers attribute calls
+ __tls_get_addr, YMM and ZMM registers will be clobbered.
+ Issue an error and suggest -mtls-dialect=gnu2 in this case. */
+ if (cfun->machine->func_type == TYPE_NORMAL)
+ error (G_("%<-mtls-dialect=gnu2%> must be used with a function"
+ " with the %<no_caller_saved_registers%> attribute"));
+ else
+ error (cfun->machine->func_type == TYPE_EXCEPTION
+ ? G_("%<-mtls-dialect=gnu2%> must be used with an"
+ " exception service routine")
+ : G_("%<-mtls-dialect=gnu2%> must be used with an"
+ " interrupt service routine"));
+ /* Don't issue the same error twice. */
+ cfun->machine->func_type = TYPE_NORMAL;
+ cfun->machine->call_saved_registers
+ = TYPE_DEFAULT_CALL_SAVED_REGISTERS;
+ }
+
if (!ix86_tls_symbol)
{
const char *sym
@@ -20007,7 +20046,7 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
tree utype, ures, vce;
utype = unsigned_type_for (TREE_TYPE (arg0));
/* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR
- instead of ABS_EXPR to hanlde overflow case(TYPE_MIN). */
+ instead of ABS_EXPR to handle overflow case(TYPE_MIN). */
ures = gimple_build (&stmts, ABSU_EXPR, utype, arg0);
gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
loc = gimple_location (stmt);
@@ -21491,8 +21530,7 @@ ix86_hard_regno_nregs (unsigned int regno, machine_mode mode)
/* Register pair for mask registers. */
if (mode == P2QImode || mode == P2HImode)
return 2;
- if (mode == V64SFmode || mode == V64SImode)
- return 4;
+
return 1;
}
@@ -22081,6 +22119,15 @@ ix86_shift_rotate_cost (const struct processor_costs *cost,
}
/* FALLTHRU */
case V32QImode:
+ if (TARGET_GFNI && constant_op1)
+ {
+ /* Use vgf2p8affine. One extra load for the mask, but in a loop
+ with enough registers it will be moved out. So for now don't
+ account the constant mask load. This is not quite right
+ for non loop vectorization. */
+ extra = 0;
+ return ix86_vec_cost (mode, cost->sse_op) + extra;
+ }
if (TARGET_AVX2)
/* Use vpbroadcast. */
extra = cost->sse_op;
@@ -22115,6 +22162,11 @@ ix86_shift_rotate_cost (const struct processor_costs *cost,
count = 9;
return ix86_vec_cost (mode, cost->sse_op * count) + extra;
+ case V64QImode:
+ /* Ignore the mask load for GF2P8AFFINEQB. */
+ extra = 0;
+ return ix86_vec_cost (mode, cost->sse_op) + extra;
+
case V2DImode:
case V4DImode:
/* V*DImode arithmetic right shift is emulated. */
@@ -23132,7 +23184,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
So current solution is make constant disp as cheap as possible. */
if (GET_CODE (addr) == PLUS
&& x86_64_immediate_operand (XEXP (addr, 1), Pmode)
- /* Only hanlde (reg + disp) since other forms of addr are mostly LEA,
+ /* Only handle (reg + disp) since other forms of addr are mostly LEA,
there's no additional cost for the plus of disp. */
&& register_operand (XEXP (addr, 0), Pmode))
{
@@ -25211,20 +25263,14 @@ asm_preferred_eh_data_format (int code, int global)
return DW_EH_PE_absptr;
}
-/* Implement targetm.vectorize.builtin_vectorization_cost. */
+/* Worker for ix86_builtin_vectorization_cost and the fallback calls
+ from ix86_vector_costs::add_stmt_cost. */
static int
-ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
- tree vectype, int)
+ix86_default_vector_cost (enum vect_cost_for_stmt type_of_cost,
+ machine_mode mode)
{
- bool fp = false;
- machine_mode mode = TImode;
+ bool fp = FLOAT_MODE_P (mode);
int index;
- if (vectype != NULL)
- {
- fp = FLOAT_TYPE_P (vectype);
- mode = TYPE_MODE (vectype);
- }
-
switch (type_of_cost)
{
case scalar_stmt:
@@ -25283,14 +25329,14 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
COSTS_N_INSNS
(ix86_cost->gather_static
+ ix86_cost->gather_per_elt
- * TYPE_VECTOR_SUBPARTS (vectype)) / 2);
+ * GET_MODE_NUNITS (mode)) / 2);
case vector_scatter_store:
return ix86_vec_cost (mode,
COSTS_N_INSNS
(ix86_cost->scatter_static
+ ix86_cost->scatter_per_elt
- * TYPE_VECTOR_SUBPARTS (vectype)) / 2);
+ * GET_MODE_NUNITS (mode)) / 2);
case cond_branch_taken:
return ix86_cost->cond_taken_branch_cost;
@@ -25308,7 +25354,7 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
case vec_construct:
{
- int n = TYPE_VECTOR_SUBPARTS (vectype);
+ int n = GET_MODE_NUNITS (mode);
/* N - 1 element inserts into an SSE vector, the possible
GPR -> XMM move is accounted for in add_stmt_cost. */
if (GET_MODE_BITSIZE (mode) <= 128)
@@ -25336,6 +25382,17 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
}
}
+/* Implement targetm.vectorize.builtin_vectorization_cost. */
+static int
+ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
+ tree vectype, int)
+{
+ machine_mode mode = TImode;
+ if (vectype != NULL)
+ mode = TYPE_MODE (vectype);
+ return ix86_default_vector_cost (type_of_cost, mode);
+}
+
/* This function returns the calling abi specific va_list type node.
It returns the FNDECL specific va_list type. */
@@ -25768,15 +25825,20 @@ private:
unsigned m_num_sse_needed[3];
/* Number of 256-bit vector permutation. */
unsigned m_num_avx256_vec_perm[3];
+ /* Number of reductions for FMA/DOT_PROD_EXPR/SAD_EXPR */
+ unsigned m_num_reduc[X86_REDUC_LAST];
+ /* Don't do unroll if m_prefer_unroll is false, default is true. */
+ bool m_prefer_unroll;
};
ix86_vector_costs::ix86_vector_costs (vec_info* vinfo, bool costing_for_scalar)
: vector_costs (vinfo, costing_for_scalar),
m_num_gpr_needed (),
m_num_sse_needed (),
- m_num_avx256_vec_perm ()
-{
-}
+ m_num_avx256_vec_perm (),
+ m_num_reduc (),
+ m_prefer_unroll (true)
+{}
/* Implement targetm.vectorize.create_costs. */
@@ -25789,7 +25851,7 @@ ix86_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
unsigned
ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
stmt_vec_info stmt_info, slp_tree node,
- tree vectype, int misalign,
+ tree vectype, int,
vect_cost_model_location where)
{
unsigned retval = 0;
@@ -26073,6 +26135,125 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
}
}
+ /* Record number of load/store/gather/scatter in vectorized body. */
+ if (where == vect_body && !m_costing_for_scalar)
+ {
+ switch (kind)
+ {
+ /* Emulated gather/scatter or any scalarization. */
+ case scalar_load:
+ case scalar_stmt:
+ case scalar_store:
+ case vector_gather_load:
+ case vector_scatter_store:
+ m_prefer_unroll = false;
+ break;
+
+ case vector_stmt:
+ case vec_to_scalar:
+ /* Count number of reduction FMA and "real" DOT_PROD_EXPR,
+ unroll in the vectorizer will enable partial sum. */
+ if (stmt_info
+ && vect_is_reduction (stmt_info)
+ && stmt_info->stmt)
+ {
+ /* Handle __builtin_fma. */
+ if (gimple_call_combined_fn (stmt_info->stmt) == CFN_FMA)
+ {
+ m_num_reduc[X86_REDUC_FMA] += count;
+ break;
+ }
+
+ if (!is_gimple_assign (stmt_info->stmt))
+ break;
+
+ tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
+ machine_mode inner_mode = GET_MODE_INNER (mode);
+ tree rhs1, rhs2;
+ bool native_vnni_p = true;
+ gimple* def;
+ machine_mode mode_rhs;
+ switch (subcode)
+ {
+ case PLUS_EXPR:
+ case MINUS_EXPR:
+ if (!fp || !flag_associative_math
+ || flag_fp_contract_mode != FP_CONTRACT_FAST)
+ break;
+
+ /* FMA condition for different modes. */
+ if (((inner_mode == DFmode || inner_mode == SFmode)
+ && !TARGET_FMA && !TARGET_AVX512VL)
+ || (inner_mode == HFmode && !TARGET_AVX512FP16)
+ || (inner_mode == BFmode && !TARGET_AVX10_2))
+ break;
+
+ /* MULT_EXPR + PLUS_EXPR/MINUS_EXPR is transformed
+ to FMA/FNMA after vectorization. */
+ rhs1 = gimple_assign_rhs1 (stmt_info->stmt);
+ rhs2 = gimple_assign_rhs2 (stmt_info->stmt);
+ if (subcode == PLUS_EXPR
+ && TREE_CODE (rhs1) == SSA_NAME
+ && (def = SSA_NAME_DEF_STMT (rhs1), true)
+ && is_gimple_assign (def)
+ && gimple_assign_rhs_code (def) == MULT_EXPR)
+ m_num_reduc[X86_REDUC_FMA] += count;
+ else if (TREE_CODE (rhs2) == SSA_NAME
+ && (def = SSA_NAME_DEF_STMT (rhs2), true)
+ && is_gimple_assign (def)
+ && gimple_assign_rhs_code (def) == MULT_EXPR)
+ m_num_reduc[X86_REDUC_FMA] += count;
+ break;
+
+ /* Vectorizer lane_reducing_op_p supports DOT_PROX_EXPR,
+ WIDEN_SUM_EXPR and SAD_EXPR, x86 backend only supports
+ SAD_EXPR (usad{v16qi,v32qi,v64qi}) and DOT_PROD_EXPR. */
+ case DOT_PROD_EXPR:
+ rhs1 = gimple_assign_rhs1 (stmt_info->stmt);
+ mode_rhs = TYPE_MODE (TREE_TYPE (rhs1));
+ if (mode_rhs == QImode)
+ {
+ rhs2 = gimple_assign_rhs2 (stmt_info->stmt);
+ signop signop1_p = TYPE_SIGN (TREE_TYPE (rhs1));
+ signop signop2_p = TYPE_SIGN (TREE_TYPE (rhs2));
+
+ /* vpdpbusd. */
+ if (signop1_p != signop2_p)
+ native_vnni_p
+ = (GET_MODE_SIZE (mode) == 64
+ ? TARGET_AVX512VNNI
+ : ((TARGET_AVX512VNNI && TARGET_AVX512VL)
+ || TARGET_AVXVNNI));
+ else
+ /* vpdpbssd. */
+ native_vnni_p
+ = (GET_MODE_SIZE (mode) == 64
+ ? TARGET_AVX10_2
+ : (TARGET_AVXVNNIINT8 || TARGET_AVX10_2));
+ }
+ m_num_reduc[X86_REDUC_DOT_PROD] += count;
+
+ /* Dislike to do unroll and partial sum for
+ emulated DOT_PROD_EXPR. */
+ if (!native_vnni_p)
+ m_num_reduc[X86_REDUC_DOT_PROD] += 3 * count;
+ break;
+
+ case SAD_EXPR:
+ m_num_reduc[X86_REDUC_SAD] += count;
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ default:
+ break;
+ }
+ }
+
+
combined_fn cfn;
if ((kind == vector_stmt || kind == scalar_stmt)
&& stmt_info
@@ -26128,32 +26309,23 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
(AGU and load ports). Try to account for this by scaling the
construction cost by the number of elements involved. */
if ((kind == vec_construct || kind == vec_to_scalar)
- && ((stmt_info
- && (STMT_VINFO_TYPE (stmt_info) == load_vec_info_type
- || STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
- && ((STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE
- && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF (stmt_info)))
+ && ((node
+ && (((SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_ELEMENTWISE
+ || (SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_STRIDED_SLP
+ && SLP_TREE_LANES (node) == 1))
+ && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF
+ (SLP_TREE_REPRESENTATIVE (node))))
!= INTEGER_CST))
- || (STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info)
- == VMAT_GATHER_SCATTER)))
- || (node
- && (((SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_ELEMENTWISE
- || (SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_STRIDED_SLP
- && SLP_TREE_LANES (node) == 1))
- && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF
- (SLP_TREE_REPRESENTATIVE (node))))
- != INTEGER_CST))
- || (SLP_TREE_MEMORY_ACCESS_TYPE (node)
- == VMAT_GATHER_SCATTER)))))
- {
- stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
+ || mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node))))))
+ {
+ stmt_cost = ix86_default_vector_cost (kind, mode);
stmt_cost *= (TYPE_VECTOR_SUBPARTS (vectype) + 1);
}
else if ((kind == vec_construct || kind == scalar_to_vec)
&& node
&& SLP_TREE_DEF_TYPE (node) == vect_external_def)
{
- stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
+ stmt_cost = ix86_default_vector_cost (kind, mode);
unsigned i;
tree op;
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
@@ -26217,7 +26389,7 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
TREE_VISITED (op) = 0;
}
if (stmt_cost == -1)
- stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
+ stmt_cost = ix86_default_vector_cost (kind, mode);
if (kind == vec_perm && vectype
&& GET_MODE_SIZE (TYPE_MODE (vectype)) == 32)
@@ -26288,6 +26460,41 @@ ix86_vector_costs::finish_cost (const vector_costs *scalar_costs)
&& (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ())
> ceil_log2 (LOOP_VINFO_INT_NITERS (loop_vinfo))))
m_costs[vect_body] = INT_MAX;
+
+ bool any_reduc_p = false;
+ for (int i = 0; i != X86_REDUC_LAST; i++)
+ if (m_num_reduc[i])
+ {
+ any_reduc_p = true;
+ break;
+ }
+
+ if (any_reduc_p
+ /* Not much gain for loop with gather and scatter. */
+ && m_prefer_unroll
+ && !LOOP_VINFO_EPILOGUE_P (loop_vinfo))
+ {
+ unsigned unroll_factor
+ = OPTION_SET_P (ix86_vect_unroll_limit)
+ ? ix86_vect_unroll_limit
+ : ix86_cost->vect_unroll_limit;
+
+ if (unroll_factor > 1)
+ {
+ for (int i = 0 ; i != X86_REDUC_LAST; i++)
+ {
+ if (m_num_reduc[i])
+ {
+ unsigned tmp = CEIL (ix86_cost->reduc_lat_mult_thr[i],
+ m_num_reduc[i]);
+ unroll_factor = MIN (unroll_factor, tmp);
+ }
+ }
+
+ m_suggested_unroll_factor = 1 << ceil_log2 (unroll_factor);
+ }
+ }
+
}
ix86_vect_estimate_reg_pressure ();
@@ -27171,9 +27378,9 @@ ix86_memtag_can_tag_addresses ()
return ix86_lam_type != lam_none && TARGET_LP64;
}
-/* Implement TARGET_MEMTAG_TAG_SIZE. */
+/* Implement TARGET_MEMTAG_TAG_BITSIZE. */
unsigned char
-ix86_memtag_tag_size ()
+ix86_memtag_tag_bitsize ()
{
return IX86_HWASAN_TAG_SIZE;
}
@@ -27744,6 +27951,10 @@ static const scoped_attribute_specs *const ix86_attribute_table[] =
#undef TARGET_ADDRESS_COST
#define TARGET_ADDRESS_COST ix86_address_cost
+#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
+#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
+ ix86_use_by_pieces_infrastructure_p
+
#undef TARGET_OVERLAP_OP_BY_PIECES_P
#define TARGET_OVERLAP_OP_BY_PIECES_P hook_bool_void_true
@@ -28147,8 +28358,8 @@ ix86_libgcc_floating_mode_supported_p
#undef TARGET_MEMTAG_UNTAGGED_POINTER
#define TARGET_MEMTAG_UNTAGGED_POINTER ix86_memtag_untagged_pointer
-#undef TARGET_MEMTAG_TAG_SIZE
-#define TARGET_MEMTAG_TAG_SIZE ix86_memtag_tag_size
+#undef TARGET_MEMTAG_TAG_BITSIZE
+#define TARGET_MEMTAG_TAG_BITSIZE ix86_memtag_tag_bitsize
#undef TARGET_GEN_CCMP_FIRST
#define TARGET_GEN_CCMP_FIRST ix86_gen_ccmp_first
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 791f3b9..ac0ce68 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -102,6 +102,15 @@ struct stringop_algs
#define COSTS_N_BYTES(N) ((N) * 2)
#endif
+
+enum ix86_reduc_unroll_factor{
+ X86_REDUC_FMA,
+ X86_REDUC_DOT_PROD,
+ X86_REDUC_SAD,
+
+ X86_REDUC_LAST
+};
+
/* Define the specific costs for a given cpu. NB: hard_register is used
by TARGET_REGISTER_MOVE_COST and TARGET_MEMORY_MOVE_COST to compute
hard register move costs by register allocator. Relative costs of
@@ -225,6 +234,13 @@ struct processor_costs {
to number of instructions executed in
parallel. See also
ix86_reassociation_width. */
+ const unsigned reduc_lat_mult_thr[X86_REDUC_LAST];
+ /* Latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ const unsigned vect_unroll_limit; /* Limit how much the autovectorizer
+ may unroll a loop. */
struct stringop_algs *memcpy, *memset;
const int cond_taken_branch_cost; /* Cost of taken branch for vectorizer
cost model. */
@@ -644,7 +660,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
{"cpu_64", "%{" OPT_ARCH64 ":%{!mtune=*:%{!mcpu=*:%{!march=*:-mtune=%(VALUE)}}}}" }, \
{"arch", "%{!march=*:-march=%(VALUE)}"}, \
{"arch_32", "%{" OPT_ARCH32 ":%{!march=*:-march=%(VALUE)}}"}, \
- {"arch_64", "%{" OPT_ARCH64 ":%{!march=*:-march=%(VALUE)}}"},
+ {"arch_64", "%{" OPT_ARCH64 ":%{!march=*:-march=%(VALUE)}}"}, \
+ {"tls", "%{!mtls-dialect=*:-mtls-dialect=%(VALUE)}"},
/* Specs for the compiler proper */
@@ -2477,9 +2494,9 @@ constexpr wide_int_bitmask PTA_DIAMONDRAPIDS = PTA_GRANITERAPIDS_D
| PTA_MOVRS | PTA_AMX_MOVRS | PTA_USER_MSR;
constexpr wide_int_bitmask PTA_BDVER1 = PTA_64BIT | PTA_MMX | PTA_SSE
- | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3
- | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
- | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE;
+ | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_POPCNT | PTA_LZCNT
+ | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL
+ | PTA_AVX | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE;
constexpr wide_int_bitmask PTA_BDVER2 = PTA_BDVER1 | PTA_BMI | PTA_TBM
| PTA_F16C | PTA_FMA;
constexpr wide_int_bitmask PTA_BDVER3 = PTA_BDVER2 | PTA_XSAVEOPT
@@ -2487,13 +2504,13 @@ constexpr wide_int_bitmask PTA_BDVER3 = PTA_BDVER2 | PTA_XSAVEOPT
constexpr wide_int_bitmask PTA_BDVER4 = PTA_BDVER3 | PTA_AVX2 | PTA_BMI2
| PTA_RDRND | PTA_MOVBE | PTA_MWAITX;
-constexpr wide_int_bitmask PTA_ZNVER1 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2
- | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
- | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2 | PTA_BMI | PTA_BMI2
- | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT
- | PTA_FSGSBASE | PTA_RDRND | PTA_MOVBE | PTA_MWAITX | PTA_ADX | PTA_RDSEED
- | PTA_CLZERO | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES | PTA_SHA | PTA_LZCNT
- | PTA_POPCNT;
+constexpr wide_int_bitmask PTA_ZNVER1 = PTA_64BIT | PTA_MMX | PTA_SSE
+ | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_POPCNT | PTA_LZCNT
+ | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL
+ | PTA_AVX | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_PRFCHW
+ | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND | PTA_MOVBE
+ | PTA_MWAITX | PTA_ADX | PTA_RDSEED | PTA_CLZERO | PTA_CLFLUSHOPT
+ | PTA_XSAVEC | PTA_XSAVES | PTA_SHA;
constexpr wide_int_bitmask PTA_ZNVER2 = PTA_ZNVER1 | PTA_CLWB | PTA_RDPID
| PTA_WBNOINVD;
constexpr wide_int_bitmask PTA_ZNVER3 = PTA_ZNVER2 | PTA_VAES | PTA_VPCLMULQDQ
@@ -2506,19 +2523,19 @@ constexpr wide_int_bitmask PTA_ZNVER5 = PTA_ZNVER4 | PTA_AVXVNNI
| PTA_MOVDIRI | PTA_MOVDIR64B | PTA_AVX512VP2INTERSECT | PTA_PREFETCHI;
constexpr wide_int_bitmask PTA_BTVER1 = PTA_64BIT | PTA_MMX | PTA_SSE
- | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 | PTA_SSE4A | PTA_ABM | PTA_CX16
- | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE;
+ | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 | PTA_SSE4A | PTA_LZCNT | PTA_POPCNT
+ | PTA_ABM | PTA_CX16 | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE;
constexpr wide_int_bitmask PTA_BTVER2 = PTA_BTVER1 | PTA_SSE4_1 | PTA_SSE4_2
| PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_BMI | PTA_F16C | PTA_MOVBE
| PTA_XSAVEOPT;
constexpr wide_int_bitmask PTA_LUJIAZUI = PTA_64BIT | PTA_MMX | PTA_SSE
- | PTA_SSE2 | PTA_SSE3 | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
- | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_BMI | PTA_BMI2 | PTA_PRFCHW
- | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND | PTA_MOVBE
- | PTA_ADX | PTA_RDSEED | PTA_POPCNT;
+ | PTA_SSE2 | PTA_SSE3 | PTA_CX16 | PTA_LZCNT | PTA_POPCNT | PTA_ABM
+ | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_BMI
+ | PTA_BMI2 | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE
+ | PTA_RDRND | PTA_MOVBE | PTA_ADX | PTA_RDSEED;
constexpr wide_int_bitmask PTA_YONGFENG = PTA_LUJIAZUI | PTA_AVX | PTA_AVX2
- | PTA_F16C | PTA_FMA | PTA_SHA | PTA_LZCNT;
+ | PTA_F16C | PTA_FMA | PTA_SHA;
#ifndef GENERATOR_FILE
@@ -2865,6 +2882,9 @@ struct GTY(()) machine_function {
approximation. */
BOOL_BITFIELD tls_descriptor_call_expanded_p : 1;
+ /* True if TLS descriptor is called more than once. */
+ BOOL_BITFIELD tls_descriptor_call_multiple_p : 1;
+
/* If true, the current function has a STATIC_CHAIN is placed on the
stack below the return address. */
BOOL_BITFIELD static_chain_on_stack : 1;
@@ -2934,6 +2954,9 @@ struct GTY(()) machine_function {
/* True if this is a recursive function. */
BOOL_BITFIELD recursive_function : 1;
+ /* True if by_pieces op is currently in use. */
+ BOOL_BITFIELD by_pieces_in_use : 1;
+
/* The largest alignment, in bytes, of stack slot actually used. */
unsigned int max_used_stack_alignment;
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index eb52699..cea6c15 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -901,6 +901,10 @@
(define_attr "avx_partial_xmm_update" "false,true"
(const_string "false"))
+;; Define attribute to indicate 64-bit TLS insns.
+(define_attr "tls64" "gd,ld_base,call,combine,lea,none"
+ (const_string "none"))
+
;; Define attribute to classify add/sub insns that consumes carry flag (CF)
(define_attr "use_carry" "0,1" (const_string "0"))
@@ -1618,10 +1622,8 @@
(compare
(match_operand:QI 0 "nonimmediate_operand" "QBn")
(subreg:QI
- (match_operator:SWI248 2 "extract_operator"
- [(match_operand 1 "int248_register_operand" "Q")
- (const_int 8)
- (const_int 8)]) 0)))]
+ (match_operator:SWI248 2 "extract_high_operator"
+ [(match_operand 1 "int248_register_operand" "Q")]) 0)))]
"ix86_match_ccmode (insn, CCmode)"
"cmp{b}\t{%h1, %0|%0, %h1}"
[(set_attr "addr" "gpr8")
@@ -1632,10 +1634,8 @@
[(set (reg FLAGS_REG)
(compare
(subreg:QI
- (match_operator:SWI248 2 "extract_operator"
- [(match_operand 0 "int248_register_operand" "Q")
- (const_int 8)
- (const_int 8)]) 0)
+ (match_operator:SWI248 2 "extract_high_operator"
+ [(match_operand 0 "int248_register_operand" "Q")]) 0)
(match_operand:QI 1 "const0_operand")))]
"ix86_match_ccmode (insn, CCNOmode)"
"test{b}\t%h0, %h0"
@@ -1657,10 +1657,8 @@
[(set (reg FLAGS_REG)
(compare
(subreg:QI
- (match_operator:SWI248 2 "extract_operator"
- [(match_operand 0 "int248_register_operand" "Q")
- (const_int 8)
- (const_int 8)]) 0)
+ (match_operator:SWI248 2 "extract_high_operator"
+ [(match_operand 0 "int248_register_operand" "Q")]) 0)
(match_operand:QI 1 "general_operand" "QnBn")))]
"ix86_match_ccmode (insn, CCmode)"
"cmp{b}\t{%1, %h0|%h0, %1}"
@@ -1672,15 +1670,11 @@
[(set (reg FLAGS_REG)
(compare
(subreg:QI
- (match_operator:SWI248 2 "extract_operator"
- [(match_operand 0 "int248_register_operand" "Q")
- (const_int 8)
- (const_int 8)]) 0)
+ (match_operator:SWI248 2 "extract_high_operator"
+ [(match_operand 0 "int248_register_operand" "Q")]) 0)
(subreg:QI
- (match_operator:SWI248 3 "extract_operator"
- [(match_operand 1 "int248_register_operand" "Q")
- (const_int 8)
- (const_int 8)]) 0)))]
+ (match_operator:SWI248 3 "extract_high_operator"
+ [(match_operand 1 "int248_register_operand" "Q")]) 0)))]
"ix86_match_ccmode (insn, CCmode)"
"cmp{b}\t{%h1, %h0|%h0, %h1}"
[(set_attr "type" "icmp")
@@ -2968,7 +2962,8 @@
(match_operand:SWI248 1 "const_int_operand"))]
"optimize_insn_for_size_p () && optimize_size > 1
&& operands[1] != const0_rtx
- && operands[1] != constm1_rtx
+ && (operands[1] != constm1_rtx
+ || (<MODE>mode == DImode && LEGACY_INT_REG_P (operands[0])))
&& IN_RANGE (INTVAL (operands[1]), -128, 127)
&& !ix86_red_zone_used
&& REGNO (operands[0]) != SP_REG"
@@ -3479,10 +3474,8 @@
[(set (strict_low_part
(match_operand:QI 0 "register_operand" "+Q"))
(subreg:QI
- (match_operator:SWI248 2 "extract_operator"
- [(match_operand 1 "int248_register_operand" "Q")
- (const_int 8)
- (const_int 8)]) 0))]
+ (match_operator:SWI248 2 "extract_high_operator"
+ [(match_operand 1 "int248_register_operand" "Q")]) 0))]
"!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
"mov{b}\t{%h1, %0|%0, %h1}"
[(set_attr "type" "imov")
@@ -3565,10 +3558,8 @@
(define_insn "*extzvqi"
[(set (match_operand:QI 0 "nonimmediate_operand" "=QBn,?R")
(subreg:QI
- (match_operator:SWI248 2 "extract_operator"
- [(match_operand 1 "int248_register_operand" "Q,Q")
- (const_int 8)
- (const_int 8)]) 0))]
+ (match_operator:SWI248 2 "extract_high_operator"
+ [(match_operand 1 "int248_register_operand" "Q,Q")]) 0))]
""
{
switch (get_attr_type (insn))
@@ -3689,10 +3680,8 @@
(match_operand 0 "int248_register_operand" "+Q")
(const_int 8)
(const_int 8))
- (match_operator:SWI248 2 "extract_operator"
- [(match_operand 1 "int248_register_operand" "Q")
- (const_int 8)
- (const_int 8)]))]
+ (match_operator:SWI248 2 "extract_high_operator"
+ [(match_operand 1 "int248_register_operand" "Q")]))]
""
"mov{b}\t{%h1, %h0|%h0, %h1}"
[(set_attr "type" "imov")
@@ -5259,10 +5248,8 @@
[(set (match_operand:SWI24 0 "register_operand" "=R")
(sign_extend:SWI24
(subreg:QI
- (match_operator:SWI248 2 "extract_operator"
- [(match_operand 1 "int248_register_operand" "Q")
- (const_int 8)
- (const_int 8)]) 0)))]
+ (match_operator:SWI248 2 "extract_high_operator"
+ [(match_operand 1 "int248_register_operand" "Q")]) 0)))]
""
"movs{b<SWI24:imodesuffix>|x}\t{%h1, %0|%0, %h1}"
[(set_attr "type" "imovx")
@@ -7008,10 +6995,8 @@
[(set (strict_low_part (match_operand:QI 0 "register_operand" "+Q,&Q"))
(plus:QI
(subreg:QI
- (match_operator:SWI248 3 "extract_operator"
- [(match_operand 2 "int248_register_operand" "Q,Q")
- (const_int 8)
- (const_int 8)]) 0)
+ (match_operator:SWI248 3 "extract_high_operator"
+ [(match_operand 2 "int248_register_operand" "Q,Q")]) 0)
(match_operand:QI 1 "nonimmediate_operand" "0,!qm")))
(clobber (reg:CC FLAGS_REG))]
"!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
@@ -7025,8 +7010,8 @@
[(set (strict_low_part (match_dup 0))
(plus:QI
(subreg:QI
- (match_op_dup 3
- [(match_dup 2) (const_int 8) (const_int 8)]) 0)
+ (zero_extract:SWI248
+ (match_dup 2) (const_int 8) (const_int 8)) 0)
(match_dup 0)))
(clobber (reg:CC FLAGS_REG))])]
""
@@ -7037,29 +7022,25 @@
[(set (strict_low_part (match_operand:QI 0 "register_operand" "+&Q"))
(plus:QI
(subreg:QI
- (match_operator:SWI248 3 "extract_operator"
- [(match_operand 1 "int248_register_operand" "Q")
- (const_int 8)
- (const_int 8)]) 0)
+ (match_operator:SWI248 3 "extract_high_operator"
+ [(match_operand 1 "int248_register_operand" "Q")]) 0)
(subreg:QI
- (match_operator:SWI248 4 "extract_operator"
- [(match_operand 2 "int248_register_operand" "Q")
- (const_int 8)
- (const_int 8)]) 0)))
+ (match_operator:SWI248 4 "extract_high_operator"
+ [(match_operand 2 "int248_register_operand" "Q")]) 0)))
(clobber (reg:CC FLAGS_REG))]
"!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
"#"
"&& reload_completed"
[(set (strict_low_part (match_dup 0))
(subreg:QI
- (match_op_dup 4
- [(match_dup 2) (const_int 8) (const_int 8)]) 0))
+ (zero_extract:SWI248
+ (match_dup 2) (const_int 8) (const_int 8)) 0))
(parallel
[(set (strict_low_part (match_dup 0))
(plus:QI
(subreg:QI
- (match_op_dup 3
- [(match_dup 1) (const_int 8) (const_int 8)]) 0)
+ (zero_extract:SWI248
+ (match_dup 1) (const_int 8) (const_int 8)) 0)
(match_dup 0)))
(clobber (reg:CC FLAGS_REG))])]
""
@@ -7474,10 +7455,8 @@
[(set (match_operand:QI 0 "nonimmediate_operand" "=QBn")
(plus:QI
(subreg:QI
- (match_operator:SWI248 3 "extract_operator"
- [(match_operand 2 "int248_register_operand" "Q")
- (const_int 8)
- (const_int 8)]) 0)
+ (match_operator:SWI248 3 "extract_high_operator"
+ [(match_operand 2 "int248_register_operand" "Q")]) 0)
(match_operand:QI 1 "nonimmediate_operand" "0")))
(clobber (reg:CC FLAGS_REG))]
""
@@ -7490,29 +7469,25 @@
[(set (match_operand:QI 0 "register_operand" "=&Q")
(plus:QI
(subreg:QI
- (match_operator:SWI248 3 "extract_operator"
- [(match_operand 1 "int248_register_operand" "Q")
- (const_int 8)
- (const_int 8)]) 0)
+ (match_operator:SWI248 3 "extract_high_operator"
+ [(match_operand 1 "int248_register_operand" "Q")]) 0)
(subreg:QI
- (match_operator:SWI248 4 "extract_operator"
- [(match_operand 2 "int248_register_operand" "Q")
- (const_int 8)
- (const_int 8)]) 0)))
+ (match_operator:SWI248 4 "extract_high_operator"
+ [(match_operand 2 "int248_register_operand" "Q")]) 0)))
(clobber (reg:CC FLAGS_REG))]
""
"#"
"&& reload_completed"
[(set (match_dup 0)
(subreg:QI
- (match_op_dup 4
- [(match_dup 2) (const_int 8) (const_int 8)]) 0))
+ (zero_extract:SWI248
+ (match_dup 2) (const_int 8) (const_int 8)) 0))
(parallel
[(set (match_dup 0)
(plus:QI
(subreg:QI
- (match_op_dup 3
- [(match_dup 1) (const_int 8) (const_int 8)]) 0)
+ (zero_extract:SWI248
+ (match_dup 1) (const_int 8) (const_int 8)) 0)
(match_dup 0)))
(clobber (reg:CC FLAGS_REG))])]
""
@@ -7542,10 +7517,8 @@
(subreg:SWI248
(plus:QI
(subreg:QI
- (match_operator:SWI248 3 "extract_operator"
- [(match_operand 1 "int248_register_operand" "0,!Q")
- (const_int 8)
- (const_int 8)]) 0)
+ (match_operator:SWI248 3 "extract_high_operator"
+ [(match_operand 1 "int248_register_operand" "0,!Q")]) 0)
(match_operand:QI 2 "general_operand" "QnBn,QnBn")) 0))
(clobber (reg:CC FLAGS_REG))]
""
@@ -7580,8 +7553,8 @@
(subreg:SWI248
(plus:QI
(subreg:QI
- (match_op_dup 3
- [(match_dup 0) (const_int 8) (const_int 8)]) 0)
+ (zero_extract:SWI248
+ (match_dup 0) (const_int 8) (const_int 8)) 0)
(match_dup 2)) 0))
(clobber (reg:CC FLAGS_REG))])]
""
@@ -7601,15 +7574,11 @@
(subreg:SWI248
(plusminus:QI
(subreg:QI
- (match_operator:SWI248 3 "extract_operator"
- [(match_operand 1 "int248_register_operand" "<comm>0,!Q")
- (const_int 8)
- (const_int 8)]) 0)
+ (match_operator:SWI248 3 "extract_high_operator"
+ [(match_operand 1 "int248_register_operand" "<comm>0,!Q")]) 0)
(subreg:QI
- (match_operator:SWI248 4 "extract_operator"
- [(match_operand 2 "int248_register_operand" "Q,Q")
- (const_int 8)
- (const_int 8)]) 0)) 0))
+ (match_operator:SWI248 4 "extract_high_operator"
+ [(match_operand 2 "int248_register_operand" "Q,Q")]) 0)) 0))
(clobber (reg:CC FLAGS_REG))]
""
"@
@@ -7628,11 +7597,11 @@
(subreg:SWI248
(plusminus:QI
(subreg:QI
- (match_op_dup 3
- [(match_dup 0) (const_int 8) (const_int 8)]) 0)
+ (zero_extract:SWI248
+ (match_dup 0) (const_int 8) (const_int 8)) 0)
(subreg:QI
- (match_op_dup 4
- [(match_dup 2) (const_int 8) (const_int 8)]) 0)) 0))
+ (zero_extract:SWI248
+ (match_dup 2) (const_int 8) (const_int 8)) 0)) 0))
(clobber (reg:CC FLAGS_REG))])]
""
[(set_attr "type" "alu")
@@ -8229,10 +8198,8 @@
(minus:QI
(match_operand:QI 1 "nonimmediate_operand" "0,!qm")
(subreg:QI
- (match_operator:SWI248 3 "extract_operator"
- [(match_operand 2 "int248_register_operand" "Q,Q")
- (const_int 8)
- (const_int 8)]) 0)))
+ (match_operator:SWI248 3 "extract_high_operator"
+ [(match_operand 2 "int248_register_operand" "Q,Q")]) 0)))
(clobber (reg:CC FLAGS_REG))]
"!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
"@
@@ -8246,8 +8213,8 @@
(minus:QI
(match_dup 0)
(subreg:QI
- (match_op_dup 3
- [(match_dup 2) (const_int 8) (const_int 8)]) 0)))
+ (zero_extract:SWI248
+ (match_dup 2) (const_int 8) (const_int 8)) 0)))
(clobber (reg:CC FLAGS_REG))])]
""
[(set_attr "type" "alu")
@@ -8257,30 +8224,26 @@
[(set (strict_low_part (match_operand:QI 0 "register_operand" "+&Q"))
(minus:QI
(subreg:QI
- (match_operator:SWI248 3 "extract_operator"
- [(match_operand 1 "int248_register_operand" "Q")
- (const_int 8)
- (const_int 8)]) 0)
+ (match_operator:SWI248 3 "extract_high_operator"
+ [(match_operand 1 "int248_register_operand" "Q")]) 0)
(subreg:QI
- (match_operator:SWI248 4 "extract_operator"
- [(match_operand 2 "int248_register_operand" "Q")
- (const_int 8)
- (const_int 8)]) 0)))
+ (match_operator:SWI248 4 "extract_high_operator"
+ [(match_operand 2 "int248_register_operand" "Q")]) 0)))
(clobber (reg:CC FLAGS_REG))]
"!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
"#"
"&& reload_completed"
[(set (strict_low_part (match_dup 0))
(subreg:QI
- (match_op_dup 3
- [(match_dup 1) (const_int 8) (const_int 8)]) 0))
+ (zero_extract:SWI248
+ (match_dup 1) (const_int 8) (const_int 8)) 0))
(parallel
[(set (strict_low_part (match_dup 0))
(minus:QI
(match_dup 0)
(subreg:QI
- (match_op_dup 4
- [(match_dup 2) (const_int 8) (const_int 8)]) 0)))
+ (zero_extract:SWI248
+ (match_dup 2) (const_int 8) (const_int 8)) 0)))
(clobber (reg:CC FLAGS_REG))])]
""
[(set_attr "type" "alu")
@@ -8331,10 +8294,8 @@
(minus:QI
(match_operand:QI 1 "nonimmediate_operand" "0")
(subreg:QI
- (match_operator:SWI248 3 "extract_operator"
- [(match_operand 2 "int248_register_operand" "Q")
- (const_int 8)
- (const_int 8)]) 0)))
+ (match_operator:SWI248 3 "extract_high_operator"
+ [(match_operand 2 "int248_register_operand" "Q")]) 0)))
(clobber (reg:CC FLAGS_REG))]
""
"sub{b}\t{%h2, %0|%0, %h2}"
@@ -8346,30 +8307,26 @@
[(set (match_operand:QI 0 "register_operand" "=&Q")
(minus:QI
(subreg:QI
- (match_operator:SWI248 3 "extract_operator"
- [(match_operand 1 "int248_register_operand" "Q")
- (const_int 8)
- (const_int 8)]) 0)
+ (match_operator:SWI248 3 "extract_high_operator"
+ [(match_operand 1 "int248_register_operand" "Q")]) 0)
(subreg:QI
- (match_operator:SWI248 4 "extract_operator"
- [(match_operand 2 "int248_register_operand" "Q")
- (const_int 8)
- (const_int 8)]) 0)))
+ (match_operator:SWI248 4 "extract_high_operator"
+ [(match_operand 2 "int248_register_operand" "Q")]) 0)))
(clobber (reg:CC FLAGS_REG))]
""
"#"
"&& reload_completed"
[(set (match_dup 0)
(subreg:QI
- (match_op_dup 3
- [(match_dup 1) (const_int 8) (const_int 8)]) 0))
+ (zero_extract:SWI248
+ (match_dup 1) (const_int 8) (const_int 8)) 0))
(parallel
[(set (match_dup 0)
(minus:QI
(match_dup 0)
(subreg:QI
- (match_op_dup 4
- [(match_dup 2) (const_int 8) (const_int 8)]) 0)))
+ (zero_extract:SWI248
+ (match_dup 2) (const_int 8) (const_int 8)) 0)))
(clobber (reg:CC FLAGS_REG))])]
""
[(set_attr "type" "alu")
@@ -8384,10 +8341,8 @@
(subreg:SWI248
(minus:QI
(subreg:QI
- (match_operator:SWI248 3 "extract_operator"
- [(match_operand 1 "int248_register_operand" "0,!Q")
- (const_int 8)
- (const_int 8)]) 0)
+ (match_operator:SWI248 3 "extract_high_operator"
+ [(match_operand 1 "int248_register_operand" "0,!Q")]) 0)
(match_operand:QI 2 "general_operand" "QnBn,QnBn")) 0))
(clobber (reg:CC FLAGS_REG))]
""
@@ -8406,8 +8361,8 @@
(subreg:SWI248
(minus:QI
(subreg:QI
- (match_op_dup 3
- [(match_dup 0) (const_int 8) (const_int 8)]) 0)
+ (zero_extract:SWI248
+ (match_dup 0) (const_int 8) (const_int 8)) 0)
(match_dup 2)) 0))
(clobber (reg:CC FLAGS_REG))])]
""
@@ -12355,10 +12310,8 @@
(compare
(and:QI
(subreg:QI
- (match_operator:SWI248 2 "extract_operator"
- [(match_operand 0 "int248_register_operand" "Q")
- (const_int 8)
- (const_int 8)]) 0)
+ (match_operator:SWI248 2 "extract_high_operator"
+ [(match_operand 0 "int248_register_operand" "Q")]) 0)
(match_operand:QI 1 "general_operand" "QnBn"))
(const_int 0)))]
"ix86_match_ccmode (insn, CCNOmode)"
@@ -12372,15 +12325,11 @@
(compare
(and:QI
(subreg:QI
- (match_operator:SWI248 2 "extract_operator"
- [(match_operand 0 "int248_register_operand" "Q")
- (const_int 8)
- (const_int 8)]) 0)
+ (match_operator:SWI248 2 "extract_high_operator"
+ [(match_operand 0 "int248_register_operand" "Q")]) 0)
(subreg:QI
- (match_operator:SWI248 3 "extract_operator"
- [(match_operand 1 "int248_register_operand" "Q")
- (const_int 8)
- (const_int 8)]) 0))
+ (match_operator:SWI248 3 "extract_high_operator"
+ [(match_operand 1 "int248_register_operand" "Q")]) 0))
(const_int 0)))]
"ix86_match_ccmode (insn, CCNOmode)"
"test{b}\t{%h1, %h0|%h0, %h1}"
@@ -12969,10 +12918,8 @@
[(set (strict_low_part (match_operand:QI 0 "register_operand" "+Q,&Q"))
(any_logic:QI
(subreg:QI
- (match_operator:SWI248 3 "extract_operator"
- [(match_operand 2 "int248_register_operand" "Q,Q")
- (const_int 8)
- (const_int 8)]) 0)
+ (match_operator:SWI248 3 "extract_high_operator"
+ [(match_operand 2 "int248_register_operand" "Q,Q")]) 0)
(match_operand:QI 1 "nonimmediate_operand" "0,!qm")))
(clobber (reg:CC FLAGS_REG))]
"!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
@@ -12986,8 +12933,8 @@
[(set (strict_low_part (match_dup 0))
(any_logic:QI
(subreg:QI
- (match_op_dup 3
- [(match_dup 2) (const_int 8) (const_int 8)]) 0)
+ (zero_extract:SWI248
+ (match_dup 2) (const_int 8) (const_int 8)) 0)
(match_dup 0)))
(clobber (reg:CC FLAGS_REG))])]
""
@@ -12998,29 +12945,25 @@
[(set (strict_low_part (match_operand:QI 0 "register_operand" "+&Q"))
(any_logic:QI
(subreg:QI
- (match_operator:SWI248 3 "extract_operator"
- [(match_operand 1 "int248_register_operand" "Q")
- (const_int 8)
- (const_int 8)]) 0)
+ (match_operator:SWI248 3 "extract_high_operator"
+ [(match_operand 1 "int248_register_operand" "Q")]) 0)
(subreg:QI
- (match_operator:SWI248 4 "extract_operator"
- [(match_operand 2 "int248_register_operand" "Q")
- (const_int 8)
- (const_int 8)]) 0)))
+ (match_operator:SWI248 4 "extract_high_operator"
+ [(match_operand 2 "int248_register_operand" "Q")]) 0)))
(clobber (reg:CC FLAGS_REG))]
"!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
"#"
"&& reload_completed"
[(set (strict_low_part (match_dup 0))
(subreg:QI
- (match_op_dup 4
- [(match_dup 2) (const_int 8) (const_int 8)]) 0))
+ (zero_extract:SWI248
+ (match_dup 2) (const_int 8) (const_int 8)) 0))
(parallel
[(set (strict_low_part (match_dup 0))
(any_logic:QI
(subreg:QI
- (match_op_dup 3
- [(match_dup 1) (const_int 8) (const_int 8)]) 0)
+ (zero_extract:SWI248
+ (match_dup 1) (const_int 8) (const_int 8)) 0)
(match_dup 0)))
(clobber (reg:CC FLAGS_REG))])]
""
@@ -13223,10 +13166,8 @@
[(set (match_operand:QI 0 "nonimmediate_operand" "=QBn")
(any_logic:QI
(subreg:QI
- (match_operator:SWI248 3 "extract_operator"
- [(match_operand 2 "int248_register_operand" "Q")
- (const_int 8)
- (const_int 8)]) 0)
+ (match_operator:SWI248 3 "extract_high_operator"
+ [(match_operand 2 "int248_register_operand" "Q")]) 0)
(match_operand:QI 1 "nonimmediate_operand" "0")))
(clobber (reg:CC FLAGS_REG))]
""
@@ -13239,29 +13180,25 @@
[(set (match_operand:QI 0 "register_operand" "=&Q")
(any_logic:QI
(subreg:QI
- (match_operator:SWI248 3 "extract_operator"
- [(match_operand 1 "int248_register_operand" "Q")
- (const_int 8)
- (const_int 8)]) 0)
+ (match_operator:SWI248 3 "extract_high_operator"
+ [(match_operand 1 "int248_register_operand" "Q")]) 0)
(subreg:QI
- (match_operator:SWI248 4 "extract_operator"
- [(match_operand 2 "int248_register_operand" "Q")
- (const_int 8)
- (const_int 8)]) 0)))
+ (match_operator:SWI248 4 "extract_high_operator"
+ [(match_operand 2 "int248_register_operand" "Q")]) 0)))
(clobber (reg:CC FLAGS_REG))]
""
"#"
"&& reload_completed"
[(set (match_dup 0)
(subreg:QI
- (match_op_dup 4
- [(match_dup 2) (const_int 8) (const_int 8)]) 0))
+ (zero_extract:SWI248
+ (match_dup 2) (const_int 8) (const_int 8)) 0))
(parallel
[(set (match_dup 0)
(any_logic:QI
(subreg:QI
- (match_op_dup 3
- [(match_dup 1) (const_int 8) (const_int 8)]) 0)
+ (zero_extract:SWI248
+ (match_dup 1) (const_int 8) (const_int 8)) 0)
(match_dup 0)))
(clobber (reg:CC FLAGS_REG))])]
""
@@ -13291,10 +13228,8 @@
(subreg:SWI248
(any_logic:QI
(subreg:QI
- (match_operator:SWI248 3 "extract_operator"
- [(match_operand 1 "int248_register_operand" "0,!Q")
- (const_int 8)
- (const_int 8)]) 0)
+ (match_operator:SWI248 3 "extract_high_operator"
+ [(match_operand 1 "int248_register_operand" "0,!Q")]) 0)
(match_operand:QI 2 "general_operand" "QnBn,QnBn")) 0))
(clobber (reg:CC FLAGS_REG))]
""
@@ -13313,8 +13248,8 @@
(subreg:SWI248
(any_logic:QI
(subreg:QI
- (match_op_dup 3
- [(match_dup 0) (const_int 8) (const_int 8)]) 0)
+ (zero_extract:SWI248
+ (match_dup 0) (const_int 8) (const_int 8)) 0)
(match_dup 2)) 0))
(clobber (reg:CC FLAGS_REG))])]
""
@@ -13328,10 +13263,8 @@
(match_operator 5 "compare_operator"
[(any_logic:QI
(subreg:QI
- (match_operator:SWI248 3 "extract_operator"
- [(match_operand 1 "int248_register_operand" "0,!Q")
- (const_int 8)
- (const_int 8)]) 0)
+ (match_operator:SWI248 3 "extract_high_operator"
+ [(match_operand 1 "int248_register_operand" "0,!Q")]) 0)
(match_operand:QI 2 "general_operand" "QnBn,QnBn"))
(const_int 0)]))
(set (zero_extract:SWI248
@@ -13341,8 +13274,8 @@
(subreg:SWI248
(any_logic:QI
(subreg:QI
- (match_op_dup 3
- [(match_dup 0) (const_int 8) (const_int 8)]) 0)
+ (zero_extract:SWI248
+ (match_dup 0) (const_int 8) (const_int 8)) 0)
(match_dup 2)) 0))]
"ix86_match_ccmode (insn, CCNOmode)"
"@
@@ -13358,9 +13291,9 @@
[(set (match_dup 4)
(match_op_dup 5
[(any_logic:QI
- (subreg:QI
- (match_op_dup 3
- [(match_dup 0) (const_int 8) (const_int 8)]) 0)
+ (subreg:QI
+ (zero_extract:SWI248
+ (match_dup 0) (const_int 8) (const_int 8)) 0)
(match_dup 2))
(const_int 0)]))
(set (zero_extract:SWI248
@@ -13368,8 +13301,8 @@
(subreg:SWI248
(any_logic:QI
(subreg:QI
- (match_op_dup 3
- [(match_dup 1) (const_int 8) (const_int 8)]) 0)
+ (zero_extract:SWI248
+ (match_dup 1) (const_int 8) (const_int 8)) 0)
(match_dup 2)) 0))])]
""
[(set_attr "addr" "gpr8")
@@ -13385,15 +13318,11 @@
(subreg:SWI248
(any_logic:QI
(subreg:QI
- (match_operator:SWI248 3 "extract_operator"
- [(match_operand 1 "int248_register_operand" "%0,!Q")
- (const_int 8)
- (const_int 8)]) 0)
+ (match_operator:SWI248 3 "extract_high_operator"
+ [(match_operand 1 "int248_register_operand" "%0,!Q")]) 0)
(subreg:QI
- (match_operator:SWI248 4 "extract_operator"
- [(match_operand 2 "int248_register_operand" "Q,Q")
- (const_int 8)
- (const_int 8)]) 0)) 0))
+ (match_operator:SWI248 4 "extract_high_operator"
+ [(match_operand 2 "int248_register_operand" "Q,Q")]) 0)) 0))
(clobber (reg:CC FLAGS_REG))]
""
"@
@@ -13412,11 +13341,11 @@
(subreg:SWI248
(any_logic:QI
(subreg:QI
- (match_op_dup 3
- [(match_dup 0) (const_int 8) (const_int 8)]) 0)
+ (zero_extract:SWI248
+ (match_dup 0) (const_int 8) (const_int 8)) 0)
(subreg:QI
- (match_op_dup 4
- [(match_dup 2) (const_int 8) (const_int 8)]) 0)) 0))
+ (zero_extract:SWI248
+ (match_dup 2) (const_int 8) (const_int 8)) 0)) 0))
(clobber (reg:CC FLAGS_REG))])]
""
[(set_attr "type" "alu")
@@ -13428,12 +13357,10 @@
(match_operand 0 "int248_register_operand" "+Q,&Q")
(const_int 8)
(const_int 8))
- (match_operator:SWI248 3 "extract_operator"
+ (match_operator:SWI248 3 "extract_high_operator"
[(any_logic
(match_operand 1 "int248_register_operand" "%0,!Q")
- (match_operand 2 "int248_register_operand" "Q,Q"))
- (const_int 8)
- (const_int 8)]))
+ (match_operand 2 "int248_register_operand" "Q,Q"))]))
(clobber (reg:CC FLAGS_REG))]
"GET_MODE (operands[1]) == GET_MODE (operands[2])"
"@
@@ -13449,9 +13376,9 @@
(parallel
[(set (zero_extract:SWI248
(match_dup 0) (const_int 8) (const_int 8))
- (match_op_dup 3
- [(any_logic (match_dup 4) (match_dup 2))
- (const_int 8) (const_int 8)]))
+ (zero_extract:SWI248
+ (any_logic (match_dup 4) (match_dup 2))
+ (const_int 8) (const_int 8)))
(clobber (reg:CC FLAGS_REG))])]
"operands[4] = gen_lowpart (GET_MODE (operands[1]), operands[0]);"
[(set_attr "type" "alu")
@@ -14696,10 +14623,8 @@
(subreg:SWI248
(neg:QI
(subreg:QI
- (match_operator:SWI248 2 "extract_operator"
- [(match_operand 1 "int248_register_operand" "0,!Q")
- (const_int 8)
- (const_int 8)]) 0)) 0))
+ (match_operator:SWI248 2 "extract_high_operator"
+ [(match_operand 1 "int248_register_operand" "0,!Q")]) 0)) 0))
(clobber (reg:CC FLAGS_REG))]
""
"@
@@ -14717,8 +14642,8 @@
(subreg:SWI248
(neg:QI
(subreg:QI
- (match_op_dup 2
- [(match_dup 0) (const_int 8) (const_int 8)]) 0)) 0))
+ (zero_extract:SWI248
+ (match_dup 0) (const_int 8) (const_int 8)) 0)) 0))
(clobber (reg:CC FLAGS_REG))])]
""
[(set_attr "type" "negnot")
@@ -15350,13 +15275,9 @@
(match_operand 0 "int248_register_operand" "+Q,&Q")
(const_int 8)
(const_int 8))
- (subreg:SWI248
- (not:QI
- (subreg:QI
- (match_operator:SWI248 2 "extract_operator"
- [(match_operand 1 "int248_register_operand" "0,!Q")
- (const_int 8)
- (const_int 8)]) 0)) 0))]
+ (not:SWI248
+ (match_operator:SWI248 2 "extract_high_operator"
+ [(match_operand 1 "int248_register_operand" "0,!Q")])))]
""
"@
not{b}\t%h0
@@ -15369,11 +15290,8 @@
(match_dup 1) (const_int 8) (const_int 8)))
(set (zero_extract:SWI248
(match_dup 0) (const_int 8) (const_int 8))
- (subreg:SWI248
- (not:QI
- (subreg:QI
- (match_op_dup 2
- [(match_dup 0) (const_int 8) (const_int 8)]) 0)) 0))]
+ (not:SWI248
+ (zero_extract:SWI248 (match_dup 0) (const_int 8) (const_int 8))))]
""
[(set_attr "type" "negnot")
(set_attr "mode" "QI")])
@@ -16720,10 +16638,8 @@
(subreg:SWI248
(ashift:QI
(subreg:QI
- (match_operator:SWI248 3 "extract_operator"
- [(match_operand 1 "int248_register_operand" "0,!Q")
- (const_int 8)
- (const_int 8)]) 0)
+ (match_operator:SWI248 3 "extract_high_operator"
+ [(match_operand 1 "int248_register_operand" "0,!Q")]) 0)
(match_operand:QI 2 "nonmemory_operand" "cI,cI")) 0))
(clobber (reg:CC FLAGS_REG))]
""
@@ -16757,8 +16673,8 @@
(subreg:SWI248
(ashift:QI
(subreg:QI
- (match_op_dup 3
- [(match_dup 0) (const_int 8) (const_int 8)]) 0)
+ (zero_extract:SWI248
+ (match_dup 0) (const_int 8) (const_int 8)) 0)
(match_dup 2)) 0))
(clobber (reg:CC FLAGS_REG))])]
""
@@ -18004,10 +17920,8 @@
(subreg:SWI248
(any_shiftrt:QI
(subreg:QI
- (match_operator:SWI248 3 "extract_operator"
- [(match_operand 1 "int248_register_operand" "0,!Q")
- (const_int 8)
- (const_int 8)]) 0)
+ (match_operator:SWI248 3 "extract_high_operator"
+ [(match_operand 1 "int248_register_operand" "0,!Q")]) 0)
(match_operand:QI 2 "nonmemory_operand" "cI,cI")) 0))
(clobber (reg:CC FLAGS_REG))]
""
@@ -18033,8 +17947,8 @@
(subreg:SWI248
(any_shiftrt:QI
(subreg:QI
- (match_op_dup 3
- [(match_dup 0) (const_int 8) (const_int 8)]) 0)
+ (zero_extract:SWI248
+ (match_dup 0) (const_int 8) (const_int 8)) 0)
(match_dup 2)) 0))
(clobber (reg:CC FLAGS_REG))])]
""
@@ -18388,17 +18302,17 @@
(any_rotate:SWI
(match_operand:SWI 1 "const_int_operand")
(subreg:QI
- (and
- (match_operand 2 "int248_register_operand")
- (match_operand 3 "const_int_operand")) 0)))]
+ (match_operator 4 "and_operator"
+ [(match_operand 2 "int248_register_operand")
+ (match_operand 3 "const_int_operand")]) 0)))]
"(INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode) - 1))
== GET_MODE_BITSIZE (<MODE>mode) - 1"
- [(set (match_dup 4) (match_dup 1))
+ [(set (match_dup 5) (match_dup 1))
(set (match_dup 0)
- (any_rotate:SWI (match_dup 4)
+ (any_rotate:SWI (match_dup 5)
(subreg:QI
- (and:SI (match_dup 2) (match_dup 3)) 0)))]
- "operands[4] = gen_reg_rtx (<MODE>mode);")
+ (match_op_dup 4 [(match_dup 2) (match_dup 3)]) 0)))]
+ "operands[5] = gen_reg_rtx (<MODE>mode);")
(define_insn_and_split "*<insn><mode>3_mask_1"
[(set (match_operand:SWI 0 "nonimmediate_operand")
@@ -23243,6 +23157,7 @@
return "call\t{*%p2@GOTPCREL(%%rip)|[QWORD PTR %p2@GOTPCREL[rip]]}";
}
[(set_attr "type" "multi")
+ (set_attr "tls64" "gd")
(set (attr "length")
(symbol_ref "TARGET_X32 ? 15 : 16"))])
@@ -23281,7 +23196,11 @@
UNSPEC_TLS_GD)
(clobber (match_operand:P 3 "register_operand"))])]
"TARGET_64BIT"
- "ix86_tls_descriptor_calls_expanded_in_cfun = true;")
+{
+ if (ix86_tls_descriptor_calls_expanded_in_cfun)
+ cfun->machine->tls_descriptor_call_multiple_p = true;
+ ix86_tls_descriptor_calls_expanded_in_cfun = true;
+})
(define_insn "*tls_local_dynamic_base_32_gnu"
[(set (match_operand:SI 0 "register_operand" "=a")
@@ -23343,6 +23262,7 @@
return "call\t{*%p1@GOTPCREL(%%rip)|[QWORD PTR %p1@GOTPCREL[rip]]}";
}
[(set_attr "type" "multi")
+ (set_attr "tls64" "ld_base")
(set_attr "length" "12")])
(define_insn "*tls_local_dynamic_base_64_largepic"
@@ -23376,7 +23296,11 @@
(unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE)
(clobber (match_operand:P 2 "register_operand"))])]
"TARGET_64BIT"
- "ix86_tls_descriptor_calls_expanded_in_cfun = true;")
+{
+ if (ix86_tls_descriptor_calls_expanded_in_cfun)
+ cfun->machine->tls_descriptor_call_multiple_p = true;
+ ix86_tls_descriptor_calls_expanded_in_cfun = true;
+})
;; Local dynamic of a single variable is a lose. Show combine how
;; to convert that back to global dynamic.
@@ -23570,6 +23494,8 @@
"TARGET_64BIT && TARGET_GNU2_TLS"
{
operands[2] = can_create_pseudo_p () ? gen_reg_rtx (ptr_mode) : operands[0];
+ if (ix86_tls_descriptor_calls_expanded_in_cfun)
+ cfun->machine->tls_descriptor_call_multiple_p = true;
ix86_tls_descriptor_calls_expanded_in_cfun = true;
})
@@ -23581,6 +23507,7 @@
"lea%z0\t{%E1@TLSDESC(%%rip), %0|%0, %E1@TLSDESC[rip]}"
[(set_attr "type" "lea")
(set_attr "mode" "<MODE>")
+ (set_attr "tls64" "lea")
(set_attr "length" "7")
(set_attr "length_address" "4")])
@@ -23594,6 +23521,7 @@
"TARGET_64BIT && TARGET_GNU2_TLS"
"call\t{*%a1@TLSCALL(%2)|[QWORD PTR [%2+%a1@TLSCALL]]}"
[(set_attr "type" "call")
+ (set_attr "tls64" "call")
(set_attr "length" "2")
(set_attr "length_address" "0")])
@@ -23615,7 +23543,8 @@
{
operands[4] = can_create_pseudo_p () ? gen_reg_rtx (ptr_mode) : operands[0];
emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, operands[4], operands[1]));
-})
+}
+ [(set_attr "tls64" "combine")])
(define_split
[(match_operand 0 "tls_address_pattern")]
@@ -28251,10 +28180,8 @@
(match_operator 1 "compare_operator"
[(and:QI
(subreg:QI
- (match_operator:SWI248 4 "extract_operator"
- [(match_operand 2 "int248_register_operand")
- (const_int 8)
- (const_int 8)]) 0)
+ (match_operator:SWI248 4 "extract_high_operator"
+ [(match_operand 2 "int248_register_operand")]) 0)
(match_operand 3 "const_int_operand"))
(const_int 0)]))]
"! TARGET_PARTIAL_REG_STALL
@@ -28266,9 +28193,9 @@
(match_op_dup 1
[(and:QI
(subreg:QI
- (match_op_dup 4 [(match_dup 2)
- (const_int 8)
- (const_int 8)]) 0)
+ (zero_extract:SWI248 (match_dup 2)
+ (const_int 8)
+ (const_int 8)) 0)
(match_dup 3))
(const_int 0)]))
(set (zero_extract:SWI248 (match_dup 2)
@@ -28277,9 +28204,9 @@
(subreg:SWI248
(and:QI
(subreg:QI
- (match_op_dup 4 [(match_dup 2)
- (const_int 8)
- (const_int 8)]) 0)
+ (zero_extract:SWI248 (match_dup 2)
+ (const_int 8)
+ (const_int 8)) 0)
(match_dup 3)) 0))])])
;; Don't do logical operations with memory inputs.
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index c93c0b1..6bda22f 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -1246,6 +1246,10 @@ munroll-only-small-loops
Target Var(ix86_unroll_only_small_loops) Init(0) Optimization
Enable conservative small loop unrolling.
+-param=ix86-vect-unroll-limit=
+Target Joined UInteger Var(ix86_vect_unroll_limit) Init(4) Param
+Limit how much the autovectorizer may unroll a loop.
+
mlam=
Target RejectNegative Joined Enum(lam_type) Var(ix86_lam_type) Init(lam_none)
-mlam=[none|u48|u57] Instrument meta data position in user data pointers.
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index b2d2eec..5dbe444 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -1319,6 +1319,9 @@
(ior (match_operand 0 "nonimmediate_operand")
(match_test "const_vec_duplicate_p (op)")))
+(define_predicate "const_vec_dup_operand"
+ (match_test "const_vec_duplicate_p (op)"))
+
;; Return true when OP is either register operand, or any
;; CONST_VECTOR.
(define_predicate "reg_or_const_vector_operand"
@@ -1714,10 +1717,14 @@
(define_predicate "div_operator"
(match_code "div"))
-;; Return true if this is a and, ior or xor operation.
+;; Return true if this is an and, ior or xor operation.
(define_predicate "logic_operator"
(match_code "and,ior,xor"))
+;; Return true if this is an and operation.
+(define_predicate "and_operator"
+ (match_code "and"))
+
;; Return true if this is a plus, minus, and, ior or xor operation.
(define_predicate "plusminuslogic_operator"
(match_code "plus,minus,and,ior,xor"))
@@ -1740,8 +1747,12 @@
(define_predicate "compare_operator"
(match_code "compare"))
-(define_predicate "extract_operator"
- (match_code "zero_extract,sign_extract"))
+(define_predicate "extract_high_operator"
+ (match_code "zero_extract,sign_extract,ashiftrt,lshiftrt")
+{
+ return (const8_operand (XEXP (op, 1), VOIDmode)
+ && (BINARY_P (op) || const8_operand (XEXP (op, 2), VOIDmode)));
+})
;; Return true if OP is a memory operand, aligned to
;; less than its natural alignment.
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index d88c3d6..73906b8 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -326,6 +326,9 @@
(define_mode_iterator VI1_AVX512VL
[V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")])
+(define_mode_iterator VI1_AVX512_3264
+ [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX")])
+
;; All vector modes
(define_mode_iterator V
[(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
@@ -21729,6 +21732,19 @@
(const_string "orig")))
(set_attr "mode" "TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
+;; Eliminate redundancy caused by
+;; /* Special case TImode to 128-bit vector conversions via V2DI. */
+;; in ix86_expand_vector_move
+
+(define_split
+ [(set (match_operand:V2DI 0 "register_operand")
+ (vec_concat:V2DI
+ (subreg:DI (match_operand:TI 1 "register_operand") 0)
+ (subreg:DI (match_dup 1) 8)))]
+ "TARGET_SSE2 && ix86_pre_reload_split ()"
+ [(set (match_dup 0)
+ (subreg:V2DI (match_dup 1) 0))])
+
(define_insn "*vec_concatv2di_0"
[(set (match_operand:V2DI 0 "register_operand" "=v,v ,x")
(vec_concat:V2DI
@@ -26546,9 +26562,9 @@
;; XOP packed rotate instructions
(define_expand "rotl<mode>3"
- [(set (match_operand:VI_128 0 "register_operand")
- (rotate:VI_128
- (match_operand:VI_128 1 "nonimmediate_operand")
+ [(set (match_operand:VI248_128 0 "register_operand")
+ (rotate:VI248_128
+ (match_operand:VI248_128 1 "nonimmediate_operand")
(match_operand:SI 2 "general_operand")))]
"TARGET_XOP"
{
@@ -26577,9 +26593,9 @@
})
(define_expand "rotr<mode>3"
- [(set (match_operand:VI_128 0 "register_operand")
- (rotatert:VI_128
- (match_operand:VI_128 1 "nonimmediate_operand")
+ [(set (match_operand:VI248_128 0 "register_operand")
+ (rotatert:VI248_128
+ (match_operand:VI248_128 1 "nonimmediate_operand")
(match_operand:SI 2 "general_operand")))]
"TARGET_XOP"
{
@@ -26951,31 +26967,122 @@
int i;
if (<CODE> != ASHIFT)
- {
- if (CONST_INT_P (operands[2]))
- operands[2] = GEN_INT (-INTVAL (operands[2]));
- else
- negate = true;
- }
+ {
+ if (CONST_INT_P (operands[2]))
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ else
+ negate = true;
+ }
par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
tmp = lowpart_subreg (QImode, operands[2], SImode);
for (i = 0; i < 16; i++)
- XVECEXP (par, 0, i) = tmp;
+ XVECEXP (par, 0, i) = tmp;
tmp = gen_reg_rtx (V16QImode);
emit_insn (gen_vec_initv16qiqi (tmp, par));
if (negate)
- emit_insn (gen_negv16qi2 (tmp, tmp));
+ emit_insn (gen_negv16qi2 (tmp, tmp));
gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
emit_insn (gen (operands[0], operands[1], tmp));
}
+ else if (TARGET_GFNI && CONST_INT_P (operands[2])
+ && (<MODE_SIZE> == 64
+ || !(INTVAL (operands[2]) == 7 && <CODE> == ASHIFTRT)))
+ {
+ rtx matrix = ix86_vgf2p8affine_shift_matrix (operands[0], operands[2],
+ <CODE>);
+ emit_insn (gen_vgf2p8affineqb_<mode> (operands[0], operands[1], matrix,
+ const0_rtx));
+ }
else
ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
DONE;
})
+(define_expand "cond_<insn><mode>"
+ [(set (match_operand:VI1_AVX512VL 0 "register_operand")
+ (vec_merge:VI1_AVX512VL
+ (any_shift:VI1_AVX512VL
+ (match_operand:VI1_AVX512VL 2 "register_operand")
+ (match_operand:VI1_AVX512VL 3 "const_vec_dup_operand"))
+ (match_operand:VI1_AVX512VL 4 "nonimm_or_0_operand")
+ (match_operand:<avx512fmaskmode> 1 "register_operand")))]
+ "TARGET_GFNI && TARGET_AVX512F"
+{
+ rtx count = XVECEXP (operands[3], 0, 0);
+ rtx matrix = ix86_vgf2p8affine_shift_matrix (operands[0], count, <CODE>);
+ emit_insn (gen_vgf2p8affineqb_<mode>_mask (operands[0], operands[2], matrix,
+ const0_rtx, operands[4],
+ operands[1]));
+ DONE;
+})
+
+(define_expand "<insn><mode>3"
+ [(set (match_operand:VI1_AVX512_3264 0 "register_operand")
+ (any_rotate:VI1_AVX512_3264
+ (match_operand:VI1_AVX512_3264 1 "register_operand")
+ (match_operand:SI 2 "const_int_operand")))]
+ "TARGET_GFNI"
+{
+ rtx matrix = ix86_vgf2p8affine_shift_matrix (operands[0], operands[2], <CODE>);
+ emit_insn (gen_vgf2p8affineqb_<mode> (operands[0], operands[1], matrix,
+ const0_rtx));
+ DONE;
+})
+
+(define_expand "<insn>v16qi3"
+ [(set (match_operand:V16QI 0 "register_operand")
+ (any_rotate:V16QI
+ (match_operand:V16QI 1 "nonimmediate_operand")
+ (match_operand:SI 2 "general_operand")))]
+ "TARGET_GFNI || TARGET_XOP"
+{
+ /* Handle the V16QI XOP case to avoid a conflict with the other expand. */
+ if (TARGET_XOP)
+ {
+ if (! const_0_to_7_operand (operands[2], SImode))
+ {
+ rtvec vs = rtvec_alloc (16);
+ rtx par = gen_rtx_PARALLEL (V16QImode, vs);
+ rtx reg = gen_reg_rtx (V16QImode);
+ rtx op2 = operands[2];
+ int i;
+
+ if (GET_MODE (op2) != QImode)
+ {
+ op2 = gen_reg_rtx (QImode);
+ convert_move (op2, operands[2], false);
+ }
+
+ for (i = 0; i < 16; i++)
+ RTVEC_ELT (vs, i) = op2;
+
+ emit_insn (gen_vec_initv16qiqi (reg, par));
+ if (<CODE> == ROTATERT)
+ {
+ rtx neg = gen_reg_rtx (V16QImode);
+ emit_insn (gen_negv16qi2 (neg, reg));
+ emit_insn (gen_xop_vrotlv16qi3 (operands[0], operands[1], neg));
+ reg = neg;
+ }
+ emit_insn (gen_xop_vrotlv16qi3 (operands[0], operands[1], reg));
+ DONE;
+ }
+ }
+ else if (TARGET_GFNI && CONST_INT_P (operands[2]))
+ {
+ rtx matrix = ix86_vgf2p8affine_shift_matrix (operands[0], operands[2], <CODE>);
+ emit_insn (gen_vgf2p8affineqb_v16qi (operands[0],
+ force_reg (V16QImode, operands[1]),
+ matrix, const0_rtx));
+ DONE;
+ }
+ else
+ FAIL;
+})
+
(define_expand "ashrv2di3"
[(set (match_operand:V2DI 0 "register_operand")
(ashiftrt:V2DI
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index c8603b9..1649ea2 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -141,6 +141,12 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */
COSTS_N_BYTES (4), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
ix86_size_memcpy,
ix86_size_memset,
COSTS_N_BYTES (1), /* cond_taken_branch_cost. */
@@ -261,6 +267,12 @@ struct processor_costs i386_cost = { /* 386 specific costs */
COSTS_N_INSNS (27), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (27), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
i386_memcpy,
i386_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
@@ -382,6 +394,12 @@ struct processor_costs i486_cost = { /* 486 specific costs */
COSTS_N_INSNS (27), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (27), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
i486_memcpy,
i486_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
@@ -501,6 +519,12 @@ struct processor_costs pentium_cost = {
COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
pentium_memcpy,
pentium_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
@@ -613,6 +637,12 @@ struct processor_costs lakemont_cost = {
COSTS_N_INSNS (5), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (5), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
pentium_memcpy,
pentium_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
@@ -740,6 +770,12 @@ struct processor_costs pentiumpro_cost = {
COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
pentiumpro_memcpy,
pentiumpro_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
@@ -858,6 +894,12 @@ struct processor_costs geode_cost = {
COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
geode_memcpy,
geode_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
@@ -979,6 +1021,12 @@ struct processor_costs k6_cost = {
COSTS_N_INSNS (2), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (2), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
k6_memcpy,
k6_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
@@ -1101,6 +1149,12 @@ struct processor_costs athlon_cost = {
COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
athlon_memcpy,
athlon_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
@@ -1232,6 +1286,12 @@ struct processor_costs k8_cost = {
COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (5), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
k8_memcpy,
k8_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
@@ -1371,6 +1431,12 @@ struct processor_costs amdfam10_cost = {
COSTS_N_INSNS (7), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
amdfam10_memcpy,
amdfam10_memset,
COSTS_N_INSNS (2), /* cond_taken_branch_cost. */
@@ -1503,6 +1569,12 @@ const struct processor_costs bdver_cost = {
COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
bdver_memcpy,
bdver_memset,
COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
@@ -1668,6 +1740,12 @@ struct processor_costs znver1_cost = {
plus/minus operations per cycle but only one multiply. This is adjusted
in ix86_reassociation_width. */
4, 4, 3, 6, /* reassoc int, fp, vec_int, vec_fp. */
+ {5, 1, 3}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 4, /* Limit how much the autovectorizer
+ may unroll a loop. */
znver1_memcpy,
znver1_memset,
COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
@@ -1836,6 +1914,12 @@ struct processor_costs znver2_cost = {
plus/minus operations per cycle but only one multiply. This is adjusted
in ix86_reassociation_width. */
4, 4, 3, 6, /* reassoc int, fp, vec_int, vec_fp. */
+ {10, 1, 3}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 4, /* Limit how much the autovectorizer
+ may unroll a loop. */
znver2_memcpy,
znver2_memset,
COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
@@ -1979,6 +2063,12 @@ struct processor_costs znver3_cost = {
plus/minus operations per cycle but only one multiply. This is adjusted
in ix86_reassociation_width. */
4, 4, 3, 6, /* reassoc int, fp, vec_int, vec_fp. */
+ {8, 1, 6}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 4, /* Limit how much the autovectorizer
+ may unroll a loop. */
znver2_memcpy,
znver2_memset,
COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
@@ -2125,6 +2215,12 @@ struct processor_costs znver4_cost = {
plus/minus operations per cycle but only one multiply. This is adjusted
in ix86_reassociation_width. */
4, 4, 3, 6, /* reassoc int, fp, vec_int, vec_fp. */
+ {8, 8, 6}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 4, /* Limit how much the autovectorizer
+ may unroll a loop. */
znver2_memcpy,
znver2_memset,
COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
@@ -2287,6 +2383,12 @@ struct processor_costs znver5_cost = {
We increase width to 6 for multiplications
in ix86_reassociation_width. */
6, 6, 4, 6, /* reassoc int, fp, vec_int, vec_fp. */
+ {8, 8, 6}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 4, /* Limit how much the autovectorizer
+ may unroll a loop. */
znver2_memcpy,
znver2_memset,
COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
@@ -2422,6 +2524,12 @@ struct processor_costs skylake_cost = {
COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (7), /* cost of CVT(T)PS2PI instruction. */
1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
+ {8, 1, 3}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 4, /* Limit how much the autovectorizer
+ may unroll a loop. */
skylake_memcpy,
skylake_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
@@ -2559,6 +2667,12 @@ struct processor_costs icelake_cost = {
COSTS_N_INSNS (7), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */
1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
+ {8, 10, 3}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 4, /* Limit how much the autovectorizer
+ may unroll a loop. */
icelake_memcpy,
icelake_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
@@ -2690,6 +2804,12 @@ struct processor_costs alderlake_cost = {
COSTS_N_INSNS (7), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */
1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */
+ {8, 8, 3}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 4, /* Limit how much the autovectorizer
+ may unroll a loop. */
alderlake_memcpy,
alderlake_memset,
COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
@@ -2814,6 +2934,12 @@ const struct processor_costs btver1_cost = {
COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
btver1_memcpy,
btver1_memset,
COSTS_N_INSNS (2), /* cond_taken_branch_cost. */
@@ -2935,6 +3061,12 @@ const struct processor_costs btver2_cost = {
COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
btver2_memcpy,
btver2_memset,
COSTS_N_INSNS (2), /* cond_taken_branch_cost. */
@@ -3055,6 +3187,12 @@ struct processor_costs pentium4_cost = {
COSTS_N_INSNS (12), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (8), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
pentium4_memcpy,
pentium4_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
@@ -3178,6 +3316,12 @@ struct processor_costs nocona_cost = {
COSTS_N_INSNS (12), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (8), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
nocona_memcpy,
nocona_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
@@ -3299,6 +3443,12 @@ struct processor_costs atom_cost = {
COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
2, 2, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
+ {8, 8, 3}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 2, /* Limit how much the autovectorizer
+ may unroll a loop. */
atom_memcpy,
atom_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
@@ -3420,6 +3570,12 @@ struct processor_costs slm_cost = {
COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {8, 8, 3}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
slm_memcpy,
slm_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
@@ -3555,6 +3711,12 @@ struct processor_costs tremont_cost = {
COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */
+ {8, 1, 3}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 4, /* Limit how much the autovectorizer
+ may unroll a loop. */
tremont_memcpy,
tremont_memset,
COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
@@ -3681,6 +3843,12 @@ struct processor_costs lujiazui_cost = {
COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */
+ {8, 1, 3}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 4, /* Limit how much the autovectorizer
+ may unroll a loop. */
lujiazui_memcpy,
lujiazui_memset,
COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
@@ -3805,6 +3973,12 @@ struct processor_costs yongfeng_cost = {
COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
4, 4, 4, 4, /* reassoc int, fp, vec_int, vec_fp. */
+ {8, 1, 3}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
yongfeng_memcpy,
yongfeng_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
@@ -3929,6 +4103,12 @@ struct processor_costs shijidadao_cost = {
COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
4, 4, 4, 4, /* reassoc int, fp, vec_int, vec_fp. */
+ {8, 1, 3}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
shijidadao_memcpy,
shijidadao_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
@@ -4078,6 +4258,12 @@ struct processor_costs generic_cost = {
COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */
+ {8, 8, 3}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 4, /* Limit how much the autovectorizer
+ may unroll a loop. */
generic_memcpy,
generic_memset,
COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
@@ -4215,6 +4401,12 @@ struct processor_costs core_cost = {
COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (7), /* cost of CVT(T)PS2PI instruction. */
1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
+ {8, 1, 3}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
core_memcpy,
core_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
diff --git a/gcc/config/loongarch/genopts/isa-evolution.in b/gcc/config/loongarch/genopts/isa-evolution.in
index 50f72d5..836d93a 100644
--- a/gcc/config/loongarch/genopts/isa-evolution.in
+++ b/gcc/config/loongarch/genopts/isa-evolution.in
@@ -2,4 +2,5 @@
2 26 div32 1.1 Support div.w[u] and mod.w[u] instructions with inputs not sign-extended.
2 27 lam-bh 1.1 Support am{swap/add}[_db].{b/h} instructions.
2 28 lamcas 1.1 Support amcas[_db].{b/h/w/d} instructions.
+2 30 scq 1.1 Support sc.q instruction.
3 23 ld-seq-sa 1.1 Do not need load-load barriers (dbar 0x700).
diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc
index 04b277e..dcd8d90 100644
--- a/gcc/config/loongarch/loongarch-def.cc
+++ b/gcc/config/loongarch/loongarch-def.cc
@@ -72,7 +72,7 @@ array_arch<loongarch_isa> loongarch_cpu_default_isa =
.simd_ (ISA_EXT_SIMD_LASX)
.evolution_ (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA
| OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS
- | OPTION_MASK_ISA_FRECIPE))
+ | OPTION_MASK_ISA_FRECIPE | OPTION_MASK_ISA_SCQ))
.set (ARCH_LA64V1_0,
loongarch_isa ()
.base_ (ISA_BASE_LA64)
@@ -86,7 +86,7 @@ array_arch<loongarch_isa> loongarch_cpu_default_isa =
.simd_ (ISA_EXT_SIMD_LSX)
.evolution_ (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA
| OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS
- | OPTION_MASK_ISA_FRECIPE));
+ | OPTION_MASK_ISA_FRECIPE | OPTION_MASK_ISA_SCQ));
static inline loongarch_cache la464_cache ()
diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h
index 0bcd2a7..0a7d0c9 100644
--- a/gcc/config/loongarch/loongarch-def.h
+++ b/gcc/config/loongarch/loongarch-def.h
@@ -78,12 +78,10 @@ extern loongarch_def_array<const char *, N_ISA_EXT_TYPES>
/* Base ABI */
-enum {
- ABI_BASE_LP64D = 0,
- ABI_BASE_LP64F = 1,
- ABI_BASE_LP64S = 2,
- N_ABI_BASE_TYPES = 3
-};
+#define ABI_BASE_LP64D 0
+#define ABI_BASE_LP64F 1
+#define ABI_BASE_LP64S 2
+#define N_ABI_BASE_TYPES 3
extern loongarch_def_array<const char *, N_ABI_BASE_TYPES>
loongarch_abi_base_strings;
diff --git a/gcc/config/loongarch/loongarch-evolution.cc b/gcc/config/loongarch/loongarch-evolution.cc
index de68624..a92a645 100644
--- a/gcc/config/loongarch/loongarch-evolution.cc
+++ b/gcc/config/loongarch/loongarch-evolution.cc
@@ -32,6 +32,7 @@ int la_evo_feature_masks[] = {
OPTION_MASK_ISA_DIV32,
OPTION_MASK_ISA_LAM_BH,
OPTION_MASK_ISA_LAMCAS,
+ OPTION_MASK_ISA_SCQ,
OPTION_MASK_ISA_LD_SEQ_SA,
};
@@ -40,6 +41,7 @@ const char* la_evo_macro_name[] = {
"__loongarch_div32",
"__loongarch_lam_bh",
"__loongarch_lamcas",
+ "__loongarch_scq",
"__loongarch_ld_seq_sa",
};
@@ -48,6 +50,7 @@ int la_evo_version_major[] = {
1, /* DIV32 */
1, /* LAM_BH */
1, /* LAMCAS */
+ 1, /* SCQ */
1, /* LD_SEQ_SA */
};
@@ -56,5 +59,6 @@ int la_evo_version_minor[] = {
1, /* DIV32 */
1, /* LAM_BH */
1, /* LAMCAS */
+ 1, /* SCQ */
1, /* LD_SEQ_SA */
};
diff --git a/gcc/config/loongarch/loongarch-evolution.h b/gcc/config/loongarch/loongarch-evolution.h
index 5f90839..7fb7b0d 100644
--- a/gcc/config/loongarch/loongarch-evolution.h
+++ b/gcc/config/loongarch/loongarch-evolution.h
@@ -36,6 +36,7 @@ static constexpr struct {
{ 2, 1u << 26, OPTION_MASK_ISA_DIV32 },
{ 2, 1u << 27, OPTION_MASK_ISA_LAM_BH },
{ 2, 1u << 28, OPTION_MASK_ISA_LAMCAS },
+ { 2, 1u << 30, OPTION_MASK_ISA_SCQ },
{ 3, 1u << 23, OPTION_MASK_ISA_LD_SEQ_SA },
};
@@ -58,8 +59,9 @@ enum {
EVO_DIV32 = 1,
EVO_LAM_BH = 2,
EVO_LAMCAS = 3,
- EVO_LD_SEQ_SA = 4,
- N_EVO_FEATURES = 5
+ EVO_SCQ = 4,
+ EVO_LD_SEQ_SA = 5,
+ N_EVO_FEATURES = 6
};
/* Condition macros */
@@ -71,6 +73,8 @@ enum {
(la_target.isa.evolution & OPTION_MASK_ISA_LAM_BH)
#define ISA_HAS_LAMCAS \
(la_target.isa.evolution & OPTION_MASK_ISA_LAMCAS)
+#define ISA_HAS_SCQ \
+ (la_target.isa.evolution & OPTION_MASK_ISA_SCQ)
#define ISA_HAS_LD_SEQ_SA \
(la_target.isa.evolution & OPTION_MASK_ISA_LD_SEQ_SA)
diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h
index 1546ea3..583cce8 100644
--- a/gcc/config/loongarch/loongarch-str.h
+++ b/gcc/config/loongarch/loongarch-str.h
@@ -70,6 +70,7 @@ along with GCC; see the file COPYING3. If not see
#define OPTSTR_DIV32 "div32"
#define OPTSTR_LAM_BH "lam-bh"
#define OPTSTR_LAMCAS "lamcas"
+#define OPTSTR_SCQ "scq"
#define OPTSTR_LD_SEQ_SA "ld-seq-sa"
#endif /* LOONGARCH_STR_H */
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index 493f95e..0935d7b 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -4388,6 +4388,7 @@ loongarch_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
break;
}
else if (TARGET_RECIP_VEC_DIV
+ && vectype
&& gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
{
machine_mode mode = TYPE_MODE (vectype);
@@ -6221,9 +6222,8 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part,
'Q' Print R_LARCH_RELAX for TLS IE.
'r' Print address 12-31bit relocation associated with OP.
'R' Print address 32-51bit relocation associated with OP.
- 'T' Print 'f' for (eq:CC ...), 't' for (ne:CC ...),
- 'z' for (eq:?I ...), 'n' for (ne:?I ...).
- 't' Like 'T', but with the EQ/NE cases reversed
+ 'T' Print a comment marker if %G outputs nothing.
+ 't' Print the register containing the higher 64 bits of a TImode.
'u' Print a LASX register.
'v' Print the insn size suffix b, h, w or d for vector modes V16QI, V8HI,
V4SI, V2SI, and w, d for vector modes V4SF, V2DF respectively.
@@ -6306,6 +6306,13 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
fputs ("dbar\t0x700", file);
break;
+ case 'T':
+ if (!loongarch_cas_failure_memorder_needs_acquire (
+ memmodel_from_int (INTVAL (op)))
+ && ISA_HAS_LD_SEQ_SA)
+ fprintf (file, "%s", ASM_COMMENT_START);
+ break;
+
case 'h':
if (code == HIGH)
op = XEXP (op, 0);
@@ -6384,14 +6391,6 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
false /* lo_reloc */);
break;
- case 't':
- case 'T':
- {
- int truth = (code == NE) == (letter == 'T');
- fputc ("zfnt"[truth * 2 + FCC_REG_P (REGNO (XEXP (op, 0)))], file);
- }
- break;
-
case 'V':
if (CONST_VECTOR_P (op))
{
@@ -6495,6 +6494,16 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
}
break;
+ case 't':
+ if (GET_MODE (op) != TImode
+ || (op != CONST0_RTX (TImode) && code != REG))
+ {
+ output_operand_lossage ("invalid use of '%%%c'", letter);
+ break;
+ }
+ op = loongarch_subword (op, 1);
+ letter = 'z';
+ /* fall through */
default:
switch (code)
{
@@ -10786,9 +10795,9 @@ loongarch_expand_vec_cmp (rtx operands[])
to a fixed type. */
static machine_mode
-loongarch_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
+loongarch_promote_function_mode (const_tree type,
machine_mode mode,
- int *punsignedp ATTRIBUTE_UNUSED,
+ int *punsignedp,
const_tree fntype ATTRIBUTE_UNUSED,
int for_return ATTRIBUTE_UNUSED)
{
@@ -11154,6 +11163,46 @@ loongarch_c_mode_for_suffix (char suffix)
return VOIDmode;
}
+/* Implement TARGET_C_BITINT_TYPE_INFO.
+ Return true if _BitInt(N) is supported and fill its details into *INFO. */
+bool
+loongarch_bitint_type_info (int n, struct bitint_info *info)
+{
+ if (n <= 8)
+ info->limb_mode = QImode;
+ else if (n <= 16)
+ info->limb_mode = HImode;
+ else if (n <= 32)
+ info->limb_mode = SImode;
+ else if (n <= 64)
+ info->limb_mode = DImode;
+ else if (n <= 128)
+ info->limb_mode = TImode;
+ else
+ info->limb_mode = DImode;
+
+ info->abi_limb_mode = info->limb_mode;
+
+ if (n > 64)
+ info->abi_limb_mode = TImode;
+
+ info->big_endian = false;
+ info->extended = true;
+ return true;
+}
+
+/* Implement TARGET_COMPUTE_PRESSURE_CLASSES. */
+
+static int
+loongarch_compute_pressure_classes (reg_class *classes)
+{
+ int i = 0;
+ classes[i++] = GENERAL_REGS;
+ classes[i++] = FP_REGS;
+ classes[i++] = FCC_REGS;
+ return i;
+}
+
/* Initialize the GCC target structure. */
#undef TARGET_ASM_ALIGNED_HI_OP
#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
@@ -11428,6 +11477,12 @@ loongarch_c_mode_for_suffix (char suffix)
#undef TARGET_C_MODE_FOR_SUFFIX
#define TARGET_C_MODE_FOR_SUFFIX loongarch_c_mode_for_suffix
+#undef TARGET_C_BITINT_TYPE_INFO
+#define TARGET_C_BITINT_TYPE_INFO loongarch_bitint_type_info
+
+#undef TARGET_COMPUTE_PRESSURE_CLASSES
+#define TARGET_COMPUTE_PRESSURE_CLASSES loongarch_compute_pressure_classes
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-loongarch.h"
diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
index d897763..e8819bf 100644
--- a/gcc/config/loongarch/loongarch.h
+++ b/gcc/config/loongarch/loongarch.h
@@ -270,7 +270,9 @@ along with GCC; see the file COPYING3. If not see
if (GET_MODE_CLASS (MODE) == MODE_INT \
&& GET_MODE_SIZE (MODE) < UNITS_PER_WORD) \
{ \
- if ((MODE) == SImode) \
+ if ((MODE) == SImode \
+ && !(TYPE && TREE_CODE (TYPE) == BITINT_TYPE \
+ && TYPE_PRECISION (TYPE) < 32)) \
(UNSIGNEDP) = 0; \
(MODE) = Pmode; \
}
@@ -823,8 +825,6 @@ typedef struct {
#define CASE_VECTOR_MODE Pmode
-#define CASE_VECTOR_SHORTEN_MODE(MIN, MAX, BODY) Pmode
-
/* Define this as 1 if `char' should by default be signed; else as 0. */
#ifndef DEFAULT_SIGNED_CHAR
#define DEFAULT_SIGNED_CHAR 1
diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt
index 4d85cf5..fbe61c0 100644
--- a/gcc/config/loongarch/loongarch.opt
+++ b/gcc/config/loongarch/loongarch.opt
@@ -334,6 +334,10 @@ mlamcas
Target Mask(ISA_LAMCAS) Var(la_isa_evolution)
Support amcas[_db].{b/h/w/d} instructions.
+mscq
+Target Mask(ISA_SCQ) Var(la_isa_evolution)
+Support sc.q instruction.
+
mld-seq-sa
Target Mask(ISA_LD_SEQ_SA) Var(la_isa_evolution)
Do not need load-load barriers (dbar 0x700).
diff --git a/gcc/config/loongarch/loongarch.opt.urls b/gcc/config/loongarch/loongarch.opt.urls
index 5f644f6..606a211 100644
--- a/gcc/config/loongarch/loongarch.opt.urls
+++ b/gcc/config/loongarch/loongarch.opt.urls
@@ -90,6 +90,9 @@ UrlSuffix(gcc/LoongArch-Options.html#index-mlam-bh)
mlamcas
UrlSuffix(gcc/LoongArch-Options.html#index-mlamcas)
+mscq
+UrlSuffix(gcc/LoongArch-Options.html#index-mscq)
+
mld-seq-sa
UrlSuffix(gcc/LoongArch-Options.html#index-mld-seq-sa)
diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md
index dd17cd1..4156b26 100644
--- a/gcc/config/loongarch/simd.md
+++ b/gcc/config/loongarch/simd.md
@@ -773,7 +773,7 @@
(vec_select:<VEC_HALF>
(match_operand:IVEC 2 "register_operand" "f")
(match_operand:IVEC 4 "vect_par_cnst_even_or_odd_half")))
- (any_extend:<WVEC>
+ (any_extend:<WVEC_HALF>
(vec_select:<VEC_HALF>
(match_operand:IVEC 3 "register_operand" "f")
(match_dup 4))))
diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md
index fd8d732..2ee400e 100644
--- a/gcc/config/loongarch/sync.md
+++ b/gcc/config/loongarch/sync.md
@@ -21,25 +21,25 @@
(define_c_enum "unspec" [
UNSPEC_COMPARE_AND_SWAP
+ UNSPEC_COMPARE_AND_SWAP_AMCAS
UNSPEC_COMPARE_AND_SWAP_ADD
UNSPEC_COMPARE_AND_SWAP_SUB
- UNSPEC_COMPARE_AND_SWAP_AND
- UNSPEC_COMPARE_AND_SWAP_XOR
- UNSPEC_COMPARE_AND_SWAP_OR
UNSPEC_COMPARE_AND_SWAP_NAND
UNSPEC_SYNC_OLD_OP
UNSPEC_SYNC_EXCHANGE
UNSPEC_ATOMIC_STORE
UNSPEC_ATOMIC_LOAD
UNSPEC_MEMORY_BARRIER
+
+ UNSPEC_TI_FETCH_ADD
+ UNSPEC_TI_FETCH_SUB
+ UNSPEC_TI_FETCH_AND
+ UNSPEC_TI_FETCH_XOR
+ UNSPEC_TI_FETCH_OR
+ UNSPEC_TI_FETCH_NAND_MASK_INVERTED
])
(define_code_iterator any_atomic [plus ior xor and])
-(define_code_attr atomic_optab
- [(plus "add") (ior "or") (xor "xor") (and "and")])
-
-;; This attribute gives the format suffix for atomic memory operations.
-(define_mode_attr amo [(QI "b") (HI "h") (SI "w") (DI "d")])
;; <amop> expands to the name of the atomic operand that implements a
;; particular code.
@@ -107,7 +107,7 @@
(define_insn "atomic_load<mode>"
[(set (match_operand:QHWD 0 "register_operand" "=r")
(unspec_volatile:QHWD
- [(match_operand:QHWD 1 "memory_operand" "+m")
+ [(match_operand:QHWD 1 "memory_operand" "m")
(match_operand:SI 2 "const_int_operand")] ;; model
UNSPEC_ATOMIC_LOAD))]
""
@@ -142,9 +142,50 @@
}
[(set (attr "length") (const_int 12))])
+(define_insn "atomic_loadti_lsx"
+ [(set (match_operand:V2DI 0 "register_operand" "=f")
+ (unspec_volatile:V2DI
+ [(match_operand:TI 1 "memory_operand" "m")
+ (match_operand:SI 2 "const_int_operand")] ;; model
+ UNSPEC_ATOMIC_LOAD))]
+ "ISA_HAS_LSX && TARGET_64BIT"
+{
+ enum memmodel model = memmodel_base (INTVAL (operands[2]));
+
+ switch (model)
+ {
+ case MEMMODEL_SEQ_CST:
+ output_asm_insn ("dbar\t0x11", operands);
+ /* fall through */
+ case MEMMODEL_ACQUIRE:
+ case MEMMODEL_RELAXED:
+ return "vld\t%w0,%1\\n\\t%G2";
+
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set (attr "length") (const_int 12))])
+
+(define_expand "atomic_loadti"
+ [(match_operand:TI 0 "register_operand" "=r")
+ (match_operand:TI 1 "memory_operand" "m")
+ (match_operand:SI 2 "const_int_operand")]
+ "ISA_HAS_LSX && TARGET_64BIT"
+{
+ rtx vr = gen_reg_rtx (V2DImode);
+
+ emit_insn (gen_atomic_loadti_lsx (vr, operands[1], operands[2]));
+ for (int i = 0; i < 2; i++)
+ emit_insn (
+ gen_lsx_vpickve2gr_d (loongarch_subword (operands[0], i), vr,
+ GEN_INT (i)));
+ DONE;
+})
+
;; Implement atomic stores with amoswap. Fall back to fences for atomic loads.
(define_insn "atomic_store<mode>"
- [(set (match_operand:QHWD 0 "memory_operand" "+m")
+ [(set (match_operand:QHWD 0 "memory_operand" "=m")
(unspec_volatile:QHWD
[(match_operand:QHWD 1 "reg_or_0_operand" "rJ")
(match_operand:SI 2 "const_int_operand")] ;; model
@@ -175,7 +216,67 @@
}
[(set (attr "length") (const_int 12))])
-(define_insn "atomic_<atomic_optab><mode>"
+(define_insn "atomic_storeti_lsx"
+ [(set (match_operand:TI 0 "memory_operand" "=m")
+ (unspec_volatile:TI
+ [(match_operand:V2DI 1 "register_operand" "f")
+ (match_operand:SI 2 "const_int_operand")] ;; model
+ UNSPEC_ATOMIC_STORE))]
+ "ISA_HAS_LSX && TARGET_64BIT"
+{
+ enum memmodel model = memmodel_base (INTVAL (operands[2]));
+
+ switch (model)
+ {
+ case MEMMODEL_SEQ_CST:
+ return "dbar\t0x12\\n\\t"
+ "vst\t%w1,%0\\n\\t"
+ "dbar\t0x18";
+ case MEMMODEL_RELEASE:
+ return "dbar\t0x12\\n\\t"
+ "vst\t%w1,%0";
+ case MEMMODEL_RELAXED:
+ return "vst\t%w1,%0";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set (attr "length") (const_int 12))])
+
+(define_insn "atomic_storeti_scq"
+ [(set (match_operand:TI 0 "memory_operand" "=m")
+ (unspec_volatile:TI
+ [(match_operand:TI 1 "register_operand" "r")]
+ UNSPEC_ATOMIC_STORE))
+ (clobber (match_scratch:DI 2 "=&r"))]
+ "TARGET_64BIT && ISA_HAS_SCQ"
+ "1:\\n\\tll.d\t$r0,%0\n\tmove\t%2,%1\n\tsc.q\t%2,%t1,%0\n\tbeqz\t%2,1b"
+ [(set (attr "length") (const_int 16))])
+
+(define_expand "atomic_storeti"
+ [(match_operand:TI 0 "memory_operand" "=m")
+ (match_operand:TI 1 "reg_or_0_operand" "rJ")
+ (match_operand:SI 2 "const_int_operand")]
+ "TARGET_64BIT && (ISA_HAS_LSX || ISA_HAS_SCQ)"
+{
+ if (!ISA_HAS_LSX)
+ {
+ emit_insn (gen_atomic_storeti_scq (operands[0], operands[1]));
+ DONE;
+ }
+
+ rtx vr = gen_reg_rtx (V2DImode), op1 = operands[1];
+ rtvec v = rtvec_alloc (2);
+
+ for (int i = 0; i < 2; i++)
+ RTVEC_ELT (v, i) = loongarch_subword (op1, i);
+
+ emit_insn (gen_vec_initv2didi (vr, gen_rtx_PARALLEL (V2DImode, v)));
+ emit_insn (gen_atomic_storeti_lsx (operands[0], vr, operands[2]));
+ DONE;
+})
+
+(define_insn "atomic_<amop><mode>"
[(set (match_operand:GPR 0 "memory_operand" "+ZB")
(unspec_volatile:GPR
[(any_atomic:GPR (match_dup 0)
@@ -183,7 +284,7 @@
(match_operand:SI 2 "const_int_operand")] ;; model
UNSPEC_SYNC_OLD_OP))]
""
- "am<amop>%A2.<amo>\t$zero,%z1,%0"
+ "am<amop>%A2.<size>\t$zero,%z1,%0"
[(set (attr "length") (const_int 4))])
(define_insn "atomic_add<mode>"
@@ -194,10 +295,10 @@
(match_operand:SI 2 "const_int_operand")] ;; model
UNSPEC_SYNC_OLD_OP))]
"ISA_HAS_LAM_BH"
- "amadd%A2.<amo>\t$zero,%z1,%0"
+ "amadd%A2.<size>\t$zero,%z1,%0"
[(set (attr "length") (const_int 4))])
-(define_insn "atomic_fetch_<atomic_optab><mode>"
+(define_insn "atomic_fetch_<amop><mode>"
[(set (match_operand:GPR 0 "register_operand" "=&r")
(match_operand:GPR 1 "memory_operand" "+ZB"))
(set (match_dup 1)
@@ -207,9 +308,52 @@
(match_operand:SI 3 "const_int_operand")] ;; model
UNSPEC_SYNC_OLD_OP))]
""
- "am<amop>%A3.<amo>\t%0,%z2,%1"
+ "am<amop>%A3.<size>\t%0,%z2,%1"
[(set (attr "length") (const_int 4))])
+(define_insn "atomic_fetch_nand_mask_inverted<mode>"
+ [(set (match_operand:GPR 0 "register_operand" "=&r")
+ (match_operand:GPR 1 "memory_operand" "+ZC"))
+ (set (match_dup 1)
+ (unspec_volatile:GPR
+ [(ior:GPR (not (match_dup 1))
+ (match_operand:GPR 2 "register_operand" "r"))]
+ UNSPEC_SYNC_OLD_OP))
+ (clobber (match_scratch:GPR 3 "=&r"))]
+ ""
+ {
+ return "1:\\n\\t"
+ "ll.<d>\\t%0,%1\\n\\t"
+ "orn\\t%3,%2,%0\\n\\t"
+ "sc.<d>\\t%3,%1\\n\\t"
+ "beqz\\t%3,1b";
+ }
+ [(set (attr "length") (const_int 16))])
+
+(define_mode_iterator ALL_SC [GPR (TI "TARGET_64BIT && ISA_HAS_SCQ")])
+(define_mode_attr _scq [(SI "") (DI "") (TI "_scq")])
+(define_expand "atomic_fetch_nand<mode>"
+ [(match_operand:ALL_SC 0 "register_operand")
+ (match_operand:ALL_SC 1 "memory_operand")
+ (match_operand:ALL_SC 2 "reg_or_0_operand")
+ (match_operand:SI 3 "const_int_operand")]
+ ""
+ {
+ /* ~(atom & mask) = (~mask) | (~atom), so we can hoist
+ (~mask) out of the ll/sc loop and use the orn instruction in the
+ ll/sc loop. */
+ rtx inverted_mask = gen_reg_rtx (<MODE>mode);
+ emit_move_insn (inverted_mask,
+ expand_simple_unop (<MODE>mode, NOT, operands[2],
+ NULL_RTX, false));
+
+ emit_insn (
+ gen_atomic_fetch_nand_mask_inverted<mode><_scq> (operands[0],
+ operands[1],
+ inverted_mask));
+ DONE;
+ })
+
(define_insn "atomic_exchange<mode>"
[(set (match_operand:GPR 0 "register_operand" "=&r")
(unspec_volatile:GPR
@@ -219,9 +363,44 @@
(set (match_dup 1)
(match_operand:GPR 2 "register_operand" "r"))]
""
- "amswap%A3.<amo>\t%0,%z2,%1"
+ "amswap%A3.<size>\t%0,%z2,%1"
[(set (attr "length") (const_int 4))])
+(define_insn "atomic_exchangeti_scq"
+ [(set (match_operand:TI 0 "register_operand" "=&r")
+ (unspec_volatile:TI
+ [(match_operand:TI 1 "memory_operand" "+ZB")]
+ UNSPEC_SYNC_EXCHANGE))
+ (set (match_dup 1)
+ (match_operand:TI 2 "register_operand" "rJ"))
+ (clobber (match_scratch:DI 3 "=&r"))]
+ "TARGET_64BIT && ISA_HAS_SCQ"
+{
+ output_asm_insn ("1:", operands);
+ output_asm_insn ("ll.d\t%0,%1", operands);
+ if (!ISA_HAS_LD_SEQ_SA)
+ output_asm_insn ("dbar\t0x700", operands);
+ output_asm_insn ("ld.d\t%t0,%b1,8", operands);
+ output_asm_insn ("move\t%3,%z2", operands);
+ output_asm_insn ("sc.q\t%3,%t2,%1", operands);
+ output_asm_insn ("beqz\t%3,1b", operands);
+
+ return "";
+}
+ [(set (attr "length") (const_int 24))])
+
+(define_expand "atomic_exchangeti"
+ [(match_operand:TI 0 "register_operand" "=&r")
+ (match_operand:TI 1 "memory_operand" "+ZB")
+ (match_operand:TI 2 "register_operand" "rJ")
+ (match_operand:SI 3 "const_int_operand")] ;; model
+ "TARGET_64BIT && ISA_HAS_SCQ"
+{
+ emit_insn (gen_atomic_exchangeti_scq (operands[0], operands[1],
+ operands[2]));
+ DONE;
+})
+
(define_insn "atomic_exchange<mode>_short"
[(set (match_operand:SHORT 0 "register_operand" "=&r")
(unspec_volatile:SHORT
@@ -231,7 +410,7 @@
(set (match_dup 1)
(match_operand:SHORT 2 "register_operand" "r"))]
"ISA_HAS_LAM_BH"
- "amswap%A3.<amo>\t%0,%z2,%1"
+ "amswap%A3.<size>\t%0,%z2,%1"
[(set (attr "length") (const_int 4))])
(define_insn "atomic_cas_value_strong<mode>"
@@ -240,13 +419,13 @@
(set (match_dup 1)
(unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ")
(match_operand:GPR 3 "reg_or_0_operand" "rJ")
- (match_operand:SI 4 "const_int_operand")] ;; mod_s
+ (match_operand:SI 4 "const_int_operand")] ;; mod_f
UNSPEC_COMPARE_AND_SWAP))
(clobber (match_scratch:GPR 5 "=&r"))]
""
{
output_asm_insn ("1:", operands);
- output_asm_insn ("ll.<amo>\t%0,%1", operands);
+ output_asm_insn ("ll.<size>\t%0,%1", operands);
/* Like the test case atomic-cas-int.C, in loongarch64, O1 and higher, the
return value of the val_without_const_folding will not be truncated and
@@ -266,9 +445,9 @@
output_asm_insn ("bne\t%0,%z2,2f", operands);
output_asm_insn ("or%i3\t%5,$zero,%3", operands);
- output_asm_insn ("sc.<amo>\t%5,%1", operands);
+ output_asm_insn ("sc.<size>\t%5,%1", operands);
output_asm_insn ("beqz\t%5,1b", operands);
- output_asm_insn ("b\t3f", operands);
+ output_asm_insn ("%T4b\t3f", operands);
output_asm_insn ("2:", operands);
output_asm_insn ("%G4", operands);
output_asm_insn ("3:", operands);
@@ -288,10 +467,10 @@
(set (match_dup 1)
(unspec_volatile:QHWD [(match_operand:QHWD 2 "reg_or_0_operand" "rJ")
(match_operand:QHWD 3 "reg_or_0_operand" "rJ")
- (match_operand:SI 4 "const_int_operand")] ;; mod_s
- UNSPEC_COMPARE_AND_SWAP))]
+ (match_operand:SI 4 "const_int_operand")] ;; mod
+ UNSPEC_COMPARE_AND_SWAP_AMCAS))]
"ISA_HAS_LAMCAS"
- "ori\t%0,%z2,0\n\tamcas%A4.<amo>\t%0,%z3,%1"
+ "ori\t%0,%z2,0\n\tamcas%A4.<size>\t%0,%z3,%1"
[(set (attr "length") (const_int 8))])
(define_expand "atomic_compare_and_swap<mode>"
@@ -318,16 +497,14 @@
&& is_mm_release (memmodel_base (INTVAL (mod_s))))
mod_s = GEN_INT (MEMMODEL_ACQ_REL);
- operands[6] = mod_s;
-
if (ISA_HAS_LAMCAS)
emit_insn (gen_atomic_cas_value_strong<mode>_amcas (operands[1], operands[2],
operands[3], operands[4],
- operands[6]));
+ mod_s));
else
emit_insn (gen_atomic_cas_value_strong<mode> (operands[1], operands[2],
operands[3], operands[4],
- operands[6]));
+ mod_f));
rtx compare = operands[1];
if (operands[3] != const0_rtx)
@@ -349,49 +526,74 @@
DONE;
})
-(define_expand "atomic_test_and_set"
- [(match_operand:QI 0 "register_operand" "") ;; bool output
- (match_operand:QI 1 "memory_operand" "+ZB") ;; memory
- (match_operand:SI 2 "const_int_operand" "")] ;; model
+(define_expand "atomic_fetch_<amop><mode>"
+ [(match_operand:SHORT 0 "register_operand" "") ;; output
+ (any_bitwise (match_operand:SHORT 1 "memory_operand" "+ZB") ;; memory
+ (match_operand:SHORT 2 "reg_or_0_operand" "rJ")) ;; val
+ (match_operand:SI 3 "const_int_operand" "")] ;; model
""
{
- /* We have no QImode atomics, so use the address LSBs to form a mask,
- then use an aligned SImode atomic. */
+ /* We have no QI/HImode bitwise atomics, so use the address LSBs to form
+ a mask, then use an aligned SImode atomic. */
rtx result = operands[0];
rtx mem = operands[1];
- rtx model = operands[2];
+ rtx model = operands[3];
rtx addr = force_reg (Pmode, XEXP (mem, 0));
- rtx tmp_reg = gen_reg_rtx (Pmode);
- rtx zero_reg = gen_rtx_REG (Pmode, 0);
-
+ rtx mask = gen_int_mode (-4, Pmode);
rtx aligned_addr = gen_reg_rtx (Pmode);
- emit_move_insn (tmp_reg, gen_rtx_PLUS (Pmode, zero_reg, GEN_INT (-4)));
- emit_move_insn (aligned_addr, gen_rtx_AND (Pmode, addr, tmp_reg));
+
+ if (!and_operand (mask, Pmode))
+ mask = force_reg (Pmode, mask);
+
+ emit_move_insn (aligned_addr, gen_rtx_AND (Pmode, addr, mask));
rtx aligned_mem = change_address (mem, SImode, aligned_addr);
set_mem_alias_set (aligned_mem, 0);
- rtx offset = gen_reg_rtx (SImode);
- emit_move_insn (offset, gen_rtx_AND (SImode, gen_lowpart (SImode, addr),
- GEN_INT (3)));
-
rtx tmp = gen_reg_rtx (SImode);
- emit_move_insn (tmp, GEN_INT (1));
+ emit_move_insn (tmp, simplify_gen_unary (ZERO_EXTEND, SImode,
+ operands[2], <MODE>mode));
+ /* Note that we have defined SHIFT_COUNT_TRUNCATED to 1, so we don't need
+ to mask addr with 0b11 here. */
rtx shmt = gen_reg_rtx (SImode);
- emit_move_insn (shmt, gen_rtx_ASHIFT (SImode, offset, GEN_INT (3)));
+ emit_move_insn (shmt, gen_rtx_ASHIFT (SImode, gen_lowpart (SImode, addr),
+ GEN_INT (3)));
rtx word = gen_reg_rtx (SImode);
emit_move_insn (word, gen_rtx_ASHIFT (SImode, tmp, shmt));
+ if (<is_and>)
+ {
+ /* word = word | ~(mode_mask << shmt) */
+ rtx tmp = force_reg (SImode,
+ gen_int_mode (GET_MODE_MASK (<MODE>mode),
+ SImode));
+ emit_move_insn (tmp, gen_rtx_ASHIFT (SImode, tmp, shmt));
+ emit_move_insn (word, gen_rtx_IOR (SImode, gen_rtx_NOT (SImode, tmp),
+ word));
+ }
+
tmp = gen_reg_rtx (SImode);
- emit_insn (gen_atomic_fetch_orsi (tmp, aligned_mem, word, model));
+ emit_insn (gen_atomic_fetch_<amop>si (tmp, aligned_mem, word, model));
emit_move_insn (gen_lowpart (SImode, result),
gen_rtx_LSHIFTRT (SImode, tmp, shmt));
DONE;
})
+(define_expand "atomic_test_and_set"
+ [(match_operand:QI 0 "register_operand" "") ;; bool output
+ (match_operand:QI 1 "memory_operand" "+ZB") ;; memory
+ (match_operand:SI 2 "const_int_operand" "")] ;; model
+ ""
+{
+ rtx one = force_reg (QImode, gen_int_mode (1, QImode));
+ emit_insn (gen_atomic_fetch_orqi (operands[0], operands[1], one,
+ operands[2]));
+ DONE;
+})
+
(define_insn "atomic_cas_value_cmp_and_7_<mode>"
[(set (match_operand:GPR 0 "register_operand" "=&r")
(match_operand:GPR 1 "memory_operand" "+ZC"))
@@ -400,20 +602,20 @@
(match_operand:GPR 3 "reg_or_0_operand" "rJ")
(match_operand:GPR 4 "reg_or_0_operand" "rJ")
(match_operand:GPR 5 "reg_or_0_operand" "rJ")
- (match_operand:SI 6 "const_int_operand")] ;; model
+ (match_operand:SI 6 "const_int_operand")] ;; mod_f
UNSPEC_COMPARE_AND_SWAP))
(clobber (match_scratch:GPR 7 "=&r"))]
""
{
return "1:\\n\\t"
- "ll.<amo>\\t%0,%1\\n\\t"
+ "ll.<size>\\t%0,%1\\n\\t"
"and\\t%7,%0,%2\\n\\t"
"bne\\t%7,%z4,2f\\n\\t"
"and\\t%7,%0,%z3\\n\\t"
"or%i5\\t%7,%7,%5\\n\\t"
- "sc.<amo>\\t%7,%1\\n\\t"
+ "sc.<size>\\t%7,%1\\n\\t"
"beq\\t$zero,%7,1b\\n\\t"
- "b\\t3f\\n\\t"
+ "%T6b\\t3f\\n\\t"
"2:\\n\\t"
"%G6\\n\\t"
"3:\\n\\t";
@@ -444,18 +646,16 @@
&& is_mm_release (memmodel_base (INTVAL (mod_s))))
mod_s = GEN_INT (MEMMODEL_ACQ_REL);
- operands[6] = mod_s;
-
if (ISA_HAS_LAMCAS)
emit_insn (gen_atomic_cas_value_strong<mode>_amcas (operands[1], operands[2],
operands[3], operands[4],
- operands[6]));
+ mod_s));
else
{
union loongarch_gen_fn_ptrs generator;
generator.fn_7 = gen_atomic_cas_value_cmp_and_7_si;
loongarch_expand_atomic_qihi (generator, operands[1], operands[2],
- operands[3], operands[4], operands[6]);
+ operands[3], operands[4], mod_f);
}
rtx compare = operands[1];
@@ -481,83 +681,96 @@
DONE;
})
-(define_insn "atomic_cas_value_add_7_<mode>"
- [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res
- (match_operand:GPR 1 "memory_operand" "+ZC"))
+(define_insn "atomic_compare_and_swapti_scq"
+ [(set (match_operand:TI 0 "register_operand" "=&r")
+ (match_operand:TI 1 "memory_operand" "+ZB"))
(set (match_dup 1)
- (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ") ;; mask
- (match_operand:GPR 3 "reg_or_0_operand" "rJ") ;; inverted_mask
- (match_operand:GPR 4 "reg_or_0_operand" "rJ") ;; old val
- (match_operand:GPR 5 "reg_or_0_operand" "rJ") ;; new val
- (match_operand:SI 6 "const_int_operand")] ;; model
- UNSPEC_COMPARE_AND_SWAP_ADD))
- (clobber (match_scratch:GPR 7 "=&r"))
- (clobber (match_scratch:GPR 8 "=&r"))]
- ""
+ (unspec_volatile:TI [(match_operand:TI 2 "reg_or_0_operand" "rJ")
+ (match_operand:TI 3 "reg_or_0_operand" "rJ")
+ (match_operand:SI 4 "const_int_operand")] ;; mod_f
+ UNSPEC_COMPARE_AND_SWAP))
+ (clobber (match_scratch:DI 5 "=&r"))]
+ "TARGET_64BIT && ISA_HAS_SCQ"
{
- return "1:\\n\\t"
- "ll.<amo>\\t%0,%1\\n\\t"
- "and\\t%7,%0,%3\\n\\t"
- "add.w\\t%8,%0,%z5\\n\\t"
- "and\\t%8,%8,%z2\\n\\t"
- "or%i8\\t%7,%7,%8\\n\\t"
- "sc.<amo>\\t%7,%1\\n\\t"
- "beq\\t$zero,%7,1b";
-}
+ output_asm_insn ("1:", operands);
+ output_asm_insn ("ll.d\t%0,%1", operands);
- [(set (attr "length") (const_int 28))])
+ /* Compare the low word */
+ output_asm_insn ("bne\t%0,%z2,2f", operands);
-(define_insn "atomic_cas_value_sub_7_<mode>"
- [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res
- (match_operand:GPR 1 "memory_operand" "+ZC"))
- (set (match_dup 1)
- (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ") ;; mask
- (match_operand:GPR 3 "reg_or_0_operand" "rJ") ;; inverted_mask
- (match_operand:GPR 4 "reg_or_0_operand" "rJ") ;; old val
- (match_operand:GPR 5 "reg_or_0_operand" "rJ") ;; new val
- (match_operand:SI 6 "const_int_operand")] ;; model
- UNSPEC_COMPARE_AND_SWAP_SUB))
- (clobber (match_scratch:GPR 7 "=&r"))
- (clobber (match_scratch:GPR 8 "=&r"))]
- ""
-{
- return "1:\\n\\t"
- "ll.<amo>\\t%0,%1\\n\\t"
- "and\\t%7,%0,%3\\n\\t"
- "sub.w\\t%8,%0,%z5\\n\\t"
- "and\\t%8,%8,%z2\\n\\t"
- "or%i8\\t%7,%7,%8\\n\\t"
- "sc.<amo>\\t%7,%1\\n\\t"
- "beq\\t$zero,%7,1b";
+ /* Don't reorder the load of high word before ll.d. As the TImode
+ must be aligned in the memory, the high and low words must be in
+ the same cacheline, thus dbar 0x700 is enough. */
+ if (!ISA_HAS_LD_SEQ_SA)
+ output_asm_insn ("dbar\t0x700", operands);
+
+ /* Now load the high word. As the high and low words are in the same
+ cacheline, in case another core has clobbered the high word before the
+ sc.q instruction is executed, the LL bit for the low word will be
+ cleared. Thus a normal load is sufficient. */
+ output_asm_insn ("ld.d\t%t0,%b1,8", operands);
+
+ /* Compare the high word. */
+ output_asm_insn ("bne\t%t0,%t2,2f", operands);
+
+ /* Copy the low word of the new value as it'll be clobbered by sc.q. */
+ output_asm_insn ("move\t%5,%z3", operands);
+
+ /* Store both words if LL bit is still set. */
+ output_asm_insn ("sc.q\t%5,%t3,%1", operands);
+
+ /* Check if sc.q has done the store. */
+ output_asm_insn ("beqz\t%5,1b", operands);
+
+ /* Jump over the mod_f barrier if sc.q has succeeded. */
+ output_asm_insn ("%T4b\t3f", operands);
+
+ /* The barrier for mod_f. */
+ output_asm_insn ("2:", operands);
+ output_asm_insn ("%G4", operands);
+
+ output_asm_insn ("3:", operands);
+ return "";
}
- [(set (attr "length") (const_int 28))])
+ [(set_attr "length" "40")])
-(define_insn "atomic_cas_value_and_7_<mode>"
- [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res
- (match_operand:GPR 1 "memory_operand" "+ZC"))
- (set (match_dup 1)
- (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ") ;; mask
- (match_operand:GPR 3 "reg_or_0_operand" "rJ") ;; inverted_mask
- (match_operand:GPR 4 "reg_or_0_operand" "rJ") ;; old val
- (match_operand:GPR 5 "reg_or_0_operand" "rJ") ;; new val
- (match_operand:SI 6 "const_int_operand")] ;; model
- UNSPEC_COMPARE_AND_SWAP_AND))
- (clobber (match_scratch:GPR 7 "=&r"))
- (clobber (match_scratch:GPR 8 "=&r"))]
- ""
+(define_expand "atomic_compare_and_swapti"
+ [(match_operand:SI 0 "register_operand" "") ;; bool output
+ (match_operand:TI 1 "register_operand" "") ;; val output
+ (match_operand:TI 2 "memory_operand" "") ;; memory
+ (match_operand:TI 3 "reg_or_0_operand" "") ;; expected value
+ (match_operand:TI 4 "reg_or_0_operand" "") ;; desired value
+ (match_operand:SI 5 "const_int_operand" "") ;; is_weak
+ (match_operand:SI 6 "const_int_operand" "") ;; mod_s
+ (match_operand:SI 7 "const_int_operand" "")] ;; mod_f
+ "TARGET_64BIT && ISA_HAS_SCQ"
{
- return "1:\\n\\t"
- "ll.<amo>\\t%0,%1\\n\\t"
- "and\\t%7,%0,%3\\n\\t"
- "and\\t%8,%0,%z5\\n\\t"
- "and\\t%8,%8,%z2\\n\\t"
- "or%i8\\t%7,%7,%8\\n\\t"
- "sc.<amo>\\t%7,%1\\n\\t"
- "beq\\t$zero,%7,1b";
-}
- [(set (attr "length") (const_int 28))])
+ emit_insn (gen_atomic_compare_and_swapti_scq (operands[1], operands[2],
+ operands[3], operands[4],
+ operands[7]));
+
+ rtx t[2];
-(define_insn "atomic_cas_value_xor_7_<mode>"
+ for (int i = 0; i < 2; i++)
+ {
+ rtx compare = loongarch_subword (operands[1], i);
+ rtx expect = loongarch_subword (operands[3], i);
+
+ t[i] = gen_reg_rtx (DImode);
+
+ if (expect != const0_rtx)
+ emit_insn (gen_xordi3 (t[i], compare, expect));
+ else
+ emit_move_insn (t[i], compare);
+ }
+
+ emit_insn (gen_iordi3 (t[0], t[0], t[1]));
+ emit_insn (gen_rtx_SET (operands[0],
+ gen_rtx_EQ (SImode, t[0], const0_rtx)));
+ DONE;
+})
+
+(define_insn "atomic_cas_value_add_7_<mode>"
[(set (match_operand:GPR 0 "register_operand" "=&r") ;; res
(match_operand:GPR 1 "memory_operand" "+ZC"))
(set (match_dup 1)
@@ -566,24 +779,24 @@
(match_operand:GPR 4 "reg_or_0_operand" "rJ") ;; old val
(match_operand:GPR 5 "reg_or_0_operand" "rJ") ;; new val
(match_operand:SI 6 "const_int_operand")] ;; model
- UNSPEC_COMPARE_AND_SWAP_XOR))
+ UNSPEC_COMPARE_AND_SWAP_ADD))
(clobber (match_scratch:GPR 7 "=&r"))
(clobber (match_scratch:GPR 8 "=&r"))]
""
{
return "1:\\n\\t"
- "ll.<amo>\\t%0,%1\\n\\t"
+ "ll.<size>\\t%0,%1\\n\\t"
"and\\t%7,%0,%3\\n\\t"
- "xor\\t%8,%0,%z5\\n\\t"
+ "add.w\\t%8,%0,%z5\\n\\t"
"and\\t%8,%8,%z2\\n\\t"
"or%i8\\t%7,%7,%8\\n\\t"
- "sc.<amo>\\t%7,%1\\n\\t"
+ "sc.<size>\\t%7,%1\\n\\t"
"beq\\t$zero,%7,1b";
}
[(set (attr "length") (const_int 28))])
-(define_insn "atomic_cas_value_or_7_<mode>"
+(define_insn "atomic_cas_value_sub_7_<mode>"
[(set (match_operand:GPR 0 "register_operand" "=&r") ;; res
(match_operand:GPR 1 "memory_operand" "+ZC"))
(set (match_dup 1)
@@ -592,21 +805,20 @@
(match_operand:GPR 4 "reg_or_0_operand" "rJ") ;; old val
(match_operand:GPR 5 "reg_or_0_operand" "rJ") ;; new val
(match_operand:SI 6 "const_int_operand")] ;; model
- UNSPEC_COMPARE_AND_SWAP_OR))
+ UNSPEC_COMPARE_AND_SWAP_SUB))
(clobber (match_scratch:GPR 7 "=&r"))
(clobber (match_scratch:GPR 8 "=&r"))]
""
{
return "1:\\n\\t"
- "ll.<amo>\\t%0,%1\\n\\t"
+ "ll.<size>\\t%0,%1\\n\\t"
"and\\t%7,%0,%3\\n\\t"
- "or\\t%8,%0,%z5\\n\\t"
+ "sub.w\\t%8,%0,%z5\\n\\t"
"and\\t%8,%8,%z2\\n\\t"
"or%i8\\t%7,%7,%8\\n\\t"
- "sc.<amo>\\t%7,%1\\n\\t"
+ "sc.<size>\\t%7,%1\\n\\t"
"beq\\t$zero,%7,1b";
}
-
[(set (attr "length") (const_int 28))])
(define_insn "atomic_cas_value_nand_7_<mode>"
@@ -624,12 +836,12 @@
""
{
return "1:\\n\\t"
- "ll.<amo>\\t%0,%1\\n\\t"
+ "ll.<size>\\t%0,%1\\n\\t"
"and\\t%7,%0,%3\\n\\t"
"and\\t%8,%0,%z5\\n\\t"
"xor\\t%8,%8,%z2\\n\\t"
"or%i8\\t%7,%7,%8\\n\\t"
- "sc.<amo>\\t%7,%1\\n\\t"
+ "sc.<size>\\t%7,%1\\n\\t"
"beq\\t$zero,%7,1b";
}
[(set (attr "length") (const_int 28))])
@@ -648,10 +860,10 @@
""
{
return "1:\\n\\t"
- "ll.<amo>\\t%0,%1\\n\\t"
+ "ll.<size>\\t%0,%1\\n\\t"
"and\\t%7,%0,%z3\\n\\t"
"or%i5\\t%7,%7,%5\\n\\t"
- "sc.<amo>\\t%7,%1\\n\\t"
+ "sc.<size>\\t%7,%1\\n\\t"
"beqz\\t%7,1b\\n\\t";
}
[(set (attr "length") (const_int 20))])
@@ -678,6 +890,101 @@
DONE;
})
+(define_int_iterator UNSPEC_TI_FETCH_DIRECT
+ [UNSPEC_TI_FETCH_ADD
+ UNSPEC_TI_FETCH_SUB
+ UNSPEC_TI_FETCH_AND
+ UNSPEC_TI_FETCH_XOR
+ UNSPEC_TI_FETCH_OR])
+(define_int_iterator UNSPEC_TI_FETCH
+ [UNSPEC_TI_FETCH_DIRECT UNSPEC_TI_FETCH_NAND_MASK_INVERTED])
+(define_int_attr amop_ti_fetch
+ [(UNSPEC_TI_FETCH_ADD "add")
+ (UNSPEC_TI_FETCH_SUB "sub")
+ (UNSPEC_TI_FETCH_AND "and")
+ (UNSPEC_TI_FETCH_XOR "xor")
+ (UNSPEC_TI_FETCH_OR "or")
+ (UNSPEC_TI_FETCH_NAND_MASK_INVERTED "nand_mask_inverted")])
+(define_int_attr size_ti_fetch
+ [(UNSPEC_TI_FETCH_ADD "36")
+ (UNSPEC_TI_FETCH_SUB "36")
+ (UNSPEC_TI_FETCH_AND "28")
+ (UNSPEC_TI_FETCH_XOR "28")
+ (UNSPEC_TI_FETCH_OR "28")
+ (UNSPEC_TI_FETCH_NAND_MASK_INVERTED "28")])
+
+(define_insn "atomic_fetch_<amop_ti_fetch>ti_scq"
+ [(set (match_operand:TI 0 "register_operand" "=&r")
+ (match_operand:TI 1 "memory_operand" "+ZB"))
+ (set (match_dup 1)
+ (unspec_volatile:TI
+ [(match_dup 0)
+ (match_operand:TI 2 "reg_or_0_operand" "rJ")]
+ UNSPEC_TI_FETCH))
+ (clobber (match_scratch:DI 3 "=&r"))
+ (clobber (match_scratch:DI 4 "=&r"))]
+ "TARGET_64BIT && ISA_HAS_SCQ"
+{
+ output_asm_insn ("1:", operands);
+ output_asm_insn ("ll.d\t%0,%1", operands);
+ if (!ISA_HAS_LD_SEQ_SA)
+ output_asm_insn ("dbar\t0x700", operands);
+ output_asm_insn ("ld.d\t%t0,%b1,8", operands);
+
+ switch (<UNSPEC_TI_FETCH>)
+ {
+ case UNSPEC_TI_FETCH_AND:
+ case UNSPEC_TI_FETCH_OR:
+ case UNSPEC_TI_FETCH_XOR:
+ output_asm_insn ("<amop_ti_fetch>\t%3,%0,%z2", operands);
+ output_asm_insn ("<amop_ti_fetch>\t%4,%t0,%t2", operands);
+ break;
+ case UNSPEC_TI_FETCH_NAND_MASK_INVERTED:
+ output_asm_insn ("orn\t%3,%z2,%0", operands);
+ output_asm_insn ("orn\t%4,%t2,%t0", operands);
+ break;
+ case UNSPEC_TI_FETCH_ADD:
+ case UNSPEC_TI_FETCH_SUB:
+ output_asm_insn ("<amop_ti_fetch>.d\t%3,%0,%z2", operands);
+
+ /* Generate carry bit. */
+ output_asm_insn (
+ <UNSPEC_TI_FETCH> == UNSPEC_TI_FETCH_ADD ? "sltu\t%4,%3,%0"
+ : "sltu\t%4,%0,%3",
+ operands);
+
+ output_asm_insn ("<amop_ti_fetch>.d\t%4,%t0,%4", operands);
+ output_asm_insn ("<amop_ti_fetch>.d\t%4,%4,%t2", operands);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ output_asm_insn ("sc.q\t%3,%4,%1", operands);
+ output_asm_insn ("beqz\t%3,1b", operands);
+
+ return "";
+}
+ [(set_attr "length" "<size_ti_fetch>")])
+
+(define_expand "atomic_fetch_<amop_ti_fetch>ti"
+ [(set (match_operand:TI 0 "register_operand" "=&r")
+ (match_operand:TI 1 "memory_operand" "+ZB"))
+ (set (match_dup 1)
+ (unspec_volatile:TI
+ [(match_dup 0)
+ (match_operand:TI 2 "reg_or_0_operand" "rJ")]
+ UNSPEC_TI_FETCH_DIRECT))
+ (match_operand:SI 3 "const_int_operand")] ;; model
+ "TARGET_64BIT && ISA_HAS_SCQ"
+{
+ /* Model is ignored as sc.q implies a full barrier. */
+ emit_insn (gen_atomic_fetch_<amop_ti_fetch>ti_scq (operands[0],
+ operands[1],
+ operands[2]));
+ DONE;
+})
+
(define_insn "atomic_fetch_add<mode>_short"
[(set (match_operand:SHORT 0 "register_operand" "=&r")
(match_operand:SHORT 1 "memory_operand" "+ZB"))
@@ -688,7 +995,7 @@
(match_operand:SI 3 "const_int_operand")] ;; model
UNSPEC_SYNC_OLD_OP))]
"ISA_HAS_LAM_BH"
- "amadd%A3.<amo>\t%0,%z2,%1"
+ "amadd%A3.<size>\t%0,%z2,%1"
[(set (attr "length") (const_int 4))])
(define_expand "atomic_fetch_add<mode>"
@@ -724,7 +1031,7 @@
(match_operand:SHORT 2 "reg_or_0_operand" "rJ"))
(match_operand:SI 3 "const_int_operand")] ;; model
UNSPEC_SYNC_OLD_OP))]
- ""
+ "!ISA_HAS_LAM_BH"
{
union loongarch_gen_fn_ptrs generator;
generator.fn_7 = gen_atomic_cas_value_sub_7_si;
@@ -733,60 +1040,6 @@
DONE;
})
-(define_expand "atomic_fetch_and<mode>"
- [(set (match_operand:SHORT 0 "register_operand" "=&r")
- (match_operand:SHORT 1 "memory_operand" "+ZB"))
- (set (match_dup 1)
- (unspec_volatile:SHORT
- [(and:SHORT (match_dup 1)
- (match_operand:SHORT 2 "reg_or_0_operand" "rJ"))
- (match_operand:SI 3 "const_int_operand")] ;; model
- UNSPEC_SYNC_OLD_OP))]
- ""
-{
- union loongarch_gen_fn_ptrs generator;
- generator.fn_7 = gen_atomic_cas_value_and_7_si;
- loongarch_expand_atomic_qihi (generator, operands[0], operands[1],
- operands[1], operands[2], operands[3]);
- DONE;
-})
-
-(define_expand "atomic_fetch_xor<mode>"
- [(set (match_operand:SHORT 0 "register_operand" "=&r")
- (match_operand:SHORT 1 "memory_operand" "+ZB"))
- (set (match_dup 1)
- (unspec_volatile:SHORT
- [(xor:SHORT (match_dup 1)
- (match_operand:SHORT 2 "reg_or_0_operand" "rJ"))
- (match_operand:SI 3 "const_int_operand")] ;; model
- UNSPEC_SYNC_OLD_OP))]
- ""
-{
- union loongarch_gen_fn_ptrs generator;
- generator.fn_7 = gen_atomic_cas_value_xor_7_si;
- loongarch_expand_atomic_qihi (generator, operands[0], operands[1],
- operands[1], operands[2], operands[3]);
- DONE;
-})
-
-(define_expand "atomic_fetch_or<mode>"
- [(set (match_operand:SHORT 0 "register_operand" "=&r")
- (match_operand:SHORT 1 "memory_operand" "+ZB"))
- (set (match_dup 1)
- (unspec_volatile:SHORT
- [(ior:SHORT (match_dup 1)
- (match_operand:SHORT 2 "reg_or_0_operand" "rJ"))
- (match_operand:SI 3 "const_int_operand")] ;; model
- UNSPEC_SYNC_OLD_OP))]
- ""
-{
- union loongarch_gen_fn_ptrs generator;
- generator.fn_7 = gen_atomic_cas_value_or_7_si;
- loongarch_expand_atomic_qihi (generator, operands[0], operands[1],
- operands[1], operands[2], operands[3]);
- DONE;
-})
-
(define_expand "atomic_fetch_nand<mode>"
[(set (match_operand:SHORT 0 "register_operand" "=&r")
(match_operand:SHORT 1 "memory_operand" "+ZB"))
diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h
index e224ade..494f14c 100644
--- a/gcc/config/mips/mips.h
+++ b/gcc/config/mips/mips.h
@@ -2363,8 +2363,14 @@ enum reg_class
#define STACK_GROWS_DOWNWARD 1
-#define FRAME_GROWS_DOWNWARD (flag_stack_protect != 0 \
- || (flag_sanitize & SANITIZE_ADDRESS) != 0)
+/* Growing the frame downwards allows us to put spills closest to
+ the stack pointer which is good as they are likely to be accessed
+ frequently. We can also arrange for normal stack usage to place
+ scalars last so that they too are close to the stack pointer. */
+#define FRAME_GROWS_DOWNWARD ((TARGET_MIPS16 \
+ && TARGET_FRAME_GROWS_DOWNWARDS) \
+ || (flag_stack_protect != 0 \
+ || (flag_sanitize & SANITIZE_ADDRESS) != 0))
/* Size of the area allocated in the frame to save the GP. */
diff --git a/gcc/config/mips/mips.opt b/gcc/config/mips/mips.opt
index e245654..f07db5a 100644
--- a/gcc/config/mips/mips.opt
+++ b/gcc/config/mips/mips.opt
@@ -473,6 +473,10 @@ mframe-header-opt
Target Var(flag_frame_header_optimization) Optimization
Optimize frame header.
+mgrow-frame-downwards
+Target Var(TARGET_FRAME_GROWS_DOWNWARDS) Init(1) Undocumented
+Change the behaviour to grow the frame downwards.
+
noasmopt
Driver
diff --git a/gcc/config/nvptx/nvptx.opt b/gcc/config/nvptx/nvptx.opt
index d326ca4..9796839 100644
--- a/gcc/config/nvptx/nvptx.opt
+++ b/gcc/config/nvptx/nvptx.opt
@@ -120,6 +120,51 @@ Target RejectNegative Alias(misa=,sm_89)
march-map=sm_90a
Target RejectNegative Alias(misa=,sm_89)
+march-map=sm_100
+Target RejectNegative Alias(misa=,sm_89)
+
+march-map=sm_100f
+Target RejectNegative Alias(misa=,sm_89)
+
+march-map=sm_100a
+Target RejectNegative Alias(misa=,sm_89)
+
+march-map=sm_101
+Target RejectNegative Alias(misa=,sm_89)
+
+march-map=sm_101f
+Target RejectNegative Alias(misa=,sm_89)
+
+march-map=sm_101a
+Target RejectNegative Alias(misa=,sm_89)
+
+march-map=sm_103
+Target RejectNegative Alias(misa=,sm_89)
+
+march-map=sm_103f
+Target RejectNegative Alias(misa=,sm_89)
+
+march-map=sm_103a
+Target RejectNegative Alias(misa=,sm_89)
+
+march-map=sm_120
+Target RejectNegative Alias(misa=,sm_89)
+
+march-map=sm_120f
+Target RejectNegative Alias(misa=,sm_89)
+
+march-map=sm_120a
+Target RejectNegative Alias(misa=,sm_89)
+
+march-map=sm_121
+Target RejectNegative Alias(misa=,sm_89)
+
+march-map=sm_121f
+Target RejectNegative Alias(misa=,sm_89)
+
+march-map=sm_121a
+Target RejectNegative Alias(misa=,sm_89)
+
Enum
Name(ptx_version) Type(enum ptx_version)
Known PTX ISA versions (for use with the -mptx= option):
diff --git a/gcc/config/pru/pru.cc b/gcc/config/pru/pru.cc
index 322e319..3fdc56e 100644
--- a/gcc/config/pru/pru.cc
+++ b/gcc/config/pru/pru.cc
@@ -941,10 +941,19 @@ pru_init_libfuncs (void)
/* Long long. */
set_optab_libfunc (ashr_optab, DImode, "__pruabi_asrll");
- set_optab_libfunc (smul_optab, DImode, "__pruabi_mpyll");
set_optab_libfunc (ashl_optab, DImode, "__pruabi_lslll");
set_optab_libfunc (lshr_optab, DImode, "__pruabi_lsrll");
+ if (TARGET_OPT_MUL)
+ {
+ set_optab_libfunc (smul_optab, DImode, "__pruabi_mpyll");
+ }
+ else
+ {
+ set_optab_libfunc (smul_optab, DImode, "__pruabi_softmpyll");
+ set_optab_libfunc (smul_optab, SImode, "__pruabi_softmpyi");
+ }
+
set_optab_libfunc (sdiv_optab, SImode, "__pruabi_divi");
set_optab_libfunc (udiv_optab, SImode, "__pruabi_divu");
set_optab_libfunc (smod_optab, SImode, "__pruabi_remi");
diff --git a/gcc/config/pru/pru.h b/gcc/config/pru/pru.h
index 6c0719b..9d547ed 100644
--- a/gcc/config/pru/pru.h
+++ b/gcc/config/pru/pru.h
@@ -65,6 +65,9 @@
#undef ENDFILE_SPEC
#define ENDFILE_SPEC "%{!mabi=ti:-lgloss} "
+#undef MULTILIB_DEFAULTS
+#define MULTILIB_DEFAULTS { "mloop", "mmul", "mfillzero" }
+
/* TI ABI mandates that ELF symbols do not start with any prefix. */
#undef USER_LABEL_PREFIX
#define USER_LABEL_PREFIX ""
diff --git a/gcc/config/pru/pru.md b/gcc/config/pru/pru.md
index 3504e42..b8ef55b 100644
--- a/gcc/config/pru/pru.md
+++ b/gcc/config/pru/pru.md
@@ -215,7 +215,7 @@
mov\\t%0, %1
ldi\\t%0, %%pmem(%1)
ldi\\t%0, %1
- fill\\t%0, 4
+ * return TARGET_OPT_FILLZERO ? \"fill\\t%0, 4\" : \"ldi32\\t%0, 0xffffffff\";
ldi32\\t%0, %1"
[(set_attr "type" "st,ld,alu,alu,alu,alu,alu")
(set_attr "length" "4,4,4,4,4,4,8")])
@@ -259,9 +259,11 @@
case 1:
return "lb%B1o\\t%b0, %1, %S1";
case 2:
- return "zero\\t%F0, 8";
+ return TARGET_OPT_FILLZERO ? "zero\\t%F0, 8"
+ : "ldi\\t%F0, 0\;ldi\\t%N0, 0";
case 3:
- return "fill\\t%F0, 8";
+ return TARGET_OPT_FILLZERO ? "fill\\t%F0, 8"
+ : "ldi32\\t%F0, 0xffffffff\;mov\\t%N0, %F0";
case 4:
/* careful with overlapping source and destination regs. */
gcc_assert (GP_REG_P (REGNO (operands[0])));
@@ -502,7 +504,7 @@
(define_insn "zero_extendqidi2"
[(set (match_operand:DI 0 "register_operand" "=r,r")
(zero_extend:DI (match_operand:QI 1 "register_operand" "0,r")))]
- ""
+ "TARGET_OPT_FILLZERO"
"@
zero\\t%F0.b1, 7
mov\\t%F0.b0, %1\;zero\\t%F0.b1, 7"
@@ -512,7 +514,7 @@
(define_insn "zero_extendhidi2"
[(set (match_operand:DI 0 "register_operand" "=r,r")
(zero_extend:DI (match_operand:HI 1 "register_operand" "0,r")))]
- ""
+ "TARGET_OPT_FILLZERO"
"@
zero\\t%F0.b2, 6
mov\\t%F0.w0, %1\;zero\\t%F0.b2, 6"
@@ -522,7 +524,7 @@
(define_insn "zero_extendsidi2"
[(set (match_operand:DI 0 "register_operand" "=r,r")
(zero_extend:DI (match_operand:SI 1 "register_operand" "0,r")))]
- ""
+ "TARGET_OPT_FILLZERO"
"@
zero\\t%N0, 4
mov\\t%F0, %1\;zero\\t%N0, 4"
@@ -535,7 +537,7 @@
(define_expand "extend<EQS0:mode><EQDHIDI:mode>2"
[(set (match_operand:EQDHIDI 0 "register_operand" "=r")
(sign_extend:EQDHIDI (match_operand:EQS0 1 "register_operand" "r")))]
- ""
+ "TARGET_OPT_FILLZERO"
{
rtx_code_label *skip_hiset_label;
@@ -744,7 +746,7 @@
(ior:HIDI
(match_operand:HIDI 1 "register_operand" "0")
(match_operand:HIDI 2 "const_fillbytes_operand" "Uf")))]
- ""
+ "TARGET_OPT_FILLZERO"
{
static char line[64];
pru_byterange r;
@@ -767,7 +769,7 @@
(and:HIDI
(match_operand:HIDI 1 "register_operand" "0")
(match_operand:HIDI 2 "const_zerobytes_operand" "Uz")))]
- ""
+ "TARGET_OPT_FILLZERO"
{
static char line[64];
pru_byterange r;
@@ -1114,7 +1116,8 @@
/* Try with the more efficient zero/fill patterns first. */
if (<LOGICAL_BITOP:CODE> == IOR
&& CONST_INT_P (operands[2])
- && const_fillbytes_operand (operands[2], DImode))
+ && const_fillbytes_operand (operands[2], DImode)
+ && TARGET_OPT_FILLZERO)
{
rtx insn = maybe_gen_pru_ior_fillbytes (DImode,
operands[0],
@@ -1130,7 +1133,8 @@
}
if (<LOGICAL_BITOP:CODE> == AND
&& CONST_INT_P (operands[2])
- && const_zerobytes_operand (operands[2], DImode))
+ && const_zerobytes_operand (operands[2], DImode)
+ && TARGET_OPT_FILLZERO)
{
rtx insn = maybe_gen_pru_and_zerobytes (DImode,
operands[0],
@@ -1212,7 +1216,7 @@
[(set (match_operand:SI 0 "pru_muldst_operand" "=Rmd0")
(mult:SI (match_operand:SI 1 "pru_mulsrc0_operand" "%Rms0")
(match_operand:SI 2 "pru_mulsrc1_operand" "Rms1")))]
- ""
+ "TARGET_OPT_MUL"
"nop\;xin\\t0, %0, 4"
[(set_attr "type" "alu")
(set_attr "length" "8")])
diff --git a/gcc/config/pru/pru.opt b/gcc/config/pru/pru.opt
index 8385beb..5206b2a 100644
--- a/gcc/config/pru/pru.opt
+++ b/gcc/config/pru/pru.opt
@@ -39,6 +39,14 @@ mloop
Target Mask(OPT_LOOP)
Allow (or do not allow) gcc to use the LOOP instruction.
+mmul
+Target Mask(OPT_MUL)
+Allow (or do not allow) gcc to use the PRU multiplier unit.
+
+mfillzero
+Target Mask(OPT_FILLZERO)
+Allow (or do not allow) gcc to use the FILL and ZERO instructions.
+
mabi=
Target RejectNegative Joined Enum(pru_abi_t) Var(pru_current_abi) Init(PRU_ABI_GNU) Save
Select target ABI variant.
diff --git a/gcc/config/pru/pru.opt.urls b/gcc/config/pru/pru.opt.urls
index c87affb..5c57892 100644
--- a/gcc/config/pru/pru.opt.urls
+++ b/gcc/config/pru/pru.opt.urls
@@ -12,6 +12,12 @@ UrlSuffix(gcc/PRU-Options.html#index-mno-relax-1)
mloop
UrlSuffix(gcc/PRU-Options.html#index-mloop)
+mmul
+UrlSuffix(gcc/PRU-Options.html#index-mmul)
+
+mfillzero
+UrlSuffix(gcc/PRU-Options.html#index-mfillzero)
+
mabi=
UrlSuffix(gcc/PRU-Options.html#index-mabi-4)
diff --git a/gcc/config/pru/t-multilib b/gcc/config/pru/t-multilib
new file mode 100644
index 0000000..1e3c2b8
--- /dev/null
+++ b/gcc/config/pru/t-multilib
@@ -0,0 +1,29 @@
+# Copyright (C) 2025 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 3, or (at your option) any later
+# version.
+#
+# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3. If not see
+# <http://www.gnu.org/licenses/>.
+
+MULTILIB_OPTIONS =
+MULTILIB_OPTIONS += mloop/mno-loop
+MULTILIB_OPTIONS += mmul/mno-mul
+MULTILIB_OPTIONS += mfillzero/mno-fillzero
+
+# Build two variants:
+# - Newer PRU core versions, present in AM335x and later.
+# - Older PRU core versions, present in AM18xx.
+MULTILIB_REQUIRED =
+MULTILIB_REQUIRED += mloop/mmul/mfillzero
+MULTILIB_REQUIRED += mno-loop/mno-mul/mno-fillzero
diff --git a/gcc/config/riscv/arch-canonicalize b/gcc/config/riscv/arch-canonicalize
index fd55255..15a3985 100755
--- a/gcc/config/riscv/arch-canonicalize
+++ b/gcc/config/riscv/arch-canonicalize
@@ -20,77 +20,326 @@
# along with GCC; see the file COPYING3. If not see
# <http://www.gnu.org/licenses/>.
-# TODO: Extract riscv_subset_t from riscv-common.cc and make it can be compiled
-# standalone to replace this script, that also prevents us implementing
-# that twice and keep sync again and again.
-
from __future__ import print_function
import sys
import argparse
import collections
import itertools
+import re
+import os
from functools import reduce
SUPPORTED_ISA_SPEC = ["2.2", "20190608", "20191213"]
-CANONICAL_ORDER = "imafdgqlcbkjtpvn"
+CANONICAL_ORDER = "imafdqlcbkjtpvnh"
LONG_EXT_PREFIXES = ['z', 's', 'h', 'x']
+def parse_define_riscv_ext(content):
+ """Parse DEFINE_RISCV_EXT macros using position-based parsing."""
+ extensions = []
+
+ # Find all DEFINE_RISCV_EXT blocks
+ pattern = r'DEFINE_RISCV_EXT\s*\('
+ matches = []
+
+ pos = 0
+ while True:
+ match = re.search(pattern, content[pos:])
+ if not match:
+ break
+
+ start_pos = pos + match.start()
+ paren_count = 0
+ current_pos = pos + match.end() - 1 # Start at the opening parenthesis
+
+ # Find the matching closing parenthesis
+ while current_pos < len(content):
+ if content[current_pos] == '(':
+ paren_count += 1
+ elif content[current_pos] == ')':
+ paren_count -= 1
+ if paren_count == 0:
+ break
+ current_pos += 1
+
+ if paren_count == 0:
+ # Extract the content inside parentheses
+ macro_content = content[pos + match.end():current_pos]
+ ext_data = parse_macro_arguments(macro_content)
+ if ext_data:
+ extensions.append(ext_data)
+
+ pos = current_pos + 1
+
+ return extensions
+
+def parse_macro_arguments(macro_content):
+ """Parse the arguments of a DEFINE_RISCV_EXT macro."""
+ # Remove comments /* ... */
+ cleaned_content = re.sub(r'/\*[^*]*\*/', '', macro_content)
+
+ # Split arguments by comma, but respect nested structures
+ args = []
+ current_arg = ""
+ paren_count = 0
+ brace_count = 0
+ in_string = False
+ escape_next = False
+
+ for char in cleaned_content:
+ if escape_next:
+ current_arg += char
+ escape_next = False
+ continue
+
+ if char == '\\':
+ escape_next = True
+ current_arg += char
+ continue
+
+ if char == '"' and not escape_next:
+ in_string = not in_string
+ current_arg += char
+ continue
+
+ if in_string:
+ current_arg += char
+ continue
+
+ if char == '(':
+ paren_count += 1
+ elif char == ')':
+ paren_count -= 1
+ elif char == '{':
+ brace_count += 1
+ elif char == '}':
+ brace_count -= 1
+ elif char == ',' and paren_count == 0 and brace_count == 0:
+ args.append(current_arg.strip())
+ current_arg = ""
+ continue
+
+ current_arg += char
+
+ # Add the last argument
+ if current_arg.strip():
+ args.append(current_arg.strip())
+
+ # We need at least 6 arguments to get DEP_EXTS (position 5)
+ if len(args) < 6:
+ return None
+
+ ext_name = args[0].strip()
+ dep_exts_arg = args[5].strip() # DEP_EXTS is at position 5
+
+ # Parse dependency extensions from the DEP_EXTS argument
+ deps = parse_dep_exts(dep_exts_arg)
+
+ return {
+ 'name': ext_name,
+ 'dep_exts': deps
+ }
+
+def parse_dep_exts(dep_exts_str):
+ """Parse the DEP_EXTS argument to extract dependency list with conditions."""
+ # Remove outer parentheses if present
+ dep_exts_str = dep_exts_str.strip()
+ if dep_exts_str.startswith('(') and dep_exts_str.endswith(')'):
+ dep_exts_str = dep_exts_str[1:-1].strip()
+
+ # Remove outer braces if present
+ if dep_exts_str.startswith('{') and dep_exts_str.endswith('}'):
+ dep_exts_str = dep_exts_str[1:-1].strip()
+
+ if not dep_exts_str:
+ return []
+
+ deps = []
+
+ # First, find and process conditional dependencies
+ conditional_pattern = r'\{\s*"([^"]+)"\s*,\s*(\[.*?\]\s*\([^)]*\)\s*->\s*bool.*?)\}'
+ conditional_matches = []
+
+ for match in re.finditer(conditional_pattern, dep_exts_str, re.DOTALL):
+ ext_name = match.group(1)
+ condition_code = match.group(2)
+ deps.append({'ext': ext_name, 'type': 'conditional', 'condition': condition_code})
+ # The conditional_pattern RE matches only the first code block enclosed
+ # in braces.
+ #
+ # Extend the match to the condition block's closing brace, encompassing
+ # all code blocks, by simply trying to match the numbers of opening
+ # and closing braces. While crude, this avoids writing a complicated
+ # parse here.
+ closing_braces_left = condition_code.count('{') - condition_code.count('}')
+ condition_end = match.end()
+ while closing_braces_left > 0:
+ condition_end = dep_exts_str.find('}', condition_end)
+ closing_braces_left -= 1
+ conditional_matches.append((match.start(), condition_end))
+
+ # Remove conditional dependency blocks from the string
+ remaining_str = dep_exts_str
+ for start, end in reversed(conditional_matches): # Reverse order to maintain indices
+ remaining_str = remaining_str[:start] + remaining_str[end:]
+
+ # Now handle simple quoted strings in the remaining text
+ for match in re.finditer(r'"([^"]+)"', remaining_str):
+ deps.append({'ext': match.group(1), 'type': 'simple'})
+
+ # Remove duplicates while preserving order
+ seen = set()
+ unique_deps = []
+ for dep in deps:
+ key = (dep['ext'], dep['type'])
+ if key not in seen:
+ seen.add(key)
+ unique_deps.append(dep)
+
+ return unique_deps
+
+def evaluate_conditional_dependency(ext, dep, xlen, current_exts):
+ """Evaluate whether a conditional dependency should be included."""
+ ext_name = dep['ext']
+ condition = dep['condition']
+ # Parse the condition based on known patterns
+ if ext_name == 'zcf' and ext in ['zca', 'c', 'zce']:
+ # zcf depends on RV32 and F extension
+ return xlen == 32 and 'f' in current_exts
+ elif ext_name == 'zcd' and ext in ['zca', 'c']:
+ # zcd depends on D extension
+ return 'd' in current_exts
+ elif ext_name == 'c' and ext in ['zca']:
+ # Special case for zca -> c conditional dependency
+ if xlen == 32:
+ if 'd' in current_exts:
+ return 'zcf' in current_exts and 'zcd' in current_exts
+ elif 'f' in current_exts:
+ return 'zcf' in current_exts
+ else:
+ return True
+ elif xlen == 64:
+ if 'd' in current_exts:
+ return 'zcd' in current_exts
+ else:
+ return True
+ return False
+ else:
+ # Report error for unhandled conditional dependencies
+ import sys
+ print(f"ERROR: Unhandled conditional dependency: '{ext_name}' with condition:", file=sys.stderr)
+ print(f" Condition code: {condition[:100]}...", file=sys.stderr)
+ print(f" Current context: xlen={xlen}, exts={sorted(current_exts)}", file=sys.stderr)
+ # For now, return False to be safe
+ return False
+
+def resolve_dependencies(arch_parts, xlen):
+ """Resolve all dependencies including conditional ones."""
+ current_exts = set(arch_parts)
+ implied_deps = set()
+
+ # Keep resolving until no new dependencies are found
+ changed = True
+ while changed:
+ changed = False
+ new_deps = set()
+
+ for ext in current_exts | implied_deps:
+ if ext in IMPLIED_EXT:
+ for dep in IMPLIED_EXT[ext]:
+ if dep['type'] == 'simple':
+ if dep['ext'] not in current_exts and dep['ext'] not in implied_deps:
+ new_deps.add(dep['ext'])
+ changed = True
+ elif dep['type'] == 'conditional':
+ should_include = evaluate_conditional_dependency(ext, dep, xlen, current_exts | implied_deps)
+ if should_include:
+ if dep['ext'] not in current_exts and dep['ext'] not in implied_deps:
+ new_deps.add(dep['ext'])
+ changed = True
+
+ implied_deps.update(new_deps)
+
+ return implied_deps
+
+def parse_def_file(file_path, script_dir, processed_files=None, collect_all=False):
+ """Parse a single .def file and recursively process #include directives."""
+ if processed_files is None:
+ processed_files = set()
+
+ # Avoid infinite recursion
+ if file_path in processed_files:
+ return ({}, set()) if collect_all else {}
+ processed_files.add(file_path)
+
+ implied_ext = {}
+ all_extensions = set() if collect_all else None
+
+ if not os.path.exists(file_path):
+ return (implied_ext, all_extensions) if collect_all else implied_ext
+
+ with open(file_path, 'r') as f:
+ content = f.read()
+
+ # Process #include directives first
+ include_pattern = r'#include\s+"([^"]+)"'
+ includes = re.findall(include_pattern, content)
+
+ for include_file in includes:
+ include_path = os.path.join(script_dir, include_file)
+ if collect_all:
+ included_ext, included_all = parse_def_file(include_path, script_dir, processed_files, collect_all)
+ implied_ext.update(included_ext)
+ all_extensions.update(included_all)
+ else:
+ included_ext = parse_def_file(include_path, script_dir, processed_files, collect_all)
+ implied_ext.update(included_ext)
+
+ # Parse DEFINE_RISCV_EXT blocks using position-based parsing
+ parsed_exts = parse_define_riscv_ext(content)
+
+ for ext_data in parsed_exts:
+ ext_name = ext_data['name']
+ deps = ext_data['dep_exts']
+
+ if collect_all:
+ all_extensions.add(ext_name)
+
+ if deps:
+ implied_ext[ext_name] = deps
+
+ return (implied_ext, all_extensions) if collect_all else implied_ext
+
+def parse_def_files():
+ """Parse RISC-V extension definition files starting from riscv-ext.def."""
+ # Get directory containing this script
+ try:
+ script_dir = os.path.dirname(os.path.abspath(__file__))
+ except NameError:
+ # When __file__ is not defined (e.g., interactive mode)
+ script_dir = os.getcwd()
+
+ # Start with the main definition file
+ main_def_file = os.path.join(script_dir, 'riscv-ext.def')
+ return parse_def_file(main_def_file, script_dir)
+
+def get_all_extensions():
+ """Get all supported extensions and their implied extensions."""
+ # Get directory containing this script
+ try:
+ script_dir = os.path.dirname(os.path.abspath(__file__))
+ except NameError:
+ # When __file__ is not defined (e.g., interactive mode)
+ script_dir = os.getcwd()
+
+ # Start with the main definition file
+ main_def_file = os.path.join(script_dir, 'riscv-ext.def')
+ return parse_def_file(main_def_file, script_dir, collect_all=True)
+
#
# IMPLIED_EXT(ext) -> implied extension list.
+# This is loaded dynamically from .def files
#
-IMPLIED_EXT = {
- "d" : ["f", "zicsr"],
-
- "a" : ["zaamo", "zalrsc"],
- "zabha" : ["zaamo"],
- "zacas" : ["zaamo"],
-
- "f" : ["zicsr"],
- "b" : ["zba", "zbb", "zbs"],
- "zdinx" : ["zfinx", "zicsr"],
- "zfinx" : ["zicsr"],
- "zhinx" : ["zhinxmin", "zfinx", "zicsr"],
- "zhinxmin" : ["zfinx", "zicsr"],
-
- "zk" : ["zkn", "zkr", "zkt"],
- "zkn" : ["zbkb", "zbkc", "zbkx", "zkne", "zknd", "zknh"],
- "zks" : ["zbkb", "zbkc", "zbkx", "zksed", "zksh"],
-
- "v" : ["zvl128b", "zve64d"],
- "zve32x" : ["zvl32b"],
- "zve64x" : ["zve32x", "zvl64b"],
- "zve32f" : ["f", "zve32x"],
- "zve64f" : ["f", "zve32f", "zve64x"],
- "zve64d" : ["d", "zve64f"],
-
- "zvl64b" : ["zvl32b"],
- "zvl128b" : ["zvl64b"],
- "zvl256b" : ["zvl128b"],
- "zvl512b" : ["zvl256b"],
- "zvl1024b" : ["zvl512b"],
- "zvl2048b" : ["zvl1024b"],
- "zvl4096b" : ["zvl2048b"],
- "zvl8192b" : ["zvl4096b"],
- "zvl16384b" : ["zvl8192b"],
- "zvl32768b" : ["zvl16384b"],
- "zvl65536b" : ["zvl32768b"],
-
- "zvkn" : ["zvkned", "zvknhb", "zvkb", "zvkt"],
- "zvknc" : ["zvkn", "zvbc"],
- "zvkng" : ["zvkn", "zvkg"],
- "zvks" : ["zvksed", "zvksh", "zvkb", "zvkt"],
- "zvksc" : ["zvks", "zvbc"],
- "zvksg" : ["zvks", "zvkg"],
- "zvbb" : ["zvkb"],
- "zvbc" : ["zve64x"],
- "zvkb" : ["zve32x"],
- "zvkg" : ["zve32x"],
- "zvkned" : ["zve32x"],
- "zvknha" : ["zve32x"],
- "zvknhb" : ["zve64x"],
- "zvksed" : ["zve32x"],
- "zvksh" : ["zve32x"],
-}
+IMPLIED_EXT = parse_def_files()
def arch_canonicalize(arch, isa_spec):
# TODO: Support extension version.
@@ -123,21 +372,31 @@ def arch_canonicalize(arch, isa_spec):
long_exts += extra_long_ext
#
- # Handle implied extensions.
+ # Handle implied extensions using new conditional logic.
#
- any_change = True
- while any_change:
- any_change = False
- for ext in std_exts + long_exts:
- if ext in IMPLIED_EXT:
- implied_exts = IMPLIED_EXT[ext]
- for implied_ext in implied_exts:
- if implied_ext == 'zicsr' and is_isa_spec_2p2:
- continue
+ # Extract xlen from architecture string
+ # TODO: We should support profile here.
+ if arch.startswith('rv32'):
+ xlen = 32
+ elif arch.startswith('rv64'):
+ xlen = 64
+ else:
+ raise Exception("Unsupported prefix `%s`" % arch)
- if implied_ext not in std_exts + long_exts:
- long_exts.append(implied_ext)
- any_change = True
+ # Get all current extensions
+ current_exts = std_exts + long_exts
+
+ # Resolve dependencies
+ implied_deps = resolve_dependencies(current_exts, xlen)
+
+ # Filter out zicsr for ISA spec 2.2
+ if is_isa_spec_2p2:
+ implied_deps.discard('zicsr')
+
+ # Add implied dependencies to long_exts
+ for dep in implied_deps:
+ if dep not in current_exts:
+ long_exts.append(dep)
# Single letter extension might appear in the long_exts list,
# because we just append extensions list to the arch string.
@@ -179,17 +438,177 @@ def arch_canonicalize(arch, isa_spec):
return new_arch
-if len(sys.argv) < 2:
- print ("Usage: %s <arch_str> [<arch_str>*]" % sys.argv)
- sys.exit(1)
+def dump_all_extensions():
+ """Dump all extensions and their implied extensions."""
+ implied_ext, all_extensions = get_all_extensions()
+
+ print("All supported RISC-V extensions:")
+ print("=" * 60)
+
+ if not all_extensions:
+ print("No extensions found.")
+ return
-parser = argparse.ArgumentParser()
-parser.add_argument('-misa-spec', type=str,
- default='20191213',
- choices=SUPPORTED_ISA_SPEC)
-parser.add_argument('arch_strs', nargs=argparse.REMAINDER)
+ # Sort all extensions for consistent output
+ sorted_all = sorted(all_extensions)
-args = parser.parse_args()
+ # Print all extensions with their dependencies (if any)
+ for ext_name in sorted_all:
+ if ext_name in implied_ext:
+ deps = implied_ext[ext_name]
+ dep_strs = []
+ for dep in deps:
+ if dep['type'] == 'simple':
+ dep_strs.append(dep['ext'])
+ else:
+ dep_strs.append(f"{dep['ext']}*") # Mark conditional deps with *
+ print(f"{ext_name:15} -> {', '.join(dep_strs)}")
+ else:
+ print(f"{ext_name:15} -> (no dependencies)")
+
+ print(f"\nTotal extensions: {len(all_extensions)}")
+ print(f"Extensions with dependencies: {len(implied_ext)}")
+ print(f"Extensions without dependencies: {len(all_extensions) - len(implied_ext)}")
+
+def run_unit_tests():
+ """Run unit tests using pytest dynamically imported."""
+ try:
+ import pytest
+ except ImportError:
+ print("Error: pytest is required for running unit tests.")
+ print("Please install pytest: pip install pytest")
+ return 1
+
+ # Define test functions
+ def test_basic_arch_parsing():
+ """Test basic architecture string parsing."""
+ result = arch_canonicalize("rv64i", "20191213")
+ assert result == "rv64i"
+
+ def test_simple_extensions():
+ """Test simple extension handling."""
+ result = arch_canonicalize("rv64im", "20191213")
+ assert "zmmul" in result
+
+ def test_implied_extensions():
+ """Test implied extension resolution."""
+ result = arch_canonicalize("rv64imaf", "20191213")
+ assert "zicsr" in result
+
+ def test_conditional_dependencies():
+ """Test conditional dependency evaluation."""
+ # Test RV32 with F extension should include zcf when c is present
+ result = arch_canonicalize("rv32ifc", "20191213")
+ parts = result.split("_")
+ if "c" in parts:
+ assert "zca" in parts
+ if "f" in parts:
+ assert "zcf" in parts
+
+ def test_parse_dep_exts():
+ """Test dependency parsing function."""
+ # Test simple dependency
+ deps = parse_dep_exts('{"ext1", "ext2"}')
+ assert len(deps) == 2
+ assert deps[0]['ext'] == 'ext1'
+ assert deps[0]['type'] == 'simple'
+
+ def test_evaluate_conditional_dependency():
+ """Test conditional dependency evaluation."""
+ # Test zcf condition for RV32 with F
+ dep = {'ext': 'zcf', 'type': 'conditional', 'condition': 'test'}
+ result = evaluate_conditional_dependency('zce', dep, 32, {'f'})
+ assert result == True
+
+ # Test zcf condition for RV64 with F (should be False)
+ result = evaluate_conditional_dependency('zce', dep, 64, {'f'})
+ assert result == False
+
+ def test_parse_define_riscv_ext():
+ """Test DEFINE_RISCV_EXT parsing."""
+ content = '''
+ DEFINE_RISCV_EXT(
+ /* NAME */ test,
+ /* UPPERCASE_NAME */ TEST,
+ /* FULL_NAME */ "Test extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"dep1", "dep2"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ test,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 0,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+ '''
+
+ extensions = parse_define_riscv_ext(content)
+ assert len(extensions) == 1
+ assert extensions[0]['name'] == 'test'
+ assert len(extensions[0]['dep_exts']) == 2
-for arch in args.arch_strs:
- print (arch_canonicalize(arch, args.misa_spec))
+ def test_parse_long_condition_block():
+ """Test condition block containing several code blocks."""
+ result = arch_canonicalize("rv32ec", "20191213")
+ assert "rv32ec_zca" in result
+
+ # Collect test functions
+ test_functions = [
+ test_basic_arch_parsing,
+ test_simple_extensions,
+ test_implied_extensions,
+ test_conditional_dependencies,
+ test_parse_dep_exts,
+ test_evaluate_conditional_dependency,
+ test_parse_define_riscv_ext,
+ test_parse_long_condition_block
+ ]
+
+ # Run tests manually first, then optionally with pytest
+ print("Running unit tests...")
+
+ passed = 0
+ failed = 0
+
+ for i, test_func in enumerate(test_functions):
+ try:
+ print(f" Running {test_func.__name__}...", end=" ")
+ test_func()
+ print("PASSED")
+ passed += 1
+ except Exception as e:
+ print(f"FAILED: {e}")
+ failed += 1
+
+ print(f"\nTest Summary: {passed} passed, {failed} failed")
+
+ if failed == 0:
+ print("\nAll tests passed!")
+ return 0
+ else:
+ print(f"\n{failed} test(s) failed!")
+ return 1
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument('-misa-spec', type=str,
+ default='20191213',
+ choices=SUPPORTED_ISA_SPEC)
+ parser.add_argument('--dump-all', action='store_true',
+ help='Dump all extensions and their implied extensions')
+ parser.add_argument('--selftest', action='store_true',
+ help='Run unit tests using pytest')
+ parser.add_argument('arch_strs', nargs='*',
+ help='Architecture strings to canonicalize')
+
+ args = parser.parse_args()
+
+ if args.dump_all:
+ dump_all_extensions()
+ elif args.selftest:
+ sys.exit(run_unit_tests())
+ elif args.arch_strs:
+ for arch in args.arch_strs:
+ print (arch_canonicalize(arch, args.misa_spec))
+ else:
+ parser.print_help()
+ sys.exit(1)
diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index 6531996..9695fdc 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -1679,6 +1679,26 @@
;; Combine vec_duplicate + op.vv to op.vx
;; Include
;; - vadd.vx
+;; - vsub.vx
+;; - vrsub.vx
+;; - vand.vx
+;; - vor.vx
+;; - vmul.vx
+;; - vdiv.vx
+;; - vdivu.vx
+;; - vrem.vx
+;; - vremu.vx
+;; - vmax.vx
+;; - vmaxu.vx
+;; - vmin.vx
+;; - vminu.vx
+;; - vsadd.vx
+;; - vsaddu.vx
+;; - vssub.vx
+;; - vssubu.vx
+;; - vaadd.vx
+;; - vaaddu.vx
+;; - vmerge.vxm
;; =============================================================================
(define_insn_and_split "*<optab>_vx_<mode>"
[(set (match_operand:V_VLSI 0 "register_operand")
@@ -1694,6 +1714,8 @@
riscv_vector::expand_vx_binary_vec_dup_vec (operands[0], operands[2],
operands[1], <CODE>,
<MODE>mode);
+
+ DONE;
}
[(set_attr "type" "vialu")])
@@ -1711,6 +1733,8 @@
riscv_vector::expand_vx_binary_vec_vec_dup (operands[0], operands[1],
operands[2], <CODE>,
<MODE>mode);
+
+ DONE;
}
[(set_attr "type" "vialu")])
@@ -1782,6 +1806,69 @@
}
[(set_attr "type" "vaalu")])
+(define_insn_and_split "*merge_vx_<mode>"
+ [(set (match_operand:V_VLSI 0 "register_operand")
+ (if_then_else:V_VLSI
+ (match_operand:<VM> 3 "vector_mask_operand")
+ (vec_duplicate:V_VLSI
+ (match_operand:<VEL> 2 "reg_or_int_operand"))
+ (match_operand:V_VLSI 1 "register_operand")))]
+ "TARGET_VECTOR && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ insn_code icode = code_for_pred_merge_scalar (<MODE>mode);
+ riscv_vector::emit_vlmax_insn (icode, riscv_vector::MERGE_OP, operands);
+ DONE;
+ }
+ [(set_attr "type" "vimerge")])
+
+(define_insn_and_split "*vmacc_vx_<mode>"
+ [(set (match_operand:V_VLSI 0 "register_operand")
+ (plus:V_VLSI
+ (mult:V_VLSI
+ (vec_duplicate:V_VLSI
+ (match_operand:<VEL> 1 "register_operand"))
+ (match_operand:V_VLSI 2 "register_operand"))
+ (match_operand:V_VLSI 3 "register_operand")))]
+ "TARGET_VECTOR && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ insn_code icode = code_for_pred_mul_plus_vx (<MODE>mode);
+ rtx ops[] = {operands[0], operands[1], operands[2], operands[3],
+ RVV_VUNDEF(<MODE>mode)};
+ riscv_vector::emit_vlmax_insn (icode, riscv_vector::TERNARY_OP, ops);
+
+ DONE;
+ }
+ [(set_attr "type" "vimuladd")])
+
+(define_insn_and_split "*vnmsac_vx_<mode>"
+ [(set (match_operand:V_VLSI 0 "register_operand")
+ (minus:V_VLSI
+ (match_operand:V_VLSI 3 "register_operand")
+ (mult:V_VLSI
+ (vec_duplicate:V_VLSI
+ (match_operand:<VEL> 1 "register_operand"))
+ (match_operand:V_VLSI 2 "register_operand"))))]
+ "TARGET_VECTOR && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ insn_code icode = code_for_pred_vnmsac_vx (<MODE>mode);
+ rtx ops[] = {operands[0], operands[1], operands[2], operands[3],
+ RVV_VUNDEF(<MODE>mode)};
+ riscv_vector::emit_vlmax_insn (icode, riscv_vector::TERNARY_OP, ops);
+
+ DONE;
+ }
+ [(set_attr "type" "vimuladd")])
+
+
;; =============================================================================
;; Combine vec_duplicate + op.vv to op.vf
;; Include
@@ -1962,3 +2049,98 @@
}
[(set_attr "type" "vfwmuladd")]
)
+
+;; vfmul.vf
+(define_insn_and_split "*vfmul_vf_<mode>"
+ [(set (match_operand:V_VLSF 0 "register_operand")
+ (mult:V_VLSF
+ (vec_duplicate:V_VLSF
+ (match_operand:<VEL> 2 "register_operand"))
+ (match_operand:V_VLSF 1 "register_operand")))]
+ "TARGET_VECTOR && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ riscv_vector::emit_vlmax_insn (code_for_pred_scalar (MULT, <MODE>mode),
+ riscv_vector::BINARY_OP_FRM_DYN, operands);
+ DONE;
+ }
+ [(set_attr "type" "vfmul")]
+)
+
+;; vfrdiv.vf
+(define_insn_and_split "*vfrdiv_vf_<mode>"
+ [(set (match_operand:V_VLSF 0 "register_operand")
+ (div:V_VLSF
+ (vec_duplicate:V_VLSF
+ (match_operand:<VEL> 2 "register_operand"))
+ (match_operand:V_VLSF 1 "register_operand")))]
+ "TARGET_VECTOR && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ riscv_vector::emit_vlmax_insn (code_for_pred_reverse_scalar (DIV, <MODE>mode),
+ riscv_vector::BINARY_OP_FRM_DYN, operands);
+ DONE;
+ }
+ [(set_attr "type" "vfdiv")]
+)
+
+;; vfmin.vf
+(define_insn_and_split "*vfmin_vf_<mode>"
+ [(set (match_operand:V_VLSF 0 "register_operand")
+ (smin:V_VLSF
+ (vec_duplicate:V_VLSF
+ (match_operand:<VEL> 2 "register_operand"))
+ (match_operand:V_VLSF 1 "register_operand")))]
+ "TARGET_VECTOR && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ riscv_vector::emit_vlmax_insn (code_for_pred_scalar (SMIN, <MODE>mode),
+ riscv_vector::BINARY_OP, operands);
+ DONE;
+ }
+ [(set_attr "type" "vfminmax")]
+)
+
+(define_insn_and_split "*vfmin_vf_ieee_<mode>"
+ [(set (match_operand:V_VLSF 0 "register_operand")
+ (unspec:V_VLSF [
+ (vec_duplicate:V_VLSF
+ (match_operand:<VEL> 2 "register_operand"))
+ (match_operand:V_VLSF 1 "register_operand")
+ ] UNSPEC_VFMIN))]
+ "TARGET_VECTOR && !HONOR_SNANS (<MODE>mode) && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ riscv_vector::emit_vlmax_insn (code_for_pred_scalar (UNSPEC_VFMIN, <MODE>mode),
+ riscv_vector::BINARY_OP, operands);
+ DONE;
+ }
+ [(set_attr "type" "vfminmax")]
+)
+
+(define_insn_and_split "*vfmin_vf_ieee_<mode>"
+ [(set (match_operand:V_VLSF 0 "register_operand")
+ (unspec:V_VLSF [
+ (match_operand:V_VLSF 1 "register_operand")
+ (vec_duplicate:V_VLSF
+ (match_operand:<VEL> 2 "register_operand"))
+ ] UNSPEC_VFMIN))]
+ "TARGET_VECTOR && !HONOR_SNANS (<MODE>mode) && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ riscv_vector::emit_vlmax_insn (code_for_pred_scalar (UNSPEC_VFMIN, <MODE>mode),
+ riscv_vector::BINARY_OP, operands);
+ DONE;
+ }
+ [(set_attr "type" "vfminmax")]
+)
diff --git a/gcc/config/riscv/constraints.md b/gcc/config/riscv/constraints.md
index 5ecaa19..979e0df 100644
--- a/gcc/config/riscv/constraints.md
+++ b/gcc/config/riscv/constraints.md
@@ -330,3 +330,7 @@
(define_constraint "Q"
"An address operand that is valid for a prefetch instruction"
(match_operand 0 "prefetch_operand"))
+
+(define_address_constraint "ZD"
+ "An address operand that is valid for a mips prefetch instruction"
+ (match_test "riscv_prefetch_offset_address_p (op, mode)"))
diff --git a/gcc/config/riscv/gen-riscv-ext-opt.cc b/gcc/config/riscv/gen-riscv-ext-opt.cc
index 17b8f5b..1ca339c 100644
--- a/gcc/config/riscv/gen-riscv-ext-opt.cc
+++ b/gcc/config/riscv/gen-riscv-ext-opt.cc
@@ -4,50 +4,6 @@
#include <stdio.h>
#include "riscv-opts.h"
-struct version_t
-{
- int major;
- int minor;
- version_t (int major, int minor,
- enum riscv_isa_spec_class spec = ISA_SPEC_CLASS_NONE)
- : major (major), minor (minor)
- {}
- bool operator<(const version_t &other) const
- {
- if (major != other.major)
- return major < other.major;
- return minor < other.minor;
- }
-
- bool operator== (const version_t &other) const
- {
- return major == other.major && minor == other.minor;
- }
-};
-
-static void
-print_ext_doc_entry (const std::string &ext_name, const std::string &full_name,
- const std::string &desc,
- const std::vector<version_t> &supported_versions)
-{
- // Implementation of the function to print the documentation entry
- // for the extension.
- std::set<version_t> unique_versions;
- for (const auto &version : supported_versions)
- unique_versions.insert (version);
- printf ("@item %s\n", ext_name.c_str ());
- printf ("@tab");
- for (const auto &version : unique_versions)
- {
- printf (" %d.%d", version.major, version.minor);
- }
- printf ("\n");
- printf ("@tab %s", full_name.c_str ());
- if (desc.size ())
- printf (", %s", desc.c_str ());
- printf ("\n\n");
-}
-
int
main ()
{
diff --git a/gcc/config/riscv/gen-riscv-mcpu-texi.cc b/gcc/config/riscv/gen-riscv-mcpu-texi.cc
new file mode 100644
index 0000000..9681438
--- /dev/null
+++ b/gcc/config/riscv/gen-riscv-mcpu-texi.cc
@@ -0,0 +1,43 @@
+#include <string>
+#include <vector>
+#include <stdio.h>
+
+int
+main ()
+{
+ puts ("@c Copyright (C) 2025 Free Software Foundation, Inc.");
+ puts ("@c This is part of the GCC manual.");
+ puts ("@c For copying conditions, see the file gcc/doc/include/fdl.texi.");
+ puts ("");
+ puts ("@c This file is generated automatically using");
+ puts ("@c gcc/config/riscv/gen-riscv-mcpu-texi.cc from:");
+ puts ("@c gcc/config/riscv/riscv-cores.def");
+ puts ("");
+ puts ("@c Please *DO NOT* edit manually.");
+ puts ("");
+ puts ("@samp{Core Name}");
+ puts ("");
+ puts ("@opindex mcpu");
+ puts ("@item -mcpu=@var{processor-string}");
+ puts ("Use architecture of and optimize the output for the given processor, specified");
+ puts ("by particular CPU name. Permissible values for this option are:");
+ puts ("");
+ puts ("");
+
+ std::vector<std::string> coreNames;
+
+#define RISCV_CORE(CORE_NAME, ARCH, MICRO_ARCH) \
+ coreNames.push_back (CORE_NAME);
+#include "riscv-cores.def"
+#undef RISCV_CORE
+
+ for (size_t i = 0; i < coreNames.size(); ++i) {
+ if (i == coreNames.size() - 1) {
+ printf("@samp{%s}.\n", coreNames[i].c_str());
+ } else {
+ printf("@samp{%s},\n\n", coreNames[i].c_str());
+ }
+ }
+
+ return 0;
+}
diff --git a/gcc/config/riscv/gen-riscv-mtune-texi.cc b/gcc/config/riscv/gen-riscv-mtune-texi.cc
new file mode 100644
index 0000000..1bdfe2a
--- /dev/null
+++ b/gcc/config/riscv/gen-riscv-mtune-texi.cc
@@ -0,0 +1,41 @@
+#include <string>
+#include <vector>
+#include <stdio.h>
+
+int
+main ()
+{
+ puts ("@c Copyright (C) 2025 Free Software Foundation, Inc.");
+ puts ("@c This is part of the GCC manual.");
+ puts ("@c For copying conditions, see the file gcc/doc/include/fdl.texi.");
+ puts ("");
+ puts ("@c This file is generated automatically using");
+ puts ("@c gcc/config/riscv/gen-riscv-mtune-texi.cc from:");
+ puts ("@c gcc/config/riscv/riscv-cores.def");
+ puts ("");
+ puts ("@c Please *DO NOT* edit manually.");
+ puts ("");
+ puts ("@samp{Tune Name}");
+ puts ("");
+ puts ("@opindex mtune");
+ puts ("@item -mtune=@var{processor-string}");
+ puts ("Optimize the output for the given processor, specified by microarchitecture or");
+ puts ("particular CPU name. Permissible values for this option are:");
+ puts ("");
+ puts ("");
+
+ std::vector<std::string> tuneNames;
+
+#define RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO) \
+ tuneNames.push_back (TUNE_NAME);
+#include "riscv-cores.def"
+#undef RISCV_TUNE
+
+ for (size_t i = 0; i < tuneNames.size(); ++i) {
+ printf("@samp{%s},\n\n", tuneNames[i].c_str());
+ }
+
+ puts ("and all valid options for @option{-mcpu=}.");
+
+ return 0;
+}
diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index 381f96c..bdb3d22 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -27,10 +27,14 @@
(ior (match_operand 0 "const_arith_operand")
(match_operand 0 "register_operand")))
+(define_predicate "prefetch_const_operand"
+ (and (match_code "const_int")
+ (match_test "(IN_RANGE (INTVAL (op), 0, 511))")))
+
;; REG or REG+D where D fits in a simm12 and has the low 5 bits
;; off. The REG+D form can be reloaded into a temporary if needed
;; after FP elimination if that exposes an invalid offset.
-(define_predicate "prefetch_operand"
+(define_predicate "zicbop_prefetch_operand"
(ior (match_operand 0 "register_operand")
(and (match_test "const_arith_operand (op, VOIDmode)")
(match_test "(INTVAL (op) & 0x1f) == 0"))
@@ -39,6 +43,20 @@
(match_test "const_arith_operand (XEXP (op, 1), VOIDmode)")
(match_test "(INTVAL (XEXP (op, 1)) & 0x1f) == 0"))))
+;; REG or REG+D where D fits in a uimm9
+(define_predicate "mips_prefetch_operand"
+ (ior (match_operand 0 "register_operand")
+ (match_test "prefetch_const_operand (op, VOIDmode)")
+ (and (match_code "plus")
+ (match_test "register_operand (XEXP (op, 0), word_mode)")
+ (match_test "prefetch_const_operand (XEXP (op, 1), VOIDmode)"))))
+
+;; MIPS specific or Standard RISCV Extension
+(define_predicate "prefetch_operand"
+ (if_then_else (match_test "TARGET_XMIPSCBOP")
+ (match_operand 0 "mips_prefetch_operand")
+ (match_operand 0 "zicbop_prefetch_operand")))
+
(define_predicate "lui_operand"
(and (match_code "const_int")
(match_test "LUI_OPERAND (INTVAL (op))")))
diff --git a/gcc/config/riscv/riscv-avlprop.cc b/gcc/config/riscv/riscv-avlprop.cc
index 3031c29..b8547a7 100644
--- a/gcc/config/riscv/riscv-avlprop.cc
+++ b/gcc/config/riscv/riscv-avlprop.cc
@@ -156,6 +156,7 @@ get_insn_vtype_mode (rtx_insn *rinsn)
extract_insn_cached (rinsn);
int mode_idx = get_attr_mode_idx (rinsn);
gcc_assert (mode_idx != INVALID_ATTRIBUTE);
+ gcc_assert (mode_idx < recog_data.n_operands);
return GET_MODE (recog_data.operand[mode_idx]);
}
@@ -205,6 +206,7 @@ simplify_replace_vlmax_avl (rtx_insn *rinsn, rtx new_avl)
{
int index = get_attr_avl_type_idx (rinsn);
gcc_assert (index != INVALID_ATTRIBUTE);
+ gcc_assert (index < recog_data.n_operands);
validate_change_or_fail (rinsn, recog_data.operand_loc[index],
get_avl_type_rtx (avl_type::NONVLMAX), false);
}
@@ -361,6 +363,8 @@ pass_avlprop::get_vlmax_ta_preferred_avl (insn_info *insn) const
is not depend on. */
extract_insn_cached (use_insn->rtl ());
int merge_op_idx = get_attr_merge_op_idx (use_insn->rtl ());
+ gcc_assert (merge_op_idx == INVALID_ATTRIBUTE
+ || merge_op_idx < recog_data.n_operands);
if (merge_op_idx != INVALID_ATTRIBUTE
&& !satisfies_constraint_vu (recog_data.operand[merge_op_idx])
&& refers_to_regno_p (set->regno (),
@@ -531,7 +535,14 @@ pass_avlprop::execute (function *fn)
&& !m_avl_propagations->get (candidate.second)
&& imm_avl_p (vtype_mode))
{
- rtx new_avl = gen_int_mode (GET_MODE_NUNITS (vtype_mode), Pmode);
+ /* For segmented operations AVL refers to a single register and
+ not all NF registers. Therefore divide the mode size by NF
+ to obtain the proper AVL. */
+ int nf = 1;
+ if (riscv_v_ext_tuple_mode_p (vtype_mode))
+ nf = get_nf (vtype_mode);
+ rtx new_avl = gen_int_mode
+ (GET_MODE_NUNITS (vtype_mode).to_constant () / nf, Pmode);
simplify_replace_vlmax_avl (rinsn, new_avl);
}
}
diff --git a/gcc/config/riscv/riscv-cores.def b/gcc/config/riscv/riscv-cores.def
index 98f3470..8f0f630 100644
--- a/gcc/config/riscv/riscv-cores.def
+++ b/gcc/config/riscv/riscv-cores.def
@@ -113,7 +113,7 @@ RISCV_CORE("xt-c908v", "rv64imafdcv_zicbom_zicbop_zicboz_zicntr_zicsr_"
"zvfh_sstc_svinval_svnapot_svpbmt__xtheadba_"
"xtheadbb_xtheadbs_xtheadcmo_xtheadcondmov_"
"xtheadfmemidx_xtheadmac_xtheadmemidx_"
- "xtheadmempair_xtheadsync_xtheadvdot",
+ "xtheadmempair_xtheadsync",
"xt-c908")
RISCV_CORE("xt-c910", "rv64imafdc_zicntr_zicsr_zifencei_zihpm_zfh_"
"xtheadba_xtheadbb_xtheadbs_xtheadcmo_"
@@ -121,7 +121,7 @@ RISCV_CORE("xt-c910", "rv64imafdc_zicntr_zicsr_zifencei_zihpm_zfh_"
"xtheadmemidx_xtheadmempair_xtheadsync",
"xt-c910")
RISCV_CORE("xt-c910v2", "rv64imafdc_zicbom_zicbop_zicboz_zicntr_zicond_"
- "zicsr_zifencei _zihintntl_zihintpause_zihpm_"
+ "zicsr_zifencei_zihintntl_zihintpause_zihpm_"
"zawrs_zfa_zfbfmin_zfh_zca_zcb_zcd_zba_zbb_zbc_"
"zbs_sscofpmf_sstc_svinval_svnapot_svpbmt_"
"xtheadba_xtheadbb_xtheadbs_xtheadcmo_"
@@ -135,13 +135,13 @@ RISCV_CORE("xt-c920", "rv64imafdc_zicntr_zicsr_zifencei_zihpm_zfh_"
"xtheadvector",
"xt-c910")
RISCV_CORE("xt-c920v2", "rv64imafdcv_zicbom_zicbop_zicboz_zicntr_zicond_"
- "zicsr_zifencei _zihintntl_zihintpause_zihpm_"
+ "zicsr_zifencei_zihintntl_zihintpause_zihpm_"
"zawrs_zfa_zfbfmin_zfh_zca_zcb_zcd_zba_zbb_zbc_"
"zbs_zvfbfmin_zvfbfwma_zvfh_sscofpmf_sstc_"
"svinval_svnapot_svpbmt_xtheadba_xtheadbb_"
"xtheadbs_xtheadcmo_xtheadcondmov_xtheadfmemidx_"
"xtheadmac_xtheadmemidx_xtheadmempair_"
- "xtheadsync_xtheadvdot",
+ "xtheadsync",
"xt-c920v2")
RISCV_CORE("tt-ascalon-d8", "rv64imafdcv_zic64b_zicbom_zicbop_zicboz_"
diff --git a/gcc/config/riscv/riscv-ext-mips.def b/gcc/config/riscv/riscv-ext-mips.def
index 5d7836d..132f6c1 100644
--- a/gcc/config/riscv/riscv-ext-mips.def
+++ b/gcc/config/riscv/riscv-ext-mips.def
@@ -33,3 +33,16 @@ DEFINE_RISCV_EXT (
/* BITMASK_GROUP_ID. */ BITMASK_NOT_YET_ALLOCATED,
/* BITMASK_BIT_POSITION. */ BITMASK_NOT_YET_ALLOCATED,
/* EXTRA_EXTENSION_FLAGS. */ 0)
+
+DEFINE_RISCV_EXT (
+ /* NAME. */ xmipscbop,
+ /* UPPERCASE_NAME. */ XMIPSCBOP,
+ /* FULL_NAME. */ "Mips Prefetch extension",
+ /* DESC. */ "",
+ /* URL. */ ,
+ /* DEP_EXTS. */ ({}),
+ /* SUPPORTED_VERSIONS. */ ({{1, 0}}),
+ /* FLAG_GROUP. */ xmips,
+ /* BITMASK_GROUP_ID. */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION. */ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS. */ 0)
diff --git a/gcc/config/riscv/riscv-ext.opt b/gcc/config/riscv/riscv-ext.opt
index 26d6e68..ced05d2 100644
--- a/gcc/config/riscv/riscv-ext.opt
+++ b/gcc/config/riscv/riscv-ext.opt
@@ -449,3 +449,5 @@ Mask(XTHEADVECTOR) Var(riscv_xthead_subext)
Mask(XVENTANACONDOPS) Var(riscv_xventana_subext)
Mask(XMIPSCMOV) Var(riscv_xmips_subext)
+
+Mask(XMIPSCBOP) Var(riscv_xmips_subext)
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 539321f..46b256d 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -143,6 +143,8 @@ extern void riscv_expand_sstrunc (rtx, rtx);
extern int riscv_register_move_cost (machine_mode, reg_class_t, reg_class_t);
extern bool synthesize_ior_xor (rtx_code, rtx [3]);
extern bool synthesize_and (rtx [3]);
+extern bool synthesize_add (rtx [3]);
+extern bool synthesize_add_extended (rtx [3]);
#ifdef RTX_CODE
extern void riscv_expand_int_scc (rtx, enum rtx_code, rtx, rtx, bool *invert_ptr = 0);
@@ -830,16 +832,18 @@ extern bool th_print_operand_address (FILE *, machine_mode, rtx);
extern bool strided_load_broadcast_p (void);
extern bool riscv_use_divmod_expander (void);
-void riscv_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
+void riscv_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx, tree, int);
extern bool
riscv_option_valid_attribute_p (tree, tree, tree, int);
extern bool
riscv_option_valid_version_attribute_p (tree, tree, tree, int);
extern bool
-riscv_process_target_version_attr (tree, location_t);
+riscv_process_target_version_attr (tree, location_t *);
extern void
riscv_override_options_internal (struct gcc_options *);
extern void riscv_option_override (void);
+extern rtx riscv_prefetch_cookie (rtx, rtx);
+extern bool riscv_prefetch_offset_address_p (rtx, machine_mode);
struct riscv_tune_param;
/* Information about one micro-arch we know about. */
diff --git a/gcc/config/riscv/riscv-subset.h b/gcc/config/riscv/riscv-subset.h
index a35537d..4cd860f 100644
--- a/gcc/config/riscv/riscv-subset.h
+++ b/gcc/config/riscv/riscv-subset.h
@@ -52,8 +52,9 @@ private:
/* Original arch string. */
const char *m_arch;
- /* Location of arch string, used for report error. */
- location_t m_loc;
+ /* A pointer to the location that should be used for diagnostics,
+ or null if diagnostics should be suppressed. */
+ location_t *m_loc;
/* Head of subset info list. */
riscv_subset_t *m_head;
@@ -70,7 +71,7 @@ private:
/* Allow adding the same extension more than once. */
bool m_allow_adding_dup;
- riscv_subset_list (const char *, location_t);
+ riscv_subset_list (const char *, location_t *);
const char *parsing_subset_version (const char *, const char *, unsigned *,
unsigned *, bool, bool *);
@@ -106,12 +107,12 @@ public:
riscv_subset_list *clone () const;
- static riscv_subset_list *parse (const char *, location_t);
+ static riscv_subset_list *parse (const char *, location_t *);
const char *parse_single_ext (const char *, bool exact_single_p = true);
int match_score (riscv_subset_list *) const;
- void set_loc (location_t);
+ void set_loc (location_t *);
void set_allow_adding_dup (bool v) { m_allow_adding_dup = v; }
@@ -182,7 +183,7 @@ extern void
riscv_set_arch_by_subset_list (riscv_subset_list *, struct gcc_options *);
extern bool riscv_minimal_hwprobe_feature_bits (const char *,
struct riscv_feature_bits *,
- location_t);
+ location_t *);
extern bool
riscv_ext_is_subset (struct cl_target_option *, struct cl_target_option *);
diff --git a/gcc/config/riscv/riscv-target-attr.cc b/gcc/config/riscv/riscv-target-attr.cc
index 8ad3025..5e01c92 100644
--- a/gcc/config/riscv/riscv-target-attr.cc
+++ b/gcc/config/riscv/riscv-target-attr.cc
@@ -34,7 +34,7 @@ namespace {
class riscv_target_attr_parser
{
public:
- riscv_target_attr_parser (location_t loc)
+ riscv_target_attr_parser (location_t *loc)
: m_found_arch_p (false)
, m_found_tune_p (false)
, m_found_cpu_p (false)
@@ -62,7 +62,7 @@ private:
bool m_found_cpu_p;
bool m_found_priority_p;
riscv_subset_list *m_subset_list;
- location_t m_loc;
+ location_t *m_loc;
const riscv_cpu_info *m_cpu_info;
const char *m_tune;
int m_priority;
@@ -102,15 +102,17 @@ riscv_target_attr_parser::parse_arch (const char *str)
{
if (TARGET_64BIT && strncmp ("32", str + 2, strlen ("32")) == 0)
{
- error_at (m_loc, "unexpected arch for %<target()%> attribute: "
- "must start with rv64 but found %qs", str);
+ if (m_loc)
+ error_at (*m_loc, "unexpected arch for %<target()%> attribute: "
+ "must start with rv64 but found %qs", str);
goto fail;
}
if (!TARGET_64BIT && strncmp ("64", str + 2, strlen ("64")) == 0)
{
- error_at (m_loc, "unexpected arch for %<target()%> attribute: "
- "must start with rv32 but found %qs", str);
+ if (m_loc)
+ error_at (*m_loc, "unexpected arch for %<target()%> attribute: "
+ "must start with rv32 but found %qs", str);
goto fail;
}
@@ -140,10 +142,9 @@ riscv_target_attr_parser::parse_arch (const char *str)
{
if (token[0] != '+')
{
- error_at (
- m_loc,
- "unexpected arch for %<target()%> attribute: must start "
- "with + or rv");
+ if (*m_loc)
+ error_at (*m_loc, "unexpected arch for %<target()%> "
+ "attribute: must start with + or rv");
goto fail;
}
@@ -151,10 +152,9 @@ riscv_target_attr_parser::parse_arch (const char *str)
/* Check parse_single_ext has consume all string. */
if (*result != '\0')
{
- error_at (
- m_loc,
- "unexpected arch for %<target()%> attribute: bad "
- "string found %qs", token);
+ if (m_loc)
+ error_at (*m_loc, "unexpected arch for %<target()%> "
+ "attribute: bad string found %qs", token);
goto fail;
}
@@ -179,8 +179,8 @@ fail:
bool
riscv_target_attr_parser::handle_arch (const char *str)
{
- if (m_found_arch_p)
- error_at (m_loc, "%<target()%> attribute: arch appears more than once");
+ if (m_found_arch_p && m_loc)
+ error_at (*m_loc, "%<target()%> attribute: arch appears more than once");
m_found_arch_p = true;
return parse_arch (str);
}
@@ -190,15 +190,16 @@ riscv_target_attr_parser::handle_arch (const char *str)
bool
riscv_target_attr_parser::handle_cpu (const char *str)
{
- if (m_found_cpu_p)
- error_at (m_loc, "%<target()%> attribute: cpu appears more than once");
+ if (m_found_cpu_p && m_loc)
+ error_at (*m_loc, "%<target()%> attribute: cpu appears more than once");
m_found_cpu_p = true;
const riscv_cpu_info *cpu_info = riscv_find_cpu (str);
if (!cpu_info)
{
- error_at (m_loc, "%<target()%> attribute: unknown CPU %qs", str);
+ if (m_loc)
+ error_at (*m_loc, "%<target()%> attribute: unknown CPU %qs", str);
return false;
}
@@ -218,14 +219,15 @@ riscv_target_attr_parser::handle_cpu (const char *str)
bool
riscv_target_attr_parser::handle_tune (const char *str)
{
- if (m_found_tune_p)
- error_at (m_loc, "%<target()%> attribute: tune appears more than once");
+ if (m_found_tune_p && m_loc)
+ error_at (*m_loc, "%<target()%> attribute: tune appears more than once");
m_found_tune_p = true;
const struct riscv_tune_info *tune = riscv_parse_tune (str, true);
if (tune == nullptr)
{
- error_at (m_loc, "%<target()%> attribute: unknown TUNE %qs", str);
+ if (m_loc)
+ error_at (*m_loc, "%<target()%> attribute: unknown TUNE %qs", str);
return false;
}
@@ -237,13 +239,15 @@ riscv_target_attr_parser::handle_tune (const char *str)
bool
riscv_target_attr_parser::handle_priority (const char *str)
{
- if (m_found_priority_p)
- error_at (m_loc, "%<target()%> attribute: priority appears more than once");
+ if (m_found_priority_p && m_loc)
+ error_at (*m_loc, "%<target()%> attribute: priority appears "
+ "more than once");
m_found_priority_p = true;
if (sscanf (str, "%d", &m_priority) != 1)
{
- error_at (m_loc, "%<target()%> attribute: invalid priority %qs", str);
+ if (m_loc)
+ error_at (*m_loc, "%<target()%> attribute: invalid priority %qs", str);
return false;
}
@@ -282,7 +286,7 @@ riscv_target_attr_parser::update_settings (struct gcc_options *opts) const
static bool
riscv_process_one_target_attr (char *arg_str,
- location_t loc,
+ location_t *loc,
riscv_target_attr_parser &attr_parser,
const struct riscv_attribute_info *attrs)
{
@@ -290,7 +294,8 @@ riscv_process_one_target_attr (char *arg_str,
if (len == 0)
{
- error_at (loc, "malformed %<target()%> attribute");
+ if (loc)
+ error_at (*loc, "malformed %<target()%> attribute");
return false;
}
@@ -302,10 +307,9 @@ riscv_process_one_target_attr (char *arg_str,
if (!arg)
{
- error_at (
- loc,
- "attribute %<target(\"%s\")%> does not accept an argument",
- str_to_check);
+ if (loc)
+ error_at (*loc, "attribute %<target(\"%s\")%> does not "
+ "accept an argument", str_to_check);
return false;
}
@@ -324,7 +328,8 @@ riscv_process_one_target_attr (char *arg_str,
return (&attr_parser->*attr->handler) (arg);
}
- error_at (loc, "Got unknown attribute %<target(\"%s\")%>", str_to_check);
+ if (loc)
+ error_at (*loc, "Got unknown attribute %<target(\"%s\")%>", str_to_check);
return false;
}
@@ -347,11 +352,12 @@ num_occurrences_in_str (char c, char *str)
}
/* Parse the string in ARGS that contains the target attribute information
- and update the global target options space. */
+ and update the global target options space. If LOC is nonnull, report
+ diagnostics against location *LOC, otherwise remain silent. */
bool
riscv_process_target_attr (const char *args,
- location_t loc,
+ location_t *loc,
const struct riscv_attribute_info *attrs)
{
size_t len = strlen (args);
@@ -387,8 +393,8 @@ riscv_process_target_attr (const char *args,
if (num_attrs != num_semicolons + 1)
{
- error_at (loc, "malformed %<target(\"%s\")%> attribute",
- args);
+ if (loc)
+ error_at (*loc, "malformed %<target(\"%s\")%> attribute", args);
return false;
}
@@ -399,11 +405,12 @@ riscv_process_target_attr (const char *args,
}
/* Parse the tree in ARGS that contains the target attribute information
- and update the global target options space. */
+ and update the global target options space. If LOC is nonnull, report
+ diagnostics against *LOC, otherwise remain silent. */
static bool
riscv_process_target_attr (tree args,
- location_t loc,
+ location_t *loc,
const struct riscv_attribute_info *attrs)
{
if (TREE_CODE (args) == TREE_LIST)
@@ -424,7 +431,8 @@ riscv_process_target_attr (tree args,
if (TREE_CODE (args) != STRING_CST)
{
- error_at (loc, "attribute %<target%> argument not a string");
+ if (loc)
+ error_at (*loc, "attribute %<target%> argument not a string");
return false;
}
@@ -466,7 +474,7 @@ riscv_option_valid_attribute_p (tree fndecl, tree, tree args, int)
TREE_TARGET_OPTION (target_option_default_node));
/* Now we can parse the attributes and set &global_options accordingly. */
- ret = riscv_process_target_attr (args, loc, riscv_target_attrs);
+ ret = riscv_process_target_attr (args, &loc, riscv_target_attrs);
if (ret)
{
riscv_override_options_internal (&global_options);
@@ -481,16 +489,19 @@ riscv_option_valid_attribute_p (tree fndecl, tree, tree args, int)
}
/* Parse the tree in ARGS that contains the target_version attribute
- information and update the global target options space. */
+ information and update the global target options space. If LOC is nonnull,
+ report diagnostics against *LOC, otherwise remain silent. */
bool
-riscv_process_target_version_attr (tree args, location_t loc)
+riscv_process_target_version_attr (tree args, location_t *loc)
{
if (TREE_CODE (args) == TREE_LIST)
{
if (TREE_CHAIN (args))
{
- error ("attribute %<target_version%> has multiple values");
+ if (loc)
+ error_at (*loc, "attribute %<target_version%> "
+ "has multiple values");
return false;
}
args = TREE_VALUE (args);
@@ -498,7 +509,8 @@ riscv_process_target_version_attr (tree args, location_t loc)
if (!args || TREE_CODE (args) != STRING_CST)
{
- error ("attribute %<target_version%> argument not a string");
+ if (loc)
+ error_at (*loc, "attribute %<target_version%> argument not a string");
return false;
}
@@ -541,7 +553,7 @@ riscv_option_valid_version_attribute_p (tree fndecl, tree, tree args, int)
cl_target_option_restore (&global_options, &global_options_set,
TREE_TARGET_OPTION (target_option_current_node));
- ret = riscv_process_target_version_attr (args, loc);
+ ret = riscv_process_target_version_attr (args, &loc);
/* Set up any additional state. */
if (ret)
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index c9c8328..b27a0be 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -63,20 +63,37 @@ imm_avl_p (machine_mode mode)
{
poly_uint64 nunits = GET_MODE_NUNITS (mode);
+ /* For segmented operations AVL refers to a single register and not all NF
+ registers. Therefore divide the mode size by NF before checking if it is
+ in range. */
+ int nf = 1;
+ if (riscv_v_ext_tuple_mode_p (mode))
+ nf = get_nf (mode);
+
return nunits.is_constant ()
/* The vsetivli can only hold register 0~31. */
- ? (IN_RANGE (nunits.to_constant (), 0, 31))
+ ? (IN_RANGE (nunits.to_constant () / nf, 0, 31))
/* Only allowed in VLS-VLMAX mode. */
: false;
}
-/* Return true if LEN is equal to NUNITS that out of the range [0, 31]. */
+/* Return true if LEN equals the number of units in MODE if MODE is either a
+ VLA mode or MODE is a VLS mode its size equals the vector size.
+ In that case we can emit a VLMAX insn which can be optimized more easily
+ by the vsetvl pass. */
+
static bool
is_vlmax_len_p (machine_mode mode, rtx len)
{
poly_int64 value;
+ if (poly_int_rtx_p (len, &value)
+ && known_eq (value, GET_MODE_NUNITS (mode))
+ && known_eq (GET_MODE_UNIT_SIZE (mode) * value, BYTES_PER_RISCV_VECTOR))
+ return true;
+
return poly_int_rtx_p (len, &value)
- && known_eq (value, GET_MODE_NUNITS (mode));
+ && !GET_MODE_NUNITS (mode).is_constant ()
+ && known_eq (value, GET_MODE_NUNITS (mode));
}
/* Helper functions for insn_flags && insn_types */
@@ -954,6 +971,26 @@ emit_vlmax_masked_gather_mu_insn (rtx target, rtx op, rtx sel, rtx mask)
emit_vlmax_insn (icode, BINARY_OP_TAMU, ops);
}
+/* Function to emit a vslide1up instruction of mode MODE with destination
+ DEST and slideup element ELT. */
+
+rtx
+expand_slide1up (machine_mode mode, rtx dest, rtx elt)
+{
+ unsigned int unspec
+ = FLOAT_MODE_P (mode) ? UNSPEC_VFSLIDE1UP : UNSPEC_VSLIDE1UP;
+ insn_code icode = code_for_pred_slide (unspec, mode);
+ /* RVV Spec 16.3.1
+ The destination vector register group for vslideup cannot overlap the
+ source vector register group, otherwise the instruction encoding
+ is reserved. Thus, we need a new register. */
+ rtx tmp = gen_reg_rtx (mode);
+ rtx ops[] = {tmp, dest, elt};
+ emit_vlmax_insn (icode, BINARY_OP, ops);
+ return tmp;
+}
+
+
/* According to RVV ISA spec (16.5.1. Synthesizing vdecompress):
https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc
@@ -1175,16 +1212,7 @@ expand_vector_init_trailing_same_elem (rtx target,
{
rtx dup = expand_vector_broadcast (mode, builder.elt (nelts_reqd - 1));
for (int i = nelts_reqd - trailing_ndups - 1; i >= 0; i--)
- {
- unsigned int unspec
- = FLOAT_MODE_P (mode) ? UNSPEC_VFSLIDE1UP : UNSPEC_VSLIDE1UP;
- insn_code icode = code_for_pred_slide (unspec, mode);
- rtx tmp = gen_reg_rtx (mode);
- rtx ops[] = {tmp, dup, builder.elt (i)};
- emit_vlmax_insn (icode, BINARY_OP, ops);
- /* slide1up need source and dest to be different REG. */
- dup = tmp;
- }
+ dup = expand_slide1up (mode, dup, builder.elt (i));
emit_move_insn (target, dup);
return true;
@@ -1717,6 +1745,77 @@ expand_const_vector_stepped (rtx target, rtx src, rvv_builder *builder)
gcc_unreachable ();
}
+/* We don't actually allow this case in legitimate_constant_p but
+ the middle-end still expects us to handle it in an expander
+ (see PR121334). This is assumed to happen very rarely so the
+ implementation is not very efficient, particularly
+ for short vectors.
+*/
+
+static void
+expand_const_vector_onestep (rtx target, rvv_builder &builder)
+{
+ machine_mode mode = GET_MODE (target);
+ gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
+ gcc_assert (builder.nelts_per_pattern () == 2);
+
+ /* We have n encoded patterns
+ {csta_0, cstb_0},
+ {csta_1, cstb_1},
+ ...
+ {csta_{n-1}, cstb_{n-1}}
+ which should become one vector:
+ {csta_0, csta_1, ..., csta_{n-1},
+ cstb_0, cstb_1, ..., cstb_{n-1},
+ ...
+ cstb_0, cstb_1, ..., cstb_{n-1}}.
+
+ In order to achieve this we create a permute/gather constant
+ sel = {0, 1, ..., n - 1, 0, 1, ..., n - 1, ...}
+ and two vectors
+ va = {csta_0, csta_1, ..., csta_{n-1}},
+ vb = {cstb_0, cstb_1, ..., cstb_{n-1}}.
+
+ Then we use a VLMAX gather to "broadcast" vb and afterwards
+ overwrite the first n elements with va. */
+
+ int n = builder.npatterns ();
+ /* { 0, 1, 2, ..., n - 1 }. */
+ rtx vid = gen_reg_rtx (mode);
+ expand_vec_series (vid, const0_rtx, const1_rtx);
+
+ /* { 0, 1, ..., n - 1, 0, 1, ..., n - 1, ... }. */
+ rtx sel = gen_reg_rtx (mode);
+ rtx and_ops[] = {sel, vid, GEN_INT (n)};
+ emit_vlmax_insn (code_for_pred_scalar (AND, mode), BINARY_OP, and_ops);
+
+ /* va = { ELT (0), ELT (1), ... ELT (n - 1) }. */
+ rtx tmp1 = gen_reg_rtx (mode);
+ rtx ops1[] = {tmp1, builder.elt (0)};
+ expand_broadcast (mode, ops1);
+ for (int i = 1; i < n; i++)
+ tmp1 = expand_slide1up (mode, tmp1, builder.elt (i));
+
+ /* vb = { ELT (n), ELT (n + 1), ... ELT (2 * n - 1) }. */
+ rtx tmp2 = gen_reg_rtx (mode);
+ rtx ops2[] = {tmp2, builder.elt (n)};
+ expand_broadcast (mode, ops2);
+ for (int i = 1; i < n; i++)
+ tmp2 = expand_slide1up (mode, tmp2, builder.elt (n + i));
+
+ /* Duplicate vb. */
+ rtx tmp3 = gen_reg_rtx (mode);
+ emit_vlmax_gather_insn (tmp3, tmp2, sel);
+
+ /* Overwrite the first n - 1 elements with va. */
+ rtx dest = gen_reg_rtx (mode);
+ insn_code icode = code_for_pred_mov (mode);
+ rtx ops3[] = {dest, tmp3, tmp1};
+ emit_nonvlmax_insn (icode, __MASK_OP_TUMA | UNARY_OP_P, ops3, GEN_INT (n));
+
+ emit_move_insn (target, dest);
+}
+
static void
expand_const_vector (rtx target, rtx src)
{
@@ -1744,6 +1843,8 @@ expand_const_vector (rtx target, rtx src)
if (CONST_VECTOR_DUPLICATE_P (src))
return expand_const_vector_duplicate (target, &builder);
+ else if (CONST_VECTOR_NELTS_PER_PATTERN (src) == 2)
+ return expand_const_vector_onestep (target, builder);
else if (CONST_VECTOR_STEPPED_P (src))
return expand_const_vector_stepped (target, src, &builder);
@@ -2648,8 +2749,14 @@ expand_vector_init_merge_repeating_sequence (rtx target,
= get_repeating_sequence_dup_machine_mode (builder, mask_bit_mode);
uint64_t full_nelts = builder.full_nelts ().to_constant ();
+ gcc_assert (builder.nelts_per_pattern () == 1
+ || builder.nelts_per_pattern () == 2);
+
+ rtx first
+ = builder.nelts_per_pattern () == 1 ? builder.elt (0) : builder.elt (1);
+
/* Step 1: Broadcast the first pattern. */
- rtx ops[] = {target, force_reg (builder.inner_mode (), builder.elt (0))};
+ rtx ops[] = {target, force_reg (builder.inner_mode (), first)};
expand_broadcast (builder.mode (), ops);
/* Step 2: Merge the rest iteration of pattern. */
for (unsigned int i = 1; i < builder.npatterns (); i++)
@@ -2677,7 +2784,10 @@ expand_vector_init_merge_repeating_sequence (rtx target,
emit_move_insn (mask, gen_lowpart (mask_bit_mode, dup));
/* Step 2-2: Merge pattern according to the mask. */
- rtx ops[] = {target, target, builder.elt (i), mask};
+ unsigned int which = i;
+ if (builder.nelts_per_pattern () == 2)
+ which = 2 * which + 1;
+ rtx ops[] = {target, target, builder.elt (which), mask};
emit_vlmax_insn (code_for_pred_merge_scalar (GET_MODE (target)),
MERGE_OP, ops);
}
@@ -3220,15 +3330,17 @@ expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
mask_mode = get_mask_mode (data_mode);
rtx mask = gen_reg_rtx (mask_mode);
rtx max_sel = gen_const_vector_dup (sel_mode, nunits);
+ bool overlap = reg_overlap_mentioned_p (target, op1);
+ rtx tmp_target = overlap ? gen_reg_rtx (data_mode) : target;
/* Step 1: generate a mask that should select everything >= nunits into the
* mask. */
expand_vec_cmp (mask, GEU, sel_mod, max_sel);
- /* Step2: gather every op0 values indexed by sel into target,
+ /* Step2: gather every op0 values indexed by sel into TMP_TARGET,
we don't need to care about the result of the element
whose index >= nunits. */
- emit_vlmax_gather_insn (target, op0, sel_mod);
+ emit_vlmax_gather_insn (tmp_target, op0, sel_mod);
/* Step3: shift the range from (nunits, max_of_mode] to
[0, max_of_mode - nunits]. */
@@ -3238,7 +3350,10 @@ expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
/* Step4: gather those into the previously masked-out elements
of target. */
- emit_vlmax_masked_gather_mu_insn (target, op1, tmp, mask);
+ emit_vlmax_masked_gather_mu_insn (tmp_target, op1, tmp, mask);
+
+ if (overlap)
+ emit_move_insn (tmp_target, target);
}
/* Implement TARGET_VECTORIZE_VEC_PERM_CONST for RVV. */
@@ -4078,11 +4193,7 @@ shuffle_off_by_one_patterns (struct expand_vec_perm_d *d)
emit_vec_extract (tmp, d->op0, gen_int_mode (nunits - 1, Pmode));
/* Insert the scalar into element 0. */
- unsigned int unspec
- = FLOAT_MODE_P (d->vmode) ? UNSPEC_VFSLIDE1UP : UNSPEC_VSLIDE1UP;
- insn_code icode = code_for_pred_slide (unspec, d->vmode);
- rtx ops[] = {d->target, d->op1, tmp};
- emit_vlmax_insn (icode, BINARY_OP, ops);
+ expand_slide1up (d->vmode, d->op1, tmp);
}
return true;
@@ -4376,13 +4487,11 @@ expand_strided_load (machine_mode mode, rtx *ops)
int idx = 4;
get_else_operand (ops[idx++]);
rtx len = ops[idx];
- poly_int64 len_val;
insn_code icode = code_for_pred_strided_load (mode);
rtx emit_ops[] = {v_reg, mask, gen_rtx_MEM (mode, base), stride};
- if (poly_int_rtx_p (len, &len_val)
- && known_eq (len_val, GET_MODE_NUNITS (mode)))
+ if (is_vlmax_len_p (mode, len))
emit_vlmax_insn (icode, BINARY_OP_TAMA, emit_ops);
else
{
@@ -4400,11 +4509,9 @@ expand_strided_store (machine_mode mode, rtx *ops)
rtx stride = ops[1];
rtx mask = ops[3];
rtx len = ops[4];
- poly_int64 len_val;
rtx vl_type;
- if (poly_int_rtx_p (len, &len_val)
- && known_eq (len_val, GET_MODE_NUNITS (mode)))
+ if (is_vlmax_len_p (mode, len))
{
len = gen_reg_rtx (Pmode);
emit_vlmax_vsetvl (mode, len);
diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc
index df924fa..5e6cb67 100644
--- a/gcc/config/riscv/riscv-vector-costs.cc
+++ b/gcc/config/riscv/riscv-vector-costs.cc
@@ -275,13 +275,13 @@ loop_invariant_op_p (class loop *loop,
/* Return true if the variable should be counted into liveness. */
static bool
variable_vectorized_p (class loop *loop, stmt_vec_info stmt_info,
- slp_tree node ATTRIBUTE_UNUSED, tree var, bool lhs_p)
+ slp_tree node, tree var, bool lhs_p)
{
if (!var)
return false;
gimple *stmt = STMT_VINFO_STMT (stmt_info);
stmt_info = vect_stmt_to_vectorize (stmt_info);
- enum stmt_vec_info_type type = STMT_VINFO_TYPE (stmt_info);
+ enum stmt_vec_info_type type = SLP_TREE_TYPE (node);
if (is_gimple_call (stmt) && gimple_call_internal_p (stmt))
{
if (gimple_call_internal_fn (stmt) == IFN_MASK_STORE
@@ -400,7 +400,7 @@ costs::compute_local_live_ranges (
pair &live_range
= live_ranges->get_or_insert (lhs, &existed_p);
gcc_assert (!existed_p);
- if (STMT_VINFO_MEMORY_ACCESS_TYPE (program_point.stmt_info)
+ if (SLP_TREE_MEMORY_ACCESS_TYPE (*node)
== VMAT_LOAD_STORE_LANES)
point = get_first_lane_point (program_points,
program_point.stmt_info);
@@ -418,8 +418,7 @@ costs::compute_local_live_ranges (
bool existed_p = false;
pair &live_range
= live_ranges->get_or_insert (var, &existed_p);
- if (STMT_VINFO_MEMORY_ACCESS_TYPE (
- program_point.stmt_info)
+ if (SLP_TREE_MEMORY_ACCESS_TYPE (*node)
== VMAT_LOAD_STORE_LANES)
point = get_last_lane_point (program_points,
program_point.stmt_info);
@@ -602,13 +601,13 @@ get_store_value (gimple *stmt)
/* Return true if additional vector vars needed. */
bool
costs::need_additional_vector_vars_p (stmt_vec_info stmt_info,
- slp_tree node ATTRIBUTE_UNUSED)
+ slp_tree node)
{
- enum stmt_vec_info_type type = STMT_VINFO_TYPE (stmt_info);
+ enum stmt_vec_info_type type = SLP_TREE_TYPE (node);
if (type == load_vec_info_type || type == store_vec_info_type)
{
if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)
- && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_GATHER_SCATTER)
+ && mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node)))
return true;
machine_mode mode = TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info));
@@ -694,7 +693,7 @@ costs::update_local_live_ranges (
if (!node)
continue;
- if (STMT_VINFO_TYPE (stmt_info) == undef_vec_info_type)
+ if (SLP_TREE_TYPE (*node) == undef_vec_info_type)
continue;
for (j = 0; j < gimple_phi_num_args (phi); j++)
@@ -773,7 +772,7 @@ costs::update_local_live_ranges (
slp_tree *node = vinfo_slp_map.get (stmt_info);
if (!node)
continue;
- enum stmt_vec_info_type type = STMT_VINFO_TYPE (stmt_info);
+ enum stmt_vec_info_type type = SLP_TREE_TYPE (*node);
if (need_additional_vector_vars_p (stmt_info, *node))
{
/* For non-adjacent load/store STMT, we will potentially
@@ -1086,7 +1085,7 @@ costs::better_main_loop_than_p (const vector_costs *uncast_other) const
load/store. */
static int
segment_loadstore_group_size (enum vect_cost_for_stmt kind,
- stmt_vec_info stmt_info)
+ stmt_vec_info stmt_info, slp_tree node)
{
if (stmt_info
&& (kind == vector_load || kind == vector_store)
@@ -1094,7 +1093,7 @@ segment_loadstore_group_size (enum vect_cost_for_stmt kind,
{
stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
if (stmt_info
- && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_LOAD_STORE_LANES)
+ && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_LOAD_STORE_LANES)
return DR_GROUP_SIZE (stmt_info);
}
return 0;
@@ -1108,7 +1107,7 @@ segment_loadstore_group_size (enum vect_cost_for_stmt kind,
unsigned
costs::adjust_stmt_cost (enum vect_cost_for_stmt kind, loop_vec_info loop,
stmt_vec_info stmt_info,
- slp_tree, tree vectype, int stmt_cost)
+ slp_tree node, tree vectype, int stmt_cost)
{
const cpu_vector_cost *costs = get_vector_costs ();
switch (kind)
@@ -1131,7 +1130,8 @@ costs::adjust_stmt_cost (enum vect_cost_for_stmt kind, loop_vec_info loop,
each vector in the group. Here we additionally add permute
costs for each. */
/* TODO: Indexed and ordered/unordered cost. */
- int group_size = segment_loadstore_group_size (kind, stmt_info);
+ int group_size = segment_loadstore_group_size (kind, stmt_info,
+ node);
if (group_size > 1)
{
switch (group_size)
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 0a9fcef..591122f 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -3685,7 +3685,8 @@ riscv_legitimize_move (machine_mode mode, rtx dest, rtx src)
/* This test can fail if (for example) we want a HF and Z[v]fh is
not enabled. In that case we just want to let the standard
expansion path run. */
- if (riscv_vector::get_vector_mode (smode, nunits).exists (&vmode))
+ if (riscv_vector::get_vector_mode (smode, nunits).exists (&vmode)
+ && gen_lowpart_common (vmode, SUBREG_REG (src)))
{
rtx v = gen_lowpart (vmode, SUBREG_REG (src));
rtx int_reg = dest;
@@ -3958,41 +3959,6 @@ riscv_extend_cost (rtx op, bool unsigned_p)
return COSTS_N_INSNS (2);
}
-/* Return the cost of the vector binary rtx like add, minus, mult.
- The cost of scalar2vr_cost will be appended if there one of the
- op comes from the VEC_DUPLICATE. */
-
-static int
-get_vector_binary_rtx_cost (rtx x, int scalar2vr_cost)
-{
- gcc_assert (riscv_v_ext_mode_p (GET_MODE (x)));
-
- rtx neg;
- rtx op_0;
- rtx op_1;
-
- if (GET_CODE (x) == UNSPEC)
- {
- op_0 = XVECEXP (x, 0, 0);
- op_1 = XVECEXP (x, 0, 1);
- }
- else
- {
- op_0 = XEXP (x, 0);
- op_1 = XEXP (x, 1);
- }
-
- if (GET_CODE (op_0) == VEC_DUPLICATE
- || GET_CODE (op_1) == VEC_DUPLICATE)
- return (scalar2vr_cost + 1) * COSTS_N_INSNS (1);
- else if (GET_CODE (neg = op_0) == NEG
- && (GET_CODE (op_1) == VEC_DUPLICATE
- || GET_CODE (XEXP (neg, 0)) == VEC_DUPLICATE))
- return (scalar2vr_cost + 1) * COSTS_N_INSNS (1);
- else
- return COSTS_N_INSNS (1);
-}
-
/* Implement TARGET_RTX_COSTS. */
#define SINGLE_SHIFT_COST 1
@@ -4014,73 +3980,20 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
{
case SET:
{
- switch (GET_CODE (x))
+ if (GET_CODE (x) == VEC_DUPLICATE)
+ *total = (scalar2vr_cost + 1) * COSTS_N_INSNS (1);
+ else
{
- case VEC_DUPLICATE:
- *total = gr2vr_cost * COSTS_N_INSNS (1);
- break;
- case IF_THEN_ELSE:
- {
- rtx op = XEXP (x, 1);
+ int vec_dup_count = 0;
+ subrtx_var_iterator::array_type array;
- switch (GET_CODE (op))
- {
- case DIV:
- case UDIV:
- case MOD:
- case UMOD:
- case US_PLUS:
- case US_MINUS:
- case SS_PLUS:
- case SS_MINUS:
- *total = get_vector_binary_rtx_cost (op, scalar2vr_cost);
- break;
- case UNSPEC:
- {
- switch (XINT (op, 1))
- {
- case UNSPEC_VAADDU:
- case UNSPEC_VAADD:
- *total
- = get_vector_binary_rtx_cost (op, scalar2vr_cost);
- break;
- default:
- *total = COSTS_N_INSNS (1);
- break;
- }
- }
- break;
- default:
- *total = COSTS_N_INSNS (1);
- break;
- }
- }
- break;
- case PLUS:
- case MINUS:
- case AND:
- case IOR:
- case XOR:
- case MULT:
- case SMAX:
- case UMAX:
- case SMIN:
- case UMIN:
- {
- rtx op;
- rtx op_0 = XEXP (x, 0);
- rtx op_1 = XEXP (x, 1);
+ FOR_EACH_SUBRTX_VAR (iter, array, x, ALL)
+ if (GET_CODE (*iter) == VEC_DUPLICATE)
+ vec_dup_count++;
- if (GET_CODE (op = op_0) == MULT
- || GET_CODE (op = op_1) == MULT)
- *total = get_vector_binary_rtx_cost (op, scalar2vr_cost);
- else
- *total = get_vector_binary_rtx_cost (x, scalar2vr_cost);
- }
- break;
- default:
- *total = COSTS_N_INSNS (1);
- break;
+ int total_vec_dup_cost = vec_dup_count * scalar2vr_cost;
+
+ *total = COSTS_N_INSNS (1) * (total_vec_dup_cost + 1);
}
}
break;
@@ -5532,9 +5445,9 @@ canonicalize_comparands (rtx_code code, rtx *op0, rtx *op1)
/* We might have been handed back a SUBREG. Just to make things
easy, force it into a REG. */
- if (!REG_P (*op0) && !CONST_INT_P (*op0))
+ if (!REG_P (*op0) && !CONST_INT_P (*op0) && INTEGRAL_MODE_P (GET_MODE (*op0)))
*op0 = force_reg (word_mode, *op0);
- if (!REG_P (*op1) && !CONST_INT_P (*op1))
+ if (!REG_P (*op1) && !CONST_INT_P (*op1) && INTEGRAL_MODE_P (GET_MODE (*op1)))
*op1 = force_reg (word_mode, *op1);
}
@@ -6213,7 +6126,8 @@ riscv_pass_vls_aggregate_in_gpr (struct riscv_arg_info *info, machine_mode mode,
For a library call, FNTYPE is 0. */
void
-riscv_init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype, rtx, tree, int)
+riscv_init_cumulative_args (CUMULATIVE_ARGS *cum, const_tree fntype,
+ rtx, tree, int)
{
memset (cum, 0, sizeof (*cum));
@@ -6494,30 +6408,44 @@ riscv_arg_partial_bytes (cumulative_args_t cum,
return arg.stack_p ? arg.num_gprs * UNITS_PER_WORD : 0;
}
-/* Implement FUNCTION_VALUE and LIBCALL_VALUE. For normal calls,
- VALTYPE is the return type and MODE is VOIDmode. For libcalls,
- VALTYPE is null and MODE is the mode of the return value. */
+/* Implements hook TARGET_FUNCTION_VALUE. */
rtx
-riscv_function_value (const_tree type, const_tree func, machine_mode mode)
+riscv_function_value (const_tree ret_type, const_tree fn_decl_or_type,
+ bool)
{
struct riscv_arg_info info;
CUMULATIVE_ARGS args;
- if (type)
+ if (fn_decl_or_type)
{
- int unsigned_p = TYPE_UNSIGNED (type);
+ const_tree fntype = TREE_CODE (fn_decl_or_type) == FUNCTION_DECL ?
+ TREE_TYPE (fn_decl_or_type) : fn_decl_or_type;
+ riscv_init_cumulative_args (&args, fntype, NULL_RTX, NULL_TREE, 0);
+ }
+ else
+ memset (&args, 0, sizeof args);
- mode = TYPE_MODE (type);
+ int unsigned_p = TYPE_UNSIGNED (ret_type);
- /* Since TARGET_PROMOTE_FUNCTION_MODE unconditionally promotes,
- return values, promote the mode here too. */
- mode = promote_function_mode (type, mode, &unsigned_p, func, 1);
- }
+ machine_mode mode = TYPE_MODE (ret_type);
- memset (&args, 0, sizeof args);
+ /* Since TARGET_PROMOTE_FUNCTION_MODE unconditionally promotes,
+ return values, promote the mode here too. */
+ mode = promote_function_mode (ret_type, mode, &unsigned_p, fn_decl_or_type, 1);
- return riscv_get_arg_info (&info, &args, mode, type, true, true);
+ return riscv_get_arg_info (&info, &args, mode, ret_type, true, true);
+}
+
+/* Implements hook TARGET_LIBCALL_VALUE. */
+
+rtx
+riscv_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
+{
+ struct riscv_arg_info info;
+ CUMULATIVE_ARGS args;
+ memset (&args, 0, sizeof args);
+ return riscv_get_arg_info (&info, &args, mode, NULL_TREE, true, true);
}
/* Implement TARGET_PASS_BY_REFERENCE. */
@@ -13867,9 +13795,9 @@ riscv_expand_xmode_usmul (rtx dest, rtx x, rtx y)
riscv_emit_binary (MULT, mul, x, y);
if (TARGET_64BIT)
- emit_insn (gen_usmuldi3_highpart (mulhu, x, y));
+ emit_insn (gen_umuldi3_highpart (mulhu, x, y));
else
- emit_insn (gen_usmulsi3_highpart (mulhu, x, y));
+ emit_insn (gen_umulsi3_highpart (mulhu, x, y));
riscv_emit_binary (NE, overflow_p, mulhu, CONST0_RTX (Xmode));
riscv_emit_unary (NEG, overflow_p, overflow_p);
@@ -14037,10 +13965,13 @@ riscv_c_mode_for_floating_type (enum tree_index ti)
return default_mode_for_floating_type (ti);
}
-/* This parses the attribute arguments to target_version in DECL and modifies
- the feature mask and priority required to select those targets. */
+/* Parse the attribute arguments to target_version in DECL and modify
+ the feature mask and priority required to select those targets.
+ If LOC is nonnull, report diagnostics against *LOC, otherwise
+ remain silent. */
static void
parse_features_for_version (tree decl,
+ location_t *loc,
struct riscv_feature_bits &res,
int &priority)
{
@@ -14071,14 +14002,12 @@ parse_features_for_version (tree decl,
cl_target_option_restore (&global_options, &global_options_set,
default_opts);
- riscv_process_target_version_attr (TREE_VALUE (version_attr),
- DECL_SOURCE_LOCATION (decl));
+ riscv_process_target_version_attr (TREE_VALUE (version_attr), loc);
priority = global_options.x_riscv_fmv_priority;
const char *arch_string = global_options.x_riscv_arch_string;
bool parse_res
- = riscv_minimal_hwprobe_feature_bits (arch_string, &res,
- DECL_SOURCE_LOCATION (decl));
+ = riscv_minimal_hwprobe_feature_bits (arch_string, &res, loc);
gcc_assert (parse_res);
cl_target_option_restore (&global_options, &global_options_set,
@@ -14135,8 +14064,8 @@ riscv_compare_version_priority (tree decl1, tree decl2)
struct riscv_feature_bits mask1, mask2;
int prio1, prio2;
- parse_features_for_version (decl1, mask1, prio1);
- parse_features_for_version (decl2, mask2, prio2);
+ parse_features_for_version (decl1, nullptr, mask1, prio1);
+ parse_features_for_version (decl2, nullptr, mask2, prio2);
return compare_fmv_features (mask1, mask2, prio1, prio2);
}
@@ -14439,6 +14368,7 @@ dispatch_function_versions (tree dispatch_decl,
version_info.version_decl = version_decl;
// Get attribute string, parse it and find the right features.
parse_features_for_version (version_decl,
+ &DECL_SOURCE_LOCATION (version_decl),
version_info.features,
version_info.prio);
function_versions.push_back (version_info);
@@ -15441,6 +15371,217 @@ synthesize_and (rtx operands[3])
return true;
}
+/* Synthesize OPERANDS[0] = OPERANDS[1] + OPERANDS[2].
+
+ OPERANDS[0] and OPERANDS[1] will be a REG and may be the same
+ REG.
+
+ OPERANDS[2] is a CONST_INT.
+
+ Return TRUE if the operation was fully synthesized and the caller
+ need not generate additional code. Return FALSE if the operation
+ was not synthesized and the caller is responsible for emitting the
+ proper sequence. */
+
+bool
+synthesize_add (rtx operands[3])
+{
+ /* Trivial cases that don't need synthesis. */
+ if (SMALL_OPERAND (INTVAL (operands[2])))
+ return false;
+
+ int budget1 = riscv_const_insns (operands[2], true);
+ int budget2 = riscv_const_insns (GEN_INT (-INTVAL (operands[2])), true);
+
+ HOST_WIDE_INT ival = INTVAL (operands[2]);
+
+ /* If we can emit two addi insns then that's better than synthesizing
+ the constant into a temporary, then adding the temporary to the
+ other input. The exception is when the constant can be loaded
+ in a single instruction which can issue whenever its convenient. */
+ if (SUM_OF_TWO_S12 (ival) && budget1 >= 2)
+ {
+ HOST_WIDE_INT saturated = HOST_WIDE_INT_M1U << (IMM_BITS - 1);
+
+ if (ival >= 0)
+ saturated = ~saturated;
+
+ ival -= saturated;
+
+ rtx x = gen_rtx_PLUS (word_mode, operands[1], GEN_INT (saturated));
+ emit_insn (gen_rtx_SET (operands[0], x));
+ rtx output = gen_rtx_PLUS (word_mode, operands[0], GEN_INT (ival));
+ emit_insn (gen_rtx_SET (operands[0], output));
+ return true;
+ }
+
+ /* If we can shift the constant by 1, 2, or 3 bit positions
+ and the result is a cheaper constant, then do so. */
+ ival = INTVAL (operands[2]);
+ if (TARGET_ZBA
+ && (((ival % 2) == 0 && budget1
+ > riscv_const_insns (GEN_INT (ival >> 1), true))
+ || ((ival % 4) == 0 && budget1
+ > riscv_const_insns (GEN_INT (ival >> 2), true))
+ || ((ival % 8) == 0 && budget1
+ > riscv_const_insns (GEN_INT (ival >> 3), true))))
+ {
+ // Load the shifted constant into a temporary
+ int shct = ctz_hwi (ival);
+
+ /* We can handle shifting up to 3 bit positions via shNadd. */
+ if (shct > 3)
+ shct = 3;
+
+ /* The adjusted constant may still need synthesis, so do not copy
+ it directly into register. Let the expander handle it. */
+ rtx tmp = force_reg (word_mode, GEN_INT (ival >> shct));
+
+ /* Generate shift-add of temporary and operands[1]
+ into the final destination. */
+ rtx x = gen_rtx_ASHIFT (word_mode, tmp, GEN_INT (shct));
+ rtx output = gen_rtx_PLUS (word_mode, x, operands[1]);
+ emit_insn (gen_rtx_SET (operands[0], output));
+ return true;
+ }
+
+ /* If the negated constant is cheaper than the original, then negate
+ the constant and use sub. */
+ if (budget2 < budget1)
+ {
+ // load -INTVAL (operands[2]) into a temporary
+ rtx tmp = force_reg (word_mode, GEN_INT (-INTVAL (operands[2])));
+
+ // subtract operads[2] from operands[1]
+ rtx output = gen_rtx_MINUS (word_mode, operands[1], tmp);
+ emit_insn (gen_rtx_SET (operands[0], output));
+ return true;
+ }
+
+ /* No add synthesis was found. Synthesize the constant into
+ a temporary and use that. */
+ rtx x = force_reg (word_mode, operands[2]);
+ x = gen_rtx_PLUS (word_mode, operands[1], x);
+ emit_insn (gen_rtx_SET (operands[0], x));
+ return true;
+}
+
+/* Synthesize OPERANDS[0] = OPERANDS[1] + OPERANDS[2].
+
+ For 32-bit object cases with a 64-bit target.
+
+ OPERANDS[0] and OPERANDS[1] will be a REG and may be the same
+ REG.
+
+ OPERANDS[2] is a CONST_INT.
+
+ Return TRUE if the operation was fully synthesized and the caller
+ need not generate additional code. Return FALSE if the operation
+ was not synthesized and the caller is responsible for emitting the
+ proper sequence. */
+
+
+bool
+synthesize_add_extended (rtx operands[3])
+{
+
+/* If operands[2] is a 12-bit signed immediate,
+ no synthesis needs to be done. */
+
+ if (SMALL_OPERAND (INTVAL (operands[2])))
+ return false;
+
+ HOST_WIDE_INT ival = INTVAL (operands[2]);
+ int budget1 = riscv_const_insns (operands[2], true);
+ int budget2 = riscv_const_insns (GEN_INT (-INTVAL (operands[2])), true);
+
+/* If operands[2] can be split into two 12-bit signed immediates,
+ split add into two adds. */
+
+ if (SUM_OF_TWO_S12 (ival))
+ {
+ HOST_WIDE_INT saturated = HOST_WIDE_INT_M1U << (IMM_BITS - 1);
+
+ if (ival >= 0)
+ saturated = ~saturated;
+
+ ival -= saturated;
+
+ rtx temp = gen_reg_rtx (DImode);
+ emit_insn (gen_addsi3_extended (temp, operands[1], GEN_INT (saturated)));
+ temp = gen_lowpart (SImode, temp);
+ SUBREG_PROMOTED_VAR_P (temp) = 1;
+ SUBREG_PROMOTED_SET (temp, SRP_SIGNED);
+ emit_insn (gen_rtx_SET (operands[0], temp));
+ rtx t = gen_reg_rtx (DImode);
+ emit_insn (gen_addsi3_extended (t, operands[0], GEN_INT (ival)));
+ t = gen_lowpart (SImode, t);
+ SUBREG_PROMOTED_VAR_P (t) = 1;
+ SUBREG_PROMOTED_SET (t, SRP_SIGNED);
+ emit_move_insn (operands[0], t);
+ return true;
+ }
+
+
+/* If the negated value is cheaper to synthesize, subtract that from
+ operands[1]. */
+
+ if (budget2 < budget1)
+ {
+ rtx tmp = gen_reg_rtx (SImode);
+ emit_insn (gen_rtx_SET (tmp, GEN_INT (-INTVAL (operands[2]))));
+
+ rtx t = gen_reg_rtx (DImode);
+ emit_insn (gen_subsi3_extended (t, operands[1], tmp));
+ t = gen_lowpart (SImode, t);
+ SUBREG_PROMOTED_VAR_P (t) = 1;
+ SUBREG_PROMOTED_SET (t, SRP_SIGNED);
+ emit_move_insn (operands[0], t);
+ return true;
+ }
+
+ rtx tsrc = force_reg (SImode, operands[2]);
+ rtx tdest = gen_reg_rtx (DImode);
+ emit_insn (gen_addsi3_extended (tdest, operands[1], tsrc));
+ tdest = gen_lowpart (SImode, tdest);
+ SUBREG_PROMOTED_VAR_P (tdest) = 1;
+ SUBREG_PROMOTED_SET (tdest, SRP_SIGNED);
+ emit_move_insn (operands[0], tdest);
+ return true;
+
+}
+
+
+/*
+ HINT : argument specify the target cache
+
+ TODO : LOCALITY is unused.
+
+ Return the first operand of the associated PREF or PREFX insn. */
+rtx
+riscv_prefetch_cookie (rtx hint, rtx locality)
+{
+ return (GEN_INT (INTVAL (hint)
+ + CacheHint::DCACHE_HINT + INTVAL (locality) * 0));
+}
+
+/* Return true if X is a legitimate address with offset for prefetch.
+ MODE is the mode of the value being accessed. */
+bool
+riscv_prefetch_offset_address_p (rtx x, machine_mode mode)
+{
+ struct riscv_address_info addr;
+
+ if (riscv_classify_address (&addr, x, mode, false)
+ && addr.type == ADDRESS_REG)
+ {
+ if (TARGET_XMIPSCBOP)
+ return (CONST_INT_P (addr.offset)
+ && MIPS_RISCV_9BIT_OFFSET_P (INTVAL (addr.offset)));
+ }
+
+ return true;
+}
/* Initialize the GCC target structure. */
#undef TARGET_ASM_ALIGNED_HI_OP
@@ -15804,6 +15945,12 @@ synthesize_and (rtx operands[3])
#undef TARGET_VECTOR_MODE_SUPPORTED_ANY_TARGET_P
#define TARGET_VECTOR_MODE_SUPPORTED_ANY_TARGET_P riscv_vector_mode_supported_any_target_p
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE riscv_function_value
+
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE riscv_libcall_value
+
#undef TARGET_FUNCTION_VALUE_REGNO_P
#define TARGET_FUNCTION_VALUE_REGNO_P riscv_function_value_regno_p
diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index 45fa521..9146571 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -71,7 +71,7 @@ extern const char *riscv_arch_help (int argc, const char **argv);
{"tune", "%{!mtune=*:" \
" %{!mcpu=*:-mtune=%(VALUE)}" \
" %{mcpu=*:-mtune=%:riscv_default_mtune(%* %(VALUE))}}" }, \
- {"arch", "%{!march=*:" \
+ {"arch", "%{!march=*|march=unset:" \
" %{!mcpu=*:-march=%(VALUE)}" \
" %{mcpu=*:%:riscv_expand_arch_from_cpu(%* %(VALUE))}}" }, \
{"abi", "%{!mabi=*:-mabi=%(VALUE)}" }, \
@@ -111,13 +111,19 @@ extern const char *riscv_arch_help (int argc, const char **argv);
%(subtarget_asm_spec)" \
ASM_MISA_SPEC
+/* Drop all -march=* options before -march=unset. */
+#define ARCH_UNSET_CLEANUP_SPECS \
+ "%{march=unset:%<march=*} " \
+
#undef DRIVER_SELF_SPECS
#define DRIVER_SELF_SPECS \
+ARCH_UNSET_CLEANUP_SPECS \
"%{march=help:%:riscv_arch_help()} " \
"%{print-supported-extensions:%:riscv_arch_help()} " \
"%{-print-supported-extensions:%:riscv_arch_help()} " \
"%{march=*:%:riscv_expand_arch(%*)} " \
-"%{!march=*:%{mcpu=*:%:riscv_expand_arch_from_cpu(%*)}} "
+"%{!march=*|march=unset:%{mcpu=*:%:riscv_expand_arch_from_cpu(%*)}} " \
+"%{march=unset:%{!mcpu=*:%eAt least one valid -mcpu option must be given after -march=unset}} "
#define LOCAL_LABEL_PREFIX "."
#define USER_LABEL_PREFIX ""
@@ -759,12 +765,6 @@ enum reg_class
#define CALLEE_SAVED_FREG_NUMBER(REGNO) CALLEE_SAVED_REG_NUMBER (REGNO - 32)
-#define LIBCALL_VALUE(MODE) \
- riscv_function_value (NULL_TREE, NULL_TREE, MODE)
-
-#define FUNCTION_VALUE(VALTYPE, FUNC) \
- riscv_function_value (VALTYPE, FUNC, VOIDmode)
-
/* 1 if N is a possible register number for function argument passing.
We have no FP argument registers when soft-float. */
@@ -1319,4 +1319,15 @@ extern void riscv_remove_unneeded_save_restore_calls (void);
#define TARGET_HAS_FMV_TARGET_ATTRIBUTE 0
+/* mips pref valid offset range. */
+#define MIPS_RISCV_9BIT_OFFSET_P(OFFSET) (IN_RANGE (OFFSET, 0, 511))
+
+/* mips pref cache hint type. */
+typedef enum {
+ ICACHE_HINT = 0 << 3,
+ DCACHE_HINT = 1 << 3,
+ SCACHE_HINT = 2 << 3,
+ TCACHE_HINT = 3 << 3
+} CacheHint;
+
#endif /* ! GCC_RISCV_H */
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 578dd43..d34405c 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -712,24 +712,45 @@
(set_attr "mode" "SI")])
(define_expand "addsi3"
- [(set (match_operand:SI 0 "register_operand" "=r,r")
- (plus:SI (match_operand:SI 1 "register_operand" " r,r")
- (match_operand:SI 2 "arith_operand" " r,I")))]
+ [(set (match_operand:SI 0 "register_operand")
+ (plus:SI (match_operand:SI 1 "register_operand")
+ (match_operand:SI 2 "reg_or_const_int_operand")))]
""
{
+ /* We may be able to find a faster sequence, if so, then we are
+ done. Otherwise let expansion continue normally. */
+ if (CONST_INT_P (operands[2])
+ && ((!TARGET_64BIT && synthesize_add (operands))
+ || (TARGET_64BIT && synthesize_add_extended (operands))))
+ DONE;
+
+ /* Constants have already been handled already. */
if (TARGET_64BIT)
{
- rtx t = gen_reg_rtx (DImode);
- emit_insn (gen_addsi3_extended (t, operands[1], operands[2]));
- t = gen_lowpart (SImode, t);
- SUBREG_PROMOTED_VAR_P (t) = 1;
- SUBREG_PROMOTED_SET (t, SRP_SIGNED);
- emit_move_insn (operands[0], t);
+ rtx tdest = gen_reg_rtx (DImode);
+ emit_insn (gen_addsi3_extended (tdest, operands[1], operands[2]));
+ tdest = gen_lowpart (SImode, tdest);
+ SUBREG_PROMOTED_VAR_P (tdest) = 1;
+ SUBREG_PROMOTED_SET (tdest, SRP_SIGNED);
+ emit_move_insn (operands[0], tdest);
DONE;
}
+
})
-(define_insn "adddi3"
+(define_expand "adddi3"
+ [(set (match_operand:DI 0 "register_operand")
+ (plus:DI (match_operand:DI 1 "register_operand")
+ (match_operand:DI 2 "reg_or_const_int_operand")))]
+ "TARGET_64BIT"
+{
+ /* We may be able to find a faster sequence, if so, then we are
+ done. Otherwise let expansion continue normally. */
+ if (CONST_INT_P (operands[2]) && synthesize_add (operands))
+ DONE;
+})
+
+(define_insn "*adddi3"
[(set (match_operand:DI 0 "register_operand" "=r,r")
(plus:DI (match_operand:DI 1 "register_operand" " r,r")
(match_operand:DI 2 "arith_operand" " r,I")))]
@@ -2293,12 +2314,16 @@
rtx abs_reg = gen_reg_rtx (<ANYF:MODE>mode);
rtx coeff_reg = gen_reg_rtx (<ANYF:MODE>mode);
rtx tmp_reg = gen_reg_rtx (<ANYF:MODE>mode);
+ rtx fflags = gen_reg_rtx (SImode);
riscv_emit_move (tmp_reg, operands[1]);
riscv_emit_move (coeff_reg,
riscv_vector::get_fp_rounding_coefficient (<ANYF:MODE>mode));
emit_insn (gen_abs<ANYF:mode>2 (abs_reg, operands[1]));
+ /* fp compare can set invalid flag for NaN, so backup fflags. */
+ if (flag_trapping_math)
+ emit_insn (gen_riscv_frflags (fflags));
riscv_expand_conditional_branch (label, LT, abs_reg, coeff_reg);
emit_jump_insn (gen_jump (end_label));
@@ -2324,6 +2349,14 @@
emit_insn (gen_copysign<ANYF:mode>3 (tmp_reg, abs_reg, operands[1]));
emit_label (end_label);
+
+ /* Restore fflags, but after label. This is slightly different
+ than glibc implementation which only needs to restore under
+ the label, since it checks for NaN first, meaning following fp
+ compare can't raise fp exceptons and thus not clobber fflags. */
+ if (flag_trapping_math)
+ emit_insn (gen_riscv_fsflags (fflags));
+
riscv_emit_move (operands[0], tmp_reg);
}
@@ -4402,11 +4435,21 @@
)
(define_insn "prefetch"
- [(prefetch (match_operand 0 "prefetch_operand" "Qr")
- (match_operand 1 "imm5_operand" "i")
- (match_operand 2 "const_int_operand" "n"))]
- "TARGET_ZICBOP"
+ [(prefetch (match_operand 0 "prefetch_operand" "Qr,ZD")
+ (match_operand 1 "imm5_operand" "i,i")
+ (match_operand 2 "const_int_operand" "n,n"))]
+ "TARGET_ZICBOP || TARGET_XMIPSCBOP"
{
+ if (TARGET_XMIPSCBOP)
+ {
+ /* Mips Prefetch write is nop for p8700. */
+ if (operands[1] != CONST0_RTX (GET_MODE (operands[1])))
+ return "nop";
+
+ operands[1] = riscv_prefetch_cookie (operands[1], operands[2]);
+ return "mips.pref\t%1,%a0";
+ }
+
switch (INTVAL (operands[1]))
{
case 0:
diff --git a/gcc/config/riscv/sifive-p400.md b/gcc/config/riscv/sifive-p400.md
index ed8b8ec..0acdbda 100644
--- a/gcc/config/riscv/sifive-p400.md
+++ b/gcc/config/riscv/sifive-p400.md
@@ -153,10 +153,13 @@
(eq_attr "type" "fmove,fcvt"))
"p400_float_pipe,sifive_p400_fpu")
+;; We need something for HF so that we don't abort during
+;; scheduling if someone was to ask for p400 scheduling, but
+;; enable the various HF mode extensions.
(define_insn_reservation "sifive_p400_fdiv_s" 18
(and (eq_attr "tune" "sifive_p400")
(eq_attr "type" "fdiv,fsqrt")
- (eq_attr "mode" "SF"))
+ (eq_attr "mode" "HF,SF"))
"sifive_p400_FM, sifive_p400_fdiv*5")
(define_insn_reservation "sifive_p400_fdiv_d" 31
@@ -178,3 +181,18 @@
(define_bypass 1 "sifive_p400_f2i"
"sifive_p400_branch,sifive_p400_sfb_alu,sifive_p400_mul,
sifive_p400_div,sifive_p400_alu,sifive_p400_cpop")
+
+
+;; Someone familiar with the p400 uarch needs to put
+;; these into the right reservations. This is just a placeholder
+;; for everything I found that had no mapping to a reservation.
+;;
+;; Note that even if the processor does not implementat a particular
+;; instruction it should still have suitable reservations, even if
+;; they are just dummies like this one.
+(define_insn_reservation "sifive_p400_unknown" 1
+ (and (eq_attr "tune" "sifive_p400")
+ (eq_attr "type" "ghost,vfrecp,vclmul,vldm,vmffs,vclmulh,vlsegde,vfcvtitof,vsm4k,vfcvtftoi,vfdiv,vsm3c,vsm4r,viwmuladd,vfwredu,vcpop,vfwmuladd,vstux,vsshift,vfwcvtftof,vfncvtftof,vfwmaccbf16,vext,vssegte,rdvl,vaeskf1,vfslide1up,vmov,vimovvx,vaesef,vfsqrt,viminmax,vfwcvtftoi,vssegtox,vfclass,viwmul,vector,vgmul,vsm3me,vfcmp,vstm,vfredo,vfwmul,vaeskf2,vstox,vfncvtbf16,vislide1up,vgather,vldox,viwred,vctz,vghsh,vsts,vslidedown,vfmerge,vicmp,vsmul,vlsegdff,vfalu,vfmov,vislide1down,vfminmax,vcompress,vldr,vldff,vlsegdux,vimuladd,vsalu,vidiv,sf_vqmacc,vfslide1down,vaesem,vimerge,vfncvtftoi,vfwcvtitof,vicalu,vaesz,sf_vc_se,vsha2cl,vmsfs,vldux,vmidx,vslideup,vired,vlde,vfwredo,vfmovfv,vbrev,vfncvtitof,rdfrm,vsetvl,vssegts,vimul,vialu,vbrev8,vfwalu,rdvlenb,sf_vfnrclip,vclz,vnclip,sf_vc,vimov,vste,vfmuladd,vfmovvf,vwsll,vsetvl_pre,vlds,vlsegds,vmiota,vmalu,wrvxrm,wrfrm,viwalu,vaesdm,vssegtux,vaesdf,vimovxv,vror,vnshift,vstr,vaalu,vsha2ms,crypto,vfwcvtbf16,vlsegdox,vrol,vandn,vfsgnj,vmpop,vfredu,vsha2ch,vshift,vrev8,vfmul"))
+ "p400_int_pipe+sifive_p400_ialu")
+
+
diff --git a/gcc/config/riscv/sifive-p600.md b/gcc/config/riscv/sifive-p600.md
index 2401349..ccd006d 100644
--- a/gcc/config/riscv/sifive-p600.md
+++ b/gcc/config/riscv/sifive-p600.md
@@ -157,10 +157,13 @@
(eq_attr "type" "fmove,fcvt"))
"float_pipe,sifive_p600_fpu")
+;; We need something for HF so that we don't abort during
+;; scheduling if someone was to ask for p600 scheduling, but
+;; enable the various HF mode extensions.
(define_insn_reservation "sifive_p600_fdiv_s" 11
(and (eq_attr "tune" "sifive_p600")
(eq_attr "type" "fdiv,fsqrt")
- (eq_attr "mode" "SF"))
+ (eq_attr "mode" "HF,SF"))
"sifive_p600_FM, sifive_p600_fdiv*5")
(define_insn_reservation "sifive_p600_fdiv_d" 19
@@ -182,3 +185,15 @@
(define_bypass 1 "sifive_p600_f2i"
"sifive_p600_branch,sifive_p600_sfb_alu,sifive_p600_mul,
sifive_p600_div,sifive_p600_alu,sifive_p600_cpop")
+
+;; Someone familiar with the p600 uarch needs to put
+;; these into the right reservations. This is just a placeholder
+;; for everything I found that had no mapping to a reservation.
+;;
+;; Note that even if the processor does not implementat a particular
+;; instruction it should still have suitable reservations, even if
+;; they are just dummies like this one.
+(define_insn_reservation "sifive_p600_unknown" 1
+ (and (eq_attr "tune" "sifive_p600")
+ (eq_attr "type" "vicmp,vssegte,vbrev8,vfwalu,vimov,vmpop,vaesdf,vislide1up,vror,vsha2cl,vrol,vslideup,vimuladd,vclmul,vaesef,vext,vlsegdff,vfmuladd,vfclass,vmsfs,vfcmp,vsmul,vsm3me,vmalu,vshift,viwmuladd,vfslide1up,vlsegde,vsm4k,wrvxrm,vislide1down,vsm3c,vfwmuladd,vaesdm,vclmulh,vfwcvtftof,vfwredu,vfredo,sf_vfnrclip,vaesz,vwsll,vmiota,vctz,vsetvl_pre,vstm,vidiv,vssegtux,vfwmul,vcompress,vste,vired,vlsegds,vaesem,vfminmax,ghost,vandn,crypto,vfmul,vialu,vfmovvf,rdfrm,vldff,vfmerge,vsshift,vnclip,sf_vqmacc,vnshift,vfdiv,vfslide1down,vfncvtitof,vfsqrt,vimovxv,vstr,vfwcvtbf16,vfwcvtitof,vbrev,vssegtox,vssegts,vcpop,vmffs,viwmul,vldr,vmidx,rdvlenb,vfalu,vslidedown,vlde,vfsgnj,vfmov,viwalu,vsha2ch,vfncvtbf16,vfcvtitof,rdvl,vsetvl,vsha2ms,vector,vstux,vimerge,vclz,sf_vc,vfcvtftoi,viminmax,vsm4r,sf_vc_se,wrfrm,vstox,vfmovfv,vfncvtftoi,vimul,vsalu,vmov,vgmul,vgather,vldux,vlsegdox,vfncvtftof,vimovvx,vghsh,vldm,vldox,vfwcvtftoi,vlds,vfrecp,vaeskf2,vsts,vfredu,vicalu,vaalu,vfwmaccbf16,vrev8,vfwredo,vlsegdux,viwred,vaeskf1"))
+ "int_pipe+sifive_p600_ialu")
diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md
index 50ec8b3..e47bb41 100644
--- a/gcc/config/riscv/sync.md
+++ b/gcc/config/riscv/sync.md
@@ -386,13 +386,13 @@
})
(define_insn "amo_atomic_exchange<mode>"
- [(set (match_operand:GPR 0 "register_operand" "=&r")
+ [(set (match_operand:GPR 0 "register_operand" "=r")
(unspec_volatile:GPR
[(match_operand:GPR 1 "memory_operand" "+A")
(match_operand:SI 3 "const_int_operand")] ;; model
UNSPEC_SYNC_EXCHANGE))
(set (match_dup 1)
- (match_operand:GPR 2 "register_operand" "0"))]
+ (match_operand:GPR 2 "reg_or_0_operand" "rJ"))]
"TARGET_ZAAMO"
"amoswap.<amo>%A3\t%0,%z2,%1"
[(set_attr "type" "atomic")
@@ -434,13 +434,13 @@
})
(define_insn "zabha_atomic_exchange<mode>"
- [(set (match_operand:SHORT 0 "register_operand" "=&r")
+ [(set (match_operand:SHORT 0 "register_operand" "=r")
(unspec_volatile:SHORT
[(match_operand:SHORT 1 "memory_operand" "+A")
(match_operand:SI 3 "const_int_operand")] ;; model
UNSPEC_SYNC_EXCHANGE_ZABHA))
(set (match_dup 1)
- (match_operand:SHORT 2 "register_operand" "0"))]
+ (match_operand:SHORT 2 "reg_or_0_operand" "rJ"))]
"TARGET_ZABHA"
"amoswap.<amobh>%A3\t%0,%z2,%1"
[(set_attr "type" "atomic")
diff --git a/gcc/config/riscv/t-riscv b/gcc/config/riscv/t-riscv
index 7aac56a..a7eaa8b 100644
--- a/gcc/config/riscv/t-riscv
+++ b/gcc/config/riscv/t-riscv
@@ -229,8 +229,41 @@ s-riscv-ext.texi: build/gen-riscv-ext-texi$(build_exeext)
$(SHELL) $(srcdir)/../move-if-change tmp-riscv-ext.texi $(srcdir)/doc/riscv-ext.texi
$(STAMP) s-riscv-ext.texi
-# Run `riscv-regen' after you changed or added anything from riscv-ext*.def
+RISCV_CORES_DEFS = \
+ $(srcdir)/config/riscv/riscv-cores.def
+
+build/gen-riscv-mtune-texi.o: $(srcdir)/config/riscv/gen-riscv-mtune-texi.cc \
+ $(RISCV_CORES_DEFS)
+ $(CXX_FOR_BUILD) $(CXXFLAGS_FOR_BUILD) -c $< -o $@
+
+build/gen-riscv-mcpu-texi.o: $(srcdir)/config/riscv/gen-riscv-mcpu-texi.cc \
+ $(RISCV_CORES_DEFS)
+ $(CXX_FOR_BUILD) $(CXXFLAGS_FOR_BUILD) -c $< -o $@
+
+build/gen-riscv-mtune-texi$(build_exeext): build/gen-riscv-mtune-texi.o
+ $(LINKER_FOR_BUILD) $(BUILD_LINKERFLAGS) $(BUILD_LDFLAGS) -o $@ $<
+
+build/gen-riscv-mcpu-texi$(build_exeext): build/gen-riscv-mcpu-texi.o
+ $(LINKER_FOR_BUILD) $(BUILD_LINKERFLAGS) $(BUILD_LDFLAGS) -o $@ $<
+
+$(srcdir)/doc/riscv-mtune.texi: $(RISCV_CORES_DEFS)
+$(srcdir)/doc/riscv-mtune.texi: s-riscv-mtune.texi ; @true
+
+$(srcdir)/doc/riscv-mcpu.texi: $(RISCV_CORES_DEFS)
+$(srcdir)/doc/riscv-mcpu.texi: s-riscv-mcpu.texi ; @true
+
+s-riscv-mtune.texi: build/gen-riscv-mtune-texi$(build_exeext)
+ $(RUN_GEN) build/gen-riscv-mtune-texi$(build_exeext) > tmp-riscv-mtune.texi
+ $(SHELL) $(srcdir)/../move-if-change tmp-riscv-mtune.texi $(srcdir)/doc/riscv-mtune.texi
+ $(STAMP) s-riscv-mtune.texi
+
+s-riscv-mcpu.texi: build/gen-riscv-mcpu-texi$(build_exeext)
+ $(RUN_GEN) build/gen-riscv-mcpu-texi$(build_exeext) > tmp-riscv-mcpu.texi
+ $(SHELL) $(srcdir)/../move-if-change tmp-riscv-mcpu.texi $(srcdir)/doc/riscv-mcpu.texi
+ $(STAMP) s-riscv-mcpu.texi
+
+# Run `riscv-regen' after you changed or added anything from riscv-ext*.def and riscv-cores*.def
.PHONY: riscv-regen
-riscv-regen: s-riscv-ext.texi s-riscv-ext.opt
+riscv-regen: s-riscv-ext.texi s-riscv-ext.opt s-riscv-mtune.texi s-riscv-mcpu.texi
diff --git a/gcc/config/riscv/t-rtems b/gcc/config/riscv/t-rtems
index f596e76..a4d2d03 100644
--- a/gcc/config/riscv/t-rtems
+++ b/gcc/config/riscv/t-rtems
@@ -1,8 +1,8 @@
MULTILIB_OPTIONS =
MULTILIB_DIRNAMES =
-MULTILIB_OPTIONS += march=rv32i/march=rv32iac/march=rv32im/march=rv32imf/march=rv32ima/march=rv32imac/march=rv32imaf/march=rv32imafc/march=rv32imafd/march=rv32imafdc/march=rv64ima/march=rv64imac/march=rv64imafd/march=rv64imafdc
-MULTILIB_DIRNAMES += rv32i rv32iac rv32im rv32imf rv32ima rv32imac rv32imaf rv32imafc rv32imafd rv32imafdc rv64ima rv64imac rv64imafd rv64imafdc
+MULTILIB_OPTIONS += march=rv32i/march=rv32iac/march=rv32im/march=rv32imf/march=rv32ima/march=rv32imac/march=rv32imaf/march=rv32imafc/march=rv32imafd/march=rv32imafdc/march=rv64ima/march=rv64imac/march=rv64imafd/march=rv64imafdc/march=rv64imc
+MULTILIB_DIRNAMES += rv32i rv32iac rv32im rv32imf rv32ima rv32imac rv32imaf rv32imafc rv32imafd rv32imafdc rv64ima rv64imac rv64imafd rv64imafdc rv64imc
MULTILIB_OPTIONS += mabi=ilp32/mabi=ilp32f/mabi=ilp32d/mabi=lp64/mabi=lp64d
MULTILIB_DIRNAMES += ilp32 ilp32f ilp32d lp64 lp64d
@@ -10,6 +10,9 @@ MULTILIB_DIRNAMES += ilp32 ilp32f ilp32d lp64 lp64d
MULTILIB_OPTIONS += mcmodel=medany
MULTILIB_DIRNAMES += medany
+MULTILIB_OPTIONS += mstrict-align
+MULTILIB_DIRNAMES += strict-align
+
MULTILIB_REQUIRED =
MULTILIB_REQUIRED += march=rv32i/mabi=ilp32
MULTILIB_REQUIRED += march=rv32iac/mabi=ilp32
@@ -25,3 +28,5 @@ MULTILIB_REQUIRED += march=rv64ima/mabi=lp64/mcmodel=medany
MULTILIB_REQUIRED += march=rv64imac/mabi=lp64/mcmodel=medany
MULTILIB_REQUIRED += march=rv64imafd/mabi=lp64d/mcmodel=medany
MULTILIB_REQUIRED += march=rv64imafdc/mabi=lp64d/mcmodel=medany
+MULTILIB_REQUIRED += march=rv64imafdc/mabi=lp64d/mcmodel=medany/mstrict-align
+MULTILIB_REQUIRED += march=rv64imc/mabi=lp64/mcmodel=medany/mstrict-align
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 66b7670..2b35d66 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -1398,6 +1398,7 @@
}
[(set_attr "type" "vmov,vlde,vste")
(set_attr "mode" "<VT:MODE>")
+ (set (attr "merge_op_idx") (const_int INVALID_ATTRIBUTE))
(set (attr "avl_type_idx") (const_int INVALID_ATTRIBUTE))
(set (attr "mode_idx") (const_int INVALID_ATTRIBUTE))])
@@ -1435,6 +1436,7 @@
}
[(set_attr "type" "vlde,vste,vmov")
(set_attr "mode" "<MODE>")
+ (set (attr "merge_op_idx") (const_int INVALID_ATTRIBUTE))
(set (attr "avl_type_idx") (const_int INVALID_ATTRIBUTE))
(set (attr "mode_idx") (const_int INVALID_ATTRIBUTE))]
)
@@ -1485,6 +1487,7 @@
}
[(set_attr "type" "vlde,vste,vmov")
(set_attr "mode" "<VLS_AVL_REG:MODE>")
+ (set (attr "merge_op_idx") (const_int INVALID_ATTRIBUTE))
(set (attr "avl_type_idx") (const_int INVALID_ATTRIBUTE))
(set (attr "mode_idx") (const_int INVALID_ATTRIBUTE))]
)
@@ -5490,6 +5493,98 @@
"TARGET_VECTOR"
{})
+(define_expand "@pred_mul_plus_vx_<mode>"
+ [(set (match_operand:V_VLSI_QHS 0 "register_operand")
+ (if_then_else:V_VLSI_QHS
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand")
+ (match_operand 6 "vector_length_operand")
+ (match_operand 7 "const_int_operand")
+ (match_operand 8 "const_int_operand")
+ (match_operand 9 "const_int_operand")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (plus:V_VLSI_QHS
+ (mult:V_VLSI_QHS
+ (vec_duplicate:V_VLSI_QHS
+ (match_operand:<VEL> 2 "register_operand"))
+ (match_operand:V_VLSI_QHS 3 "register_operand"))
+ (match_operand:V_VLSI_QHS 4 "register_operand"))
+ (match_operand:V_VLSI_QHS 5 "vector_merge_operand")))]
+ "TARGET_VECTOR"
+{
+ riscv_vector::prepare_ternary_operands (operands);
+})
+
+(define_expand "@pred_mul_plus_vx_<mode>"
+ [(set (match_operand:V_VLSI_D 0 "register_operand")
+ (if_then_else:V_VLSI_D
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand")
+ (match_operand 6 "vector_length_operand")
+ (match_operand 7 "const_int_operand")
+ (match_operand 8 "const_int_operand")
+ (match_operand 9 "const_int_operand")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (plus:V_VLSI_D
+ (mult:V_VLSI_D
+ (vec_duplicate:V_VLSI_D
+ (match_operand:<VEL> 2 "register_operand"))
+ (match_operand:V_VLSI_D 3 "register_operand"))
+ (match_operand:V_VLSI_D 4 "register_operand"))
+ (match_operand:V_VLSI_D 5 "vector_merge_operand")))]
+ "TARGET_VECTOR && TARGET_64BIT"
+{
+ riscv_vector::prepare_ternary_operands (operands);
+})
+
+(define_expand "@pred_vnmsac_vx_<mode>"
+ [(set (match_operand:V_VLSI_QHS 0 "register_operand")
+ (if_then_else:V_VLSI_QHS
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand")
+ (match_operand 6 "vector_length_operand")
+ (match_operand 7 "const_int_operand")
+ (match_operand 8 "const_int_operand")
+ (match_operand 9 "const_int_operand")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (minus:V_VLSI_QHS
+ (match_operand:V_VLSI_QHS 4 "register_operand")
+ (mult:V_VLSI_QHS
+ (vec_duplicate:V_VLSI_QHS
+ (match_operand:<VEL> 2 "register_operand"))
+ (match_operand:V_VLSI_QHS 3 "register_operand")))
+ (match_operand:V_VLSI_QHS 5 "vector_merge_operand")))]
+ "TARGET_VECTOR"
+{
+ riscv_vector::prepare_ternary_operands (operands);
+})
+
+(define_expand "@pred_vnmsac_vx_<mode>"
+ [(set (match_operand:V_VLSI_D 0 "register_operand")
+ (if_then_else:V_VLSI_D
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand")
+ (match_operand 6 "vector_length_operand")
+ (match_operand 7 "const_int_operand")
+ (match_operand 8 "const_int_operand")
+ (match_operand 9 "const_int_operand")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (minus:V_VLSI_D
+ (match_operand:V_VLSI_D 4 "register_operand")
+ (mult:V_VLSI_D
+ (vec_duplicate:V_VLSI_D
+ (match_operand:<VEL> 2 "register_operand"))
+ (match_operand:V_VLSI_D 3 "register_operand")))
+ (match_operand:V_VLSI_D 5 "vector_merge_operand")))]
+ "TARGET_VECTOR && TARGET_64BIT"
+{
+ riscv_vector::prepare_ternary_operands (operands);
+})
+
(define_insn "*pred_madd<mode>_scalar"
[(set (match_operand:V_VLSI 0 "register_operand" "=vd, vr")
(if_then_else:V_VLSI
@@ -6324,8 +6419,8 @@
(set_attr "mode" "<MODE>")])
(define_insn "@pred_<optab><mode>_scalar"
- [(set (match_operand:VF 0 "register_operand" "=vd, vd, vr, vr")
- (if_then_else:VF
+ [(set (match_operand:V_VLSF 0 "register_operand" "=vd, vd, vr, vr")
+ (if_then_else:V_VLSF
(unspec:<VM>
[(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
(match_operand 5 "vector_length_operand" "rvl,rvl,rvl,rvl")
@@ -6336,11 +6431,11 @@
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)
(reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
- (commutative_float_binop:VF
- (vec_duplicate:VF
+ (commutative_float_binop:V_VLSF
+ (vec_duplicate:V_VLSF
(match_operand:<VEL> 4 "register_operand" " f, f, f, f"))
- (match_operand:VF 3 "register_operand" " vr, vr, vr, vr"))
- (match_operand:VF 2 "vector_merge_operand" " vu, 0, vu, 0")))]
+ (match_operand:V_VLSF 3 "register_operand" " vr, vr, vr, vr"))
+ (match_operand:V_VLSF 2 "vector_merge_operand" " vu, 0, vu, 0")))]
"TARGET_VECTOR"
"vf<insn>.vf\t%0,%3,%4%p1"
[(set_attr "type" "<float_insn_type>")
@@ -6349,43 +6444,43 @@
(symbol_ref "riscv_vector::get_frm_mode (operands[9])"))])
(define_insn "@pred_<optab><mode>_scalar"
- [(set (match_operand:VF 0 "register_operand" "=vd, vd, vr, vr")
- (if_then_else:VF
+ [(set (match_operand:V_VLSF 0 "register_operand" "=vd, vd, vr, vr")
+ (if_then_else:V_VLSF
(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
- (match_operand 5 "vector_length_operand" "rvl,rvl,rvl,rvl")
- (match_operand 6 "const_int_operand" " i, i, i, i")
- (match_operand 7 "const_int_operand" " i, i, i, i")
- (match_operand 8 "const_int_operand" " i, i, i, i")
+ [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
+ (match_operand 5 "vector_length_operand" "rvl,rvl,rvl,rvl")
+ (match_operand 6 "const_int_operand" " i, i, i, i")
+ (match_operand 7 "const_int_operand" " i, i, i, i")
+ (match_operand 8 "const_int_operand" " i, i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
- (commutative_float_binop_nofrm:VF
- (vec_duplicate:VF
- (match_operand:<VEL> 4 "register_operand" " f, f, f, f"))
- (match_operand:VF 3 "register_operand" " vr, vr, vr, vr"))
- (match_operand:VF 2 "vector_merge_operand" " vu, 0, vu, 0")))]
+ (commutative_float_binop_nofrm:V_VLSF
+ (vec_duplicate:V_VLSF
+ (match_operand:<VEL> 4 "register_operand" " f, f, f, f"))
+ (match_operand:V_VLSF 3 "register_operand" " vr, vr, vr, vr"))
+ (match_operand:V_VLSF 2 "vector_merge_operand" " vu, 0, vu, 0")))]
"TARGET_VECTOR"
"vf<insn>.vf\t%0,%3,%4%p1"
[(set_attr "type" "<float_insn_type>")
(set_attr "mode" "<MODE>")])
(define_insn "@pred_<ieee_fmaxmin_op><mode>_scalar"
- [(set (match_operand:VF 0 "register_operand" "=vd, vd, vr, vr")
- (if_then_else:VF
+ [(set (match_operand:V_VLSF 0 "register_operand" "=vd, vd, vr, vr")
+ (if_then_else:V_VLSF
(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
- (match_operand 5 "vector_length_operand" "rvl,rvl,rvl,rvl")
- (match_operand 6 "const_int_operand" " i, i, i, i")
- (match_operand 7 "const_int_operand" " i, i, i, i")
- (match_operand 8 "const_int_operand" " i, i, i, i")
+ [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
+ (match_operand 5 "vector_length_operand" "rvl,rvl,rvl,rvl")
+ (match_operand 6 "const_int_operand" " i, i, i, i")
+ (match_operand 7 "const_int_operand" " i, i, i, i")
+ (match_operand 8 "const_int_operand" " i, i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
- (unspec:VF
- [(match_operand:VF 3 "register_operand" " vr, vr, vr, vr")
- (vec_duplicate:VF
+ (unspec:V_VLSF
+ [(match_operand:V_VLSF 3 "register_operand" " vr, vr, vr, vr")
+ (vec_duplicate:V_VLSF
(match_operand:<VEL> 4 "register_operand" " f, f, f, f"))]
UNSPEC_VFMAXMIN)
- (match_operand:VF 2 "vector_merge_operand" " vu, 0, vu, 0")))]
+ (match_operand:V_VLSF 2 "vector_merge_operand" " vu, 0, vu, 0")))]
"TARGET_VECTOR"
"v<ieee_fmaxmin_op>.vf\t%0,%3,%4%p1"
[(set_attr "type" "vfminmax")
@@ -6417,8 +6512,8 @@
(symbol_ref "riscv_vector::get_frm_mode (operands[9])"))])
(define_insn "@pred_<optab><mode>_reverse_scalar"
- [(set (match_operand:VF 0 "register_operand" "=vd, vd, vr, vr")
- (if_then_else:VF
+ [(set (match_operand:V_VLSF 0 "register_operand" "=vd, vd, vr, vr")
+ (if_then_else:V_VLSF
(unspec:<VM>
[(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
(match_operand 5 "vector_length_operand" "rvl,rvl,rvl,rvl")
@@ -6429,11 +6524,11 @@
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)
(reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
- (non_commutative_float_binop:VF
- (vec_duplicate:VF
+ (non_commutative_float_binop:V_VLSF
+ (vec_duplicate:V_VLSF
(match_operand:<VEL> 4 "register_operand" " f, f, f, f"))
- (match_operand:VF 3 "register_operand" " vr, vr, vr, vr"))
- (match_operand:VF 2 "vector_merge_operand" " vu, 0, vu, 0")))]
+ (match_operand:V_VLSF 3 "register_operand" " vr, vr, vr, vr"))
+ (match_operand:V_VLSF 2 "vector_merge_operand" " vu, 0, vu, 0")))]
"TARGET_VECTOR"
"vfr<insn>.vf\t%0,%3,%4%p1"
[(set_attr "type" "<float_insn_type>")
@@ -8839,6 +8934,106 @@
[(set_attr "type" "vssegt<order>x")
(set_attr "mode" "<V32T:MODE>")])
+(define_insn "*pred_macc_<mode>_scalar_undef"
+ [(set (match_operand:V_VLSI_QHS 0 "register_operand" "=vd, vr")
+ (if_then_else:V_VLSI_QHS
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " vm, Wc1")
+ (match_operand 6 "vector_length_operand" "rvl, rvl")
+ (match_operand 7 "const_int_operand" " i, i")
+ (match_operand 8 "const_int_operand" " i, i")
+ (match_operand 9 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (plus:V_VLSI_QHS
+ (mult:V_VLSI_QHS
+ (vec_duplicate:V_VLSI_QHS
+ (match_operand:<VEL> 3 "reg_or_0_operand" " rJ, rJ"))
+ (match_operand:V_VLSI_QHS 4 "register_operand" " vr, vr"))
+ (match_operand:V_VLSI_QHS 5 "register_operand" " 0, 0"))
+ (match_operand:V_VLSI_QHS 2 "vector_undef_operand")))]
+ "TARGET_VECTOR"
+ "@
+ vmacc.vx\t%0,%z3,%4%p1
+ vmacc.vx\t%0,%z3,%4%p1"
+ [(set_attr "type" "vimuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*pred_macc_<mode>_scalar_undef"
+ [(set (match_operand:V_VLSI_D 0 "register_operand" "=vd, vr")
+ (if_then_else:V_VLSI_D
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " vm, Wc1")
+ (match_operand 6 "vector_length_operand" "rvl, rvl")
+ (match_operand 7 "const_int_operand" " i, i")
+ (match_operand 8 "const_int_operand" " i, i")
+ (match_operand 9 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (plus:V_VLSI_D
+ (mult:V_VLSI_D
+ (vec_duplicate:V_VLSI_D
+ (match_operand:<VEL> 3 "reg_or_0_operand" " rJ, rJ"))
+ (match_operand:V_VLSI_D 4 "register_operand" " vr, vr"))
+ (match_operand:V_VLSI_D 5 "register_operand" " 0, 0"))
+ (match_operand:V_VLSI_D 2 "vector_undef_operand")))]
+ "TARGET_VECTOR && TARGET_64BIT"
+ "@
+ vmacc.vx\t%0,%z3,%4%p1
+ vmacc.vx\t%0,%z3,%4%p1"
+ [(set_attr "type" "vimuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*pred_nmsac_<mode>_scalar_undef"
+ [(set (match_operand:V_VLSI_QHS 0 "register_operand" "=vd, vr")
+ (if_then_else:V_VLSI_QHS
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " vm, Wc1")
+ (match_operand 6 "vector_length_operand" "rvl, rvl")
+ (match_operand 7 "const_int_operand" " i, i")
+ (match_operand 8 "const_int_operand" " i, i")
+ (match_operand 9 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (minus:V_VLSI_QHS
+ (match_operand:V_VLSI_QHS 5 "register_operand" " 0, 0")
+ (mult:V_VLSI_QHS
+ (vec_duplicate:V_VLSI_QHS
+ (match_operand:<VEL> 3 "reg_or_0_operand" " rJ, rJ"))
+ (match_operand:V_VLSI_QHS 4 "register_operand" " vr, vr")))
+ (match_operand:V_VLSI_QHS 2 "vector_undef_operand")))]
+ "TARGET_VECTOR"
+ "@
+ vnmsac.vx\t%0,%z3,%4%p1
+ vnmsac.vx\t%0,%z3,%4%p1"
+ [(set_attr "type" "vimuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*pred_nmsac_<mode>_scalar_undef"
+ [(set (match_operand:V_VLSI_D 0 "register_operand" "=vd, vr")
+ (if_then_else:V_VLSI_D
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " vm, Wc1")
+ (match_operand 6 "vector_length_operand" "rvl, rvl")
+ (match_operand 7 "const_int_operand" " i, i")
+ (match_operand 8 "const_int_operand" " i, i")
+ (match_operand 9 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (minus:V_VLSI_D
+ (match_operand:V_VLSI_D 5 "register_operand" " 0, 0")
+ (mult:V_VLSI_D
+ (vec_duplicate:V_VLSI_D
+ (match_operand:<VEL> 3 "reg_or_0_operand" " rJ, rJ"))
+ (match_operand:V_VLSI_D 4 "register_operand" " vr, vr")))
+ (match_operand:V_VLSI_D 2 "vector_undef_operand")))]
+ "TARGET_VECTOR && TARGET_64BIT"
+ "@
+ vnmsac.vx\t%0,%z3,%4%p1
+ vnmsac.vx\t%0,%z3,%4%p1"
+ [(set_attr "type" "vimuladd")
+ (set_attr "mode" "<MODE>")])
+
(include "autovec.md")
(include "autovec-opt.md")
(include "sifive-vector.md")
diff --git a/gcc/config/riscv/xiangshan.md b/gcc/config/riscv/xiangshan.md
index 34b4a8f..6179140 100644
--- a/gcc/config/riscv/xiangshan.md
+++ b/gcc/config/riscv/xiangshan.md
@@ -144,13 +144,13 @@
(define_insn_reservation "xiangshan_sfdiv" 11
(and (eq_attr "tune" "xiangshan")
(eq_attr "type" "fdiv")
- (eq_attr "mode" "SF"))
+ (eq_attr "mode" "HF,SF"))
"xs_fmisc_rs")
(define_insn_reservation "xiangshan_sfsqrt" 17
(and (eq_attr "tune" "xiangshan")
(eq_attr "type" "fsqrt")
- (eq_attr "mode" "SF"))
+ (eq_attr "mode" "HF,SF"))
"xs_fmisc_rs")
(define_insn_reservation "xiangshan_dfdiv" 21
diff --git a/gcc/config/rl78/rl78.opt.urls b/gcc/config/rl78/rl78.opt.urls
index 96eff5f..66e874b 100644
--- a/gcc/config/rl78/rl78.opt.urls
+++ b/gcc/config/rl78/rl78.opt.urls
@@ -4,7 +4,7 @@ msim
UrlSuffix(gcc/RL78-Options.html#index-msim-6)
mmul=
-UrlSuffix(gcc/RL78-Options.html#index-mmul)
+UrlSuffix(gcc/RL78-Options.html#index-mmul-1)
mallregs
UrlSuffix(gcc/RL78-Options.html#index-mallregs)
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 16227e5..8dd23f8 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -5174,6 +5174,7 @@ public:
protected:
void update_target_cost_per_stmt (vect_cost_for_stmt, stmt_vec_info,
+ slp_tree node,
vect_cost_model_location, unsigned int);
void density_test (loop_vec_info);
void adjust_vect_cost_per_loop (loop_vec_info);
@@ -5321,6 +5322,7 @@ rs6000_adjust_vect_cost_per_stmt (enum vect_cost_for_stmt kind,
void
rs6000_cost_data::update_target_cost_per_stmt (vect_cost_for_stmt kind,
stmt_vec_info stmt_info,
+ slp_tree node,
vect_cost_model_location where,
unsigned int orig_count)
{
@@ -5381,12 +5383,12 @@ rs6000_cost_data::update_target_cost_per_stmt (vect_cost_for_stmt kind,
or may not need to apply. When finalizing the cost of the loop,
the extra penalty is applied when the load density heuristics
are satisfied. */
- if (kind == vec_construct && stmt_info
- && STMT_VINFO_TYPE (stmt_info) == load_vec_info_type
- && (STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE
- || STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_STRIDED_SLP))
+ if (kind == vec_construct && node
+ && SLP_TREE_TYPE (node) == load_vec_info_type
+ && (SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_ELEMENTWISE
+ || SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_STRIDED_SLP))
{
- tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+ tree vectype = SLP_TREE_VECTYPE (node);
unsigned int nunits = vect_nunits_for_cost (vectype);
/* As PR103702 shows, it's possible that vectorizer wants to do
costings for only one unit here, it's no need to do any
@@ -5415,7 +5417,7 @@ rs6000_cost_data::update_target_cost_per_stmt (vect_cost_for_stmt kind,
unsigned
rs6000_cost_data::add_stmt_cost (int count, vect_cost_for_stmt kind,
- stmt_vec_info stmt_info, slp_tree,
+ stmt_vec_info stmt_info, slp_tree node,
tree vectype, int misalign,
vect_cost_model_location where)
{
@@ -5433,7 +5435,7 @@ rs6000_cost_data::add_stmt_cost (int count, vect_cost_for_stmt kind,
retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost);
m_costs[where] += retval;
- update_target_cost_per_stmt (kind, stmt_info, where, orig_count);
+ update_target_cost_per_stmt (kind, stmt_info, node, where, orig_count);
}
return retval;
@@ -10318,15 +10320,18 @@ can_be_rotated_to_negative_lis (HOST_WIDE_INT c, int *rot)
/* case b. xx0..01..1xx: some of 15 x's (and some of 16 0's) are
rotated over the highest bit. */
- int pos_one = clz_hwi ((c << 16) >> 16);
- middle_zeros = ctz_hwi (c >> (HOST_BITS_PER_WIDE_INT - pos_one));
- int middle_ones = clz_hwi (~(c << pos_one));
- if (middle_zeros >= 16 && middle_ones >= 33)
+ unsigned HOST_WIDE_INT uc = c;
+ int pos_one = clz_hwi ((HOST_WIDE_INT) (uc << 16) >> 16);
+ if (pos_one > 0 && pos_one < HOST_BITS_PER_WIDE_INT)
{
- *rot = pos_one;
- return true;
+ middle_zeros = ctz_hwi (c >> (HOST_BITS_PER_WIDE_INT - pos_one));
+ int middle_ones = clz_hwi (~(uc << pos_one));
+ if (middle_zeros >= 16 && middle_ones >= 33)
+ {
+ *rot = pos_one;
+ return true;
+ }
}
-
return false;
}
@@ -10443,7 +10448,8 @@ can_be_built_by_li_and_rldic (HOST_WIDE_INT c, int *shift, HOST_WIDE_INT *mask)
if (lz >= HOST_BITS_PER_WIDE_INT)
return false;
- int middle_ones = clz_hwi (~(c << lz));
+ unsigned HOST_WIDE_INT uc = c;
+ int middle_ones = clz_hwi (~(uc << lz));
if (tz + lz + middle_ones >= ones
&& (tz - lz) < HOST_BITS_PER_WIDE_INT
&& tz < HOST_BITS_PER_WIDE_INT)
@@ -10477,7 +10483,7 @@ can_be_built_by_li_and_rldic (HOST_WIDE_INT c, int *shift, HOST_WIDE_INT *mask)
if (!IN_RANGE (pos_first_1, 1, HOST_BITS_PER_WIDE_INT-1))
return false;
- middle_ones = clz_hwi (~c << pos_first_1);
+ middle_ones = clz_hwi ((~(unsigned HOST_WIDE_INT) c) << pos_first_1);
middle_zeros = ctz_hwi (c >> (HOST_BITS_PER_WIDE_INT - pos_first_1));
if (pos_first_1 < HOST_BITS_PER_WIDE_INT
&& middle_ones + middle_zeros < HOST_BITS_PER_WIDE_INT
@@ -10579,7 +10585,8 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns)
{
/* li/lis; rldicX */
unsigned HOST_WIDE_INT imm = (c | ~mask);
- imm = (imm >> shift) | (imm << (HOST_BITS_PER_WIDE_INT - shift));
+ if (shift > 0 && shift < HOST_BITS_PER_WIDE_INT)
+ imm = (imm >> shift) | (imm << (HOST_BITS_PER_WIDE_INT - shift));
count_or_emit_insn (temp, GEN_INT (imm));
if (shift != 0)
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 9c718ca..04a6c0f 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -1969,7 +1969,7 @@
[(set (match_dup 0) (plus:GPR (match_dup 1) (match_dup 3)))
(set (match_dup 0) (plus:GPR (match_dup 0) (match_dup 4)))]
{
- HOST_WIDE_INT val = INTVAL (operands[2]);
+ unsigned HOST_WIDE_INT val = UINTVAL (operands[2]);
HOST_WIDE_INT low = sext_hwi (val, 16);
HOST_WIDE_INT rest = trunc_int_for_mode (val - low, <MODE>mode);
@@ -15665,10 +15665,10 @@
(if_then_else:SI (lt (match_dup 3)
(const_int 0))
(const_int -1)
- (if_then_else (gt (match_dup 3)
- (const_int 0))
- (const_int 1)
- (const_int 0))))]
+ (if_then_else:SI (gt (match_dup 3)
+ (const_int 0))
+ (const_int 1)
+ (const_int 0))))]
"TARGET_P9_MISC"
{
operands[3] = gen_reg_rtx (CCmode);
@@ -15703,10 +15703,10 @@
(if_then_else:SI (lt (match_operand:CC 1 "cc_reg_operand" "y")
(const_int 0))
(const_int -1)
- (if_then_else (gt (match_dup 1)
- (const_int 0))
- (const_int 1)
- (const_int 0))))]
+ (if_then_else:SI (gt (match_dup 1)
+ (const_int 0))
+ (const_int 1)
+ (const_int 0))))]
"TARGET_P9_MISC"
"setb %0,%1"
[(set_attr "type" "logical")])
@@ -15716,10 +15716,10 @@
(if_then_else:SI (ltu (match_operand:CCUNS 1 "cc_reg_operand" "y")
(const_int 0))
(const_int -1)
- (if_then_else (gtu (match_dup 1)
- (const_int 0))
- (const_int 1)
- (const_int 0))))]
+ (if_then_else:SI (gtu (match_dup 1)
+ (const_int 0))
+ (const_int 1)
+ (const_int 0))))]
"TARGET_P9_MISC"
"setb %0,%1"
[(set_attr "type" "logical")])
@@ -15751,10 +15751,10 @@
(if_then_else:SI (lt (match_dup 3)
(const_int 0))
(const_int -1)
- (if_then_else (gt (match_dup 3)
- (const_int 0))
- (const_int 1)
- (const_int 0))))]
+ (if_then_else:SI (gt (match_dup 3)
+ (const_int 0))
+ (const_int 1)
+ (const_int 0))))]
"TARGET_P9_MISC"
{
operands[3] = gen_reg_rtx (CCmode);
@@ -15807,10 +15807,10 @@
(if_then_else:SI (lt (match_dup 3)
(const_int 0))
(const_int -1)
- (if_then_else (gt (match_dup 3)
- (const_int 0))
- (const_int 1)
- (const_int 0))))]
+ (if_then_else:SI (gt (match_dup 3)
+ (const_int 0))
+ (const_int 1)
+ (const_int 0))))]
"TARGET_P9_MISC && TARGET_64BIT"
{
operands[3] = gen_reg_rtx (CCmode);
diff --git a/gcc/config/rx/rx.cc b/gcc/config/rx/rx.cc
index dd730dc..c563881 100644
--- a/gcc/config/rx/rx.cc
+++ b/gcc/config/rx/rx.cc
@@ -1648,16 +1648,20 @@ mark_frame_related (rtx insn)
static void
add_pop_cfi_notes (rtx_insn *insn, unsigned int high, unsigned int low)
{
- rtx t = plus_constant (Pmode, stack_pointer_rtx,
- (high - low + 1) * UNITS_PER_WORD);
+ rtx src = stack_pointer_rtx;
+ rtx t;
+ for (unsigned int i = low; i <= high; i++)
+ {
+ add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (word_mode, i));
+ if (i == FRAME_POINTER_REGNUM && frame_pointer_needed)
+ src = frame_pointer_rtx;
+ }
+ t = plus_constant (Pmode, src, (high - low + 1) * UNITS_PER_WORD);
t = gen_rtx_SET (stack_pointer_rtx, t);
add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
RTX_FRAME_RELATED_P (insn) = 1;
- for (unsigned int i = low; i <= high; i++)
- add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (word_mode, i));
}
-
static bool
ok_for_max_constant (HOST_WIDE_INT val)
{
@@ -1816,36 +1820,17 @@ rx_expand_prologue (void)
}
}
- /* If needed, set up the frame pointer. */
- if (frame_pointer_needed)
- gen_safe_add (frame_pointer_rtx, stack_pointer_rtx,
- GEN_INT (- (HOST_WIDE_INT) frame_size), true);
-
- /* Allocate space for the outgoing args.
- If the stack frame has not already been set up then handle this as well. */
- if (stack_size)
+ if (stack_size || frame_size)
{
- if (frame_size)
- {
- if (frame_pointer_needed)
- gen_safe_add (stack_pointer_rtx, frame_pointer_rtx,
- GEN_INT (- (HOST_WIDE_INT) stack_size), true);
- else
- gen_safe_add (stack_pointer_rtx, stack_pointer_rtx,
- GEN_INT (- (HOST_WIDE_INT) (frame_size + stack_size)),
- true);
- }
- else
- gen_safe_add (stack_pointer_rtx, stack_pointer_rtx,
- GEN_INT (- (HOST_WIDE_INT) stack_size), true);
+ gen_safe_add (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (- (HOST_WIDE_INT) (stack_size + frame_size)),
+ true);
}
- else if (frame_size)
+ if (frame_pointer_needed)
{
- if (! frame_pointer_needed)
- gen_safe_add (stack_pointer_rtx, stack_pointer_rtx,
- GEN_INT (- (HOST_WIDE_INT) frame_size), true);
- else
- gen_safe_add (stack_pointer_rtx, frame_pointer_rtx, NULL_RTX, true);
+ gen_safe_add (frame_pointer_rtx, stack_pointer_rtx,
+ GEN_INT ((HOST_WIDE_INT) stack_size),
+ true);
}
}
diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
index d760a7e..6becad1 100644
--- a/gcc/config/s390/s390-protos.h
+++ b/gcc/config/s390/s390-protos.h
@@ -128,6 +128,8 @@ extern void s390_expand_vcond (rtx, rtx, rtx, enum rtx_code, rtx, rtx);
extern void s390_expand_vec_init (rtx, rtx);
extern rtx s390_expand_merge_perm_const (machine_mode, bool);
extern void s390_expand_merge (rtx, rtx, rtx, bool);
+extern void s390_expand_int_spaceship (rtx, rtx, rtx, rtx);
+extern void s390_expand_fp_spaceship (rtx, rtx, rtx, rtx);
extern rtx s390_build_signbit_mask (machine_mode);
extern rtx s390_return_addr_rtx (int, rtx);
extern rtx s390_back_chain_rtx (void);
diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index abe551c..1a47f47 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -8213,6 +8213,167 @@ s390_expand_atomic (machine_mode mode, enum rtx_code code,
NULL_RTX, 1, OPTAB_DIRECT), 1);
}
+/* Expand integer op0 = op1 <=> op2, i.e.,
+ op0 = op1 == op2 ? 0 : op1 < op2 ? -1 : 1.
+
+ Signedness is specified by op3. If op3 equals 1, then perform an unsigned
+ comparison, and if op3 equals -1, then perform a signed comparison.
+
+ For integer comparisons we strive for a sequence like
+ CR[L] ; LHI ; LOCHIL ; LOCHIH
+ where the first three instructions fit into a group. */
+
+void
+s390_expand_int_spaceship (rtx op0, rtx op1, rtx op2, rtx op3)
+{
+ gcc_assert (op3 == const1_rtx || op3 == constm1_rtx);
+
+ rtx cc, cond_lt, cond_gt;
+ machine_mode cc_mode;
+ machine_mode mode = GET_MODE (op1);
+
+ /* Prior VXE3 emulate a 128-bit comparison by breaking it up into three
+ comparisons. First test the high halfs. In case they equal, then test
+ the low halfs. Finally, test for equality. Depending on the results
+ make use of LOCs. */
+ if (mode == TImode && !TARGET_VXE3)
+ {
+ gcc_assert (TARGET_VX);
+ op1
+ = force_reg (V2DImode, simplify_gen_subreg (V2DImode, op1, TImode, 0));
+ op2
+ = force_reg (V2DImode, simplify_gen_subreg (V2DImode, op2, TImode, 0));
+ rtx lab = gen_label_rtx ();
+ rtx ccz = gen_rtx_REG (CCZmode, CC_REGNUM);
+ /* Compare high halfs for equality.
+ VEC[L]G op1, op2 sets
+ CC1 if high(op1) < high(op2)
+ and
+ CC2 if high(op1) > high(op2). */
+ machine_mode cc_mode = op3 == const1_rtx ? CCUmode : CCSmode;
+ rtx lane0 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
+ emit_insn (gen_rtx_SET (
+ gen_rtx_REG (cc_mode, CC_REGNUM),
+ gen_rtx_COMPARE (cc_mode,
+ gen_rtx_VEC_SELECT (DImode, op1, lane0),
+ gen_rtx_VEC_SELECT (DImode, op2, lane0))));
+ s390_emit_jump (lab, gen_rtx_NE (CCZmode, ccz, const0_rtx));
+ /* At this point we know that the high halfs equal.
+ VCHLGS op2, op1 sets CC1 if low(op1) < low(op2) */
+ emit_insn (gen_rtx_PARALLEL (
+ VOIDmode,
+ gen_rtvec (2,
+ gen_rtx_SET (gen_rtx_REG (CCVIHUmode, CC_REGNUM),
+ gen_rtx_COMPARE (CCVIHUmode, op2, op1)),
+ gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode)))));
+ emit_label (lab);
+ emit_insn (gen_rtx_SET (op0, const1_rtx));
+ emit_insn (
+ gen_movsicc (op0,
+ gen_rtx_LTU (CCUmode, gen_rtx_REG (CCUmode, CC_REGNUM),
+ const0_rtx),
+ constm1_rtx, op0));
+ /* Deal with the case where both halfs equal. */
+ emit_insn (gen_rtx_PARALLEL (
+ VOIDmode,
+ gen_rtvec (2,
+ gen_rtx_SET (gen_rtx_REG (CCVEQmode, CC_REGNUM),
+ gen_rtx_COMPARE (CCVEQmode, op1, op2)),
+ gen_rtx_SET (gen_reg_rtx (V2DImode),
+ gen_rtx_EQ (V2DImode, op1, op2)))));
+ emit_insn (gen_movsicc (op0, gen_rtx_EQ (CCZmode, ccz, const0_rtx),
+ const0_rtx, op0));
+ return;
+ }
+
+ if (mode == QImode || mode == HImode)
+ {
+ rtx_code extend = op3 == const1_rtx ? ZERO_EXTEND : SIGN_EXTEND;
+ op1 = simplify_gen_unary (extend, SImode, op1, mode);
+ op1 = force_reg (SImode, op1);
+ op2 = simplify_gen_unary (extend, SImode, op2, mode);
+ op2 = force_reg (SImode, op2);
+ mode = SImode;
+ }
+
+ if (op3 == const1_rtx)
+ {
+ cc_mode = CCUmode;
+ cc = gen_rtx_REG (cc_mode, CC_REGNUM);
+ cond_lt = gen_rtx_LTU (mode, cc, const0_rtx);
+ cond_gt = gen_rtx_GTU (mode, cc, const0_rtx);
+ }
+ else
+ {
+ cc_mode = CCSmode;
+ cc = gen_rtx_REG (cc_mode, CC_REGNUM);
+ cond_lt = gen_rtx_LT (mode, cc, const0_rtx);
+ cond_gt = gen_rtx_GT (mode, cc, const0_rtx);
+ }
+
+ emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (cc_mode, op1, op2)));
+ emit_move_insn (op0, const0_rtx);
+ emit_insn (gen_movsicc (op0, cond_lt, constm1_rtx, op0));
+ emit_insn (gen_movsicc (op0, cond_gt, const1_rtx, op0));
+}
+
+/* Expand floating-point op0 = op1 <=> op2, i.e.,
+ op0 = op1 == op2 ? 0 : op1 < op2 ? -1 : op1 > op2 ? 1 : -128.
+
+ If op3 equals const0_rtx, then we are interested in the compare only (see
+ test spaceship-fp-4.c). Otherwise, op3 is a CONST_INT different than
+ const1_rtx and constm1_rtx which is used in order to set op0 for unordered.
+
+ Emit a branch-only solution, i.e., let if-convert fold the branches into
+ LOCs if applicable. This has the benefit that the solution is also
+ applicable if we are only interested in the compare, i.e., if op3 equals
+ const0_rtx.
+ */
+
+void
+s390_expand_fp_spaceship (rtx op0, rtx op1, rtx op2, rtx op3)
+{
+ gcc_assert (op3 != const1_rtx && op3 != constm1_rtx);
+
+ machine_mode mode = GET_MODE (op1);
+ machine_mode cc_mode = s390_select_ccmode (LTGT, op1, op2);
+ rtx cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
+ rtx cond_unordered = gen_rtx_UNORDERED (mode, cc_reg, const0_rtx);
+ rtx cond_eq = gen_rtx_EQ (mode, cc_reg, const0_rtx);
+ rtx cond_gt = gen_rtx_GT (mode, cc_reg, const0_rtx);
+ rtx_insn *insn;
+ rtx l_unordered = gen_label_rtx ();
+ rtx l_eq = gen_label_rtx ();
+ rtx l_gt = gen_label_rtx ();
+ rtx l_end = gen_label_rtx ();
+
+ s390_emit_compare (VOIDmode, LTGT, op1, op2);
+ if (!flag_finite_math_only)
+ {
+ insn = s390_emit_jump (l_unordered, cond_unordered);
+ add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
+ }
+ insn = s390_emit_jump (l_eq, cond_eq);
+ add_reg_br_prob_note (insn, profile_probability::unlikely ());
+ insn = s390_emit_jump (l_gt, cond_gt);
+ add_reg_br_prob_note (insn, profile_probability::even ());
+ emit_move_insn (op0, constm1_rtx);
+ emit_jump (l_end);
+ emit_label (l_eq);
+ emit_move_insn (op0, const0_rtx);
+ emit_jump (l_end);
+ emit_label (l_gt);
+ emit_move_insn (op0, const1_rtx);
+ if (!flag_finite_math_only)
+ {
+ emit_jump (l_end);
+ emit_label (l_unordered);
+ rtx unord_val = op3 == const0_rtx ? GEN_INT (-128) : op3;
+ emit_move_insn (op0, unord_val);
+ }
+ emit_label (l_end);
+}
+
/* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
We need to emit DTP-relative relocations. */
@@ -9078,15 +9239,12 @@ print_operand (FILE *file, rtx x, int code)
else if (code == 'h')
fprintf (file, HOST_WIDE_INT_PRINT_DEC,
((CONST_WIDE_INT_ELT (x, 0) & 0xffff) ^ 0x8000) - 0x8000);
+ /* Support arbitrary _BitInt constants in asm statements. */
+ else if (code == 0)
+ output_addr_const (file, x);
else
- {
- if (code == 0)
- output_operand_lossage ("invalid constant - try using "
- "an output modifier");
- else
- output_operand_lossage ("invalid constant for output modifier '%c'",
- code);
- }
+ output_operand_lossage ("invalid constant for output modifier '%c'",
+ code);
break;
case CONST_VECTOR:
switch (code)
@@ -18607,6 +18765,27 @@ s390_c_mode_for_floating_type (enum tree_index ti)
return default_mode_for_floating_type (ti);
}
+/* Return true if _BitInt(N) is supported and fill its details into *INFO. */
+
+bool
+s390_bitint_type_info (int n, struct bitint_info *info)
+{
+ if (!TARGET_64BIT)
+ return false;
+ if (n <= 8)
+ info->limb_mode = QImode;
+ else if (n <= 16)
+ info->limb_mode = HImode;
+ else if (n <= 32)
+ info->limb_mode = SImode;
+ else
+ info->limb_mode = DImode;
+ info->abi_limb_mode = info->limb_mode;
+ info->big_endian = true;
+ info->extended = true;
+ return true;
+}
+
/* Initialize GCC target structure. */
#undef TARGET_ASM_ALIGNED_HI_OP
@@ -18928,6 +19107,9 @@ s390_c_mode_for_floating_type (enum tree_index ti)
#undef TARGET_DOCUMENTATION_NAME
#define TARGET_DOCUMENTATION_NAME "S/390"
+#undef TARGET_C_BITINT_TYPE_INFO
+#define TARGET_C_BITINT_TYPE_INFO s390_bitint_type_info
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-s390.h"
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 1edbfde..858387c 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -1527,6 +1527,27 @@
operands[0] = SET_DEST (PATTERN (curr_insn));
})
+; Restrict spaceship optab to z13 or later since there we have
+; LOAD HALFWORD IMMEDIATE ON CONDITION.
+
+(define_mode_iterator SPACESHIP_INT [(TI "TARGET_VX") DI SI HI QI])
+(define_expand "spaceship<mode>4"
+ [(match_operand:SI 0 "register_operand")
+ (match_operand:SPACESHIP_INT 1 "register_operand")
+ (match_operand:SPACESHIP_INT 2 "register_operand")
+ (match_operand:SI 3 "const_int_operand")]
+ "TARGET_Z13 && TARGET_64BIT"
+ "s390_expand_int_spaceship (operands[0], operands[1], operands[2], operands[3]); DONE;")
+
+(define_mode_iterator SPACESHIP_BFP [TF DF SF])
+(define_expand "spaceship<mode>4"
+ [(match_operand:SI 0 "register_operand")
+ (match_operand:SPACESHIP_BFP 1 "register_operand")
+ (match_operand:SPACESHIP_BFP 2 "register_operand")
+ (match_operand:SI 3 "const_int_operand")]
+ "TARGET_Z13 && TARGET_64BIT && TARGET_HARD_FLOAT"
+ "s390_expand_fp_spaceship (operands[0], operands[1], operands[2], operands[3]); DONE;")
+
; (TF|DF|SF|TD|DD|SD) instructions
@@ -5227,18 +5248,19 @@
})
(define_insn "*zero_extendsidi2"
- [(set (match_operand:DI 0 "register_operand" "=d,d,d")
- (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "d,T,b")))]
+ [(set (match_operand:DI 0 "register_operand" "=d,d,d,d")
+ (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "d,T,b,v")))]
"TARGET_ZARCH"
"@
llgfr\t%0,%1
llgf\t%0,%1
- llgfrl\t%0,%1"
- [(set_attr "op_type" "RRE,RXY,RIL")
- (set_attr "type" "*,*,larl")
- (set_attr "cpu_facility" "*,*,z10")
- (set_attr "z10prop" "z10_fwd_E1,z10_fwd_A3,z10_fwd_A3")
- (set_attr "relative_long" "*,*,yes")])
+ llgfrl\t%0,%1
+ vlgvf\t%0,%v1,0"
+ [(set_attr "op_type" "RRE,RXY,RIL,VRS")
+ (set_attr "type" "*,*,larl,*")
+ (set_attr "cpu_facility" "*,*,z10,vx")
+ (set_attr "z10prop" "z10_fwd_E1,z10_fwd_A3,z10_fwd_A3,*")
+ (set_attr "relative_long" "*,*,yes,*")])
;
; LLGT-type instructions (zero-extend from 31 bit to 64 bit).
@@ -5341,29 +5363,32 @@
; llhrl, llghrl
(define_insn "*zero_extendhi<mode>2_z10"
- [(set (match_operand:GPR 0 "register_operand" "=d,d,d")
- (zero_extend:GPR (match_operand:HI 1 "nonimmediate_operand" "d,T,b")))]
+ [(set (match_operand:GPR 0 "register_operand" "=d,d,d,d")
+ (zero_extend:GPR (match_operand:HI 1 "nonimmediate_operand" "d,T,b,v")))]
"TARGET_Z10"
"@
ll<g>hr\t%0,%1
ll<g>h\t%0,%1
- ll<g>hrl\t%0,%1"
- [(set_attr "op_type" "RXY,RRE,RIL")
- (set_attr "type" "*,*,larl")
- (set_attr "cpu_facility" "*,*,z10")
- (set_attr "z10prop" "z10_super_E1,z10_fwd_A3,z10_fwd_A3")
- (set_attr "relative_long" "*,*,yes")])
+ ll<g>hrl\t%0,%1
+ vlgvh\t%0,%v1,0"
+ [(set_attr "op_type" "RXY,RRE,RIL,VRS")
+ (set_attr "type" "*,*,larl,*")
+ (set_attr "cpu_facility" "*,*,z10,vx")
+ (set_attr "z10prop" "z10_super_E1,z10_fwd_A3,z10_fwd_A3,*")
+ (set_attr "relative_long" "*,*,yes,*")])
; llhr, llcr, llghr, llgcr, llh, llc, llgh, llgc
(define_insn "*zero_extend<HQI:mode><GPR:mode>2_extimm"
- [(set (match_operand:GPR 0 "register_operand" "=d,d")
- (zero_extend:GPR (match_operand:HQI 1 "nonimmediate_operand" "d,T")))]
+ [(set (match_operand:GPR 0 "register_operand" "=d,d,d")
+ (zero_extend:GPR (match_operand:HQI 1 "nonimmediate_operand" "d,T,v")))]
"TARGET_EXTIMM"
"@
ll<g><hc>r\t%0,%1
- ll<g><hc>\t%0,%1"
- [(set_attr "op_type" "RRE,RXY")
- (set_attr "z10prop" "z10_super_E1,z10_fwd_A3")])
+ ll<g><hc>\t%0,%1
+ vlgv<HQI:bhfgq>\t%0,%v1,0"
+ [(set_attr "op_type" "RRE,RXY,VRS")
+ (set_attr "cpu_facility" "*,*,vx")
+ (set_attr "z10prop" "z10_super_E1,z10_fwd_A3,*")])
; llgh, llgc
(define_insn "*zero_extend<HQI:mode><GPR:mode>2"
diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index 12bbeb6..745634e 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -501,54 +501,6 @@
SIL,SIL,RI,RI,RRE,RRE,RIL,RR,RXY,RXY,RIL")])
-; Instructions vlgvb, vlgvh, vlgvf zero all remaining bits of a GPR, i.e.,
-; an implicit zero extend is done.
-
-(define_insn "*movdi<mode>_zero_extend_A"
- [(set (match_operand:DI 0 "register_operand" "=d")
- (zero_extend:DI (match_operand:SINT 1 "register_operand" "v")))]
- "TARGET_VX"
- "vlgv<bhfgq>\t%0,%v1,0"
- [(set_attr "op_type" "VRS")])
-
-(define_insn "*movsi<mode>_zero_extend_A"
- [(set (match_operand:SI 0 "register_operand" "=d")
- (zero_extend:SI (match_operand:HQI 1 "register_operand" "v")))]
- "TARGET_VX"
- "vlgv<bhfgq>\t%0,%v1,0"
- [(set_attr "op_type" "VRS")])
-
-(define_mode_iterator VLGV_DI [V1QI V2QI V4QI V8QI V16QI
- V1HI V2HI V4HI V8HI
- V1SI V2SI V4SI])
-(define_insn "*movdi<mode>_zero_extend_B"
- [(set (match_operand:DI 0 "register_operand" "=d")
- (zero_extend:DI (vec_select:<non_vec>
- (match_operand:VLGV_DI 1 "register_operand" "v")
- (parallel [(match_operand:SI 2 "const_int_operand" "n")]))))]
- "TARGET_VX"
-{
- operands[2] = GEN_INT (UINTVAL (operands[2]) & (GET_MODE_NUNITS (<MODE>mode) - 1));
- return "vlgv<bhfgq>\t%0,%v1,%Y2";
-}
- [(set_attr "op_type" "VRS")
- (set_attr "mnemonic" "vlgv<bhfgq>")])
-
-(define_mode_iterator VLGV_SI [V1QI V2QI V4QI V8QI V16QI
- V1HI V2HI V4HI V8HI])
-(define_insn "*movsi<mode>_zero_extend_B"
- [(set (match_operand:SI 0 "register_operand" "=d")
- (zero_extend:SI (vec_select:<non_vec>
- (match_operand:VLGV_SI 1 "register_operand" "v")
- (parallel [(match_operand:SI 2 "const_int_operand" "n")]))))]
- "TARGET_VX"
-{
- operands[2] = GEN_INT (UINTVAL (operands[2]) & (GET_MODE_NUNITS (<MODE>mode) - 1));
- return "vlgv<bhfgq>\t%0,%v1,%Y2";
-}
- [(set_attr "op_type" "VRS")
- (set_attr "mnemonic" "vlgv<bhfgq>")])
-
; vec_load_lanes?
; vec_store_lanes?
@@ -763,6 +715,42 @@
DONE;
})
+; Instructions vlgvb, vlgvh, vlgvf zero all remaining bits of a GPR, i.e.,
+; an implicit zero extend is done.
+
+(define_mode_iterator VLGV_DI [V1QI V2QI V4QI V8QI V16QI
+ V1HI V2HI V4HI V8HI
+ V1SI V2SI V4SI])
+(define_insn "*vec_extract<mode>_zero_extend"
+ [(set (match_operand:DI 0 "register_operand" "=d")
+ (zero_extend:DI (vec_select:<non_vec>
+ (match_operand:VLGV_DI 1 "register_operand" "v")
+ (parallel [(match_operand:SI 2 "nonmemory_operand" "an")]))))]
+ "TARGET_VX"
+{
+ if (CONST_INT_P (operands[2]))
+ operands[2] = GEN_INT (UINTVAL (operands[2]) & (GET_MODE_NUNITS (<MODE>mode) - 1));
+ return "vlgv<bhfgq>\t%0,%v1,%Y2";
+}
+ [(set_attr "op_type" "VRS")
+ (set_attr "mnemonic" "vlgv<bhfgq>")])
+
+(define_mode_iterator VLGV_SI [V1QI V2QI V4QI V8QI V16QI
+ V1HI V2HI V4HI V8HI])
+(define_insn "*vec_extract<mode>_zero_extend"
+ [(set (match_operand:SI 0 "register_operand" "=d")
+ (zero_extend:SI (vec_select:<non_vec>
+ (match_operand:VLGV_SI 1 "register_operand" "v")
+ (parallel [(match_operand:SI 2 "nonmemory_operand" "an")]))))]
+ "TARGET_VX"
+{
+ if (CONST_INT_P (operands[2]))
+ operands[2] = GEN_INT (UINTVAL (operands[2]) & (GET_MODE_NUNITS (<MODE>mode) - 1));
+ return "vlgv<bhfgq>\t%0,%v1,%Y2";
+}
+ [(set_attr "op_type" "VRS")
+ (set_attr "mnemonic" "vlgv<bhfgq>")])
+
(define_insn "*vec_vllezlf<mode>"
[(set (match_operand:V_HW_4 0 "register_operand" "=v")
(vec_concat:V_HW_4
diff --git a/gcc/config/xtensa/constraints.md b/gcc/config/xtensa/constraints.md
index 77c9571..727ec1e 100644
--- a/gcc/config/xtensa/constraints.md
+++ b/gcc/config/xtensa/constraints.md
@@ -130,7 +130,7 @@
(and (match_code "mem")
(match_test "smalloffset_mem_p (op)")))
-(define_memory_constraint "T"
+(define_special_memory_constraint "T"
"Memory in a literal pool (addressable with an L32R instruction)."
(and (match_code "mem")
(match_test "!TARGET_CONST16 && constantpool_mem_p (op)")))
diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md
index 9aeaba6..20160a4 100644
--- a/gcc/config/xtensa/predicates.md
+++ b/gcc/config/xtensa/predicates.md
@@ -189,6 +189,9 @@
(define_predicate "ubranch_operator"
(match_code "ltu,geu"))
+(define_predicate "alt_ubranch_operator"
+ (match_code "gtu,leu"))
+
(define_predicate "boolean_operator"
(match_code "eq,ne"))
diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h
index 1f5dcf5..98e75c6 100644
--- a/gcc/config/xtensa/xtensa-protos.h
+++ b/gcc/config/xtensa/xtensa-protos.h
@@ -60,6 +60,7 @@ extern bool xtensa_tls_referenced_p (rtx);
extern enum rtx_code xtensa_shlrd_which_direction (rtx, rtx);
extern bool xtensa_split1_finished_p (void);
extern void xtensa_split_DI_reg_imm (rtx *);
+extern char *xtensa_bswapsi2_output (rtx_insn *, const char *);
#ifdef TREE_CODE
extern void init_cumulative_args (CUMULATIVE_ARGS *, int);
diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 02554c5..f3b89de 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -2645,6 +2645,94 @@ xtensa_split_DI_reg_imm (rtx *operands)
}
+/* Return the asm output string of bswapsi2_internal insn pattern.
+ It does this by scanning backwards for the BB from the specified insn,
+ and if an another bswapsi2_internal is found, it omits the instruction
+ to set SAR to 8. If not found, or if a CALL, JUMP, ASM, or other insn
+ that clobbers SAR is found first, prepend an instruction to set SAR to
+ 8 as usual. */
+
+static int
+xtensa_bswapsi2_output_1 (rtx_insn *insn)
+{
+ int icode;
+ rtx pat;
+ const char *iname;
+
+ /* CALL insn do not preserve SAR.
+ JUMP insn only appear at the end of BB, so they do not need to be
+ considered when scanning backwards. */
+ if (CALL_P (insn))
+ return -1;
+
+ switch (icode = INSN_CODE (insn))
+ {
+ /* rotate insns clobber SAR. */
+ case CODE_FOR_rotlsi3:
+ case CODE_FOR_rotrsi3:
+ return -1;
+ /* simple shift insns clobber SAR if non-immediate shift amounts. */
+ case CODE_FOR_ashlsi3_internal:
+ case CODE_FOR_ashrsi3:
+ case CODE_FOR_lshrsi3:
+ if (! CONST_INT_P (XEXP (SET_SRC (PATTERN (insn)), 1)))
+ return -1;
+ break;
+ /* this insn always set SAR to 8. */
+ case CODE_FOR_bswapsi2_internal:
+ return 1;
+ default:
+ break;
+ }
+
+ /* "*shift_per_byte" and "*shlrd_*" complex shift insns clobber SAR. */
+ if (icode >= CODE_FOR_nothing
+ && (! strcmp (iname = insn_data[icode].name, "*shift_per_byte")
+ || ! strncmp (iname, "*shlrd_", 7)))
+ return -1;
+
+ /* asm statements may also clobber SAR, so they are anything goes. */
+ if (NONJUMP_INSN_P (insn))
+ switch (GET_CODE (pat = PATTERN (insn)))
+ {
+ case SET:
+ return GET_CODE (SET_SRC (pat)) == ASM_OPERANDS ? -1 : 0;
+ case PARALLEL:
+ return (GET_CODE (pat = XVECEXP (pat, 0, 0)) == SET
+ && GET_CODE (SET_SRC (pat)) == ASM_OPERANDS)
+ || GET_CODE (pat) == ASM_OPERANDS
+ || GET_CODE (pat) == ASM_INPUT ? -1 : 0;
+ case ASM_OPERANDS:
+ return -1;
+ default:
+ break;
+ }
+
+ /* All other insns are not interested in SAR. */
+ return 0;
+}
+
+char *
+xtensa_bswapsi2_output (rtx_insn *insn, const char *output)
+{
+ static char result[128];
+ int i;
+
+ strcpy (result, "ssai\t8\n\t");
+ while ((insn = prev_nonnote_nondebug_insn_bb (insn)))
+ if ((i = xtensa_bswapsi2_output_1 (insn)) < 0)
+ break;
+ else if (i > 0)
+ {
+ result[0] = '\0';
+ break;
+ }
+ strcat (result, output);
+
+ return result;
+}
+
+
/* Try to split an integer value into what are suitable for two consecutive
immediate addition instructions, ADDI or ADDMI. */
@@ -4702,25 +4790,49 @@ static bool
xtensa_is_insn_L32R_p (const rtx_insn *insn)
{
rtx pat, dest, src;
+ machine_mode mode;
- /* "PATTERN (insn)" can be used without checking, see insn_cost()
- in gcc/rtlanal.cc. */
+ /* RTX insns that are not "(set (reg) ...)" cannot become L32R instructions:
+ - it is permitted to apply PATTERN() to the insn without validation.
+ See insn_cost() in gcc/rtlanal.cc.
+ - it is used register_operand() instead of REG() to identify things that
+ don't look like REGs but will eventually become so as well. */
if (GET_CODE (pat = PATTERN (insn)) != SET
|| ! register_operand (dest = SET_DEST (pat), VOIDmode))
return false;
+ /* If the source is a reference to a literal pool entry, then the insn
+ obviously corresponds to an L32R instruction. */
if (constantpool_mem_p (src = SET_SRC (pat)))
return true;
- /* Return true if:
- - CONST16 instruction is not configured, and
- - the source is some constant, and also
- - negation of "the source is integer and fits into the immediate
- field". */
- return (!TARGET_CONST16
- && CONSTANT_P (src)
- && ! ((GET_MODE (dest) == SImode || GET_MODE (dest) == HImode)
- && CONST_INT_P (src) && xtensa_simm12b (INTVAL (src))));
+ /* Similarly, an insn whose source is not a constant obviously does not
+ correspond to L32R. */
+ if (! CONSTANT_P (src))
+ return false;
+
+ /* If the source is a CONST_INT whose value fits into signed 12 bits, then
+ the insn corresponds to a MOVI instruction (rather than an L32R one),
+ regardless of the configuration of TARGET_CONST16 or
+ TARGET_AUTOLITPOOLS. Note that the destination register can be non-
+ SImode. */
+ if (((mode = GET_MODE (dest)) == SImode
+ || mode == HImode || mode == SFmode)
+ && CONST_INT_P (src) && xtensa_simm12b (INTVAL (src)))
+ return false;
+
+ /* If TARGET_CONST16 is configured, constants of the remaining forms
+ correspond to pairs of CONST16 instructions, not L32R. */
+ if (TARGET_CONST16)
+ return false;
+
+ /* The last remaining form of constant is one of the following:
+ - CONST_INTs with large values
+ - floating-point constants
+ - symbolic constants
+ and is all handled by a relaxed MOVI instruction, which is later
+ converted to an L32R instruction by the assembler. */
+ return true;
}
/* Compute a relative costs of RTL insns. This is necessary in order to
diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 629dfdd..52ffb16 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -88,6 +88,7 @@
;; This mode iterator allows the HI and QI patterns to be defined from
;; the same template.
(define_mode_iterator HQI [HI QI])
+(define_mode_attr mode_bits [(HI "16") (QI "8")])
;; This mode iterator allows the SI and HI patterns to be defined from
;; the same template.
@@ -176,19 +177,18 @@
;; Addition.
(define_insn "addsi3"
- [(set (match_operand:SI 0 "register_operand" "=D,D,a,a,a")
- (plus:SI (match_operand:SI 1 "register_operand" "%d,d,r,r,r")
- (match_operand:SI 2 "add_operand" "d,O,r,J,N")))]
- ""
- "@
- add.n\t%0, %1, %2
- addi.n\t%0, %1, %d2
- add\t%0, %1, %2
- addi\t%0, %1, %d2
- addmi\t%0, %1, %x2"
- [(set_attr "type" "arith,arith,arith,arith,arith")
- (set_attr "mode" "SI")
- (set_attr "length" "2,2,3,3,3")])
+ [(set (match_operand:SI 0 "register_operand")
+ (plus:SI (match_operand:SI 1 "register_operand")
+ (match_operand:SI 2 "add_operand")))]
+ ""
+ {@ [cons: =0, %1, 2; attrs: type, length]
+ [D, d, d; arith, 2] add.n\t%0, %1, %2
+ [D, d, O; arith, 2] addi.n\t%0, %1, %d2
+ [a, r, r; arith, 3] add\t%0, %1, %2
+ [a, r, J; arith, 3] addi\t%0, %1, %d2
+ [a, r, N; arith, 3] addmi\t%0, %1, %x2
+ }
+ [(set_attr "mode" "SI")])
(define_insn "*addsubx"
[(set (match_operand:SI 0 "register_operand" "=a")
@@ -392,18 +392,15 @@
(set_attr "length" "3")])
(define_insn "<u>mulhisi3"
- [(set (match_operand:SI 0 "register_operand" "=C,A")
- (mult:SI (any_extend:SI
- (match_operand:HI 1 "register_operand" "%r,r"))
- (any_extend:SI
- (match_operand:HI 2 "register_operand" "r,r"))))]
+ [(set (match_operand:SI 0 "register_operand")
+ (mult:SI (any_extend:SI (match_operand:HI 1 "register_operand"))
+ (any_extend:SI (match_operand:HI 2 "register_operand"))))]
"TARGET_MUL16 || TARGET_MAC16"
- "@
- mul16<su>\t%0, %1, %2
- <u>mul.aa.ll\t%1, %2"
- [(set_attr "type" "mul16,mac16")
- (set_attr "mode" "SI")
- (set_attr "length" "3,3")])
+ {@ [cons: =0, %1, 2; attrs: type, length]
+ [C, r, r; mul16, 3] mul16<su>\t%0, %1, %2
+ [A, r, r; mac16, 3] <u>mul.aa.ll\t%1, %2
+ }
+ [(set_attr "mode" "SI")])
(define_insn "muladdhisi"
[(set (match_operand:SI 0 "register_operand" "=A")
@@ -652,36 +649,15 @@
})
(define_insn "bswapsi2_internal"
- [(set (match_operand:SI 0 "register_operand" "=a,&a")
- (bswap:SI (match_operand:SI 1 "register_operand" "0,r")))
- (clobber (match_scratch:SI 2 "=&a,X"))]
+ [(set (match_operand:SI 0 "register_operand")
+ (bswap:SI (match_operand:SI 1 "register_operand")))
+ (clobber (match_scratch:SI 2))]
"!optimize_debug && optimize > 1 && !optimize_size"
-{
- rtx_insn *prev_insn = prev_nonnote_nondebug_insn (insn);
- const char *init = "ssai\t8\;";
- static char result[128];
- if (prev_insn && NONJUMP_INSN_P (prev_insn))
- {
- rtx x = PATTERN (prev_insn);
- if (GET_CODE (x) == PARALLEL && XVECLEN (x, 0) == 2
- && GET_CODE (XVECEXP (x, 0, 0)) == SET
- && GET_CODE (XVECEXP (x, 0, 1)) == CLOBBER)
- {
- x = XEXP (XVECEXP (x, 0, 0), 1);
- if (GET_CODE (x) == BSWAP && GET_MODE (x) == SImode)
- init = "";
- }
- }
- sprintf (result,
- (which_alternative == 0)
- ? "%s" "srli\t%%2, %%1, 16\;src\t%%2, %%2, %%1\;src\t%%2, %%2, %%2\;src\t%%0, %%1, %%2"
- : "%s" "srli\t%%0, %%1, 16\;src\t%%0, %%0, %%1\;src\t%%0, %%0, %%0\;src\t%%0, %%1, %%0",
- init);
- return result;
-}
- [(set_attr "type" "arith,arith")
- (set_attr "mode" "SI")
- (set_attr "length" "15,15")])
+ {@ [cons: =0, 1, =2; attrs: type, length]
+ [ a, 0, &a; arith, 15] << xtensa_bswapsi2_output (insn, "srli\t%2, %1, 16\;src\t%2, %2, %1\;src\t%2, %2, %2\;src\t%0, %1, %2");
+ [&a, r, X; arith, 15] << xtensa_bswapsi2_output (insn, "srli\t%0, %1, 16\;src\t%0, %0, %1\;src\t%0, %0, %0\;src\t%0, %1, %0");
+ }
+ [(set_attr "mode" "SI")])
(define_expand "bswapdi2"
[(set (match_operand:DI 0 "register_operand" "")
@@ -742,16 +718,15 @@
;; Logical instructions.
(define_insn "andsi3"
- [(set (match_operand:SI 0 "register_operand" "=a,a")
- (and:SI (match_operand:SI 1 "register_operand" "%r,r")
- (match_operand:SI 2 "mask_operand" "P,r")))]
+ [(set (match_operand:SI 0 "register_operand")
+ (and:SI (match_operand:SI 1 "register_operand")
+ (match_operand:SI 2 "mask_operand")))]
""
- "@
- extui\t%0, %1, 0, %K2
- and\t%0, %1, %2"
- [(set_attr "type" "arith,arith")
- (set_attr "mode" "SI")
- (set_attr "length" "3,3")])
+ {@ [cons: =0, %1, 2; attrs: type, length]
+ [a, r, P; arith, 3] extui\t%0, %1, 0, %K2
+ [a, r, r; arith, 3] and\t%0, %1, %2
+ }
+ [(set_attr "mode" "SI")])
(define_insn_and_split "*andsi3_bitcmpl"
[(set (match_operand:SI 0 "register_operand" "=a")
@@ -944,27 +919,15 @@
;; Zero-extend instructions.
-(define_insn "zero_extendhisi2"
- [(set (match_operand:SI 0 "register_operand" "=a,a")
- (zero_extend:SI (match_operand:HI 1 "nonimmed_operand" "r,U")))]
- ""
- "@
- extui\t%0, %1, 0, 16
- %v1l16ui\t%0, %1"
- [(set_attr "type" "arith,load")
- (set_attr "mode" "SI")
- (set_attr "length" "3,3")])
-
-(define_insn "zero_extendqisi2"
- [(set (match_operand:SI 0 "register_operand" "=a,a")
- (zero_extend:SI (match_operand:QI 1 "nonimmed_operand" "r,U")))]
+(define_insn "zero_extend<mode>si2"
+ [(set (match_operand:SI 0 "register_operand")
+ (zero_extend:SI (match_operand:HQI 1 "nonimmed_operand")))]
""
- "@
- extui\t%0, %1, 0, 8
- %v1l8ui\t%0, %1"
- [(set_attr "type" "arith,load")
- (set_attr "mode" "SI")
- (set_attr "length" "3,3")])
+ {@ [cons: =0, 1; attrs: type, length]
+ [a, r; arith, 3] extui\t%0, %1, 0, <mode_bits>
+ [a, U; load , 3] %v1l<mode_bits>ui\t%0, %1
+ }
+ [(set_attr "mode" "SI")])
;; Sign-extend instructions.
@@ -982,15 +945,14 @@
})
(define_insn "extendhisi2_internal"
- [(set (match_operand:SI 0 "register_operand" "=B,a")
- (sign_extend:SI (match_operand:HI 1 "sext_operand" "r,U")))]
+ [(set (match_operand:SI 0 "register_operand")
+ (sign_extend:SI (match_operand:HI 1 "sext_operand")))]
""
- "@
- sext\t%0, %1, 15
- %v1l16si\t%0, %1"
- [(set_attr "type" "arith,load")
- (set_attr "mode" "SI")
- (set_attr "length" "3,3")])
+ {@ [cons: =0, 1; attrs: type, length]
+ [B, r; arith, 3] sext\t%0, %1, 15
+ [a, U; load , 3] %v1l16si\t%0, %1
+ }
+ [(set_attr "mode" "SI")])
(define_expand "extendqisi2"
[(set (match_operand:SI 0 "register_operand" "")
@@ -1327,29 +1289,28 @@
})
(define_insn "movsi_internal"
- [(set (match_operand:SI 0 "nonimmed_operand" "=D,D,D,D,R,R,a,q,a,a,W,a,a,U,*a,*A")
- (match_operand:SI 1 "move_operand" "M,D,d,R,D,d,r,r,I,Y,i,T,U,r,*A,*r"))]
+ [(set (match_operand:SI 0 "nonimmed_operand")
+ (match_operand:SI 1 "move_operand"))]
"xtensa_valid_move (SImode, operands)"
- "@
- movi.n\t%0, %x1
- mov.n\t%0, %1
- mov.n\t%0, %1
- %v1l32i.n\t%0, %1
- %v0s32i.n\t%1, %0
- %v0s32i.n\t%1, %0
- mov\t%0, %1
- movsp\t%0, %1
- movi\t%0, %x1
- movi\t%0, %1
- const16\t%0, %t1\;const16\t%0, %b1
- %v1l32r\t%0, %1
- %v1l32i\t%0, %1
- %v0s32i\t%1, %0
- rsr\t%0, ACCLO
- wsr\t%1, ACCLO"
- [(set_attr "type" "move,move,move,load,store,store,move,move,move,load,move,load,load,store,rsr,wsr")
- (set_attr "mode" "SI")
- (set_attr "length" "2,2,2,2,2,2,3,3,3,3,6,3,3,3,3,3")])
+ {@ [cons: =0, 1; attrs: type, length]
+ [ D, M; move , 2] movi.n\t%0, %x1
+ [ D, D; move , 2] mov.n\t%0, %1
+ [ D, d; move , 2] ^
+ [ D, R; load , 2] %v1l32i.n\t%0, %1
+ [ R, D; store, 2] %v0s32i.n\t%1, %0
+ [ R, d; store, 2] ^
+ [ a, r; move , 3] mov\t%0, %1
+ [ q, r; move , 3] movsp\t%0, %1
+ [ a, I; move , 3] movi\t%0, %x1
+ [ a, Y; load , 3] movi\t%0, %1
+ [ W, i; move , 6] const16\t%0, %t1\;const16\t%0, %b1
+ [ a, T; load , 3] %v1l32r\t%0, %1
+ [ a, U; load , 3] %v1l32i\t%0, %1
+ [ U, r; store, 3] %v0s32i\t%1, %0
+ [*a, *A; rsr , 3] rsr\t%0, ACCLO
+ [*A, *r; wsr , 3] wsr\t%1, ACCLO
+ }
+ [(set_attr "mode" "SI")])
(define_split
[(set (match_operand:SHI 0 "register_operand")
@@ -1399,23 +1360,22 @@
})
(define_insn "movhi_internal"
- [(set (match_operand:HI 0 "nonimmed_operand" "=D,D,a,a,a,a,a,U,*a,*A")
- (match_operand:HI 1 "move_operand" "M,d,r,I,Y,T,U,r,*A,*r"))]
+ [(set (match_operand:HI 0 "nonimmed_operand")
+ (match_operand:HI 1 "move_operand"))]
"xtensa_valid_move (HImode, operands)"
- "@
- movi.n\t%0, %x1
- mov.n\t%0, %1
- mov\t%0, %1
- movi\t%0, %x1
- movi\t%0, %1
- %v1l32r\t%0, %1
- %v1l16ui\t%0, %1
- %v0s16i\t%1, %0
- rsr\t%0, ACCLO
- wsr\t%1, ACCLO"
- [(set_attr "type" "move,move,move,move,load,load,load,store,rsr,wsr")
- (set_attr "mode" "HI")
- (set_attr "length" "2,2,3,3,3,3,3,3,3,3")])
+ {@ [cons: =0, 1; attrs: type, length]
+ [ D, M; move , 2] movi.n\t%0, %x1
+ [ D, d; move , 2] mov.n\t%0, %1
+ [ a, r; move , 3] mov\t%0, %1
+ [ a, I; move , 3] movi\t%0, %x1
+ [ a, Y; load , 3] movi\t%0, %1
+ [ a, T; load , 3] %v1l32r\t%0, %1
+ [ a, U; load , 3] %v1l16ui\t%0, %1
+ [ U, r; store, 3] %v0s16i\t%1, %0
+ [*a, *A; rsr , 3] rsr\t%0, ACCLO
+ [*A, *r; wsr , 3] wsr\t%1, ACCLO
+ }
+ [(set_attr "mode" "HI")])
;; 8-bit Integer moves
@@ -1429,21 +1389,20 @@
})
(define_insn "movqi_internal"
- [(set (match_operand:QI 0 "nonimmed_operand" "=D,D,a,a,a,U,*a,*A")
- (match_operand:QI 1 "move_operand" "M,d,r,I,U,r,*A,*r"))]
+ [(set (match_operand:QI 0 "nonimmed_operand")
+ (match_operand:QI 1 "move_operand"))]
"xtensa_valid_move (QImode, operands)"
- "@
- movi.n\t%0, %x1
- mov.n\t%0, %1
- mov\t%0, %1
- movi\t%0, %x1
- %v1l8ui\t%0, %1
- %v0s8i\t%1, %0
- rsr\t%0, ACCLO
- wsr\t%1, ACCLO"
- [(set_attr "type" "move,move,move,move,load,store,rsr,wsr")
- (set_attr "mode" "QI")
- (set_attr "length" "2,2,3,3,3,3,3,3")])
+ {@ [cons: =0, 1; attrs: type, length]
+ [ D, M; move , 2] movi.n\t%0, %x1
+ [ D, d; move , 2] mov.n\t%0, %1
+ [ a, r; move , 3] mov\t%0, %1
+ [ a, I; move , 3] movi\t%0, %x1
+ [ a, U; load , 3] %v1l8ui\t%0, %1
+ [ U, r; store, 3] %v0s8i\t%1, %0
+ [*a, *A; rsr , 3] rsr\t%0, ACCLO
+ [*A, *r; wsr , 3] wsr\t%1, ACCLO
+ }
+ [(set_attr "mode" "QI")])
;; Sub-word reloads from the constant pool.
@@ -1501,30 +1460,29 @@
})
(define_insn "movsf_internal"
- [(set (match_operand:SF 0 "nonimmed_operand" "=f,f,U,D,a,D,R,a,f,a,a,W,a,U")
- (match_operand:SF 1 "move_operand" "f,^U,f,d,T,R,d,r,r,f,Y,iF,U,r"))]
+ [(set (match_operand:SF 0 "nonimmed_operand")
+ (match_operand:SF 1 "move_operand"))]
"((register_operand (operands[0], SFmode)
|| register_operand (operands[1], SFmode))
&& !(FP_REG_P (xt_true_regnum (operands[0]))
&& (constantpool_mem_p (operands[1]) || CONSTANT_P (operands[1]))))"
- "@
- mov.s\t%0, %1
- %v1lsi\t%0, %1
- %v0ssi\t%1, %0
- mov.n\t%0, %1
- %v1l32r\t%0, %1
- %v1l32i.n\t%0, %1
- %v0s32i.n\t%1, %0
- mov\t%0, %1
- wfr\t%0, %1
- rfr\t%0, %1
- movi\t%0, %y1
- const16\t%0, %t1\;const16\t%0, %b1
- %v1l32i\t%0, %1
- %v0s32i\t%1, %0"
- [(set_attr "type" "farith,fload,fstore,move,load,load,store,move,farith,farith,load,move,load,store")
- (set_attr "mode" "SF")
- (set_attr "length" "3,3,3,2,3,2,2,3,3,3,3,6,3,3")])
+ {@ [cons: =0, 1; attrs: type, length]
+ [f, f; farith, 3] mov.s\t%0, %1
+ [f, ^U; fload , 3] %v1lsi\t%0, %1
+ [U, f; fstore, 3] %v0ssi\t%1, %0
+ [D, d; move , 2] mov.n\t%0, %1
+ [a, T; load , 3] %v1l32r\t%0, %1
+ [D, R; load , 2] %v1l32i.n\t%0, %1
+ [R, d; store , 2] %v0s32i.n\t%1, %0
+ [a, r; move , 3] mov\t%0, %1
+ [f, r; farith, 3] wfr\t%0, %1
+ [a, f; farith, 3] rfr\t%0, %1
+ [a, Y; load , 3] movi\t%0, %y1
+ [W, iF; move , 6] const16\t%0, %t1\;const16\t%0, %b1
+ [a, U; load , 3] %v1l32i\t%0, %1
+ [U, r; store , 3] %v0s32i\t%1, %0
+ }
+ [(set_attr "mode" "SF")])
(define_insn "*lsiu"
[(set (match_operand:SF 0 "register_operand" "=f")
@@ -1692,16 +1650,15 @@
})
(define_insn "ashlsi3_internal"
- [(set (match_operand:SI 0 "register_operand" "=a,a")
- (ashift:SI (match_operand:SI 1 "register_operand" "r,r")
- (match_operand:SI 2 "arith_operand" "J,r")))]
+ [(set (match_operand:SI 0 "register_operand")
+ (ashift:SI (match_operand:SI 1 "register_operand")
+ (match_operand:SI 2 "arith_operand")))]
""
- "@
- slli\t%0, %1, %R2
- ssl\t%2\;sll\t%0, %1"
- [(set_attr "type" "arith,arith")
- (set_attr "mode" "SI")
- (set_attr "length" "3,6")])
+ {@ [cons: =0, 1, 2; attrs: type, length]
+ [a, r, J; arith, 3] slli\t%0, %1, %R2
+ [a, r, r; arith, 6] ssl\t%2\;sll\t%0, %1
+ }
+ [(set_attr "mode" "SI")])
(define_split
[(set (match_operand:SI 0 "register_operand")
@@ -1713,35 +1670,26 @@
(match_dup 1)))])
(define_insn "ashrsi3"
- [(set (match_operand:SI 0 "register_operand" "=a,a")
- (ashiftrt:SI (match_operand:SI 1 "register_operand" "r,r")
- (match_operand:SI 2 "arith_operand" "J,r")))]
+ [(set (match_operand:SI 0 "register_operand")
+ (ashiftrt:SI (match_operand:SI 1 "register_operand")
+ (match_operand:SI 2 "arith_operand")))]
""
- "@
- srai\t%0, %1, %R2
- ssr\t%2\;sra\t%0, %1"
- [(set_attr "type" "arith,arith")
- (set_attr "mode" "SI")
- (set_attr "length" "3,6")])
+ {@ [cons: =0, 1, 2; attrs: type, length]
+ [a, r, J; arith, 3] srai\t%0, %1, %R2
+ [a, r, r; arith, 6] ssr\t%2\;sra\t%0, %1
+ }
+ [(set_attr "mode" "SI")])
(define_insn "lshrsi3"
- [(set (match_operand:SI 0 "register_operand" "=a,a")
- (lshiftrt:SI (match_operand:SI 1 "register_operand" "r,r")
- (match_operand:SI 2 "arith_operand" "J,r")))]
+ [(set (match_operand:SI 0 "register_operand")
+ (lshiftrt:SI (match_operand:SI 1 "register_operand")
+ (match_operand:SI 2 "arith_operand")))]
""
-{
- if (which_alternative == 0)
- {
- if ((INTVAL (operands[2]) & 0x1f) < 16)
- return "srli\t%0, %1, %R2";
- else
- return "extui\t%0, %1, %R2, %L2";
- }
- return "ssr\t%2\;srl\t%0, %1";
-}
- [(set_attr "type" "arith,arith")
- (set_attr "mode" "SI")
- (set_attr "length" "3,6")])
+ {@ [cons: =0, 1, 2; attrs: type, length]
+ [a, r, J; arith, 3] << (INTVAL (operands[2]) & 0x1f) < 16 ? \"srli\t%0, %1, %R2\" : \"extui\t%0, %1, %R2, %L2\";
+ [a, r, r; arith, 6] ssr\t%2\;srl\t%0, %1
+ }
+ [(set_attr "mode" "SI")])
(define_insn "*shift_per_byte"
[(set (match_operand:SI 0 "register_operand" "=a")
@@ -1944,28 +1892,26 @@
(set_attr "length" "6")])
(define_insn "rotlsi3"
- [(set (match_operand:SI 0 "register_operand" "=a,a")
- (rotate:SI (match_operand:SI 1 "register_operand" "r,r")
- (match_operand:SI 2 "arith_operand" "J,r")))]
+ [(set (match_operand:SI 0 "register_operand")
+ (rotate:SI (match_operand:SI 1 "register_operand")
+ (match_operand:SI 2 "arith_operand")))]
""
- "@
- ssai\t%L2\;src\t%0, %1, %1
- ssl\t%2\;src\t%0, %1, %1"
- [(set_attr "type" "multi,multi")
- (set_attr "mode" "SI")
- (set_attr "length" "6,6")])
+ {@ [cons: =0, 1, 2; attrs: type, length]
+ [a, r, J; multi, 6] ssai\t%L2\;src\t%0, %1, %1
+ [a, r, r; multi, 6] ssl\t%2\;src\t%0, %1, %1
+ }
+ [(set_attr "mode" "SI")])
(define_insn "rotrsi3"
- [(set (match_operand:SI 0 "register_operand" "=a,a")
- (rotatert:SI (match_operand:SI 1 "register_operand" "r,r")
- (match_operand:SI 2 "arith_operand" "J,r")))]
+ [(set (match_operand:SI 0 "register_operand")
+ (rotatert:SI (match_operand:SI 1 "register_operand")
+ (match_operand:SI 2 "arith_operand")))]
""
- "@
- ssai\t%R2\;src\t%0, %1, %1
- ssr\t%2\;src\t%0, %1, %1"
- [(set_attr "type" "multi,multi")
- (set_attr "mode" "SI")
- (set_attr "length" "6,6")])
+ {@ [cons: =0, 1, 2; attrs: type, length]
+ [a, r, J; multi, 6] ssai\t%R2\;src\t%0, %1, %1
+ [a, r, r; multi, 6] ssr\t%2\;src\t%0, %1, %1
+ }
+ [(set_attr "mode" "SI")])
;; Comparisons.
@@ -2024,26 +1970,23 @@
[(match_operand:SI 0 "register_operand" "r")
(const_int -2147483648)])
(label_ref (match_operand 1 ""))
- (pc)))]
+ (pc)))
+ (clobber (match_scratch:SI 3 "=a"))]
"TARGET_ABS"
"#"
- "&& can_create_pseudo_p ()"
+ "&& 1"
[(set (match_dup 3)
(abs:SI (match_dup 0)))
(set (pc)
(if_then_else (match_op_dup 2
- [(zero_extract:SI (match_dup 3)
- (const_int 1)
- (match_dup 4))
+ [(match_dup 3)
(const_int 0)])
(label_ref (match_dup 1))
(pc)))]
{
- operands[3] = gen_reg_rtx (SImode);
- operands[4] = GEN_INT (BITS_BIG_ENDIAN ? 0 : 31);
- operands[2] = gen_rtx_fmt_ee (reverse_condition (GET_CODE (operands[2])),
- VOIDmode, XEXP (operands[2], 0),
- const0_rtx);
+ if (GET_CODE (operands[3]) == SCRATCH)
+ operands[3] = gen_reg_rtx (SImode);
+ PUT_CODE (operands[2], GET_CODE (operands[2]) == EQ ? LT : GE);
}
[(set_attr "type" "jump")
(set_attr "mode" "none")
@@ -2190,7 +2133,7 @@
(label_ref (match_dup 1))
(pc)))]
{
- operands[3] = GEN_INT ((1 << GET_MODE_BITSIZE (GET_MODE (operands[3]))) - 1);
+ operands[3] = GEN_INT (GET_MODE_MASK (GET_MODE (operands[3])));
})
(define_insn_and_split "*masktrue_const_pow2_minus_one"
@@ -3370,6 +3313,42 @@
(const_int 8)
(const_int 9))))])
+(define_insn_and_split "*eqne_in_range"
+ [(set (pc)
+ (if_then_else (match_operator 4 "alt_ubranch_operator"
+ [(plus:SI (match_operand:SI 0 "register_operand" "r")
+ (match_operand:SI 1 "const_int_operand" "i"))
+ (match_operand:SI 2 "const_int_operand" "i")])
+ (label_ref (match_operand 3 ""))
+ (pc)))
+ (clobber (match_scratch:SI 5 "=&a"))]
+ "TARGET_MINMAX && TARGET_CLAMPS
+ && INTVAL (operands[1]) * 2 - INTVAL (operands[2]) == 1
+ && IN_RANGE (exact_log2 (INTVAL (operands[1])), 7, 22)"
+ "#"
+ "&& 1"
+ [(set (match_dup 5)
+ (smin:SI (smax:SI (match_dup 0)
+ (match_dup 1))
+ (match_dup 2)))
+ (set (pc)
+ (if_then_else (match_op_dup 4
+ [(match_dup 0)
+ (match_dup 5)])
+ (label_ref (match_dup 3))
+ (pc)))]
+{
+ HOST_WIDE_INT v = INTVAL (operands[1]);
+ operands[1] = GEN_INT (-v);
+ operands[2] = GEN_INT (v - 1);
+ PUT_CODE (operands[4], GET_CODE (operands[4]) == GTU ? NE : EQ);
+ if (GET_CODE (operands[5]) == SCRATCH)
+ operands[5] = gen_reg_rtx (SImode);
+}
+ [(set_attr "type" "jump")
+ (set_attr "mode" "none")
+ (set_attr "length" "6")])
+
(define_split
[(clobber (match_operand 0 "register_operand"))]
"HARD_REGISTER_P (operands[0])