aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/aarch64/aarch64-cc-fusion.cc297
-rw-r--r--gcc/config/aarch64/aarch64-passes.def1
-rw-r--r--gcc/config/aarch64/aarch64-protos.h5
-rw-r--r--gcc/config/aarch64/aarch64-simd.md22
-rw-r--r--gcc/config/aarch64/aarch64-sme.md3
-rw-r--r--gcc/config/aarch64/aarch64.cc245
-rw-r--r--gcc/config/aarch64/aarch64.h5
-rw-r--r--gcc/config/aarch64/aarch64.md220
-rw-r--r--gcc/config/aarch64/constraints.md10
-rw-r--r--gcc/config/aarch64/iterators.md25
-rw-r--r--gcc/config/aarch64/predicates.md23
-rw-r--r--gcc/config/aarch64/t-aarch646
-rw-r--r--gcc/config/arc/arc.md6
-rw-r--r--gcc/config/arm/arm.md4
-rw-r--r--gcc/config/avr/specs.h2
-rw-r--r--gcc/config/cris/cris.h2
-rw-r--r--gcc/config/darwin-sections.def7
-rw-r--r--gcc/config/darwin.cc67
-rw-r--r--gcc/config/darwin.h30
-rw-r--r--gcc/config/h8300/addsub.md2
-rw-r--r--gcc/config/h8300/jumpcall.md12
-rw-r--r--gcc/config/h8300/testcompare.md26
-rw-r--r--gcc/config/i386/i386-expand.cc124
-rw-r--r--gcc/config/i386/i386-features.cc1029
-rw-r--r--gcc/config/i386/i386-options.cc10
-rw-r--r--gcc/config/i386/i386-passes.def2
-rw-r--r--gcc/config/i386/i386-protos.h5
-rw-r--r--gcc/config/i386/i386.cc213
-rw-r--r--gcc/config/i386/i386.h25
-rw-r--r--gcc/config/i386/i386.md25
-rw-r--r--gcc/config/i386/i386.opt4
-rw-r--r--gcc/config/i386/predicates.md3
-rw-r--r--gcc/config/i386/sse.md122
-rw-r--r--gcc/config/i386/x86-tune-costs.h192
-rw-r--r--gcc/config/loongarch/genopts/isa-evolution.in1
-rw-r--r--gcc/config/loongarch/loongarch-def.cc4
-rw-r--r--gcc/config/loongarch/loongarch-def.h10
-rw-r--r--gcc/config/loongarch/loongarch-evolution.cc4
-rw-r--r--gcc/config/loongarch/loongarch-evolution.h8
-rw-r--r--gcc/config/loongarch/loongarch-str.h1
-rw-r--r--gcc/config/loongarch/loongarch.cc81
-rw-r--r--gcc/config/loongarch/loongarch.h4
-rw-r--r--gcc/config/loongarch/loongarch.opt4
-rw-r--r--gcc/config/loongarch/loongarch.opt.urls3
-rw-r--r--gcc/config/loongarch/simd.md2
-rw-r--r--gcc/config/loongarch/sync.md641
-rw-r--r--gcc/config/mips/mips.h10
-rw-r--r--gcc/config/mips/mips.opt4
-rw-r--r--gcc/config/pru/pru.cc11
-rw-r--r--gcc/config/pru/pru.h3
-rw-r--r--gcc/config/pru/pru.md28
-rw-r--r--gcc/config/pru/pru.opt8
-rw-r--r--gcc/config/pru/pru.opt.urls6
-rw-r--r--gcc/config/pru/t-multilib29
-rwxr-xr-xgcc/config/riscv/arch-canonicalize22
-rw-r--r--gcc/config/riscv/autovec-opt.md182
-rw-r--r--gcc/config/riscv/constraints.md4
-rw-r--r--gcc/config/riscv/gen-riscv-ext-opt.cc44
-rw-r--r--gcc/config/riscv/predicates.md20
-rw-r--r--gcc/config/riscv/riscv-avlprop.cc13
-rw-r--r--gcc/config/riscv/riscv-cores.def8
-rw-r--r--gcc/config/riscv/riscv-ext-mips.def13
-rw-r--r--gcc/config/riscv/riscv-ext.opt2
-rw-r--r--gcc/config/riscv/riscv-protos.h8
-rw-r--r--gcc/config/riscv/riscv-subset.h13
-rw-r--r--gcc/config/riscv/riscv-target-attr.cc102
-rw-r--r--gcc/config/riscv/riscv-v.cc165
-rw-r--r--gcc/config/riscv/riscv-vector-costs.cc2
-rw-r--r--gcc/config/riscv/riscv.cc395
-rw-r--r--gcc/config/riscv/riscv.h17
-rw-r--r--gcc/config/riscv/riscv.md71
-rw-r--r--gcc/config/riscv/sifive-p400.md20
-rw-r--r--gcc/config/riscv/sifive-p600.md17
-rw-r--r--gcc/config/riscv/sync.md35
-rw-r--r--gcc/config/riscv/t-rtems9
-rw-r--r--gcc/config/riscv/vector.md265
-rw-r--r--gcc/config/riscv/xiangshan.md4
-rw-r--r--gcc/config/rl78/rl78.opt.urls2
-rw-r--r--gcc/config/rs6000/rs6000.cc4
-rw-r--r--gcc/config/rs6000/rs6000.md40
-rw-r--r--gcc/config/rx/rx.cc49
-rw-r--r--gcc/config/s390/s390.cc4
-rw-r--r--gcc/config/s390/s390.md46
-rw-r--r--gcc/config/s390/vector.md84
-rw-r--r--gcc/config/xtensa/constraints.md2
-rw-r--r--gcc/config/xtensa/predicates.md3
-rw-r--r--gcc/config/xtensa/xtensa-protos.h1
-rw-r--r--gcc/config/xtensa/xtensa.cc88
-rw-r--r--gcc/config/xtensa/xtensa.md429
89 files changed, 4157 insertions, 1662 deletions
diff --git a/gcc/config/aarch64/aarch64-cc-fusion.cc b/gcc/config/aarch64/aarch64-cc-fusion.cc
deleted file mode 100644
index cea54de..0000000
--- a/gcc/config/aarch64/aarch64-cc-fusion.cc
+++ /dev/null
@@ -1,297 +0,0 @@
-// Pass to fuse CC operations with other instructions.
-// Copyright (C) 2021-2025 Free Software Foundation, Inc.
-//
-// This file is part of GCC.
-//
-// GCC is free software; you can redistribute it and/or modify it under
-// the terms of the GNU General Public License as published by the Free
-// Software Foundation; either version 3, or (at your option) any later
-// version.
-//
-// GCC is distributed in the hope that it will be useful, but WITHOUT ANY
-// WARRANTY; without even the implied warranty of MERCHANTABILITY or
-// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-// for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with GCC; see the file COPYING3. If not see
-// <http://www.gnu.org/licenses/>.
-
-// This pass looks for sequences of the form:
-//
-// A: (set (reg R1) X1)
-// B: ...instructions that might change the value of X1...
-// C: (set (reg CC) X2) // X2 uses R1
-//
-// and tries to change them to:
-//
-// C': [(set (reg CC) X2')
-// (set (reg R1) X1)]
-// B: ...instructions that might change the value of X1...
-//
-// where X2' is the result of replacing R1 with X1 in X2.
-//
-// This sequence occurs in SVE code in two important cases:
-//
-// (a) Sometimes, to deal correctly with overflow, we need to increment
-// an IV after a WHILELO rather than before it. In this case:
-// - A is a WHILELO,
-// - B includes an IV increment and
-// - C is a separate PTEST.
-//
-// (b) ACLE code of the form:
-//
-// svbool_t ok = svrdffr ();
-// if (svptest_last (pg, ok))
-// ...
-//
-// must, for performance reasons, be code-generated as:
-//
-// RDFFRS Pok.B, Pg/Z
-// ...branch on flags result...
-//
-// without a separate PTEST of Pok. In this case:
-// - A is an aarch64_rdffr
-// - B includes an aarch64_update_ffrt
-// - C is a separate PTEST
-//
-// Combine can handle this optimization if B doesn't exist and if A and
-// C are in the same BB. This pass instead handles cases where B does
-// exist and cases where A and C are in different BBs of the same EBB.
-
-#define IN_TARGET_CODE 1
-
-#define INCLUDE_ALGORITHM
-#define INCLUDE_FUNCTIONAL
-#define INCLUDE_ARRAY
-#include "config.h"
-#include "system.h"
-#include "coretypes.h"
-#include "backend.h"
-#include "rtl.h"
-#include "df.h"
-#include "rtl-ssa.h"
-#include "tree-pass.h"
-
-using namespace rtl_ssa;
-
-namespace {
-const pass_data pass_data_cc_fusion =
-{
- RTL_PASS, // type
- "cc_fusion", // name
- OPTGROUP_NONE, // optinfo_flags
- TV_NONE, // tv_id
- 0, // properties_required
- 0, // properties_provided
- 0, // properties_destroyed
- 0, // todo_flags_start
- TODO_df_finish, // todo_flags_finish
-};
-
-// Class that represents one run of the pass.
-class cc_fusion
-{
-public:
- cc_fusion () : m_parallel () {}
- void execute ();
-
-private:
- rtx optimizable_set (const insn_info *);
- bool parallelize_insns (def_info *, rtx, def_info *, rtx);
- void optimize_cc_setter (def_info *, rtx);
-
- // A spare PARALLEL rtx, or null if none.
- rtx m_parallel;
-};
-
-// See whether INSN is a single_set that we can optimize. Return the
-// set if so, otherwise return null.
-rtx
-cc_fusion::optimizable_set (const insn_info *insn)
-{
- if (!insn->can_be_optimized ()
- || insn->is_asm ()
- || insn->has_volatile_refs ()
- || insn->has_pre_post_modify ())
- return NULL_RTX;
-
- return single_set (insn->rtl ());
-}
-
-// CC_SET is a single_set that sets (only) CC_DEF; OTHER_SET is likewise
-// a single_set that sets (only) OTHER_DEF. CC_SET is known to set the
-// CC register and the instruction that contains CC_SET is known to use
-// OTHER_DEF. Try to do CC_SET and OTHER_SET in parallel.
-bool
-cc_fusion::parallelize_insns (def_info *cc_def, rtx cc_set,
- def_info *other_def, rtx other_set)
-{
- auto attempt = crtl->ssa->new_change_attempt ();
-
- insn_info *cc_insn = cc_def->insn ();
- insn_info *other_insn = other_def->insn ();
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "trying to parallelize insn %d and insn %d\n",
- other_insn->uid (), cc_insn->uid ());
-
- // Try to substitute OTHER_SET into CC_INSN.
- insn_change_watermark rtl_watermark;
- rtx_insn *cc_rtl = cc_insn->rtl ();
- insn_propagation prop (cc_rtl, SET_DEST (other_set),
- SET_SRC (other_set));
- if (!prop.apply_to_pattern (&PATTERN (cc_rtl))
- || prop.num_replacements == 0)
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "-- failed to substitute all uses of r%d\n",
- other_def->regno ());
- return false;
- }
-
- // Restrict the uses to those outside notes.
- use_array cc_uses = remove_note_accesses (attempt, cc_insn->uses ());
- use_array other_set_uses = remove_note_accesses (attempt,
- other_insn->uses ());
-
- // Remove the use of the substituted value.
- access_array_builder uses_builder (attempt);
- uses_builder.reserve (cc_uses.size ());
- for (use_info *use : cc_uses)
- if (use->def () != other_def)
- uses_builder.quick_push (use);
- cc_uses = use_array (uses_builder.finish ());
-
- // Get the list of uses for the new instruction.
- insn_change cc_change (cc_insn);
- cc_change.new_uses = merge_access_arrays (attempt, other_set_uses, cc_uses);
- if (!cc_change.new_uses.is_valid ())
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "-- cannot merge uses\n");
- return false;
- }
-
- // The instruction initially defines just two registers. recog can add
- // extra clobbers if necessary.
- auto_vec<access_info *, 2> new_defs;
- new_defs.quick_push (cc_def);
- new_defs.quick_push (other_def);
- sort_accesses (new_defs);
- cc_change.new_defs = def_array (access_array (new_defs));
-
- // Make sure there is somewhere that the new instruction could live.
- auto other_change = insn_change::delete_insn (other_insn);
- insn_change *changes[] = { &other_change, &cc_change };
- cc_change.move_range = cc_insn->ebb ()->insn_range ();
- if (!restrict_movement (cc_change, ignore_changing_insns (changes)))
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "-- cannot satisfy all definitions and uses\n");
- return false;
- }
-
- // Tentatively install the new pattern. By convention, the CC set
- // must be first.
- if (m_parallel)
- {
- XVECEXP (m_parallel, 0, 0) = cc_set;
- XVECEXP (m_parallel, 0, 1) = other_set;
- }
- else
- {
- rtvec vec = gen_rtvec (2, cc_set, other_set);
- m_parallel = gen_rtx_PARALLEL (VOIDmode, vec);
- }
- validate_change (cc_rtl, &PATTERN (cc_rtl), m_parallel, 1);
-
- // These routines report failures themselves.
- if (!recog (attempt, cc_change, ignore_changing_insns (changes))
- || !changes_are_worthwhile (changes)
- || !crtl->ssa->verify_insn_changes (changes))
- return false;
-
- remove_reg_equal_equiv_notes (cc_rtl);
- confirm_change_group ();
- crtl->ssa->change_insns (changes);
- m_parallel = NULL_RTX;
- return true;
-}
-
-// Try to optimize the instruction that contains CC_DEF, where CC_DEF describes
-// a definition of the CC register by CC_SET.
-void
-cc_fusion::optimize_cc_setter (def_info *cc_def, rtx cc_set)
-{
- // Search the registers used by the CC setter for an easily-substitutable
- // def-use chain.
- for (use_info *other_use : cc_def->insn ()->uses ())
- if (def_info *other_def = other_use->def ())
- if (other_use->regno () != CC_REGNUM
- && other_def->ebb () == cc_def->ebb ())
- if (rtx other_set = optimizable_set (other_def->insn ()))
- {
- rtx dest = SET_DEST (other_set);
- if (REG_P (dest)
- && REGNO (dest) == other_def->regno ()
- && REG_NREGS (dest) == 1
- && parallelize_insns (cc_def, cc_set, other_def, other_set))
- return;
- }
-}
-
-// Run the pass on the current function.
-void
-cc_fusion::execute ()
-{
- // Initialization.
- calculate_dominance_info (CDI_DOMINATORS);
- df_analyze ();
- crtl->ssa = new rtl_ssa::function_info (cfun);
-
- // Walk through all instructions that set CC. Look for a PTEST instruction
- // that we can optimize.
- //
- // ??? The PTEST test isn't needed for correctness, but it ensures that the
- // pass no effect on non-SVE code.
- for (def_info *def : crtl->ssa->reg_defs (CC_REGNUM))
- if (rtx cc_set = optimizable_set (def->insn ()))
- if (REG_P (SET_DEST (cc_set))
- && REGNO (SET_DEST (cc_set)) == CC_REGNUM
- && GET_CODE (SET_SRC (cc_set)) == UNSPEC
- && XINT (SET_SRC (cc_set), 1) == UNSPEC_PTEST)
- optimize_cc_setter (def, cc_set);
-
- // Finalization.
- crtl->ssa->perform_pending_updates ();
- free_dominance_info (CDI_DOMINATORS);
-}
-
-class pass_cc_fusion : public rtl_opt_pass
-{
-public:
- pass_cc_fusion (gcc::context *ctxt)
- : rtl_opt_pass (pass_data_cc_fusion, ctxt)
- {}
-
- // opt_pass methods:
- virtual bool gate (function *) { return TARGET_SVE && optimize >= 2; }
- virtual unsigned int execute (function *);
-};
-
-unsigned int
-pass_cc_fusion::execute (function *)
-{
- cc_fusion ().execute ();
- return 0;
-}
-
-} // end namespace
-
-// Create a new CC fusion pass instance.
-
-rtl_opt_pass *
-make_pass_cc_fusion (gcc::context *ctxt)
-{
- return new pass_cc_fusion (ctxt);
-}
diff --git a/gcc/config/aarch64/aarch64-passes.def b/gcc/config/aarch64/aarch64-passes.def
index 9cf9d3e..6a53ff3 100644
--- a/gcc/config/aarch64/aarch64-passes.def
+++ b/gcc/config/aarch64/aarch64-passes.def
@@ -24,6 +24,5 @@ INSERT_PASS_BEFORE (pass_reorder_blocks, 1, pass_track_speculation);
INSERT_PASS_BEFORE (pass_late_thread_prologue_and_epilogue, 1, pass_switch_pstate_sm);
INSERT_PASS_BEFORE (pass_late_thread_prologue_and_epilogue, 1, pass_late_track_speculation);
INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_insert_bti);
-INSERT_PASS_AFTER (pass_if_after_combine, 1, pass_cc_fusion);
INSERT_PASS_BEFORE (pass_early_remat, 1, pass_ldp_fusion);
INSERT_PASS_BEFORE (pass_peephole2, 1, pass_ldp_fusion);
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 36bd885..56efcf2 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1098,6 +1098,7 @@ bool aarch64_legitimate_address_p (machine_mode, rtx, bool,
aarch64_addr_query_type = ADDR_QUERY_M);
machine_mode aarch64_select_cc_mode (RTX_CODE, rtx, rtx);
rtx aarch64_gen_compare_reg (RTX_CODE, rtx, rtx);
+rtx aarch64_gen_compare_split_imm24 (rtx, rtx, rtx);
bool aarch64_maxmin_plus_const (rtx_code, rtx *, bool);
rtx aarch64_load_tp (rtx);
@@ -1236,7 +1237,6 @@ rtl_opt_pass *make_pass_fma_steering (gcc::context *);
rtl_opt_pass *make_pass_track_speculation (gcc::context *);
rtl_opt_pass *make_pass_late_track_speculation (gcc::context *);
rtl_opt_pass *make_pass_insert_bti (gcc::context *ctxt);
-rtl_opt_pass *make_pass_cc_fusion (gcc::context *ctxt);
rtl_opt_pass *make_pass_switch_pstate_sm (gcc::context *ctxt);
rtl_opt_pass *make_pass_ldp_fusion (gcc::context *);
@@ -1281,4 +1281,7 @@ extern bool aarch64_gcs_enabled ();
extern unsigned aarch64_data_alignment (const_tree exp, unsigned align);
extern unsigned aarch64_stack_alignment (const_tree exp, unsigned align);
+extern rtx aarch64_gen_compare_zero_and_branch (rtx_code code, rtx x,
+ rtx_code_label *label);
+
#endif /* GCC_AARCH64_PROTOS_H */
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 8b75c3d..c111dc2 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -6731,7 +6731,7 @@
(SAT_TRUNC:<VNARROWQ>
(<TRUNC_SHIFT>:SD_HSDI
(match_operand:SD_HSDI 1 "register_operand" "w")
- (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
+ (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>"))))]
"TARGET_SIMD"
"<shrn_op>shrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
[(set_attr "type" "neon_shift_imm_narrow_q")]
@@ -6753,7 +6753,7 @@
(ALL_TRUNC:<VNARROWQ>
(<TRUNC_SHIFT>:VQN
(match_operand:VQN 1 "register_operand")
- (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
+ (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>"))))]
"TARGET_SIMD"
{
operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
@@ -6784,7 +6784,7 @@
(<TRUNCEXTEND>:<DWI>
(match_operand:SD_HSDI 1 "register_operand" "w"))
(match_operand:<DWI> 3 "aarch64_int_rnd_operand"))
- (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
+ (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>"))))]
"TARGET_SIMD
&& aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
"<shrn_op>rshrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
@@ -6799,7 +6799,7 @@
(<TRUNCEXTEND>:<V2XWIDE>
(match_operand:SD_HSDI 1 "register_operand"))
(match_dup 3))
- (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
+ (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>"))))]
"TARGET_SIMD"
{
/* Use this expander to create the rounding constant vector, which is
@@ -6819,7 +6819,7 @@
(<TRUNCEXTEND>:<V2XWIDE>
(match_operand:VQN 1 "register_operand"))
(match_dup 3))
- (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
+ (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>"))))]
"TARGET_SIMD"
{
if (<CODE> == TRUNCATE
@@ -6861,7 +6861,7 @@
(smax:SD_HSDI
(ashiftrt:SD_HSDI
(match_operand:SD_HSDI 1 "register_operand" "w")
- (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
+ (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>"))
(const_int 0))
(const_int <half_mask>)))]
"TARGET_SIMD"
@@ -6872,7 +6872,7 @@
(define_expand "aarch64_sqshrun_n<mode>"
[(match_operand:<VNARROWQ> 0 "register_operand")
(match_operand:SD_HSDI 1 "register_operand")
- (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>")]
+ (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>")]
"TARGET_SIMD"
{
rtx dst = gen_reg_rtx (<MODE>mode);
@@ -6890,7 +6890,7 @@
(smax:VQN
(ashiftrt:VQN
(match_operand:VQN 1 "register_operand")
- (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
+ (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>"))
(match_dup 3))
(match_dup 4))))]
"TARGET_SIMD"
@@ -6932,7 +6932,7 @@
(sign_extend:<DWI>
(match_operand:SD_HSDI 1 "register_operand" "w"))
(match_operand:<DWI> 3 "aarch64_int_rnd_operand"))
- (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
+ (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>"))
(const_int 0))
(const_int <half_mask>)))]
"TARGET_SIMD
@@ -6944,7 +6944,7 @@
(define_expand "aarch64_sqrshrun_n<mode>"
[(match_operand:<VNARROWQ> 0 "register_operand")
(match_operand:SD_HSDI 1 "register_operand")
- (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>")]
+ (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>")]
"TARGET_SIMD"
{
int prec = GET_MODE_UNIT_PRECISION (<DWI>mode);
@@ -6967,7 +6967,7 @@
(sign_extend:<V2XWIDE>
(match_operand:VQN 1 "register_operand"))
(match_dup 3))
- (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
+ (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>"))
(match_dup 4))
(match_dup 5))))]
"TARGET_SIMD"
diff --git a/gcc/config/aarch64/aarch64-sme.md b/gcc/config/aarch64/aarch64-sme.md
index 6b1a747..0123ea0 100644
--- a/gcc/config/aarch64/aarch64-sme.md
+++ b/gcc/config/aarch64/aarch64-sme.md
@@ -400,7 +400,8 @@
auto label = gen_label_rtx ();
auto tpidr2 = gen_rtx_REG (DImode, R16_REGNUM);
emit_insn (gen_aarch64_read_tpidr2 (tpidr2));
- auto jump = emit_likely_jump_insn (gen_aarch64_cbznedi1 (tpidr2, label));
+ auto pat = aarch64_gen_compare_zero_and_branch (NE, tpidr2, label);
+ auto jump = emit_likely_jump_insn (pat);
JUMP_LABEL (jump) = label;
aarch64_restore_za (operands[0]);
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 2dbaf4a..ef9c165 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -975,19 +975,24 @@ aarch64_cb_rhs (rtx_code op_code, rtx rhs)
{
case EQ:
case NE:
- case GT:
- case GTU:
case LT:
case LTU:
+ case GE:
+ case GEU:
+ /* EQ/NE range is 0 .. 63.
+ LT/LTU range is 0 .. 63.
+ GE/GEU range is 1 .. 64 => GT x - 1, but also supports 0 via XZR.
+ So the intersection is 0 .. 63. */
return IN_RANGE (rhs_val, 0, 63);
- case GE: /* CBGE: signed greater than or equal */
- case GEU: /* CBHS: unsigned greater than or equal */
- return IN_RANGE (rhs_val, 1, 64);
-
- case LE: /* CBLE: signed less than or equal */
- case LEU: /* CBLS: unsigned less than or equal */
- return IN_RANGE (rhs_val, -1, 62);
+ case GT:
+ case GTU:
+ case LE:
+ case LEU:
+ /* GT/GTU range is 0 .. 63
+ LE/LEU range is -1 .. 62 => LT x + 1.
+ So the intersection is 0 .. 62. */
+ return IN_RANGE (rhs_val, 0, 62);
default:
return false;
@@ -2882,10 +2887,47 @@ aarch64_gen_compare_reg_maybe_ze (RTX_CODE code, rtx x, rtx y,
return aarch64_gen_compare_reg (code, x, y);
}
+/* Split IMM into two 12-bit halves, producing an EQ/NE comparison vs X.
+ TMP may be a scratch. This optimizes a sequence from
+ mov x0, #imm1
+ movk x0, #imm2, lsl 16 -- x0 contains CST
+ cmp x1, x0
+ into the shorter:
+ sub tmp, x1, #(CST & 0xfff000)
+ subs tmp, tmp, #(CST & 0x000fff)
+*/
+rtx
+aarch64_gen_compare_split_imm24 (rtx x, rtx imm, rtx tmp)
+{
+ HOST_WIDE_INT lo_imm = UINTVAL (imm) & 0xfff;
+ HOST_WIDE_INT hi_imm = UINTVAL (imm) & 0xfff000;
+ enum machine_mode mode = GET_MODE (x);
+
+ if (GET_CODE (tmp) == SCRATCH)
+ tmp = gen_reg_rtx (mode);
+
+ emit_insn (gen_add3_insn (tmp, x, GEN_INT (-hi_imm)));
+ /* TODO: We don't need the gpr result of the second insn. */
+ switch (mode)
+ {
+ case SImode:
+ tmp = gen_addsi3_compare0 (tmp, tmp, GEN_INT (-lo_imm));
+ break;
+ case DImode:
+ tmp = gen_adddi3_compare0 (tmp, tmp, GEN_INT (-lo_imm));
+ break;
+ default:
+ abort ();
+ }
+ emit_insn (tmp);
+
+ return gen_rtx_REG (CC_NZmode, CC_REGNUM);
+}
+
/* Generate conditional branch to LABEL, comparing X to 0 using CODE.
Return the jump instruction. */
-static rtx
+rtx
aarch64_gen_compare_zero_and_branch (rtx_code code, rtx x,
rtx_code_label *label)
{
@@ -14380,41 +14422,57 @@ aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
{
/* Conditional branch. */
- if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
+ enum machine_mode cmpmode = GET_MODE (inner);
+ if (GET_MODE_CLASS (cmpmode) == MODE_CC)
return true;
- else
+
+ if (comparator == const0_rtx)
{
- if (cmpcode == NE || cmpcode == EQ)
+ switch (cmpcode)
{
- if (comparator == const0_rtx)
- {
- /* TBZ/TBNZ/CBZ/CBNZ. */
- if (GET_CODE (inner) == ZERO_EXTRACT)
- /* TBZ/TBNZ. */
- *cost += rtx_cost (XEXP (inner, 0), VOIDmode,
- ZERO_EXTRACT, 0, speed);
- else
- /* CBZ/CBNZ. */
- *cost += rtx_cost (inner, VOIDmode, cmpcode, 0, speed);
-
- return true;
- }
- if (register_operand (inner, VOIDmode)
- && aarch64_imm24 (comparator, VOIDmode))
+ case NE:
+ case EQ:
+ if (cmpmode != SImode && cmpmode != DImode)
+ break;
+ if (GET_CODE (inner) == ZERO_EXTRACT)
{
- /* SUB and SUBS. */
- *cost += COSTS_N_INSNS (2);
- if (speed)
- *cost += extra_cost->alu.arith * 2;
+ /* TBZ/TBNZ. */
+ *cost += rtx_cost (XEXP (inner, 0), VOIDmode,
+ ZERO_EXTRACT, 0, speed);
return true;
}
+ /* FALLTHRU */
+
+ case LT:
+ case GE:
+ /* CBZ/CBNZ/TBZ/TBNZ. */
+ *cost += rtx_cost (inner, cmpmode, cmpcode, 0, speed);
+ return true;
+
+ default:
+ break;
}
- else if (cmpcode == LT || cmpcode == GE)
- {
- /* TBZ/TBNZ. */
- if (comparator == const0_rtx)
- return true;
- }
+ }
+
+ if ((cmpcode == NE || cmpcode == EQ)
+ && (cmpmode == SImode || cmpmode == DImode)
+ && aarch64_split_imm24 (comparator, cmpmode))
+ {
+ /* SUB and SUBS. */
+ *cost += rtx_cost (inner, cmpmode, cmpcode, 0, speed);
+ *cost += COSTS_N_INSNS (2);
+ if (speed)
+ *cost += extra_cost->alu.arith * 2;
+ return true;
+ }
+
+ if (TARGET_CMPBR)
+ {
+ *cost += rtx_cost (inner, cmpmode, cmpcode, 0, speed);
+ if ((cmpmode != SImode && cmpmode != DImode)
+ || !aarch64_cb_rhs (cmpcode, comparator))
+ *cost += rtx_cost (comparator, cmpmode, cmpcode, 1, speed);
+ return true;
}
}
else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
@@ -16999,6 +17057,14 @@ private:
or vector loop. There is one entry for each tuning option of
interest. */
auto_vec<aarch64_vec_op_count, 2> m_ops;
+
+ /* When doing inner-loop vectorization the constraints on the data-refs in the
+ outer-loop could limit the inner loop references. i.e. the outerloop can
+ force the inner-loop to do a load and splat which will result in the loop
+ being entirely scalar as all lanes work on a duplicate. Currently we don't
+ support unrolling of the inner loop independently from the outerloop during
+ outer-loop vectorization which tends to lead to pipeline bubbles. */
+ bool m_loop_fully_scalar_dup = false;
};
aarch64_vector_costs::aarch64_vector_costs (vec_info *vinfo,
@@ -17320,13 +17386,14 @@ aarch64_multiply_add_p (vec_info *vinfo, stmt_vec_info stmt_info,
static bool
aarch64_bool_compound_p (vec_info *vinfo, stmt_vec_info stmt_info,
- unsigned int vec_flags)
+ slp_tree node, unsigned int vec_flags)
{
gassign *assign = dyn_cast<gassign *> (stmt_info->stmt);
if (!assign
+ || !node
|| gimple_assign_rhs_code (assign) != BIT_AND_EXPR
- || !STMT_VINFO_VECTYPE (stmt_info)
- || !VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_info)))
+ || !SLP_TREE_VECTYPE (node)
+ || !VECTOR_BOOLEAN_TYPE_P (SLP_TREE_VECTYPE (node)))
return false;
for (int i = 1; i < 3; ++i)
@@ -17361,10 +17428,11 @@ aarch64_bool_compound_p (vec_info *vinfo, stmt_vec_info stmt_info,
instructions. */
static unsigned int
aarch64_sve_in_loop_reduction_latency (vec_info *vinfo,
+ slp_tree node,
stmt_vec_info stmt_info,
const sve_vec_cost *sve_costs)
{
- switch (vect_reduc_type (vinfo, stmt_info))
+ switch (vect_reduc_type (vinfo, node))
{
case EXTRACT_LAST_REDUCTION:
return sve_costs->clast_cost;
@@ -17404,7 +17472,9 @@ aarch64_sve_in_loop_reduction_latency (vec_info *vinfo,
- If VEC_FLAGS & VEC_ANY_SVE, return the loop carry latency of the
SVE implementation. */
static unsigned int
-aarch64_in_loop_reduction_latency (vec_info *vinfo, stmt_vec_info stmt_info,
+aarch64_in_loop_reduction_latency (vec_info *vinfo,
+ slp_tree node,
+ stmt_vec_info stmt_info,
unsigned int vec_flags)
{
const cpu_vector_cost *vec_costs = aarch64_tune_params.vec_costs;
@@ -17417,7 +17487,8 @@ aarch64_in_loop_reduction_latency (vec_info *vinfo, stmt_vec_info stmt_info,
if (sve_costs)
{
unsigned int latency
- = aarch64_sve_in_loop_reduction_latency (vinfo, stmt_info, sve_costs);
+ = aarch64_sve_in_loop_reduction_latency (vinfo, node,
+ stmt_info, sve_costs);
if (latency)
return latency;
}
@@ -17493,7 +17564,7 @@ aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind,
if (kind == scalar_load
&& node
&& sve_costs
- && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_GATHER_SCATTER)
+ && mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node)))
{
unsigned int nunits = vect_nunits_for_cost (vectype);
/* Test for VNx2 modes, which have 64-bit containers. */
@@ -17507,7 +17578,7 @@ aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind,
if (kind == scalar_store
&& node
&& sve_costs
- && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_GATHER_SCATTER)
+ && mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node)))
return sve_costs->scatter_store_elt_cost;
/* Detect cases in which vec_to_scalar represents an in-loop reduction. */
@@ -17516,7 +17587,8 @@ aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind,
&& sve_costs)
{
unsigned int latency
- = aarch64_sve_in_loop_reduction_latency (vinfo, stmt_info, sve_costs);
+ = aarch64_sve_in_loop_reduction_latency (vinfo, node,
+ stmt_info, sve_costs);
if (latency)
return latency;
}
@@ -17665,7 +17737,7 @@ aarch64_adjust_stmt_cost (vec_info *vinfo, vect_cost_for_stmt kind,
/* For vector boolean ANDs with a compare operand we just need
one insn. */
- if (aarch64_bool_compound_p (vinfo, stmt_info, vec_flags))
+ if (aarch64_bool_compound_p (vinfo, stmt_info, node, vec_flags))
return 0;
}
@@ -17698,13 +17770,12 @@ aarch64_adjust_stmt_cost (vec_info *vinfo, vect_cost_for_stmt kind,
with the single accumulator being read and written multiple times. */
static bool
-aarch64_force_single_cycle (vec_info *vinfo, stmt_vec_info stmt_info)
+aarch64_force_single_cycle (vec_info *vinfo, slp_tree node)
{
- if (!STMT_VINFO_REDUC_DEF (stmt_info))
+ auto reduc_info = info_for_reduction (as_a <loop_vec_info> (vinfo), node);
+ if (!reduc_info)
return false;
-
- auto reduc_info = info_for_reduction (vinfo, stmt_info);
- return STMT_VINFO_FORCE_SINGLE_CYCLE (reduc_info);
+ return VECT_REDUC_INFO_FORCE_SINGLE_CYCLE (reduc_info);
}
/* COUNT, KIND and STMT_INFO are the same as for vector_costs::add_stmt_cost
@@ -17728,8 +17799,10 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind,
&& vect_is_reduction (stmt_info))
{
unsigned int base
- = aarch64_in_loop_reduction_latency (m_vinfo, stmt_info, m_vec_flags);
- if (aarch64_force_single_cycle (m_vinfo, stmt_info))
+ = aarch64_in_loop_reduction_latency (m_vinfo, node,
+ stmt_info, m_vec_flags);
+ if (m_costing_for_scalar
+ || aarch64_force_single_cycle (m_vinfo, node))
/* ??? Ideally we'd use a tree to reduce the copies down to 1 vector,
and then accumulate that, but at the moment the loop-carried
dependency includes all copies. */
@@ -17746,7 +17819,7 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind,
/* Assume that bool AND with compare operands will become a single
operation. */
- if (aarch64_bool_compound_p (m_vinfo, stmt_info, m_vec_flags))
+ if (aarch64_bool_compound_p (m_vinfo, stmt_info, node, m_vec_flags))
return;
}
@@ -17763,7 +17836,7 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind,
if (stmt_info
&& kind == vec_to_scalar
&& (m_vec_flags & VEC_ADVSIMD)
- && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_GATHER_SCATTER)
+ && mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node)))
{
auto dr = STMT_VINFO_DATA_REF (stmt_info);
tree dr_ref = DR_REF (dr);
@@ -17842,7 +17915,7 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind,
have only accounted for one. */
if (stmt_info
&& (kind == vector_stmt || kind == vec_to_scalar)
- && vect_reduc_type (m_vinfo, stmt_info) == COND_REDUCTION)
+ && vect_reduc_type (m_vinfo, node) == COND_REDUCTION)
ops->general_ops += count;
/* Count the predicate operations needed by an SVE comparison. */
@@ -17878,7 +17951,7 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind,
if (stmt_info
&& sve_issue
&& (kind == scalar_load || kind == scalar_store)
- && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_GATHER_SCATTER)
+ && mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node)))
{
unsigned int pairs = CEIL (count, 2);
ops->pred_ops += sve_issue->gather_scatter_pair_pred_ops * pairs;
@@ -17987,6 +18060,17 @@ aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
tree vectype, int misalign,
vect_cost_model_location where)
{
+ /* When costing for scalars, vectype will be NULL; so look up the type via
+ stmt_info's statement. */
+ if (m_costing_for_scalar && stmt_info)
+ {
+ gcc_assert (!vectype);
+ /* This won't work for e.g. gconds or other statements without a lhs,
+ but those only work on GPR anyway and this is the best we can do. */
+ if (tree lhs = gimple_get_lhs (STMT_VINFO_STMT (stmt_info)))
+ vectype = TREE_TYPE (lhs);
+ }
+
fractional_cost stmt_cost
= aarch64_builtin_vectorization_cost (kind, vectype, misalign);
@@ -18002,6 +18086,28 @@ aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
analyze_loop_vinfo (loop_vinfo);
m_analyzed_vinfo = true;
+ if (in_inner_loop_p)
+ m_loop_fully_scalar_dup = true;
+ }
+
+ /* Detect whether the loop is working on fully duplicated lanes. This would
+ only be possible with inner loop vectorization since otherwise we wouldn't
+ try to vectorize. */
+ if (in_inner_loop_p
+ && node
+ && m_loop_fully_scalar_dup
+ && SLP_TREE_LANES (node) == 1
+ && !SLP_TREE_CHILDREN (node).exists ())
+ {
+ /* Check if load is a duplicate. */
+ if (gimple_vuse (stmt_info->stmt)
+ && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_INVARIANT)
+ ;
+ else if (SLP_TREE_DEF_TYPE (node) == vect_constant_def
+ || SLP_TREE_DEF_TYPE (node) == vect_external_def)
+ ;
+ else
+ m_loop_fully_scalar_dup = false;
}
/* Apply the heuristic described above m_stp_sequence_cost. */
@@ -18036,7 +18142,7 @@ aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
&& node
&& vectype
&& aarch64_sve_mode_p (TYPE_MODE (vectype))
- && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_GATHER_SCATTER)
+ && mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node)))
{
const sve_vec_cost *sve_costs = aarch64_tune_params.vec_costs->sve;
if (sve_costs)
@@ -18368,8 +18474,19 @@ adjust_body_cost (loop_vec_info loop_vinfo,
if (m_vec_flags & VEC_ANY_SVE)
threshold = CEIL (threshold, aarch64_estimated_sve_vq ());
- if (m_num_vector_iterations >= 1
- && m_num_vector_iterations < threshold)
+ /* Increase the cost of the vector code if it looks like the vector code has
+ limited throughput due to outer-loop vectorization. */
+ if (m_loop_fully_scalar_dup)
+ {
+ body_cost *= estimated_vf;
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Increasing body cost to %d because vector code has"
+ " low throughput of per iteration due to splats\n",
+ body_cost);
+ }
+ else if (m_num_vector_iterations >= 1
+ && m_num_vector_iterations < threshold)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
@@ -31808,7 +31925,7 @@ aarch64_expand_reversed_crc_using_pmull (scalar_mode crc_mode,
/* Expand the spaceship optab for floating-point operands.
- If the result is compared against (-1, 0, 1 , 2), expand into
+ If the result is compared against (-1, 0, 1, -128), expand into
fcmpe + conditional branch insns.
Otherwise (the result is just stored as an integer), expand into
@@ -31847,7 +31964,7 @@ aarch64_expand_fp_spaceship (rtx dest, rtx op0, rtx op1, rtx hint)
emit_jump (end_label);
emit_label (un_label);
- emit_move_insn (dest, const2_rtx);
+ emit_move_insn (dest, GEN_INT (-128));
emit_jump (end_label);
emit_label (gt_label);
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 096c853..2b3610c 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -410,8 +410,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED
/* CSSC instructions are enabled through +cssc. */
#define TARGET_CSSC AARCH64_HAVE_ISA (CSSC)
-/* CB<cc> instructions are enabled through +cmpbr. */
-#define TARGET_CMPBR AARCH64_HAVE_ISA (CMPBR)
+/* CB<cc> instructions are enabled through +cmpbr,
+ but are incompatible with -mtrack-speculation. */
+#define TARGET_CMPBR (AARCH64_HAVE_ISA (CMPBR) && !aarch64_track_speculation)
/* Make sure this is always defined so we don't have to check for ifdefs
but rather use normal ifs. */
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index dc2be81..6e215c4 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -441,6 +441,16 @@
; must not operate on inactive inputs if doing so could induce a fault.
(SVE_STRICT_GP 1)])
+;; These constants are used as a const_int in MTE instructions
+(define_constants
+ [; 0xf0ff...
+ ; Tag mask for the 4-bit tag stored in the top 8 bits of a pointer.
+ (MEMTAG_TAG_MASK -1080863910568919041)
+
+ ; 0x00ff...
+ ; Tag mask 56-bit address used by subp instruction.
+ (MEMTAG_ADDR_MASK 72057594037927935)])
+
(include "constraints.md")
(include "predicates.md")
(include "iterators.md")
@@ -725,8 +735,8 @@
(BRANCH_LEN_N_32KiB -32768)
;; +/- 1KiB. Used by CBB<cond>, CBH<cond>, CB<cond>.
- (BRANCH_LEN_P_1Kib 1020)
- (BRANCH_LEN_N_1Kib -1024)
+ (BRANCH_LEN_P_1KiB 1020)
+ (BRANCH_LEN_N_1KiB -1024)
]
)
@@ -804,7 +814,7 @@
)
;; For an EQ/NE comparison against zero, emit `CBZ`/`CBNZ`
-(define_insn "aarch64_cbz<optab><mode>1"
+(define_insn "*aarch64_cbz<optab><mode>"
[(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r")
(const_int 0))
(label_ref (match_operand 1))
@@ -838,27 +848,13 @@
[(set (pc) (if_then_else (LTGE (match_operand:ALLI 0 "register_operand" "r")
(const_int 0))
(label_ref (match_operand 1))
- (pc)))
- (clobber (reg:CC CC_REGNUM))]
+ (pc)))]
"!aarch64_track_speculation"
{
- if (get_attr_length (insn) == 8)
- {
- if (get_attr_far_branch (insn) == FAR_BRANCH_YES)
- return aarch64_gen_far_branch (operands, 1, "Ltb",
- "<inv_tb>\\t%<w>0, <sizem1>, ");
- else
- {
- char buf[64];
- uint64_t val = ((uint64_t) 1)
- << (GET_MODE_SIZE (<MODE>mode) * BITS_PER_UNIT - 1);
- sprintf (buf, "tst\t%%<w>0, %" PRId64, val);
- output_asm_insn (buf, operands);
- return "<bcond>\t%l1";
- }
- }
- else
+ if (get_attr_length (insn) == 4)
return "<tbz>\t%<w>0, <sizem1>, %l1";
+ return aarch64_gen_far_branch (operands, 1, "Ltb",
+ "<inv_tb>\\t%<w>0, <sizem1>, ");
}
[(set_attr "type" "branch")
(set (attr "length")
@@ -870,44 +866,44 @@
(const_int 8)))
(set (attr "far_branch")
(if_then_else (and (ge (minus (match_dup 1) (pc))
- (const_int BRANCH_LEN_N_1MiB))
+ (const_int BRANCH_LEN_N_32KiB))
(lt (minus (match_dup 1) (pc))
- (const_int BRANCH_LEN_P_1MiB)))
+ (const_int BRANCH_LEN_P_32KiB)))
(const_string "no")
(const_string "yes")))]
)
;; Emit a `CB<cond> (register)` or `CB<cond> (immediate)` instruction.
;; The immediate range depends on the comparison code.
-;; Comparisons against immediates outside this range fall back to
-;; CMP + B<cond>.
-(define_insn "aarch64_cb<INT_CMP:code><GPI:mode>"
- [(set (pc) (if_then_else (INT_CMP
- (match_operand:GPI 0 "register_operand" "r")
- (match_operand:GPI 1 "nonmemory_operand"
- "r<INT_CMP:cmpbr_imm_constraint>"))
- (label_ref (match_operand 2))
- (pc)))]
- "TARGET_CMPBR && aarch64_cb_rhs (<INT_CMP:CODE>, operands[1])"
+(define_insn "*aarch64_cb<code><mode>"
+ [(set (pc) (if_then_else
+ (INT_CMP
+ (match_operand:GPI 0 "register_operand" "r")
+ (match_operand:GPI 1
+ "aarch64_reg_<cmpbr_imm_constraint>_operand"
+ "r<cmpbr_imm_constraint>"))
+ (label_ref (match_operand 2))
+ (pc)))]
+ "TARGET_CMPBR"
{
- return (get_attr_far_branch (insn) == FAR_BRANCH_NO)
- ? "cb<INT_CMP:cmp_op>\\t%<w>0, %<w>1, %l2"
- : aarch64_gen_far_branch (operands, 2, "L",
- "cb<INT_CMP:inv_cmp_op>\\t%<w>0, %<w>1, ");
+ if (get_attr_length (insn) == 4)
+ return "cb<cmp_op>\t%<w>0, %<w>1, %l2";
+ return aarch64_gen_far_branch (operands, 2, "L",
+ "cb<inv_cmp_op>\t%<w>0, %<w>1, ");
}
[(set_attr "type" "branch")
(set (attr "length")
(if_then_else (and (ge (minus (match_dup 2) (pc))
- (const_int BRANCH_LEN_N_1Kib))
+ (const_int BRANCH_LEN_N_1KiB))
(lt (minus (match_dup 2) (pc))
- (const_int BRANCH_LEN_P_1Kib)))
+ (const_int BRANCH_LEN_P_1KiB)))
(const_int 4)
(const_int 8)))
(set (attr "far_branch")
(if_then_else (and (ge (minus (match_dup 2) (pc))
- (const_int BRANCH_LEN_N_1Kib))
+ (const_int BRANCH_LEN_N_1KiB))
(lt (minus (match_dup 2) (pc))
- (const_int BRANCH_LEN_P_1Kib)))
+ (const_int BRANCH_LEN_P_1KiB)))
(const_string "no")
(const_string "yes")))]
)
@@ -929,16 +925,16 @@
[(set_attr "type" "branch")
(set (attr "length")
(if_then_else (and (ge (minus (match_dup 2) (pc))
- (const_int BRANCH_LEN_N_1Kib))
+ (const_int BRANCH_LEN_N_1KiB))
(lt (minus (match_dup 2) (pc))
- (const_int BRANCH_LEN_P_1Kib)))
+ (const_int BRANCH_LEN_P_1KiB)))
(const_int 4)
(const_int 8)))
(set (attr "far_branch")
(if_then_else (and (ge (minus (match_dup 2) (pc))
- (const_int BRANCH_LEN_N_1Kib))
+ (const_int BRANCH_LEN_N_1KiB))
(lt (minus (match_dup 2) (pc))
- (const_int BRANCH_LEN_P_1Kib)))
+ (const_int BRANCH_LEN_P_1KiB)))
(const_string "no")
(const_string "yes")))]
)
@@ -978,37 +974,24 @@
(const_string "yes")))]
)
-;; For a 24-bit immediate CST we can optimize the compare for equality
-;; and branch sequence from:
-;; mov x0, #imm1
-;; movk x0, #imm2, lsl 16 /* x0 contains CST. */
-;; cmp x1, x0
-;; b<ne,eq> .Label
-;; into the shorter:
-;; sub x0, x1, #(CST & 0xfff000)
-;; subs x0, x0, #(CST & 0x000fff)
-;; b<ne,eq> .Label
+;; For a 24-bit immediate CST we can optimize the compare for equality.
(define_insn_and_split "*aarch64_bcond_wide_imm<GPI:mode>"
- [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r")
- (match_operand:GPI 1 "aarch64_imm24" "n"))
- (label_ref:P (match_operand 2))
- (pc)))]
- "!aarch64_move_imm (INTVAL (operands[1]), <GPI:MODE>mode)
- && !aarch64_plus_operand (operands[1], <GPI:MODE>mode)
- && !reload_completed"
+ [(set (pc) (if_then_else
+ (match_operator 0 "aarch64_equality_operator"
+ [(match_operand:GPI 1 "register_operand" "r")
+ (match_operand:GPI 2 "aarch64_split_imm24" "n")])
+ (label_ref (match_operand 3))
+ (pc)))
+ (clobber (reg:CC CC_REGNUM))
+ (clobber (match_scratch:GPI 4 "=r"))]
+ ""
"#"
- "&& true"
+ ""
[(const_int 0)]
{
- HOST_WIDE_INT lo_imm = UINTVAL (operands[1]) & 0xfff;
- HOST_WIDE_INT hi_imm = UINTVAL (operands[1]) & 0xfff000;
- rtx tmp = gen_reg_rtx (<GPI:MODE>mode);
- emit_insn (gen_add<GPI:mode>3 (tmp, operands[0], GEN_INT (-hi_imm)));
- emit_insn (gen_add<GPI:mode>3_compare0 (tmp, tmp, GEN_INT (-lo_imm)));
- rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
- rtx cmp_rtx = gen_rtx_fmt_ee (<EQL:CMP>, <GPI:MODE>mode,
- cc_reg, const0_rtx);
- emit_jump_insn (gen_aarch64_bcond (cmp_rtx, cc_reg, operands[2]));
+ rtx cc_reg = aarch64_gen_compare_split_imm24 (operands[1], operands[2],
+ operands[4]);
+ emit_jump_insn (gen_aarch64_bcond (operands[0], cc_reg, operands[3]));
DONE;
}
)
@@ -1413,16 +1396,16 @@
/* Save GCS with code like
mov x16, 1
chkfeat x16
- tbnz x16, 0, .L_done
+ cbnz x16, .L_done
mrs tmp, gcspr_el0
str tmp, [%0, 8]
.L_done: */
- rtx done_label = gen_label_rtx ();
+ auto done_label = gen_label_rtx ();
rtx r16 = gen_rtx_REG (DImode, R16_REGNUM);
emit_move_insn (r16, const1_rtx);
emit_insn (gen_aarch64_chkfeat ());
- emit_insn (gen_tbranch_neqi3 (r16, const0_rtx, done_label));
+ emit_jump_insn (aarch64_gen_compare_zero_and_branch (NE, r16, done_label));
rtx gcs_slot = adjust_address (operands[0], Pmode, GET_MODE_SIZE (Pmode));
rtx gcs = gen_reg_rtx (Pmode);
emit_insn (gen_aarch64_load_gcspr (gcs));
@@ -1445,7 +1428,7 @@
/* Restore GCS with code like
mov x16, 1
chkfeat x16
- tbnz x16, 0, .L_done
+ cbnz x16, .L_done
ldr tmp1, [%1, 8]
mrs tmp2, gcspr_el0
subs tmp2, tmp1, tmp2
@@ -1456,12 +1439,12 @@
b.ne .L_loop
.L_done: */
- rtx loop_label = gen_label_rtx ();
- rtx done_label = gen_label_rtx ();
+ auto loop_label = gen_label_rtx ();
+ auto done_label = gen_label_rtx ();
rtx r16 = gen_rtx_REG (DImode, R16_REGNUM);
emit_move_insn (r16, const1_rtx);
emit_insn (gen_aarch64_chkfeat ());
- emit_insn (gen_tbranch_neqi3 (r16, const0_rtx, done_label));
+ emit_jump_insn (aarch64_gen_compare_zero_and_branch (NE, r16, done_label));
rtx gcs_slot = adjust_address (operands[1], Pmode, GET_MODE_SIZE (Pmode));
rtx gcs_old = gen_reg_rtx (Pmode);
emit_move_insn (gcs_old, gcs_slot);
@@ -4524,7 +4507,7 @@
[(set_attr "type" "fcmp<stype>")]
)
-(define_insn "*cmp_swp_<shift>_reg<mode>"
+(define_insn "cmp_swp_<shift>_reg<mode>"
[(set (reg:CC_SWP CC_REGNUM)
(compare:CC_SWP (ASHIFT:GPI
(match_operand:GPI 0 "register_operand" "r")
@@ -4651,39 +4634,24 @@
[(set_attr "type" "csel")]
)
-;; For a 24-bit immediate CST we can optimize the compare for equality
-;; and branch sequence from:
-;; mov x0, #imm1
-;; movk x0, #imm2, lsl 16 /* x0 contains CST. */
-;; cmp x1, x0
-;; cset x2, <ne,eq>
-;; into the shorter:
-;; sub x0, x1, #(CST & 0xfff000)
-;; subs x0, x0, #(CST & 0x000fff)
-;; cset x2, <ne, eq>.
+;; For a 24-bit immediate CST we can optimize the compare for equality.
(define_insn_and_split "*compare_cstore<mode>_insn"
[(set (match_operand:GPI 0 "register_operand" "=r")
- (EQL:GPI (match_operand:GPI 1 "register_operand" "r")
- (match_operand:GPI 2 "aarch64_imm24" "n")))
- (clobber (reg:CC CC_REGNUM))]
- "!aarch64_move_imm (INTVAL (operands[2]), <MODE>mode)
- && !aarch64_plus_operand (operands[2], <MODE>mode)
- && !reload_completed"
+ (match_operator:GPI 1 "aarch64_equality_operator"
+ [(match_operand:GPI 2 "register_operand" "r")
+ (match_operand:GPI 3 "aarch64_split_imm24" "n")]))
+ (clobber (reg:CC CC_REGNUM))
+ (clobber (match_scratch:GPI 4 "=r"))]
+ ""
"#"
- "&& true"
+ ""
[(const_int 0)]
{
- HOST_WIDE_INT lo_imm = UINTVAL (operands[2]) & 0xfff;
- HOST_WIDE_INT hi_imm = UINTVAL (operands[2]) & 0xfff000;
- rtx tmp = gen_reg_rtx (<MODE>mode);
- emit_insn (gen_add<mode>3 (tmp, operands[1], GEN_INT (-hi_imm)));
- emit_insn (gen_add<mode>3_compare0 (tmp, tmp, GEN_INT (-lo_imm)));
- rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
- rtx cmp_rtx = gen_rtx_fmt_ee (<EQL:CMP>, <MODE>mode, cc_reg, const0_rtx);
- emit_insn (gen_aarch64_cstore<mode> (operands[0], cmp_rtx, cc_reg));
+ rtx cc_reg = aarch64_gen_compare_split_imm24 (operands[2], operands[3],
+ operands[4]);
+ emit_insn (gen_aarch64_cstore<mode> (operands[0], operands[1], cc_reg));
DONE;
}
- [(set_attr "type" "csel")]
)
;; zero_extend version of the above
@@ -4813,15 +4781,21 @@
(match_operand:ALLI 3 "register_operand")))]
""
{
- rtx ccreg;
enum rtx_code code = GET_CODE (operands[1]);
-
if (code == UNEQ || code == LTGT)
FAIL;
- ccreg = aarch64_gen_compare_reg (code, XEXP (operands[1], 0),
- XEXP (operands[1], 1));
- operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx);
+ rtx ccreg = XEXP (operands[1], 0);
+ enum machine_mode ccmode = GET_MODE (ccreg);
+ if (GET_MODE_CLASS (ccmode) == MODE_CC)
+ gcc_assert (XEXP (operands[1], 1) == const0_rtx);
+ else if (ccmode == QImode || ccmode == HImode)
+ FAIL;
+ else
+ {
+ ccreg = aarch64_gen_compare_reg (code, ccreg, XEXP (operands[1], 1));
+ operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx);
+ }
}
)
@@ -7716,6 +7690,22 @@
}
)
+(define_expand "isinf<mode>2"
+ [(match_operand:SI 0 "register_operand")
+ (match_operand:GPF 1 "register_operand")]
+ "TARGET_FLOAT"
+{
+ rtx op = force_lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
+ rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
+ emit_move_insn (tmp, GEN_INT (HOST_WIDE_INT_M1U << (<mantissa_bits> + 1)));
+ rtx cc_reg = gen_rtx_REG (CC_SWPmode, CC_REGNUM);
+ emit_insn (gen_cmp_swp_lsl_reg<v_int_equiv> (op, GEN_INT (1), tmp));
+ rtx cmp = gen_rtx_fmt_ee (EQ, SImode, cc_reg, const0_rtx);
+ emit_insn (gen_aarch64_cstoresi (operands[0], cmp, cc_reg));
+ DONE;
+}
+)
+
;; -------------------------------------------------------------------
;; Reload support
;; -------------------------------------------------------------------
@@ -8566,7 +8556,7 @@
[(set (match_operand:DI 0 "register_operand" "=rk")
(ior:DI
(and:DI (match_operand:DI 1 "register_operand" "rk")
- (const_int -1080863910568919041)) ;; 0xf0ff...
+ (const_int MEMTAG_TAG_MASK))
(ashift:DI (unspec:QI [(match_operand:DI 2 "register_operand" "r")]
UNSPEC_GEN_TAG_RND)
(const_int 56))))]
@@ -8609,9 +8599,9 @@
[(set (match_operand:DI 0 "register_operand" "=r")
(minus:DI
(and:DI (match_operand:DI 1 "register_operand" "rk")
- (const_int 72057594037927935)) ;; 0x00ff...
+ (const_int MEMTAG_ADDR_MASK))
(and:DI (match_operand:DI 2 "register_operand" "rk")
- (const_int 72057594037927935))))] ;; 0x00ff...
+ (const_int MEMTAG_ADDR_MASK))))]
"TARGET_MEMTAG"
"subp\\t%0, %1, %2"
[(set_attr "type" "memtag")]
@@ -8621,7 +8611,7 @@
(define_insn "ldg"
[(set (match_operand:DI 0 "register_operand" "+r")
(ior:DI
- (and:DI (match_dup 0) (const_int -1080863910568919041)) ;; 0xf0ff...
+ (and:DI (match_dup 0) (const_int MEMTAG_TAG_MASK))
(ashift:DI
(mem:QI (unspec:DI
[(and:DI (plus:DI (match_operand:DI 1 "register_operand" "rk")
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index dc1925d..7b9e558 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -312,15 +312,9 @@
(define_constraint "Uc1"
"@internal
- A constraint that matches the integers 1...64."
+ A constraint that matches the integers 0...62."
(and (match_code "const_int")
- (match_test "IN_RANGE (ival, 1, 64)")))
-
-(define_constraint "Uc2"
- "@internal
- A constraint that matches the integers -1...62."
- (and (match_code "const_int")
- (match_test "IN_RANGE (ival, -1, 62)")))
+ (match_test "IN_RANGE (ival, 0, 62)")))
(define_constraint "Up3"
"@internal
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 68b080d..7a6ea0d 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1340,6 +1340,8 @@
(define_mode_attr half_mask [(HI "255") (SI "65535") (DI "4294967295")])
+(define_mode_attr mantissa_bits [(SF "23") (DF "52")])
+
;; For constraints used in scalar immediate vector moves
(define_mode_attr hq [(HI "h") (QI "q")])
@@ -2203,7 +2205,8 @@
(SI "si")])
;; Like ve_mode but for the half-width modes.
-(define_mode_attr vn_mode [(V8HI "qi") (V4SI "hi") (V2DI "si")])
+(define_mode_attr vn_mode [(V8HI "qi") (V4SI "hi") (V2DI "si") (DI "si")
+ (SI "hi") (HI "qi")])
;; Vm for lane instructions is restricted to FP_LO_REGS.
(define_mode_attr vwx [(V4HI "x") (V8HI "x") (HI "x")
@@ -2986,19 +2989,15 @@
(define_code_iterator INT_CMP [lt le eq ne ge gt ltu leu geu gtu])
+;; Inverse comparisons must have the same constraint so that
+;; branches can be redirected during late compilation.
(define_code_attr cmpbr_imm_constraint [
- (eq "Uc0")
- (ne "Uc0")
- (gt "Uc0")
- (gtu "Uc0")
- (lt "Uc0")
- (ltu "Uc0")
-
- (ge "Uc1")
- (geu "Uc1")
-
- (le "Uc2")
- (leu "Uc2")
+ (eq "Uc0") (ne "Uc0")
+ (lt "Uc0") (ge "Uc0")
+ (ltu "Uc0") (geu "Uc0")
+
+ (gt "Uc1") (le "Uc1")
+ (gtu "Uc1") (leu "Uc1")
])
(define_code_attr fix_trunc_optab [(fix "fix_trunc")
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 4d5d57f..42304ce 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -286,10 +286,15 @@
(and (match_code "const_int")
(match_test "UINTVAL (op) <= 7")))
-;; An immediate that fits into 24 bits.
-(define_predicate "aarch64_imm24"
- (and (match_code "const_int")
- (match_test "IN_RANGE (UINTVAL (op), 0, 0xffffff)")))
+;; An immediate that fits into 24 bits, but needs splitting.
+(define_predicate "aarch64_split_imm24"
+ (match_code "const_int")
+{
+ unsigned HOST_WIDE_INT i = UINTVAL (op);
+ return (IN_RANGE (i, 0, 0xffffff)
+ && !aarch64_move_imm (i, mode)
+ && !aarch64_uimm12_shift (i));
+})
(define_predicate "aarch64_mem_pair_offset"
(and (match_code "const_int")
@@ -1084,3 +1089,13 @@
(define_special_predicate "aarch64_ptrue_all_operand"
(and (match_code "const_vector")
(match_test "aarch64_ptrue_all_mode (op) == mode")))
+
+(define_predicate "aarch64_reg_Uc0_operand"
+ (ior (match_operand 0 "register_operand")
+ (and (match_code "const_int")
+ (match_test "satisfies_constraint_Uc0 (op)"))))
+
+(define_predicate "aarch64_reg_Uc1_operand"
+ (ior (match_operand 0 "register_operand")
+ (and (match_code "const_int")
+ (match_test "satisfies_constraint_Uc1 (op)"))))
diff --git a/gcc/config/aarch64/t-aarch64 b/gcc/config/aarch64/t-aarch64
index 38a8c06..63ca8e9 100644
--- a/gcc/config/aarch64/t-aarch64
+++ b/gcc/config/aarch64/t-aarch64
@@ -190,12 +190,6 @@ aarch-bti-insert.o: $(srcdir)/config/arm/aarch-bti-insert.cc \
$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
$(srcdir)/config/arm/aarch-bti-insert.cc
-aarch64-cc-fusion.o: $(srcdir)/config/aarch64/aarch64-cc-fusion.cc \
- $(CONFIG_H) $(SYSTEM_H) $(CORETYPES_H) $(BACKEND_H) $(RTL_H) $(DF_H) \
- $(RTL_SSA_H) tree-pass.h
- $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
- $(srcdir)/config/aarch64/aarch64-cc-fusion.cc
-
aarch64-early-ra.o: $(srcdir)/config/aarch64/aarch64-early-ra.cc \
$(CONFIG_H) $(SYSTEM_H) $(CORETYPES_H) $(BACKEND_H) $(RTL_H) $(DF_H) \
$(RTL_SSA_H) tree-pass.h
diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index d119464..8f7e537 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -66,9 +66,9 @@
;; I signed 12-bit immediate (for ARCompact)
;; K unsigned 3-bit immediate (for ARCompact)
;; L unsigned 6-bit immediate (for ARCompact)
-;; M unsinged 5-bit immediate (for ARCompact)
-;; O unsinged 7-bit immediate (for ARCompact)
-;; P unsinged 8-bit immediate (for ARCompact)
+;; M unsigned 5-bit immediate (for ARCompact)
+;; O unsigned 7-bit immediate (for ARCompact)
+;; P unsigned 8-bit immediate (for ARCompact)
;; N constant '1' (for ARCompact)
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 537a3e2..422ae54 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -13026,7 +13026,7 @@
"arm_coproc_builtin_available (VUNSPEC_<MCRR>)"
{
arm_const_bounds (operands[0], 0, 16);
- arm_const_bounds (operands[1], 0, 8);
+ arm_const_bounds (operands[1], 0, 16);
arm_const_bounds (operands[3], 0, (1 << 5));
return "<mcrr>\\tp%c0, %1, %Q2, %R2, CR%c3";
}
@@ -13041,7 +13041,7 @@
"arm_coproc_builtin_available (VUNSPEC_<MRRC>)"
{
arm_const_bounds (operands[1], 0, 16);
- arm_const_bounds (operands[2], 0, 8);
+ arm_const_bounds (operands[2], 0, 16);
arm_const_bounds (operands[3], 0, (1 << 5));
return "<mrrc>\\tp%c1, %2, %Q0, %R0, CR%c3";
}
diff --git a/gcc/config/avr/specs.h b/gcc/config/avr/specs.h
index ff269bf..c95c758 100644
--- a/gcc/config/avr/specs.h
+++ b/gcc/config/avr/specs.h
@@ -57,7 +57,7 @@ along with GCC; see the file COPYING3. If not see
"%(asm_errata_skip) "
#define LINK_RELAX_SPEC \
- "%{mrelax:--relax} "
+ "%{!r:%{mrelax:--relax}} "
#undef LINK_SPEC
#define LINK_SPEC \
diff --git a/gcc/config/cris/cris.h b/gcc/config/cris/cris.h
index 1681c79..f356679 100644
--- a/gcc/config/cris/cris.h
+++ b/gcc/config/cris/cris.h
@@ -171,7 +171,7 @@ extern int cris_cpu_version;
/* For the cris-*-elf subtarget. */
#define CRIS_ASM_SUBTARGET_SPEC \
- "--em=criself %{!march=*:%{!mcpu=*:" CRIS_DEFAULT_ASM_ARCH_OPTION "}}"
+ "--emulation=criself %{!march=*:%{!mcpu=*:" CRIS_DEFAULT_ASM_ARCH_OPTION "}}"
/* FIXME: We should propagate the -melf option to make the criself
"emulation" unless a linker script is provided (-T*), but I don't know
diff --git a/gcc/config/darwin-sections.def b/gcc/config/darwin-sections.def
index 44adcc6..76587c2 100644
--- a/gcc/config/darwin-sections.def
+++ b/gcc/config/darwin-sections.def
@@ -215,3 +215,10 @@ DEF_SECTION (objc2_method_names_section, 0,
DEF_SECTION (objc2_method_types_section, 0,
".section __TEXT, __objc_methtype, cstring_literals", 1)
+
+/* ASAN sections. */
+
+DEF_SECTION (asan_string_section, 0, ".section __TEXT, __asan_cstring", 0)
+DEF_SECTION (asan_globals_section, 0, ".section __DATA, __asan_globals", 0)
+DEF_SECTION (asan_liveness_section, 0,
+ ".section __DATA,__asan_liveness,regular,live_support", 0)
diff --git a/gcc/config/darwin.cc b/gcc/config/darwin.cc
index be2daed..75ac356 100644
--- a/gcc/config/darwin.cc
+++ b/gcc/config/darwin.cc
@@ -49,6 +49,7 @@ along with GCC; see the file COPYING3. If not see
#include "optabs.h"
#include "flags.h"
#include "opts.h"
+#include "asan.h"
/* Fix and Continue.
@@ -1298,6 +1299,39 @@ darwin_encode_section_info (tree decl, rtx rtl, int first)
SYMBOL_FLAG_EXTERNAL. */
default_encode_section_info (decl, rtl, first);
+ if (CONSTANT_CLASS_P (decl))
+ {
+ bool is_str = TREE_CODE (decl) == STRING_CST;
+ rtx sym_ref = XEXP (rtl, 0);
+
+ /* Unless this is a string cst or we are in an anchored section we have
+ nothing more to do here. */
+ if (!is_str && !SYMBOL_REF_HAS_BLOCK_INFO_P (sym_ref))
+ return;
+
+ tree sym_decl = SYMBOL_REF_DECL (sym_ref);
+ const char *name = XSTR (sym_ref, 0);
+ gcc_checking_assert (strncmp ("*lC", name, 3) == 0);
+
+ char *buf;
+ if (is_str)
+ {
+ bool for_asan = (flag_sanitize & SANITIZE_ADDRESS)
+ && asan_protect_global (CONST_CAST_TREE (decl));
+ /* When we are generating code for sanitized strings, the string
+ internal symbols are made visible in the object. */
+ buf = xasprintf ("*%c.str.%s", for_asan ? 'l' : 'L', &name[3]);
+ }
+ else
+ /* Lets identify anchored constants with a different prefix, for the
+ sake of inspection only. */
+ buf = xasprintf ("*LaC%s", &name[3]);
+ if (sym_decl)
+ DECL_NAME (sym_decl) = get_identifier (buf);
+ XSTR (sym_ref, 0) = ggc_strdup (buf);
+ free (buf);
+ }
+
if (! VAR_OR_FUNCTION_DECL_P (decl))
return;
@@ -1683,6 +1717,17 @@ machopic_select_section (tree decl,
ro = TREE_READONLY (decl) || TREE_CONSTANT (decl) ;
+ /* Trump categorize_decl_for_section () for ASAN stuff - the Darwin
+ categorisations are special. */
+ if (flag_sanitize & SANITIZE_ADDRESS)
+ {
+ if (TREE_CODE (decl) == STRING_CST
+ && asan_protect_global (CONST_CAST_TREE (decl)))
+ {
+ return darwin_sections[asan_string_section];
+ }
+ }
+
switch (categorize_decl_for_section (decl, reloc))
{
case SECCAT_TEXT:
@@ -1699,7 +1744,12 @@ machopic_select_section (tree decl,
break;
case SECCAT_RODATA_MERGE_STR_INIT:
- base_section = darwin_mergeable_string_section (DECL_INITIAL (decl), align);
+ if ((flag_sanitize & SANITIZE_ADDRESS)
+ && asan_protect_global (CONST_CAST_TREE (decl)))
+ /* or !flag_merge_constants */
+ return darwin_sections[asan_string_section];
+ else
+ return darwin_mergeable_string_section (DECL_INITIAL (decl), align);
break;
case SECCAT_RODATA_MERGE_CONST:
@@ -3297,11 +3347,16 @@ darwin_use_anchors_for_symbol_p (const_rtx symbol)
{
if (DARWIN_SECTION_ANCHORS && flag_section_anchors)
{
- section *sect;
- /* If the section contains a zero-sized object it's ineligible. */
- sect = SYMBOL_REF_BLOCK (symbol)->sect;
- /* This should have the effect of disabling anchors for vars that follow
- any zero-sized one, in a given section. */
+ tree decl = SYMBOL_REF_DECL (symbol);
+ /* If the symbol would be linker-visible, then it can split at that
+ so we must disallow. This is more strict than the default impl.
+ TODO: add other cases. */
+ if (decl && DECL_P (decl)
+ && (TREE_PUBLIC (decl) || !DECL_ARTIFICIAL (decl)))
+ return false;
+
+ /* We mark sections containing unsuitable entries. */
+ section *sect = SYMBOL_REF_BLOCK (symbol)->sect;
if (sect->common.flags & SECTION_NO_ANCHOR)
return false;
diff --git a/gcc/config/darwin.h b/gcc/config/darwin.h
index 9b9a3fe..c3e28e2 100644
--- a/gcc/config/darwin.h
+++ b/gcc/config/darwin.h
@@ -287,6 +287,19 @@ extern GTY(()) int darwin_ms_struct;
#define DARWIN_RDYNAMIC "%{rdynamic:%nrdynamic is not supported}"
#endif
+#if LD64_HAS_NO_DEDUPLICATE
+/* What we want is "when the optimization level is debug OR when it is
+ a compile & link job with implied O0 optimization". */
+#define DARWIN_LD_NO_DEDUPLICATE \
+ "%{O0|O1|O|Og: -no_deduplicate} \
+ %{!O*:\
+ %{.c|.cc|.C|.cpp|.cp|.c++|.cxx|.CPP|.m|.mm|.s|.S|.i|.ii|.mi|.mii|\
+ .f|.for|.ftn|.fpp|.f90|.f95|.f03|.f08|.f77|.F|.F90|.F95|.F03|.F08|\
+ .d|.mod: -no_deduplicate }} "
+#else
+#define DARWIN_LD_NO_DEDUPLICATE ""
+#endif
+
#if LD64_HAS_MACOS_VERSION_MIN
# define DARWIN_PLATFORM_ID \
"%{mmacosx-version-min=*:-macos_version_min %*} "
@@ -403,10 +416,14 @@ extern GTY(()) int darwin_ms_struct;
%(linker)" \
DARWIN_LD_DEMANGLE \
LINK_PLUGIN_SPEC \
+ DARWIN_LD_NO_DEDUPLICATE \
"%{flto*:%<fcompare-debug*} \
%{flto} %{fno-lto} %{flto=*} \
- %l " \
+ %{static}%{!static:%{!dynamic:-dynamic}} \
+ %{force_cpusubtype_ALL:-arch %(darwin_arch)} \
+ %{!force_cpusubtype_ALL:-arch %(darwin_subarch)} "\
DARWIN_PLATFORM_ID \
+ " %l " \
LINK_COMPRESS_DEBUG_SPEC \
"%X %{s} %{t} %{Z} %{u*} \
%{e*} %{r} \
@@ -493,9 +510,8 @@ extern GTY(()) int darwin_ms_struct;
Note that options taking arguments may appear multiple times on a command
line with different arguments each time, so put a * after their names so
all of them get passed. */
-#define LINK_SPEC \
- "%{static}%{!static:%{!dynamic:-dynamic}} \
- %:remove-outfile(-ldl) \
+#define LINK_SPEC \
+ "%:remove-outfile(-ldl) \
%:remove-outfile(-lm) \
%:remove-outfile(-lpthread) \
%{fgnu-runtime: %{static|static-libgcc: \
@@ -511,9 +527,7 @@ extern GTY(()) int darwin_ms_struct;
%{static|static-libgm2:%:replace-outfile(-lm2iso libm2iso.a%s)}\
%{static|static-libgm2:%:replace-outfile(-lm2min libm2min.a%s)}\
%{static|static-libgm2:%:replace-outfile(-lm2log libm2log.a%s)}\
- %{static|static-libgm2:%:replace-outfile(-lm2cor libm2cor.a%s)}\
- %{force_cpusubtype_ALL:-arch %(darwin_arch)} \
- %{!force_cpusubtype_ALL:-arch %(darwin_subarch)} "\
+ %{static|static-libgm2:%:replace-outfile(-lm2cor libm2cor.a%s)} "\
LINK_SYSROOT_SPEC \
"%{!multiply_defined*:%{shared-libgcc: \
%:version-compare(< 10.5 mmacosx-version-min= -multiply_defined) \
@@ -1005,6 +1019,8 @@ extern GTY(()) section * darwin_sections[NUM_DARWIN_SECTIONS];
sprintf (LABEL, "*%s%ld", "lASAN", (long)(NUM));\
else if (strcmp ("LTRAMP", PREFIX) == 0) \
sprintf (LABEL, "*%s%ld", "lTRAMP", (long)(NUM));\
+ else if (strncmp ("LANCHOR", PREFIX, 7) == 0) \
+ sprintf (LABEL, "*%s%ld", "lANCHOR", (long)(NUM));\
else \
sprintf (LABEL, "*%s%ld", PREFIX, (long)(NUM)); \
} while (0)
diff --git a/gcc/config/h8300/addsub.md b/gcc/config/h8300/addsub.md
index 32eba9d..f153625 100644
--- a/gcc/config/h8300/addsub.md
+++ b/gcc/config/h8300/addsub.md
@@ -271,7 +271,7 @@
(match_operand:QHSI 2 "register_operand" "r"))
(match_dup 1)))
(set (match_operand:QHSI 0 "register_operand" "=r")
- (plus (match_dup 1) (match_dup 2)))
+ (plus:QHSI (match_dup 1) (match_dup 2)))
(clobber (reg:CC CC_REG))]
""
{
diff --git a/gcc/config/h8300/jumpcall.md b/gcc/config/h8300/jumpcall.md
index 4e63408..44847e4 100644
--- a/gcc/config/h8300/jumpcall.md
+++ b/gcc/config/h8300/jumpcall.md
@@ -156,7 +156,7 @@
"#"
"&& reload_completed"
[(set (reg:CCZ CC_REG)
- (eq (zero_extract:HSI (match_dup 1) (const_int 1) (match_dup 2))
+ (eq:CCZ (zero_extract:HSI (match_dup 1) (const_int 1) (match_dup 2))
(const_int 0)))
(set (pc)
(if_then_else (match_op_dup 3 [(reg:CCZ CC_REG) (const_int 0)])
@@ -181,7 +181,7 @@
(lshiftrt:SI (match_dup 1) (const_int 16))))
(clobber (reg:CC CC_REG))])
(set (reg:CCZ CC_REG)
- (eq (zero_extract:SI (match_dup 4) (const_int 1) (match_dup 2))
+ (eq:CCZ (zero_extract:SI (match_dup 4) (const_int 1) (match_dup 2))
(const_int 0)))
(set (pc)
(if_then_else (match_op_dup 3 [(reg:CCZ CC_REG) (const_int 0)])
@@ -288,7 +288,7 @@
})
(define_insn "call_insn_<mode>"
- [(call (mem:QI (match_operand 0 "call_insn_operand" "Cr"))
+ [(call (mem:QI (match_operand:P 0 "call_insn_operand" "Cr"))
(match_operand:P 1 "general_operand" "g"))]
"!SIBLING_CALL_P (insn)"
{
@@ -326,7 +326,7 @@
(define_insn "call_value_insn_<mode>"
[(set (match_operand 0 "" "=r")
- (call (mem:QI (match_operand 1 "call_insn_operand" "Cr"))
+ (call (mem:QI (match_operand:P 1 "call_insn_operand" "Cr"))
(match_operand:P 2 "general_operand" "g")))]
"!SIBLING_CALL_P (insn)"
{
@@ -358,7 +358,7 @@
})
(define_insn "sibcall_insn_<mode>"
- [(call (mem:QI (match_operand 0 "call_insn_operand" "Cr"))
+ [(call (mem:QI (match_operand:P 0 "call_insn_operand" "Cr"))
(match_operand:P 1 "general_operand" "g"))]
"SIBLING_CALL_P (insn)"
{
@@ -396,7 +396,7 @@
(define_insn "sibcall_value_insn_<mode>"
[(set (match_operand 0 "" "=r")
- (call (mem:QI (match_operand 1 "call_insn_operand" "Cr"))
+ (call (mem:QI (match_operand:P 1 "call_insn_operand" "Cr"))
(match_operand:P 2 "general_operand" "g")))]
"SIBLING_CALL_P (insn)"
{
diff --git a/gcc/config/h8300/testcompare.md b/gcc/config/h8300/testcompare.md
index 694c9e6..3b43381 100644
--- a/gcc/config/h8300/testcompare.md
+++ b/gcc/config/h8300/testcompare.md
@@ -28,7 +28,7 @@
;;
(define_insn ""
[(set (reg:CCZ CC_REG)
- (eq (zero_extract:HSI (match_operand:HSI 0 "register_operand" "r")
+ (eq:CCZ (zero_extract:HSI (match_operand:HSI 0 "register_operand" "r")
(const_int 1)
(match_operand 1 "const_int_operand" "n"))
(const_int 0)))]
@@ -54,7 +54,7 @@
(define_insn "*tsthi_upper"
[(set (reg:CCZN CC_REG)
- (compare (and:HI (match_operand:HI 0 "register_operand" "r")
+ (compare:CCZN (and:HI (match_operand:HI 0 "register_operand" "r")
(const_int -256))
(const_int 0)))]
"reload_completed"
@@ -63,7 +63,7 @@
(define_insn "*tsthi_upper_z"
[(set (reg:CCZ CC_REG)
- (compare (and:HI (match_operand:HI 0 "register_operand" "r")
+ (compare:CCZ (and:HI (match_operand:HI 0 "register_operand" "r")
(const_int -256))
(const_int 0)))]
"reload_completed"
@@ -72,7 +72,7 @@
(define_insn "*tstsi_upper"
[(set (reg:CCZN CC_REG)
- (compare (and:SI (match_operand:SI 0 "register_operand" "r")
+ (compare:CCZN (and:SI (match_operand:SI 0 "register_operand" "r")
(const_int -65536))
(const_int 0)))]
"reload_completed"
@@ -81,7 +81,7 @@
(define_insn "*cmp<mode>_c"
[(set (reg:CCC CC_REG)
- (ltu (match_operand:QHSI 0 "h8300_dst_operand" "rQ")
+ (ltu:CCC (match_operand:QHSI 0 "h8300_dst_operand" "rQ")
(match_operand:QHSI 1 "h8300_src_operand" "rQi")))]
"reload_completed"
{
@@ -97,7 +97,7 @@
(define_insn "*cmpqi_z"
[(set (reg:CCZ CC_REG)
- (eq (match_operand:QI 0 "h8300_dst_operand" "rQ")
+ (eq:CCZ (match_operand:QI 0 "h8300_dst_operand" "rQ")
(match_operand:QI 1 "h8300_src_operand" "rQi")))]
"reload_completed"
{ return "cmp.b %X1,%X0"; }
@@ -105,7 +105,7 @@
(define_insn "*cmphi_z"
[(set (reg:CCZ CC_REG)
- (eq (match_operand:HI 0 "h8300_dst_operand" "rQ")
+ (eq:CCZ (match_operand:HI 0 "h8300_dst_operand" "rQ")
(match_operand:HI 1 "h8300_src_operand" "rQi")))]
"reload_completed"
{ return "cmp.w %T1,%T0"; }
@@ -113,7 +113,7 @@
(define_insn "*cmpsi_z"
[(set (reg:CCZ CC_REG)
- (eq (match_operand:SI 0 "h8300_dst_operand" "rQ")
+ (eq:CCZ (match_operand:SI 0 "h8300_dst_operand" "rQ")
(match_operand:SI 1 "h8300_src_operand" "rQi")))]
"reload_completed"
{ return "cmp.l %S1,%S0"; }
@@ -121,7 +121,7 @@
(define_insn "*cmpqi"
[(set (reg:CC CC_REG)
- (compare (match_operand:QI 0 "h8300_dst_operand" "rQ")
+ (compare:CC (match_operand:QI 0 "h8300_dst_operand" "rQ")
(match_operand:QI 1 "h8300_src_operand" "rQi")))]
"reload_completed"
"cmp.b %X1,%X0"
@@ -129,7 +129,7 @@
(define_insn "*cmphi"
[(set (reg:CC CC_REG)
- (compare (match_operand:HI 0 "h8300_dst_operand" "rU,rQ")
+ (compare:CC (match_operand:HI 0 "h8300_dst_operand" "rU,rQ")
(match_operand:HI 1 "h8300_src_operand" "P3>X,rQi")))]
"reload_completed"
{
@@ -150,7 +150,7 @@
(define_insn "cmpsi"
[(set (reg:CC CC_REG)
- (compare (match_operand:SI 0 "h8300_dst_operand" "r,rQ")
+ (compare:CC (match_operand:SI 0 "h8300_dst_operand" "r,rQ")
(match_operand:SI 1 "h8300_src_operand" "P3>X,rQi")))]
"reload_completed"
{
@@ -176,7 +176,7 @@
(define_peephole2
[(match_scratch:QHSI 1 "r")
(set (reg:CC CC_REG)
- (compare (match_operand:QHSI 0 "memory_operand" "")
+ (compare:CC (match_operand:QHSI 0 "memory_operand" "")
(const_int 0)))]
"!mode_dependent_address_p (XEXP (operands[0], 0), MEM_ADDR_SPACE (operands[0]))"
[(parallel [(set (reg:CCZN CC_REG) (compare:CCZN (match_dup 0) (const_int 0)))
@@ -187,7 +187,7 @@
(define_peephole2
[(match_scratch:QHSI 1 "r")
(set (reg:CC CC_REG)
- (compare (match_operand:QHSI 0 "memory_operand" "")
+ (compare:CC (match_operand:QHSI 0 "memory_operand" "")
(const_int 0)))]
"mode_dependent_address_p (XEXP (operands[0], 0), MEM_ADDR_SPACE (operands[0]))"
[(parallel [(set (match_dup 1) (match_dup 0)) (clobber (reg:CC CC_REG))])
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 12cec61..3278f1f 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -3151,7 +3151,7 @@ ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
}
/* Expand floating point op0 <=> op1, i.e.
- dest = op0 == op1 ? 0 : op0 < op1 ? -1 : op0 > op1 ? 1 : 2. */
+ dest = op0 == op1 ? 0 : op0 < op1 ? -1 : op0 > op1 ? 1 : -128. */
void
ix86_expand_fp_spaceship (rtx dest, rtx op0, rtx op1, rtx op2)
@@ -3264,7 +3264,7 @@ ix86_expand_fp_spaceship (rtx dest, rtx op0, rtx op1, rtx op2)
if (l2)
{
emit_label (l2);
- emit_move_insn (dest, op2 == const0_rtx ? const2_rtx : op2);
+ emit_move_insn (dest, op2 == const0_rtx ? GEN_INT (-128) : op2);
}
emit_label (lend);
}
@@ -8241,8 +8241,10 @@ expand_cpymem_epilogue (rtx destmem, rtx srcmem,
unsigned HOST_WIDE_INT countval = UINTVAL (count);
unsigned HOST_WIDE_INT epilogue_size = countval % max_size;
unsigned int destalign = MEM_ALIGN (destmem);
+ cfun->machine->by_pieces_in_use = true;
move_by_pieces (destmem, srcmem, epilogue_size, destalign,
RETURN_BEGIN);
+ cfun->machine->by_pieces_in_use = false;
return;
}
if (max_size > 8)
@@ -8405,8 +8407,8 @@ expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
/* Callback routine for store_by_pieces. Return the RTL of a register
containing GET_MODE_SIZE (MODE) bytes in the RTL register op_p which
- is a word or a word vector register. If PREV_P isn't nullptr, it
- has the RTL info from the previous iteration. */
+ is an integer or a word vector register. If PREV_P isn't nullptr,
+ it has the RTL info from the previous iteration. */
static rtx
setmem_epilogue_gen_val (void *op_p, void *prev_p, HOST_WIDE_INT,
@@ -8435,10 +8437,6 @@ setmem_epilogue_gen_val (void *op_p, void *prev_p, HOST_WIDE_INT,
rtx op = (rtx) op_p;
machine_mode op_mode = GET_MODE (op);
- gcc_assert (op_mode == word_mode
- || (VECTOR_MODE_P (op_mode)
- && GET_MODE_INNER (op_mode) == word_mode));
-
if (VECTOR_MODE_P (mode))
{
gcc_assert (GET_MODE_INNER (mode) == QImode);
@@ -8460,16 +8458,17 @@ setmem_epilogue_gen_val (void *op_p, void *prev_p, HOST_WIDE_INT,
return tmp;
}
- target = gen_reg_rtx (word_mode);
if (VECTOR_MODE_P (op_mode))
{
+ gcc_assert (GET_MODE_INNER (op_mode) == word_mode);
+ target = gen_reg_rtx (word_mode);
op = gen_rtx_SUBREG (word_mode, op, 0);
emit_move_insn (target, op);
}
else
target = op;
- if (mode == word_mode)
+ if (mode == GET_MODE (target))
return target;
rtx tmp = gen_reg_rtx (mode);
@@ -8490,9 +8489,11 @@ expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
unsigned HOST_WIDE_INT countval = UINTVAL (count);
unsigned HOST_WIDE_INT epilogue_size = countval % max_size;
unsigned int destalign = MEM_ALIGN (destmem);
+ cfun->machine->by_pieces_in_use = true;
store_by_pieces (destmem, epilogue_size, setmem_epilogue_gen_val,
vec_value ? vec_value : value, destalign, true,
RETURN_BEGIN);
+ cfun->machine->by_pieces_in_use = false;
return;
}
if (max_size > 32)
@@ -27034,6 +27035,109 @@ ix86_expand_ternlog (machine_mode mode, rtx op0, rtx op1, rtx op2, int idx,
return target;
}
+/* GF2P8AFFINEQB matrixes to implement shift and rotate. */
+
+static const uint64_t matrix_ashift[8] =
+{
+ 0,
+ 0x0001020408102040, /* 1 l */
+ 0x0000010204081020, /* 2 l */
+ 0x0000000102040810, /* 3 l */
+ 0x0000000001020408, /* 4 l */
+ 0x0000000000010204, /* 5 l */
+ 0x0000000000000102, /* 6 l */
+ 0x0000000000000001 /* 7 l */
+};
+
+static const uint64_t matrix_lshiftrt[8] =
+{
+ 0,
+ 0x0204081020408000, /* 1 r */
+ 0x0408102040800000, /* 2 r */
+ 0x0810204080000000, /* 3 r */
+ 0x1020408000000000, /* 4 r */
+ 0x2040800000000000, /* 5 r */
+ 0x4080000000000000, /* 6 r */
+ 0x8000000000000000 /* 7 r */
+};
+
+static const uint64_t matrix_ashiftrt[8] =
+{
+ 0,
+ 0x0204081020408080, /* 1 r */
+ 0x0408102040808080, /* 2 r */
+ 0x0810204080808080, /* 3 r */
+ 0x1020408080808080, /* 4 r */
+ 0x2040808080808080, /* 5 r */
+ 0x4080808080808080, /* 6 r */
+ 0x8080808080808080 /* 7 r */
+};
+
+static const uint64_t matrix_rotate[8] =
+{
+ 0,
+ 0x8001020408102040, /* 1 rol8 */
+ 0x4080010204081020, /* 2 rol8 */
+ 0x2040800102040810, /* 3 rol8 */
+ 0x1020408001020408, /* 4 rol8 */
+ 0x0810204080010204, /* 5 rol8 */
+ 0x0408102040800102, /* 6 rol8 */
+ 0x0204081020408001 /* 7 rol8 */
+};
+
+static const uint64_t matrix_rotatert[8] =
+{
+ 0,
+ 0x0204081020408001, /* 1 ror8 */
+ 0x0408102040800102, /* 2 ror8 */
+ 0x0810204080010204, /* 3 ror8 */
+ 0x1020408001020408, /* 4 ror8 */
+ 0x2040800102040810, /* 5 ror8 */
+ 0x4080010204081020, /* 6 ror8 */
+ 0x8001020408102040 /* 7 ror8 */
+};
+
+/* Return rtx to load a 64bit GF2P8AFFINE GP(2) matrix implementing a shift
+ for CODE and shift count COUNT into register with vector of size of SRC. */
+
+rtx
+ix86_vgf2p8affine_shift_matrix (rtx src, rtx count, enum rtx_code code)
+{
+ machine_mode mode = GET_MODE (src);
+ const uint64_t *matrix;
+ unsigned shift = INTVAL (count) & 7;
+ gcc_assert (shift > 0 && shift < 8);
+
+ switch (code)
+ {
+ case ASHIFT:
+ matrix = matrix_ashift;
+ break;
+ case ASHIFTRT:
+ matrix = matrix_ashiftrt;
+ break;
+ case LSHIFTRT:
+ matrix = matrix_lshiftrt;
+ break;
+ case ROTATE:
+ matrix = matrix_rotate;
+ break;
+ case ROTATERT:
+ matrix = matrix_rotatert;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ int nelts = GET_MODE_NUNITS (mode);
+ rtvec vec = rtvec_alloc (nelts);
+ uint64_t ma = matrix[shift];
+ for (int i = 0; i < nelts; i++)
+ RTVEC_ELT (vec, i) = gen_int_mode ((ma >> ((i % 8) * 8)) & 0xff, QImode);
+
+ return force_reg (mode, gen_rtx_CONST_VECTOR (mode, vec));
+}
+
/* Trunc a vector to a narrow vector, like v4di -> v4si. */
void
diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index 9941e61..0608dd2 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -3089,10 +3089,13 @@ enum x86_cse_kind
{
X86_CSE_CONST0_VECTOR,
X86_CSE_CONSTM1_VECTOR,
- X86_CSE_VEC_DUP
+ X86_CSE_VEC_DUP,
+ X86_CSE_TLS_GD,
+ X86_CSE_TLS_LD_BASE,
+ X86_CSE_TLSDESC
};
-struct redundant_load
+struct redundant_pattern
{
/* Bitmap of basic blocks with broadcast instructions. */
auto_bitmap bbs;
@@ -3100,6 +3103,8 @@ struct redundant_load
auto_bitmap insns;
/* The broadcast inner scalar. */
rtx val;
+ /* The actual redundant source value for UNSPEC_TLSDESC. */
+ rtx tlsdesc_val;
/* The inner scalar mode. */
machine_mode mode;
/* The instruction which sets the inner scalar. Nullptr if the inner
@@ -3130,7 +3135,7 @@ struct redundant_load
static void
ix86_place_single_vector_set (rtx dest, rtx src, bitmap bbs,
- redundant_load *load = nullptr)
+ redundant_pattern *load = nullptr)
{
basic_block bb = nearest_common_dominator_for_set (CDI_DOMINATORS, bbs);
/* For X86_CSE_VEC_DUP, don't place the vector set outside of the loop
@@ -3639,6 +3644,8 @@ ix86_broadcast_inner (rtx op, machine_mode mode,
Set *INSN_P to nullptr and return SET_SRC if SET_SRC is an
integer constant. */
op = src;
+ if (mode != GET_MODE (reg))
+ op = gen_int_mode (INTVAL (src), mode);
*insn_p = nullptr;
}
else
@@ -3679,25 +3686,719 @@ ix86_broadcast_inner (rtx op, machine_mode mode,
return op;
}
-/* At entry of the nearest common dominator for basic blocks with vector
- CONST0_RTX and integer CONSTM1_RTX uses, generate a single widest
- vector set instruction for all CONST0_RTX and integer CONSTM1_RTX
- uses.
+/* Replace CALL instruction in TLS_CALL_INSNS with SET from SRC and
+ put the updated instruction in UPDATED_TLS_INSNS. */
- NB: We want to generate only a single widest vector set to cover the
- whole function. The LCM algorithm isn't appropriate here since it
- may place a vector set inside the loop. */
+static void
+replace_tls_call (rtx src, auto_bitmap &tls_call_insns,
+ auto_bitmap &updated_tls_insns)
+{
+ bitmap_iterator bi;
+ unsigned int id;
-static unsigned int
-remove_redundant_vector_load (void)
+ EXECUTE_IF_SET_IN_BITMAP (tls_call_insns, 0, id, bi)
+ {
+ rtx_insn *insn = DF_INSN_UID_GET (id)->insn;
+
+ /* If this isn't a CALL, only GNU2 TLS implicit CALL patterns are
+ allowed. */
+ if (!CALL_P (insn))
+ {
+ attr_tls64 tls64 = get_attr_tls64 (insn);
+ if (tls64 != TLS64_CALL && tls64 != TLS64_COMBINE)
+ gcc_unreachable ();
+ }
+
+ rtx pat = PATTERN (insn);
+ gcc_assert (GET_CODE (pat) == PARALLEL);
+ rtx set = XVECEXP (pat, 0, 0);
+ gcc_assert (GET_CODE (set) == SET);
+ rtx dest = SET_DEST (set);
+
+ set = gen_rtx_SET (dest, src);
+ rtx_insn *set_insn = emit_insn_after (set, insn);
+ if (recog_memoized (set_insn) < 0)
+ gcc_unreachable ();
+
+ /* Put SET_INSN in UPDATED_TLS_INSNS. */
+ bitmap_set_bit (updated_tls_insns, INSN_UID (set_insn));
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "\nReplace:\n\n");
+ print_rtl_single (dump_file, insn);
+ fprintf (dump_file, "\nwith:\n\n");
+ print_rtl_single (dump_file, set_insn);
+ fprintf (dump_file, "\n");
+ }
+
+ /* Delete the CALL insn. */
+ delete_insn (insn);
+
+ df_insn_rescan (set_insn);
+ }
+}
+
+/* Return the basic block which dominates all basic blocks which set
+ hard register REGNO used in basic block BB. */
+
+static basic_block
+ix86_get_dominator_for_reg (unsigned int regno, basic_block bb)
+{
+ basic_block set_bb;
+ auto_bitmap set_bbs;
+
+ /* Get all BBs which set REGNO and dominate the current BB from all
+ DEFs of REGNO. */
+ for (df_ref def = DF_REG_DEF_CHAIN (regno);
+ def;
+ def = DF_REF_NEXT_REG (def))
+ if (!DF_REF_IS_ARTIFICIAL (def)
+ && !DF_REF_FLAGS_IS_SET (def, DF_REF_MAY_CLOBBER)
+ && !DF_REF_FLAGS_IS_SET (def, DF_REF_MUST_CLOBBER))
+ {
+ set_bb = DF_REF_BB (def);
+ if (dominated_by_p (CDI_DOMINATORS, bb, set_bb))
+ bitmap_set_bit (set_bbs, set_bb->index);
+ }
+
+ bb = nearest_common_dominator_for_set (CDI_DOMINATORS, set_bbs);
+ return bb;
+}
+
+/* Mark FLAGS register as live in DATA, a bitmap of live caller-saved
+ registers, if DEST is FLAGS register. */
+
+static void
+ix86_check_flags_reg (rtx dest, const_rtx, void *data)
+{
+ auto_bitmap *live_caller_saved_regs = (auto_bitmap *) data;
+ if (REG_P (dest) && REGNO (dest) == FLAGS_REG)
+ bitmap_set_bit (*live_caller_saved_regs, FLAGS_REG);
+}
+
+/* Emit a TLS_SET instruction of KIND in basic block BB. Store the
+ insertion point in *BEFORE_P for emit_insn_before or in *AFTER_P
+ for emit_insn_after. UPDATED_GNU_TLS_INSNS contains instructions
+ which replace the GNU TLS instructions. UPDATED_GNU2_TLS_INSNS
+ contains instructions which replace the GNU2 TLS instructions. */
+
+static rtx_insn *
+ix86_emit_tls_call (rtx tls_set, x86_cse_kind kind, basic_block bb,
+ rtx_insn **before_p, rtx_insn **after_p,
+ auto_bitmap &updated_gnu_tls_insns,
+ auto_bitmap &updated_gnu2_tls_insns)
+{
+ rtx_insn *tls_insn;
+
+ do
+ {
+ rtx_insn *insn = BB_HEAD (bb);
+ while (insn && !NONDEBUG_INSN_P (insn))
+ {
+ if (insn == BB_END (bb))
+ {
+ /* This must be the beginning basic block:
+
+ (note 4 0 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (note 2 4 26 2 NOTE_INSN_FUNCTION_BEG)
+
+ or a basic block with only a label:
+
+ (code_label 78 11 77 3 14 (nil) [1 uses])
+ (note 77 78 54 3 [bb 3] NOTE_INSN_BASIC_BLOCK)
+
+ or a basic block with only a debug marker:
+
+ (note 3 0 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (note 2 3 5 2 NOTE_INSN_FUNCTION_BEG)
+ (debug_insn 5 2 16 2 (debug_marker) "x.c":6:3 -1 (nil))
+
+ */
+ gcc_assert (DEBUG_INSN_P (insn)
+ || (NOTE_P (insn)
+ && ((NOTE_KIND (insn)
+ == NOTE_INSN_FUNCTION_BEG)
+ || (NOTE_KIND (insn)
+ == NOTE_INSN_BASIC_BLOCK))));
+ insn = NULL;
+ break;
+ }
+ insn = NEXT_INSN (insn);
+ }
+
+ /* TLS_GD and TLS_LD_BASE instructions are normal functions which
+ clobber caller-saved registers. TLSDESC instructions only
+ clobber FLAGS. If any registers clobbered by TLS instructions
+ are live in this basic block, we must insert TLS instructions
+ after all live registers clobbered are dead. */
+
+ auto_bitmap live_caller_saved_regs;
+ bitmap in = df_live ? DF_LIVE_IN (bb) : DF_LR_IN (bb);
+
+ if (bitmap_bit_p (in, FLAGS_REG))
+ bitmap_set_bit (live_caller_saved_regs, FLAGS_REG);
+
+ unsigned int i;
+
+ /* Get all live caller-saved registers for TLS_GD and TLS_LD_BASE
+ instructions. */
+ if (kind != X86_CSE_TLSDESC)
+ for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+ if (call_used_regs[i]
+ && !fixed_regs[i]
+ && bitmap_bit_p (in, i))
+ bitmap_set_bit (live_caller_saved_regs, i);
+
+ if (bitmap_empty_p (live_caller_saved_regs))
+ {
+ if (insn == BB_HEAD (bb))
+ {
+ *before_p = insn;
+ tls_insn = emit_insn_before (tls_set, insn);
+ }
+ else
+ {
+ /* Emit the TLS call after NOTE_INSN_FUNCTION_BEG in the
+ beginning basic block:
+
+ (note 4 0 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (note 2 4 26 2 NOTE_INSN_FUNCTION_BEG)
+
+ or after NOTE_INSN_BASIC_BLOCK in a basic block with
+ only a label:
+
+ (code_label 78 11 77 3 14 (nil) [1 uses])
+ (note 77 78 54 3 [bb 3] NOTE_INSN_BASIC_BLOCK)
+
+ or after debug marker in a basic block with only a
+ debug marker:
+
+ (note 3 0 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (note 2 3 5 2 NOTE_INSN_FUNCTION_BEG)
+ (debug_insn 5 2 16 2 (debug_marker) "x.c":6:3 -1 (nil))
+
+ */
+ insn = insn ? PREV_INSN (insn) : BB_END (bb);
+ *after_p = insn;
+ tls_insn = emit_insn_after (tls_set, insn);
+ }
+ return tls_insn;
+ }
+
+ bool repeat = false;
+
+ /* Search for REG_DEAD notes in this basic block. */
+ FOR_BB_INSNS (bb, insn)
+ {
+ if (!NONDEBUG_INSN_P (insn))
+ continue;
+
+ /* NB: Conditional jump is the only instruction which reads
+ flags register and changes control flow. We can never
+ place the TLS call after unconditional jump. */
+ if (JUMP_P (insn))
+ {
+ /* This must be a conditional jump. */
+ rtx label = JUMP_LABEL (insn);
+ if (label == nullptr
+ || ANY_RETURN_P (label)
+ || !(LABEL_P (label) || SYMBOL_REF_P (label)))
+ gcc_unreachable ();
+
+ /* Place the call before all FLAGS_REG setting BBs since
+ we can't place a call before nor after a conditional
+ jump. */
+ bb = ix86_get_dominator_for_reg (FLAGS_REG, bb);
+
+ /* Start over again. */
+ repeat = true;
+ break;
+ }
+
+ if (bitmap_bit_p (updated_gnu_tls_insns, INSN_UID (insn)))
+ {
+ /* Insert the __tls_get_addr call before INSN which
+ replaces a __tls_get_addr call. */
+ *before_p = insn;
+ tls_insn = emit_insn_before (tls_set, insn);
+ return tls_insn;
+ }
+
+ if (bitmap_bit_p (updated_gnu2_tls_insns, INSN_UID (insn)))
+ {
+ /* Mark FLAGS register as dead since FLAGS register
+ would be clobbered by the GNU2 TLS instruction. */
+ bitmap_clear_bit (live_caller_saved_regs, FLAGS_REG);
+ continue;
+ }
+
+ /* Check if FLAGS register is live. */
+ note_stores (insn, ix86_check_flags_reg,
+ &live_caller_saved_regs);
+
+ rtx link;
+ for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
+ if (REG_NOTE_KIND (link) == REG_DEAD
+ && REG_P (XEXP (link, 0)))
+ {
+ /* Mark the live caller-saved register as dead. */
+ for (i = REGNO (XEXP (link, 0));
+ i < END_REGNO (XEXP (link, 0));
+ i++)
+ if (i < FIRST_PSEUDO_REGISTER)
+ bitmap_clear_bit (live_caller_saved_regs, i);
+
+ if (bitmap_empty_p (live_caller_saved_regs))
+ {
+ *after_p = insn;
+ tls_insn = emit_insn_after (tls_set, insn);
+ return tls_insn;
+ }
+ }
+ }
+
+ /* NB: Start over again for conditional jump. */
+ if (repeat)
+ continue;
+
+ gcc_assert (!bitmap_empty_p (live_caller_saved_regs));
+
+ /* If any live caller-saved registers aren't dead at the end of
+ this basic block, get the basic block which dominates all
+ basic blocks which set the remaining live registers. */
+ auto_bitmap set_bbs;
+ bitmap_iterator bi;
+ unsigned int id;
+ EXECUTE_IF_SET_IN_BITMAP (live_caller_saved_regs, 0, id, bi)
+ {
+ basic_block set_bb = ix86_get_dominator_for_reg (id, bb);
+ bitmap_set_bit (set_bbs, set_bb->index);
+ }
+ bb = nearest_common_dominator_for_set (CDI_DOMINATORS, set_bbs);
+ }
+ while (true);
+}
+
+/* Generate a TLS call of KIND with VAL and copy the call result to DEST,
+ at entry of the nearest dominator for basic block map BBS, which is in
+ the fake loop that contains the whole function, so that there is only
+ a single TLS CALL of KIND with VAL in the whole function.
+ UPDATED_GNU_TLS_INSNS contains instructions which replace the GNU TLS
+ instructions. UPDATED_GNU2_TLS_INSNS contains instructions which
+ replace the GNU2 TLS instructions. If TLSDESC_SET isn't nullptr,
+ insert it before the TLS call. */
+
+static void
+ix86_place_single_tls_call (rtx dest, rtx val, x86_cse_kind kind,
+ auto_bitmap &bbs,
+ auto_bitmap &updated_gnu_tls_insns,
+ auto_bitmap &updated_gnu2_tls_insns,
+ rtx tlsdesc_set = nullptr)
+{
+ basic_block bb = nearest_common_dominator_for_set (CDI_DOMINATORS, bbs);
+ while (bb->loop_father->latch
+ != EXIT_BLOCK_PTR_FOR_FN (cfun))
+ bb = get_immediate_dominator (CDI_DOMINATORS,
+ bb->loop_father->header);
+
+ rtx rax = nullptr, rdi;
+ rtx eqv = nullptr;
+ rtx caddr;
+ rtx set;
+ rtx clob;
+ rtx symbol;
+ rtx tls;
+
+ switch (kind)
+ {
+ case X86_CSE_TLS_GD:
+ rax = gen_rtx_REG (Pmode, AX_REG);
+ rdi = gen_rtx_REG (Pmode, DI_REG);
+ caddr = ix86_tls_get_addr ();
+
+ symbol = XVECEXP (val, 0, 0);
+ tls = gen_tls_global_dynamic_64 (Pmode, rax, symbol, caddr, rdi);
+
+ if (GET_MODE (symbol) != Pmode)
+ symbol = gen_rtx_ZERO_EXTEND (Pmode, symbol);
+ eqv = symbol;
+ break;
+
+ case X86_CSE_TLS_LD_BASE:
+ rax = gen_rtx_REG (Pmode, AX_REG);
+ rdi = gen_rtx_REG (Pmode, DI_REG);
+ caddr = ix86_tls_get_addr ();
+
+ tls = gen_tls_local_dynamic_base_64 (Pmode, rax, caddr, rdi);
+
+ /* Attach a unique REG_EQUAL to DEST, to allow the RTL optimizers
+ to share the LD_BASE result with other LD model accesses. */
+ eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
+ UNSPEC_TLS_LD_BASE);
+
+ break;
+
+ case X86_CSE_TLSDESC:
+ set = gen_rtx_SET (dest, val);
+ clob = gen_rtx_CLOBBER (VOIDmode,
+ gen_rtx_REG (CCmode, FLAGS_REG));
+ tls = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clob));
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ /* Emit the TLS CALL insn. */
+ rtx_insn *before = nullptr;
+ rtx_insn *after = nullptr;
+ rtx_insn *tls_insn = ix86_emit_tls_call (tls, kind, bb, &before,
+ &after,
+ updated_gnu_tls_insns,
+ updated_gnu2_tls_insns);
+
+ rtx_insn *tlsdesc_insn = nullptr;
+ if (tlsdesc_set)
+ {
+ rtx dest = copy_rtx (SET_DEST (tlsdesc_set));
+ rtx src = copy_rtx (SET_SRC (tlsdesc_set));
+ tlsdesc_set = gen_rtx_SET (dest, src);
+ tlsdesc_insn = emit_insn_before (tlsdesc_set, tls_insn);
+ }
+
+ if (kind != X86_CSE_TLSDESC)
+ {
+ RTL_CONST_CALL_P (tls_insn) = 1;
+
+ /* Indicate that this function can't jump to non-local gotos. */
+ make_reg_eh_region_note_nothrow_nononlocal (tls_insn);
+ }
+
+ if (recog_memoized (tls_insn) < 0)
+ gcc_unreachable ();
+
+ if (dump_file)
+ {
+ if (after)
+ {
+ fprintf (dump_file, "\nPlace:\n\n");
+ if (tlsdesc_insn)
+ print_rtl_single (dump_file, tlsdesc_insn);
+ print_rtl_single (dump_file, tls_insn);
+ fprintf (dump_file, "\nafter:\n\n");
+ print_rtl_single (dump_file, after);
+ fprintf (dump_file, "\n");
+ }
+ else
+ {
+ fprintf (dump_file, "\nPlace:\n\n");
+ if (tlsdesc_insn)
+ print_rtl_single (dump_file, tlsdesc_insn);
+ print_rtl_single (dump_file, tls_insn);
+ fprintf (dump_file, "\nbefore:\n\n");
+ print_rtl_single (dump_file, before);
+ fprintf (dump_file, "\n");
+ }
+ }
+
+ if (kind != X86_CSE_TLSDESC)
+ {
+ /* Copy RAX to DEST. */
+ set = gen_rtx_SET (dest, rax);
+ rtx_insn *set_insn = emit_insn_after (set, tls_insn);
+ set_dst_reg_note (set_insn, REG_EQUAL, copy_rtx (eqv), dest);
+ if (dump_file)
+ {
+ fprintf (dump_file, "\nPlace:\n\n");
+ print_rtl_single (dump_file, set_insn);
+ fprintf (dump_file, "\nafter:\n\n");
+ print_rtl_single (dump_file, tls_insn);
+ fprintf (dump_file, "\n");
+ }
+ }
+}
+
+namespace {
+
+const pass_data pass_data_x86_cse =
+{
+ RTL_PASS, /* type */
+ "x86_cse", /* name */
+ OPTGROUP_NONE, /* optinfo_flags */
+ TV_MACH_DEP, /* tv_id */
+ 0, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ 0, /* todo_flags_finish */
+};
+
+class pass_x86_cse : public rtl_opt_pass
+{
+public:
+ pass_x86_cse (gcc::context *ctxt)
+ : rtl_opt_pass (pass_data_x86_cse, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ bool gate (function *fun) final override
+ {
+ return (TARGET_SSE2
+ && optimize
+ && optimize_function_for_speed_p (fun));
+ }
+
+ unsigned int execute (function *) final override
+ {
+ return x86_cse ();
+ }
+
+private:
+ /* The redundant source value. */
+ rtx val;
+ /* The actual redundant source value for UNSPEC_TLSDESC. */
+ rtx tlsdesc_val;
+ /* The instruction which defines the redundant value. */
+ rtx_insn *def_insn;
+ /* Mode of the destination of the candidate redundant instruction. */
+ machine_mode mode;
+ /* Mode of the source of the candidate redundant instruction. */
+ machine_mode scalar_mode;
+ /* The classification of the candidate redundant instruction. */
+ x86_cse_kind kind;
+
+ unsigned int x86_cse (void);
+ bool candidate_gnu_tls_p (rtx_insn *, attr_tls64);
+ bool candidate_gnu2_tls_p (rtx, attr_tls64);
+ bool candidate_vector_p (rtx);
+ rtx_insn *tls_set_insn_from_symbol (const_rtx, const_rtx);
+}; // class pass_x86_cse
+
+/* Return the instruction which sets REG from TLS_SYMBOL. */
+
+rtx_insn *
+pass_x86_cse::tls_set_insn_from_symbol (const_rtx reg,
+ const_rtx tls_symbol)
+{
+ rtx_insn *set_insn = nullptr;
+ for (df_ref ref = DF_REG_DEF_CHAIN (REGNO (reg));
+ ref;
+ ref = DF_REF_NEXT_REG (ref))
+ {
+ if (DF_REF_IS_ARTIFICIAL (ref))
+ return nullptr;
+
+ set_insn = DF_REF_INSN (ref);
+ if (get_attr_tls64 (set_insn) != TLS64_LEA)
+ return nullptr;
+
+ rtx tls_set = PATTERN (set_insn);
+ rtx tls_src = XVECEXP (SET_SRC (tls_set), 0, 0);
+ if (!rtx_equal_p (tls_symbol, tls_src))
+ return nullptr;
+ }
+
+ return set_insn;
+}
+
+/* Return true and output def_insn, val, mode, scalar_mode and kind if
+ INSN is UNSPEC_TLS_GD or UNSPEC_TLS_LD_BASE. */
+
+bool
+pass_x86_cse::candidate_gnu_tls_p (rtx_insn *insn, attr_tls64 tls64)
+{
+ if (!TARGET_64BIT || !cfun->machine->tls_descriptor_call_multiple_p)
+ return false;
+
+ /* Record the redundant TLS CALLs for 64-bit:
+
+ (parallel [
+ (set (reg:DI 0 ax)
+ (call:DI (mem:QI (symbol_ref:DI ("__tls_get_addr")))
+ (const_int 0 [0])))
+ (unspec:DI [(symbol_ref:DI ("foo") [flags 0x50])
+ (reg/f:DI 7 sp)] UNSPEC_TLS_GD)
+ (clobber (reg:DI 5 di))])
+
+
+ and
+
+ (parallel [
+ (set (reg:DI 0 ax)
+ (call:DI (mem:QI (symbol_ref:DI ("__tls_get_addr")))
+ (const_int 0 [0])))
+ (unspec:DI [(reg/f:DI 7 sp)] UNSPEC_TLS_LD_BASE)])
+
+ */
+
+ rtx pat = PATTERN (insn);
+ rtx set = XVECEXP (pat, 0, 0);
+ gcc_assert (GET_CODE (set) == SET);
+ rtx dest = SET_DEST (set);
+ scalar_mode = mode = GET_MODE (dest);
+ val = XVECEXP (pat, 0, 1);
+ gcc_assert (GET_CODE (val) == UNSPEC);
+
+ if (tls64 == TLS64_GD)
+ kind = X86_CSE_TLS_GD;
+ else
+ kind = X86_CSE_TLS_LD_BASE;
+
+ def_insn = nullptr;
+ return true;
+}
+
+/* Return true and output def_insn, val, mode, scalar_mode and kind if
+ SET is UNSPEC_TLSDESC. */
+
+bool
+pass_x86_cse::candidate_gnu2_tls_p (rtx set, attr_tls64 tls64)
+{
+ if (!TARGET_64BIT || !cfun->machine->tls_descriptor_call_multiple_p)
+ return false;
+
+ rtx tls_symbol;
+ rtx_insn *set_insn;
+ rtx src = SET_SRC (set);
+ val = src;
+ tlsdesc_val = src;
+ kind = X86_CSE_TLSDESC;
+
+ if (tls64 == TLS64_COMBINE)
+ {
+ /* Record 64-bit TLS64_COMBINE:
+
+ (set (reg/f:DI 104)
+ (plus:DI (unspec:DI [
+ (symbol_ref:DI ("_TLS_MODULE_BASE_") [flags 0x10])
+ (reg:DI 114)
+ (reg/f:DI 7 sp)] UNSPEC_TLSDESC)
+ (const:DI (unspec:DI [
+ (symbol_ref:DI ("e") [flags 0x1a])
+ ] UNSPEC_DTPOFF))))
+
+ (set (reg/f:DI 104)
+ (plus:DI (unspec:DI [
+ (symbol_ref:DI ("_TLS_MODULE_BASE_") [flags 0x10])
+ (unspec:DI [
+ (symbol_ref:DI ("_TLS_MODULE_BASE_") [flags 0x10])
+ ] UNSPEC_TLSDESC)
+ (reg/f:DI 7 sp)] UNSPEC_TLSDESC)
+ (const:DI (unspec:DI [
+ (symbol_ref:DI ("e") [flags 0x1a])
+ ] UNSPEC_DTPOFF))))
+ */
+
+ scalar_mode = mode = GET_MODE (src);
+
+ /* Since the first operand of PLUS in the source TLS_COMBINE
+ pattern is unused, use the second operand of PLUS:
+
+ (const:DI (unspec:DI [
+ (symbol_ref:DI ("e") [flags 0x1a])
+ ] UNSPEC_DTPOFF))
+
+ as VAL to check if 2 TLS_COMBINE patterns have the same
+ source. */
+ val = XEXP (src, 1);
+ gcc_assert (GET_CODE (val) == CONST
+ && GET_CODE (XEXP (val, 0)) == UNSPEC
+ && XINT (XEXP (val, 0), 1) == UNSPEC_DTPOFF
+ && SYMBOL_REF_P (XVECEXP (XEXP (val, 0), 0, 0)));
+ def_insn = nullptr;
+ return true;
+ }
+
+ /* Record 64-bit TLS_CALL:
+
+ (set (reg:DI 101)
+ (unspec:DI [(symbol_ref:DI ("foo") [flags 0x50])
+ (reg:DI 112)
+ (reg/f:DI 7 sp)] UNSPEC_TLSDESC))
+
+ */
+
+ gcc_assert (GET_CODE (src) == UNSPEC);
+ tls_symbol = XVECEXP (src, 0, 0);
+ src = XVECEXP (src, 0, 1);
+ scalar_mode = mode = GET_MODE (src);
+ gcc_assert (REG_P (src));
+
+ /* All definitions of reg:DI 129 in
+
+ (set (reg:DI 110)
+ (unspec:DI [(symbol_ref:DI ("foo"))
+ (reg:DI 129)
+ (reg/f:DI 7 sp)] UNSPEC_TLSDESC))
+
+ should have the same source as in
+
+ (set (reg:DI 129)
+ (unspec:DI [(symbol_ref:DI ("foo"))] UNSPEC_TLSDESC))
+
+ */
+
+ set_insn = tls_set_insn_from_symbol (src, tls_symbol);
+ if (!set_insn)
+ return false;
+
+ /* Use TLS_SYMBOL as VAL to check if 2 patterns have the same source. */
+ val = tls_symbol;
+ def_insn = set_insn;
+ return true;
+}
+
+/* Return true and output def_insn, val, mode, scalar_mode and kind if
+ INSN is a vector broadcast instruction. */
+
+bool
+pass_x86_cse::candidate_vector_p (rtx set)
+{
+ rtx src = SET_SRC (set);
+ rtx dest = SET_DEST (set);
+ mode = GET_MODE (dest);
+ /* Skip non-vector instruction. */
+ if (!VECTOR_MODE_P (mode))
+ return false;
+
+ /* Skip non-vector load instruction. */
+ if (!REG_P (dest) && !SUBREG_P (dest))
+ return false;
+
+ val = ix86_broadcast_inner (src, mode, &scalar_mode, &kind,
+ &def_insn);
+ return val ? true : false;
+}
+
+/* At entry of the nearest common dominator for basic blocks with
+
+ 1. Vector CONST0_RTX patterns.
+ 2. Vector CONSTM1_RTX patterns.
+ 3. Vector broadcast patterns.
+ 4. UNSPEC_TLS_GD patterns.
+ 5. UNSPEC_TLS_LD_BASE patterns.
+ 6. UNSPEC_TLSDESC patterns.
+
+ generate a single pattern whose destination is used to replace the
+ source in all identical patterns.
+
+ NB: We want to generate a pattern, which is executed only once, to
+ cover the whole function. The LCM algorithm isn't appropriate here
+ since it may place a pattern inside the loop. */
+
+unsigned int
+pass_x86_cse::x86_cse (void)
{
timevar_push (TV_MACH_DEP);
- auto_vec<redundant_load *> loads;
- redundant_load *load;
+ auto_vec<redundant_pattern *> loads;
+ redundant_pattern *load;
basic_block bb;
rtx_insn *insn;
unsigned int i;
+ auto_bitmap updated_gnu_tls_insns;
+ auto_bitmap updated_gnu2_tls_insns;
df_set_flags (DF_DEFER_INSN_RESCAN);
@@ -3710,61 +4411,74 @@ remove_redundant_vector_load (void)
if (!NONDEBUG_INSN_P (insn))
continue;
+ bool matched = false;
+ /* Remove redundant pattens if there are more than 2 of
+ them. */
+ unsigned int threshold = 2;
+
rtx set = single_set (insn);
- if (!set)
+ if (!set && !CALL_P (insn))
continue;
- /* Record single set vector instruction with CONST0_RTX and
- CONSTM1_RTX source. Record basic blocks with CONST0_RTX and
- CONSTM1_RTX. Count CONST0_RTX and CONSTM1_RTX. Record the
- maximum size of CONST0_RTX and CONSTM1_RTX. */
+ tlsdesc_val = nullptr;
- rtx dest = SET_DEST (set);
- machine_mode mode = GET_MODE (dest);
- /* Skip non-vector instruction. */
- if (!VECTOR_MODE_P (mode))
- continue;
+ attr_tls64 tls64 = get_attr_tls64 (insn);
+ switch (tls64)
+ {
+ case TLS64_GD:
+ case TLS64_LD_BASE:
+ /* Verify UNSPEC_TLS_GD and UNSPEC_TLS_LD_BASE. */
+ if (candidate_gnu_tls_p (insn, tls64))
+ break;
+ continue;
- rtx src = SET_SRC (set);
- /* Skip non-vector load instruction. */
- if (!REG_P (dest) && !SUBREG_P (dest))
- continue;
+ case TLS64_CALL:
+ case TLS64_COMBINE:
+ /* Verify UNSPEC_TLSDESC. */
+ if (candidate_gnu2_tls_p (set, tls64))
+ break;
+ continue;
- rtx_insn *def_insn;
- machine_mode scalar_mode;
- x86_cse_kind kind;
- rtx val = ix86_broadcast_inner (src, mode, &scalar_mode,
- &kind, &def_insn);
- if (!val)
- continue;
+ case TLS64_LEA:
+ /* Skip TLS64_LEA. */
+ continue;
- /* Remove redundant register loads if there are more than 2
- loads will be used. */
- unsigned int threshold = 2;
+ case TLS64_NONE:
+ if (!set)
+ continue;
- /* Check if there is a matching redundant vector load. */
- bool matched = false;
+ /* Check for vector broadcast. */
+ if (candidate_vector_p (set))
+ break;
+ continue;
+ }
+
+ /* Check if there is a matching redundant load. */
FOR_EACH_VEC_ELT (loads, i, load)
if (load->val
&& load->kind == kind
&& load->mode == scalar_mode
&& (load->bb == bb
- || kind < X86_CSE_VEC_DUP
+ || kind != X86_CSE_VEC_DUP
/* Non all 0s/1s vector load must be in the same
basic block if it is in a recursive call. */
|| !recursive_call_p)
&& rtx_equal_p (load->val, val))
{
- /* Record vector instruction. */
+ /* Record instruction. */
bitmap_set_bit (load->insns, INSN_UID (insn));
/* Record the maximum vector size. */
- if (load->size < GET_MODE_SIZE (mode))
+ if (kind <= X86_CSE_VEC_DUP
+ && load->size < GET_MODE_SIZE (mode))
load->size = GET_MODE_SIZE (mode);
/* Record the basic block. */
bitmap_set_bit (load->bbs, bb->index);
+
+ /* Increment the count. */
load->count++;
+
matched = true;
break;
}
@@ -3772,10 +4486,17 @@ remove_redundant_vector_load (void)
if (matched)
continue;
- /* We see this vector broadcast the first time. */
- load = new redundant_load;
+ /* We see this instruction the first time. Record the
+ redundant source value, its mode, the destination size,
+ instruction which defines the redundant source value,
+ instruction basic block and the instruction kind. */
+ load = new redundant_pattern;
load->val = copy_rtx (val);
+ if (tlsdesc_val)
+ load->tlsdesc_val = copy_rtx (tlsdesc_val);
+ else
+ load->tlsdesc_val = nullptr;
load->mode = scalar_mode;
load->size = GET_MODE_SIZE (mode);
load->def_insn = def_insn;
@@ -3792,49 +4513,64 @@ remove_redundant_vector_load (void)
}
bool replaced = false;
- rtx reg, broadcast_source, broadcast_reg;
FOR_EACH_VEC_ELT (loads, i, load)
if (load->count >= load->threshold)
{
- machine_mode mode = ix86_get_vector_cse_mode (load->size,
- load->mode);
- broadcast_reg = gen_reg_rtx (mode);
- if (load->def_insn)
- {
- /* Replace redundant vector loads with a single vector load
- in the same basic block. */
- reg = load->val;
- if (load->mode != GET_MODE (reg))
- reg = gen_rtx_SUBREG (load->mode, reg, 0);
- broadcast_source = gen_rtx_VEC_DUPLICATE (mode, reg);
- replace_vector_const (mode, broadcast_reg, load->insns,
- load->mode);
- }
- else
+ machine_mode mode;
+ rtx reg, broadcast_source, broadcast_reg;
+ replaced = true;
+ switch (load->kind)
{
- /* This is a constant integer/double vector. If the
- inner scalar is 0 or -1, set vector to CONST0_RTX
- or CONSTM1_RTX directly. */
- rtx reg;
- switch (load->kind)
+ case X86_CSE_TLS_GD:
+ case X86_CSE_TLS_LD_BASE:
+ case X86_CSE_TLSDESC:
+ broadcast_reg = gen_reg_rtx (load->mode);
+ replace_tls_call (broadcast_reg, load->insns,
+ (load->kind == X86_CSE_TLSDESC
+ ? updated_gnu2_tls_insns
+ : updated_gnu_tls_insns));
+ load->broadcast_reg = broadcast_reg;
+ break;
+
+ case X86_CSE_CONST0_VECTOR:
+ case X86_CSE_CONSTM1_VECTOR:
+ case X86_CSE_VEC_DUP:
+ mode = ix86_get_vector_cse_mode (load->size, load->mode);
+ broadcast_reg = gen_reg_rtx (mode);
+ if (load->def_insn)
{
- case X86_CSE_CONST0_VECTOR:
- broadcast_source = CONST0_RTX (mode);
- break;
- case X86_CSE_CONSTM1_VECTOR:
- broadcast_source = CONSTM1_RTX (mode);
- break;
- default:
- reg = gen_reg_rtx (load->mode);
+ /* Replace redundant vector loads with a single vector
+ load in the same basic block. */
+ reg = load->val;
+ if (load->mode != GET_MODE (reg))
+ reg = gen_rtx_SUBREG (load->mode, reg, 0);
broadcast_source = gen_rtx_VEC_DUPLICATE (mode, reg);
- break;
}
+ else
+ /* This is a constant integer/double vector. If the
+ inner scalar is 0 or -1, set vector to CONST0_RTX
+ or CONSTM1_RTX directly. */
+ switch (load->kind)
+ {
+ case X86_CSE_CONST0_VECTOR:
+ broadcast_source = CONST0_RTX (mode);
+ break;
+ case X86_CSE_CONSTM1_VECTOR:
+ broadcast_source = CONSTM1_RTX (mode);
+ break;
+ case X86_CSE_VEC_DUP:
+ reg = gen_reg_rtx (load->mode);
+ broadcast_source = gen_rtx_VEC_DUPLICATE (mode, reg);
+ break;
+ default:
+ gcc_unreachable ();
+ }
replace_vector_const (mode, broadcast_reg, load->insns,
load->mode);
+ load->broadcast_source = broadcast_source;
+ load->broadcast_reg = broadcast_reg;
+ break;
}
- load->broadcast_source = broadcast_source;
- load->broadcast_reg = broadcast_reg;
- replaced = true;
}
if (replaced)
@@ -3849,40 +4585,75 @@ remove_redundant_vector_load (void)
FOR_EACH_VEC_ELT (loads, i, load)
if (load->count >= load->threshold)
{
+ rtx set;
if (load->def_insn)
- {
- /* Insert a broadcast after the original scalar
- definition. */
- rtx set = gen_rtx_SET (load->broadcast_reg,
- load->broadcast_source);
- insn = emit_insn_after (set, load->def_insn);
-
- if (cfun->can_throw_non_call_exceptions)
- {
- /* Handle REG_EH_REGION note in DEF_INSN. */
- rtx note = find_reg_note (load->def_insn,
- REG_EH_REGION, nullptr);
- if (note)
- {
- control_flow_insns.safe_push (load->def_insn);
- add_reg_note (insn, REG_EH_REGION,
- XEXP (note, 0));
- }
- }
+ switch (load->kind)
+ {
+ case X86_CSE_TLSDESC:
+ ix86_place_single_tls_call (load->broadcast_reg,
+ load->tlsdesc_val,
+ load->kind,
+ load->bbs,
+ updated_gnu_tls_insns,
+ updated_gnu2_tls_insns,
+ PATTERN (load->def_insn));
+ break;
+ case X86_CSE_VEC_DUP:
+ /* Insert a broadcast after the original scalar
+ definition. */
+ set = gen_rtx_SET (load->broadcast_reg,
+ load->broadcast_source);
+ insn = emit_insn_after (set, load->def_insn);
+
+ if (cfun->can_throw_non_call_exceptions)
+ {
+ /* Handle REG_EH_REGION note in DEF_INSN. */
+ rtx note = find_reg_note (load->def_insn,
+ REG_EH_REGION, nullptr);
+ if (note)
+ {
+ control_flow_insns.safe_push (load->def_insn);
+ add_reg_note (insn, REG_EH_REGION,
+ XEXP (note, 0));
+ }
+ }
- if (dump_file)
- {
- fprintf (dump_file, "\nAdd:\n\n");
- print_rtl_single (dump_file, insn);
- fprintf (dump_file, "\nafter:\n\n");
- print_rtl_single (dump_file, load->def_insn);
- fprintf (dump_file, "\n");
- }
- }
+ if (dump_file)
+ {
+ fprintf (dump_file, "\nAdd:\n\n");
+ print_rtl_single (dump_file, insn);
+ fprintf (dump_file, "\nafter:\n\n");
+ print_rtl_single (dump_file, load->def_insn);
+ fprintf (dump_file, "\n");
+ }
+ break;
+ default:
+ gcc_unreachable ();
+ }
else
- ix86_place_single_vector_set (load->broadcast_reg,
- load->broadcast_source,
- load->bbs, load);
+ switch (load->kind)
+ {
+ case X86_CSE_TLS_GD:
+ case X86_CSE_TLS_LD_BASE:
+ case X86_CSE_TLSDESC:
+ ix86_place_single_tls_call (load->broadcast_reg,
+ (load->kind == X86_CSE_TLSDESC
+ ? load->tlsdesc_val
+ : load->val),
+ load->kind,
+ load->bbs,
+ updated_gnu_tls_insns,
+ updated_gnu2_tls_insns);
+ break;
+ case X86_CSE_CONST0_VECTOR:
+ case X86_CSE_CONSTM1_VECTOR:
+ case X86_CSE_VEC_DUP:
+ ix86_place_single_vector_set (load->broadcast_reg,
+ load->broadcast_source,
+ load->bbs,
+ load);
+ break;
+ }
}
loop_optimizer_finalize ();
@@ -3912,48 +4683,12 @@ remove_redundant_vector_load (void)
return 0;
}
-namespace {
-
-const pass_data pass_data_remove_redundant_vector_load =
-{
- RTL_PASS, /* type */
- "rrvl", /* name */
- OPTGROUP_NONE, /* optinfo_flags */
- TV_MACH_DEP, /* tv_id */
- 0, /* properties_required */
- 0, /* properties_provided */
- 0, /* properties_destroyed */
- 0, /* todo_flags_start */
- 0, /* todo_flags_finish */
-};
-
-class pass_remove_redundant_vector_load : public rtl_opt_pass
-{
-public:
- pass_remove_redundant_vector_load (gcc::context *ctxt)
- : rtl_opt_pass (pass_data_remove_redundant_vector_load, ctxt)
- {}
-
- /* opt_pass methods: */
- bool gate (function *fun) final override
- {
- return (TARGET_SSE2
- && optimize
- && optimize_function_for_speed_p (fun));
- }
-
- unsigned int execute (function *) final override
- {
- return remove_redundant_vector_load ();
- }
-}; // class pass_remove_redundant_vector_load
-
} // anon namespace
rtl_opt_pass *
-make_pass_remove_redundant_vector_load (gcc::context *ctxt)
+make_pass_x86_cse (gcc::context *ctxt)
{
- return new pass_remove_redundant_vector_load (ctxt);
+ return new pass_x86_cse (ctxt);
}
/* Convert legacy instructions that clobbers EFLAGS to APX_NF
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
index 09a35ef..abb5dd7 100644
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -1172,6 +1172,10 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
OPT_mrecip,
MASK_RECIP),
+ IX86_ATTR_YES ("80387",
+ OPT_m80387,
+ MASK_80387),
+
IX86_ATTR_IX86_YES ("general-regs-only",
OPT_mgeneral_regs_only,
OPTION_MASK_GENERAL_REGS_ONLY),
@@ -1281,6 +1285,8 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
else if (type == ix86_opt_yes || type == ix86_opt_no)
{
+ opts_set->x_target_flags |= mask;
+
if (type == ix86_opt_no)
opt_set_p = !opt_set_p;
@@ -3556,6 +3562,10 @@ ix86_set_current_function (tree fndecl)
isa = "AVX";
else if (cfun->machine->func_type != TYPE_NORMAL)
isa = "SSE";
+ else if (TARGET_MMX)
+ isa = "MMX/3Dnow";
+ else if (TARGET_80387)
+ isa = "80387";
else
isa = NULL;
}
diff --git a/gcc/config/i386/i386-passes.def b/gcc/config/i386/i386-passes.def
index 06f0288..553b46d 100644
--- a/gcc/config/i386/i386-passes.def
+++ b/gcc/config/i386/i386-passes.def
@@ -35,6 +35,6 @@ along with GCC; see the file COPYING3. If not see
PR116174. */
INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_align_tight_loops);
- INSERT_PASS_AFTER (pass_late_combine, 1, pass_remove_redundant_vector_load);
+ INSERT_PASS_AFTER (pass_late_combine, 1, pass_x86_cse);
INSERT_PASS_AFTER (pass_late_combine, 1, pass_remove_partial_avx_dependency);
INSERT_PASS_AFTER (pass_rtl_ifcvt, 1, pass_apx_nf_convert);
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 69bc0ee..bdb8bb9 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -290,6 +290,7 @@ extern rtx ix86_tls_module_base (void);
extern bool ix86_gpr_tls_address_pattern_p (rtx);
extern bool ix86_tls_address_pattern_p (rtx);
extern rtx ix86_rewrite_tls_address (rtx);
+extern rtx ix86_tls_get_addr (void);
extern void ix86_expand_vector_init (bool, rtx, rtx);
extern void ix86_expand_vector_set (bool, rtx, rtx, int);
@@ -430,8 +431,7 @@ extern rtl_opt_pass *make_pass_insert_endbr_and_patchable_area
(gcc::context *);
extern rtl_opt_pass *make_pass_remove_partial_avx_dependency
(gcc::context *);
-extern rtl_opt_pass *make_pass_remove_redundant_vector_load
- (gcc::context *);
+extern rtl_opt_pass *make_pass_x86_cse (gcc::context *);
extern rtl_opt_pass *make_pass_apx_nf_convert (gcc::context *);
extern rtl_opt_pass *make_pass_align_tight_loops (gcc::context *);
@@ -448,3 +448,4 @@ extern void ix86_set_handled_components (sbitmap);
/* In i386-expand.cc. */
bool ix86_check_builtin_isa_match (unsigned int, HOST_WIDE_INT*,
HOST_WIDE_INT*);
+rtx ix86_vgf2p8affine_shift_matrix (rtx, rtx, enum rtx_code);
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 65e04d3..471be3e 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -11382,6 +11382,23 @@ ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
return cost;
}
+
+/* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
+
+bool
+ix86_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
+ unsigned int align,
+ enum by_pieces_operation op,
+ bool speed_p)
+{
+ /* Return true when we are currently expanding memcpy/memset epilogue
+ with move_by_pieces or store_by_pieces. */
+ if (cfun->machine->by_pieces_in_use)
+ return true;
+
+ return default_use_by_pieces_infrastructure_p (size, align, op,
+ speed_p);
+}
/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
this is used for to form addresses to local data when -fPIC is in
@@ -12439,7 +12456,7 @@ ix86_tls_index (void)
static GTY(()) rtx ix86_tls_symbol;
-static rtx
+rtx
ix86_tls_get_addr (void)
{
if (cfun->machine->call_saved_registers
@@ -22102,6 +22119,15 @@ ix86_shift_rotate_cost (const struct processor_costs *cost,
}
/* FALLTHRU */
case V32QImode:
+ if (TARGET_GFNI && constant_op1)
+ {
+ /* Use vgf2p8affine. One extra load for the mask, but in a loop
+ with enough registers it will be moved out. So for now don't
+ account the constant mask load. This is not quite right
+ for non loop vectorization. */
+ extra = 0;
+ return ix86_vec_cost (mode, cost->sse_op) + extra;
+ }
if (TARGET_AVX2)
/* Use vpbroadcast. */
extra = cost->sse_op;
@@ -22136,6 +22162,11 @@ ix86_shift_rotate_cost (const struct processor_costs *cost,
count = 9;
return ix86_vec_cost (mode, cost->sse_op * count) + extra;
+ case V64QImode:
+ /* Ignore the mask load for GF2P8AFFINEQB. */
+ extra = 0;
+ return ix86_vec_cost (mode, cost->sse_op) + extra;
+
case V2DImode:
case V4DImode:
/* V*DImode arithmetic right shift is emulated. */
@@ -25794,15 +25825,20 @@ private:
unsigned m_num_sse_needed[3];
/* Number of 256-bit vector permutation. */
unsigned m_num_avx256_vec_perm[3];
+ /* Number of reductions for FMA/DOT_PROD_EXPR/SAD_EXPR */
+ unsigned m_num_reduc[X86_REDUC_LAST];
+ /* Don't do unroll if m_prefer_unroll is false, default is true. */
+ bool m_prefer_unroll;
};
ix86_vector_costs::ix86_vector_costs (vec_info* vinfo, bool costing_for_scalar)
: vector_costs (vinfo, costing_for_scalar),
m_num_gpr_needed (),
m_num_sse_needed (),
- m_num_avx256_vec_perm ()
-{
-}
+ m_num_avx256_vec_perm (),
+ m_num_reduc (),
+ m_prefer_unroll (true)
+{}
/* Implement targetm.vectorize.create_costs. */
@@ -26099,6 +26135,125 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
}
}
+ /* Record number of load/store/gather/scatter in vectorized body. */
+ if (where == vect_body && !m_costing_for_scalar)
+ {
+ switch (kind)
+ {
+ /* Emulated gather/scatter or any scalarization. */
+ case scalar_load:
+ case scalar_stmt:
+ case scalar_store:
+ case vector_gather_load:
+ case vector_scatter_store:
+ m_prefer_unroll = false;
+ break;
+
+ case vector_stmt:
+ case vec_to_scalar:
+ /* Count number of reduction FMA and "real" DOT_PROD_EXPR,
+ unroll in the vectorizer will enable partial sum. */
+ if (stmt_info
+ && vect_is_reduction (stmt_info)
+ && stmt_info->stmt)
+ {
+ /* Handle __builtin_fma. */
+ if (gimple_call_combined_fn (stmt_info->stmt) == CFN_FMA)
+ {
+ m_num_reduc[X86_REDUC_FMA] += count;
+ break;
+ }
+
+ if (!is_gimple_assign (stmt_info->stmt))
+ break;
+
+ tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
+ machine_mode inner_mode = GET_MODE_INNER (mode);
+ tree rhs1, rhs2;
+ bool native_vnni_p = true;
+ gimple* def;
+ machine_mode mode_rhs;
+ switch (subcode)
+ {
+ case PLUS_EXPR:
+ case MINUS_EXPR:
+ if (!fp || !flag_associative_math
+ || flag_fp_contract_mode != FP_CONTRACT_FAST)
+ break;
+
+ /* FMA condition for different modes. */
+ if (((inner_mode == DFmode || inner_mode == SFmode)
+ && !TARGET_FMA && !TARGET_AVX512VL)
+ || (inner_mode == HFmode && !TARGET_AVX512FP16)
+ || (inner_mode == BFmode && !TARGET_AVX10_2))
+ break;
+
+ /* MULT_EXPR + PLUS_EXPR/MINUS_EXPR is transformed
+ to FMA/FNMA after vectorization. */
+ rhs1 = gimple_assign_rhs1 (stmt_info->stmt);
+ rhs2 = gimple_assign_rhs2 (stmt_info->stmt);
+ if (subcode == PLUS_EXPR
+ && TREE_CODE (rhs1) == SSA_NAME
+ && (def = SSA_NAME_DEF_STMT (rhs1), true)
+ && is_gimple_assign (def)
+ && gimple_assign_rhs_code (def) == MULT_EXPR)
+ m_num_reduc[X86_REDUC_FMA] += count;
+ else if (TREE_CODE (rhs2) == SSA_NAME
+ && (def = SSA_NAME_DEF_STMT (rhs2), true)
+ && is_gimple_assign (def)
+ && gimple_assign_rhs_code (def) == MULT_EXPR)
+ m_num_reduc[X86_REDUC_FMA] += count;
+ break;
+
+ /* Vectorizer lane_reducing_op_p supports DOT_PROX_EXPR,
+ WIDEN_SUM_EXPR and SAD_EXPR, x86 backend only supports
+ SAD_EXPR (usad{v16qi,v32qi,v64qi}) and DOT_PROD_EXPR. */
+ case DOT_PROD_EXPR:
+ rhs1 = gimple_assign_rhs1 (stmt_info->stmt);
+ mode_rhs = TYPE_MODE (TREE_TYPE (rhs1));
+ if (mode_rhs == QImode)
+ {
+ rhs2 = gimple_assign_rhs2 (stmt_info->stmt);
+ signop signop1_p = TYPE_SIGN (TREE_TYPE (rhs1));
+ signop signop2_p = TYPE_SIGN (TREE_TYPE (rhs2));
+
+ /* vpdpbusd. */
+ if (signop1_p != signop2_p)
+ native_vnni_p
+ = (GET_MODE_SIZE (mode) == 64
+ ? TARGET_AVX512VNNI
+ : ((TARGET_AVX512VNNI && TARGET_AVX512VL)
+ || TARGET_AVXVNNI));
+ else
+ /* vpdpbssd. */
+ native_vnni_p
+ = (GET_MODE_SIZE (mode) == 64
+ ? TARGET_AVX10_2
+ : (TARGET_AVXVNNIINT8 || TARGET_AVX10_2));
+ }
+ m_num_reduc[X86_REDUC_DOT_PROD] += count;
+
+ /* Dislike to do unroll and partial sum for
+ emulated DOT_PROD_EXPR. */
+ if (!native_vnni_p)
+ m_num_reduc[X86_REDUC_DOT_PROD] += 3 * count;
+ break;
+
+ case SAD_EXPR:
+ m_num_reduc[X86_REDUC_SAD] += count;
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ default:
+ break;
+ }
+ }
+
+
combined_fn cfn;
if ((kind == vector_stmt || kind == scalar_stmt)
&& stmt_info
@@ -26161,8 +26316,7 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
&& (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF
(SLP_TREE_REPRESENTATIVE (node))))
!= INTEGER_CST))
- || (SLP_TREE_MEMORY_ACCESS_TYPE (node)
- == VMAT_GATHER_SCATTER)))))
+ || mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node))))))
{
stmt_cost = ix86_default_vector_cost (kind, mode);
stmt_cost *= (TYPE_VECTOR_SUBPARTS (vectype) + 1);
@@ -26306,6 +26460,41 @@ ix86_vector_costs::finish_cost (const vector_costs *scalar_costs)
&& (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ())
> ceil_log2 (LOOP_VINFO_INT_NITERS (loop_vinfo))))
m_costs[vect_body] = INT_MAX;
+
+ bool any_reduc_p = false;
+ for (int i = 0; i != X86_REDUC_LAST; i++)
+ if (m_num_reduc[i])
+ {
+ any_reduc_p = true;
+ break;
+ }
+
+ if (any_reduc_p
+ /* Not much gain for loop with gather and scatter. */
+ && m_prefer_unroll
+ && !LOOP_VINFO_EPILOGUE_P (loop_vinfo))
+ {
+ unsigned unroll_factor
+ = OPTION_SET_P (ix86_vect_unroll_limit)
+ ? ix86_vect_unroll_limit
+ : ix86_cost->vect_unroll_limit;
+
+ if (unroll_factor > 1)
+ {
+ for (int i = 0 ; i != X86_REDUC_LAST; i++)
+ {
+ if (m_num_reduc[i])
+ {
+ unsigned tmp = CEIL (ix86_cost->reduc_lat_mult_thr[i],
+ m_num_reduc[i]);
+ unroll_factor = MIN (unroll_factor, tmp);
+ }
+ }
+
+ m_suggested_unroll_factor = 1 << ceil_log2 (unroll_factor);
+ }
+ }
+
}
ix86_vect_estimate_reg_pressure ();
@@ -27189,9 +27378,9 @@ ix86_memtag_can_tag_addresses ()
return ix86_lam_type != lam_none && TARGET_LP64;
}
-/* Implement TARGET_MEMTAG_TAG_SIZE. */
+/* Implement TARGET_MEMTAG_TAG_BITSIZE. */
unsigned char
-ix86_memtag_tag_size ()
+ix86_memtag_tag_bitsize ()
{
return IX86_HWASAN_TAG_SIZE;
}
@@ -27762,6 +27951,10 @@ static const scoped_attribute_specs *const ix86_attribute_table[] =
#undef TARGET_ADDRESS_COST
#define TARGET_ADDRESS_COST ix86_address_cost
+#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
+#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
+ ix86_use_by_pieces_infrastructure_p
+
#undef TARGET_OVERLAP_OP_BY_PIECES_P
#define TARGET_OVERLAP_OP_BY_PIECES_P hook_bool_void_true
@@ -28165,8 +28358,8 @@ ix86_libgcc_floating_mode_supported_p
#undef TARGET_MEMTAG_UNTAGGED_POINTER
#define TARGET_MEMTAG_UNTAGGED_POINTER ix86_memtag_untagged_pointer
-#undef TARGET_MEMTAG_TAG_SIZE
-#define TARGET_MEMTAG_TAG_SIZE ix86_memtag_tag_size
+#undef TARGET_MEMTAG_TAG_BITSIZE
+#define TARGET_MEMTAG_TAG_BITSIZE ix86_memtag_tag_bitsize
#undef TARGET_GEN_CCMP_FIRST
#define TARGET_GEN_CCMP_FIRST ix86_gen_ccmp_first
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 49af963..ac0ce68 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -102,6 +102,15 @@ struct stringop_algs
#define COSTS_N_BYTES(N) ((N) * 2)
#endif
+
+enum ix86_reduc_unroll_factor{
+ X86_REDUC_FMA,
+ X86_REDUC_DOT_PROD,
+ X86_REDUC_SAD,
+
+ X86_REDUC_LAST
+};
+
/* Define the specific costs for a given cpu. NB: hard_register is used
by TARGET_REGISTER_MOVE_COST and TARGET_MEMORY_MOVE_COST to compute
hard register move costs by register allocator. Relative costs of
@@ -225,6 +234,13 @@ struct processor_costs {
to number of instructions executed in
parallel. See also
ix86_reassociation_width. */
+ const unsigned reduc_lat_mult_thr[X86_REDUC_LAST];
+ /* Latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ const unsigned vect_unroll_limit; /* Limit how much the autovectorizer
+ may unroll a loop. */
struct stringop_algs *memcpy, *memset;
const int cond_taken_branch_cost; /* Cost of taken branch for vectorizer
cost model. */
@@ -644,7 +660,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
{"cpu_64", "%{" OPT_ARCH64 ":%{!mtune=*:%{!mcpu=*:%{!march=*:-mtune=%(VALUE)}}}}" }, \
{"arch", "%{!march=*:-march=%(VALUE)}"}, \
{"arch_32", "%{" OPT_ARCH32 ":%{!march=*:-march=%(VALUE)}}"}, \
- {"arch_64", "%{" OPT_ARCH64 ":%{!march=*:-march=%(VALUE)}}"},
+ {"arch_64", "%{" OPT_ARCH64 ":%{!march=*:-march=%(VALUE)}}"}, \
+ {"tls", "%{!mtls-dialect=*:-mtls-dialect=%(VALUE)}"},
/* Specs for the compiler proper */
@@ -2865,6 +2882,9 @@ struct GTY(()) machine_function {
approximation. */
BOOL_BITFIELD tls_descriptor_call_expanded_p : 1;
+ /* True if TLS descriptor is called more than once. */
+ BOOL_BITFIELD tls_descriptor_call_multiple_p : 1;
+
/* If true, the current function has a STATIC_CHAIN is placed on the
stack below the return address. */
BOOL_BITFIELD static_chain_on_stack : 1;
@@ -2934,6 +2954,9 @@ struct GTY(()) machine_function {
/* True if this is a recursive function. */
BOOL_BITFIELD recursive_function : 1;
+ /* True if by_pieces op is currently in use. */
+ BOOL_BITFIELD by_pieces_in_use : 1;
+
/* The largest alignment, in bytes, of stack slot actually used. */
unsigned int max_used_stack_alignment;
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 6686f10..cea6c15 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -901,6 +901,10 @@
(define_attr "avx_partial_xmm_update" "false,true"
(const_string "false"))
+;; Define attribute to indicate 64-bit TLS insns.
+(define_attr "tls64" "gd,ld_base,call,combine,lea,none"
+ (const_string "none"))
+
;; Define attribute to classify add/sub insns that consumes carry flag (CF)
(define_attr "use_carry" "0,1" (const_string "0"))
@@ -23153,6 +23157,7 @@
return "call\t{*%p2@GOTPCREL(%%rip)|[QWORD PTR %p2@GOTPCREL[rip]]}";
}
[(set_attr "type" "multi")
+ (set_attr "tls64" "gd")
(set (attr "length")
(symbol_ref "TARGET_X32 ? 15 : 16"))])
@@ -23191,7 +23196,11 @@
UNSPEC_TLS_GD)
(clobber (match_operand:P 3 "register_operand"))])]
"TARGET_64BIT"
- "ix86_tls_descriptor_calls_expanded_in_cfun = true;")
+{
+ if (ix86_tls_descriptor_calls_expanded_in_cfun)
+ cfun->machine->tls_descriptor_call_multiple_p = true;
+ ix86_tls_descriptor_calls_expanded_in_cfun = true;
+})
(define_insn "*tls_local_dynamic_base_32_gnu"
[(set (match_operand:SI 0 "register_operand" "=a")
@@ -23253,6 +23262,7 @@
return "call\t{*%p1@GOTPCREL(%%rip)|[QWORD PTR %p1@GOTPCREL[rip]]}";
}
[(set_attr "type" "multi")
+ (set_attr "tls64" "ld_base")
(set_attr "length" "12")])
(define_insn "*tls_local_dynamic_base_64_largepic"
@@ -23286,7 +23296,11 @@
(unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE)
(clobber (match_operand:P 2 "register_operand"))])]
"TARGET_64BIT"
- "ix86_tls_descriptor_calls_expanded_in_cfun = true;")
+{
+ if (ix86_tls_descriptor_calls_expanded_in_cfun)
+ cfun->machine->tls_descriptor_call_multiple_p = true;
+ ix86_tls_descriptor_calls_expanded_in_cfun = true;
+})
;; Local dynamic of a single variable is a lose. Show combine how
;; to convert that back to global dynamic.
@@ -23480,6 +23494,8 @@
"TARGET_64BIT && TARGET_GNU2_TLS"
{
operands[2] = can_create_pseudo_p () ? gen_reg_rtx (ptr_mode) : operands[0];
+ if (ix86_tls_descriptor_calls_expanded_in_cfun)
+ cfun->machine->tls_descriptor_call_multiple_p = true;
ix86_tls_descriptor_calls_expanded_in_cfun = true;
})
@@ -23491,6 +23507,7 @@
"lea%z0\t{%E1@TLSDESC(%%rip), %0|%0, %E1@TLSDESC[rip]}"
[(set_attr "type" "lea")
(set_attr "mode" "<MODE>")
+ (set_attr "tls64" "lea")
(set_attr "length" "7")
(set_attr "length_address" "4")])
@@ -23504,6 +23521,7 @@
"TARGET_64BIT && TARGET_GNU2_TLS"
"call\t{*%a1@TLSCALL(%2)|[QWORD PTR [%2+%a1@TLSCALL]]}"
[(set_attr "type" "call")
+ (set_attr "tls64" "call")
(set_attr "length" "2")
(set_attr "length_address" "0")])
@@ -23525,7 +23543,8 @@
{
operands[4] = can_create_pseudo_p () ? gen_reg_rtx (ptr_mode) : operands[0];
emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, operands[4], operands[1]));
-})
+}
+ [(set_attr "tls64" "combine")])
(define_split
[(match_operand 0 "tls_address_pattern")]
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index c93c0b1..6bda22f 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -1246,6 +1246,10 @@ munroll-only-small-loops
Target Var(ix86_unroll_only_small_loops) Init(0) Optimization
Enable conservative small loop unrolling.
+-param=ix86-vect-unroll-limit=
+Target Joined UInteger Var(ix86_vect_unroll_limit) Init(4) Param
+Limit how much the autovectorizer may unroll a loop.
+
mlam=
Target RejectNegative Joined Enum(lam_type) Var(ix86_lam_type) Init(lam_none)
-mlam=[none|u48|u57] Instrument meta data position in user data pointers.
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 175798c..5dbe444 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -1319,6 +1319,9 @@
(ior (match_operand 0 "nonimmediate_operand")
(match_test "const_vec_duplicate_p (op)")))
+(define_predicate "const_vec_dup_operand"
+ (match_test "const_vec_duplicate_p (op)"))
+
;; Return true when OP is either register operand, or any
;; CONST_VECTOR.
(define_predicate "reg_or_const_vector_operand"
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index ec74f93..73906b8 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -326,6 +326,9 @@
(define_mode_iterator VI1_AVX512VL
[V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")])
+(define_mode_iterator VI1_AVX512_3264
+ [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX")])
+
;; All vector modes
(define_mode_iterator V
[(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
@@ -26559,9 +26562,9 @@
;; XOP packed rotate instructions
(define_expand "rotl<mode>3"
- [(set (match_operand:VI_128 0 "register_operand")
- (rotate:VI_128
- (match_operand:VI_128 1 "nonimmediate_operand")
+ [(set (match_operand:VI248_128 0 "register_operand")
+ (rotate:VI248_128
+ (match_operand:VI248_128 1 "nonimmediate_operand")
(match_operand:SI 2 "general_operand")))]
"TARGET_XOP"
{
@@ -26590,9 +26593,9 @@
})
(define_expand "rotr<mode>3"
- [(set (match_operand:VI_128 0 "register_operand")
- (rotatert:VI_128
- (match_operand:VI_128 1 "nonimmediate_operand")
+ [(set (match_operand:VI248_128 0 "register_operand")
+ (rotatert:VI248_128
+ (match_operand:VI248_128 1 "nonimmediate_operand")
(match_operand:SI 2 "general_operand")))]
"TARGET_XOP"
{
@@ -26964,31 +26967,122 @@
int i;
if (<CODE> != ASHIFT)
- {
- if (CONST_INT_P (operands[2]))
- operands[2] = GEN_INT (-INTVAL (operands[2]));
- else
- negate = true;
- }
+ {
+ if (CONST_INT_P (operands[2]))
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ else
+ negate = true;
+ }
par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
tmp = lowpart_subreg (QImode, operands[2], SImode);
for (i = 0; i < 16; i++)
- XVECEXP (par, 0, i) = tmp;
+ XVECEXP (par, 0, i) = tmp;
tmp = gen_reg_rtx (V16QImode);
emit_insn (gen_vec_initv16qiqi (tmp, par));
if (negate)
- emit_insn (gen_negv16qi2 (tmp, tmp));
+ emit_insn (gen_negv16qi2 (tmp, tmp));
gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
emit_insn (gen (operands[0], operands[1], tmp));
}
+ else if (TARGET_GFNI && CONST_INT_P (operands[2])
+ && (<MODE_SIZE> == 64
+ || !(INTVAL (operands[2]) == 7 && <CODE> == ASHIFTRT)))
+ {
+ rtx matrix = ix86_vgf2p8affine_shift_matrix (operands[0], operands[2],
+ <CODE>);
+ emit_insn (gen_vgf2p8affineqb_<mode> (operands[0], operands[1], matrix,
+ const0_rtx));
+ }
else
ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
DONE;
})
+(define_expand "cond_<insn><mode>"
+ [(set (match_operand:VI1_AVX512VL 0 "register_operand")
+ (vec_merge:VI1_AVX512VL
+ (any_shift:VI1_AVX512VL
+ (match_operand:VI1_AVX512VL 2 "register_operand")
+ (match_operand:VI1_AVX512VL 3 "const_vec_dup_operand"))
+ (match_operand:VI1_AVX512VL 4 "nonimm_or_0_operand")
+ (match_operand:<avx512fmaskmode> 1 "register_operand")))]
+ "TARGET_GFNI && TARGET_AVX512F"
+{
+ rtx count = XVECEXP (operands[3], 0, 0);
+ rtx matrix = ix86_vgf2p8affine_shift_matrix (operands[0], count, <CODE>);
+ emit_insn (gen_vgf2p8affineqb_<mode>_mask (operands[0], operands[2], matrix,
+ const0_rtx, operands[4],
+ operands[1]));
+ DONE;
+})
+
+(define_expand "<insn><mode>3"
+ [(set (match_operand:VI1_AVX512_3264 0 "register_operand")
+ (any_rotate:VI1_AVX512_3264
+ (match_operand:VI1_AVX512_3264 1 "register_operand")
+ (match_operand:SI 2 "const_int_operand")))]
+ "TARGET_GFNI"
+{
+ rtx matrix = ix86_vgf2p8affine_shift_matrix (operands[0], operands[2], <CODE>);
+ emit_insn (gen_vgf2p8affineqb_<mode> (operands[0], operands[1], matrix,
+ const0_rtx));
+ DONE;
+})
+
+(define_expand "<insn>v16qi3"
+ [(set (match_operand:V16QI 0 "register_operand")
+ (any_rotate:V16QI
+ (match_operand:V16QI 1 "nonimmediate_operand")
+ (match_operand:SI 2 "general_operand")))]
+ "TARGET_GFNI || TARGET_XOP"
+{
+ /* Handle the V16QI XOP case to avoid a conflict with the other expand. */
+ if (TARGET_XOP)
+ {
+ if (! const_0_to_7_operand (operands[2], SImode))
+ {
+ rtvec vs = rtvec_alloc (16);
+ rtx par = gen_rtx_PARALLEL (V16QImode, vs);
+ rtx reg = gen_reg_rtx (V16QImode);
+ rtx op2 = operands[2];
+ int i;
+
+ if (GET_MODE (op2) != QImode)
+ {
+ op2 = gen_reg_rtx (QImode);
+ convert_move (op2, operands[2], false);
+ }
+
+ for (i = 0; i < 16; i++)
+ RTVEC_ELT (vs, i) = op2;
+
+ emit_insn (gen_vec_initv16qiqi (reg, par));
+ if (<CODE> == ROTATERT)
+ {
+ rtx neg = gen_reg_rtx (V16QImode);
+ emit_insn (gen_negv16qi2 (neg, reg));
+ emit_insn (gen_xop_vrotlv16qi3 (operands[0], operands[1], neg));
+ reg = neg;
+ }
+ emit_insn (gen_xop_vrotlv16qi3 (operands[0], operands[1], reg));
+ DONE;
+ }
+ }
+ else if (TARGET_GFNI && CONST_INT_P (operands[2]))
+ {
+ rtx matrix = ix86_vgf2p8affine_shift_matrix (operands[0], operands[2], <CODE>);
+ emit_insn (gen_vgf2p8affineqb_v16qi (operands[0],
+ force_reg (V16QImode, operands[1]),
+ matrix, const0_rtx));
+ DONE;
+ }
+ else
+ FAIL;
+})
+
(define_expand "ashrv2di3"
[(set (match_operand:V2DI 0 "register_operand")
(ashiftrt:V2DI
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index c8603b9..1649ea2 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -141,6 +141,12 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */
COSTS_N_BYTES (4), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
ix86_size_memcpy,
ix86_size_memset,
COSTS_N_BYTES (1), /* cond_taken_branch_cost. */
@@ -261,6 +267,12 @@ struct processor_costs i386_cost = { /* 386 specific costs */
COSTS_N_INSNS (27), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (27), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
i386_memcpy,
i386_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
@@ -382,6 +394,12 @@ struct processor_costs i486_cost = { /* 486 specific costs */
COSTS_N_INSNS (27), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (27), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
i486_memcpy,
i486_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
@@ -501,6 +519,12 @@ struct processor_costs pentium_cost = {
COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
pentium_memcpy,
pentium_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
@@ -613,6 +637,12 @@ struct processor_costs lakemont_cost = {
COSTS_N_INSNS (5), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (5), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
pentium_memcpy,
pentium_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
@@ -740,6 +770,12 @@ struct processor_costs pentiumpro_cost = {
COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
pentiumpro_memcpy,
pentiumpro_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
@@ -858,6 +894,12 @@ struct processor_costs geode_cost = {
COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
geode_memcpy,
geode_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
@@ -979,6 +1021,12 @@ struct processor_costs k6_cost = {
COSTS_N_INSNS (2), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (2), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
k6_memcpy,
k6_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
@@ -1101,6 +1149,12 @@ struct processor_costs athlon_cost = {
COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
athlon_memcpy,
athlon_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
@@ -1232,6 +1286,12 @@ struct processor_costs k8_cost = {
COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (5), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
k8_memcpy,
k8_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
@@ -1371,6 +1431,12 @@ struct processor_costs amdfam10_cost = {
COSTS_N_INSNS (7), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
amdfam10_memcpy,
amdfam10_memset,
COSTS_N_INSNS (2), /* cond_taken_branch_cost. */
@@ -1503,6 +1569,12 @@ const struct processor_costs bdver_cost = {
COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
bdver_memcpy,
bdver_memset,
COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
@@ -1668,6 +1740,12 @@ struct processor_costs znver1_cost = {
plus/minus operations per cycle but only one multiply. This is adjusted
in ix86_reassociation_width. */
4, 4, 3, 6, /* reassoc int, fp, vec_int, vec_fp. */
+ {5, 1, 3}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 4, /* Limit how much the autovectorizer
+ may unroll a loop. */
znver1_memcpy,
znver1_memset,
COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
@@ -1836,6 +1914,12 @@ struct processor_costs znver2_cost = {
plus/minus operations per cycle but only one multiply. This is adjusted
in ix86_reassociation_width. */
4, 4, 3, 6, /* reassoc int, fp, vec_int, vec_fp. */
+ {10, 1, 3}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 4, /* Limit how much the autovectorizer
+ may unroll a loop. */
znver2_memcpy,
znver2_memset,
COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
@@ -1979,6 +2063,12 @@ struct processor_costs znver3_cost = {
plus/minus operations per cycle but only one multiply. This is adjusted
in ix86_reassociation_width. */
4, 4, 3, 6, /* reassoc int, fp, vec_int, vec_fp. */
+ {8, 1, 6}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 4, /* Limit how much the autovectorizer
+ may unroll a loop. */
znver2_memcpy,
znver2_memset,
COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
@@ -2125,6 +2215,12 @@ struct processor_costs znver4_cost = {
plus/minus operations per cycle but only one multiply. This is adjusted
in ix86_reassociation_width. */
4, 4, 3, 6, /* reassoc int, fp, vec_int, vec_fp. */
+ {8, 8, 6}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 4, /* Limit how much the autovectorizer
+ may unroll a loop. */
znver2_memcpy,
znver2_memset,
COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
@@ -2287,6 +2383,12 @@ struct processor_costs znver5_cost = {
We increase width to 6 for multiplications
in ix86_reassociation_width. */
6, 6, 4, 6, /* reassoc int, fp, vec_int, vec_fp. */
+ {8, 8, 6}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 4, /* Limit how much the autovectorizer
+ may unroll a loop. */
znver2_memcpy,
znver2_memset,
COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
@@ -2422,6 +2524,12 @@ struct processor_costs skylake_cost = {
COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (7), /* cost of CVT(T)PS2PI instruction. */
1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
+ {8, 1, 3}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 4, /* Limit how much the autovectorizer
+ may unroll a loop. */
skylake_memcpy,
skylake_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
@@ -2559,6 +2667,12 @@ struct processor_costs icelake_cost = {
COSTS_N_INSNS (7), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */
1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
+ {8, 10, 3}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 4, /* Limit how much the autovectorizer
+ may unroll a loop. */
icelake_memcpy,
icelake_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
@@ -2690,6 +2804,12 @@ struct processor_costs alderlake_cost = {
COSTS_N_INSNS (7), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */
1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */
+ {8, 8, 3}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 4, /* Limit how much the autovectorizer
+ may unroll a loop. */
alderlake_memcpy,
alderlake_memset,
COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
@@ -2814,6 +2934,12 @@ const struct processor_costs btver1_cost = {
COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
btver1_memcpy,
btver1_memset,
COSTS_N_INSNS (2), /* cond_taken_branch_cost. */
@@ -2935,6 +3061,12 @@ const struct processor_costs btver2_cost = {
COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
btver2_memcpy,
btver2_memset,
COSTS_N_INSNS (2), /* cond_taken_branch_cost. */
@@ -3055,6 +3187,12 @@ struct processor_costs pentium4_cost = {
COSTS_N_INSNS (12), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (8), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
pentium4_memcpy,
pentium4_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
@@ -3178,6 +3316,12 @@ struct processor_costs nocona_cost = {
COSTS_N_INSNS (12), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (8), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
nocona_memcpy,
nocona_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
@@ -3299,6 +3443,12 @@ struct processor_costs atom_cost = {
COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
2, 2, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
+ {8, 8, 3}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 2, /* Limit how much the autovectorizer
+ may unroll a loop. */
atom_memcpy,
atom_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
@@ -3420,6 +3570,12 @@ struct processor_costs slm_cost = {
COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {8, 8, 3}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
slm_memcpy,
slm_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
@@ -3555,6 +3711,12 @@ struct processor_costs tremont_cost = {
COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */
+ {8, 1, 3}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 4, /* Limit how much the autovectorizer
+ may unroll a loop. */
tremont_memcpy,
tremont_memset,
COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
@@ -3681,6 +3843,12 @@ struct processor_costs lujiazui_cost = {
COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */
+ {8, 1, 3}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 4, /* Limit how much the autovectorizer
+ may unroll a loop. */
lujiazui_memcpy,
lujiazui_memset,
COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
@@ -3805,6 +3973,12 @@ struct processor_costs yongfeng_cost = {
COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
4, 4, 4, 4, /* reassoc int, fp, vec_int, vec_fp. */
+ {8, 1, 3}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
yongfeng_memcpy,
yongfeng_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
@@ -3929,6 +4103,12 @@ struct processor_costs shijidadao_cost = {
COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
4, 4, 4, 4, /* reassoc int, fp, vec_int, vec_fp. */
+ {8, 1, 3}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
shijidadao_memcpy,
shijidadao_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
@@ -4078,6 +4258,12 @@ struct processor_costs generic_cost = {
COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */
+ {8, 8, 3}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 4, /* Limit how much the autovectorizer
+ may unroll a loop. */
generic_memcpy,
generic_memset,
COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
@@ -4215,6 +4401,12 @@ struct processor_costs core_cost = {
COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (7), /* cost of CVT(T)PS2PI instruction. */
1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
+ {8, 1, 3}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
core_memcpy,
core_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
diff --git a/gcc/config/loongarch/genopts/isa-evolution.in b/gcc/config/loongarch/genopts/isa-evolution.in
index 50f72d5..836d93a 100644
--- a/gcc/config/loongarch/genopts/isa-evolution.in
+++ b/gcc/config/loongarch/genopts/isa-evolution.in
@@ -2,4 +2,5 @@
2 26 div32 1.1 Support div.w[u] and mod.w[u] instructions with inputs not sign-extended.
2 27 lam-bh 1.1 Support am{swap/add}[_db].{b/h} instructions.
2 28 lamcas 1.1 Support amcas[_db].{b/h/w/d} instructions.
+2 30 scq 1.1 Support sc.q instruction.
3 23 ld-seq-sa 1.1 Do not need load-load barriers (dbar 0x700).
diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc
index 04b277e..dcd8d90 100644
--- a/gcc/config/loongarch/loongarch-def.cc
+++ b/gcc/config/loongarch/loongarch-def.cc
@@ -72,7 +72,7 @@ array_arch<loongarch_isa> loongarch_cpu_default_isa =
.simd_ (ISA_EXT_SIMD_LASX)
.evolution_ (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA
| OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS
- | OPTION_MASK_ISA_FRECIPE))
+ | OPTION_MASK_ISA_FRECIPE | OPTION_MASK_ISA_SCQ))
.set (ARCH_LA64V1_0,
loongarch_isa ()
.base_ (ISA_BASE_LA64)
@@ -86,7 +86,7 @@ array_arch<loongarch_isa> loongarch_cpu_default_isa =
.simd_ (ISA_EXT_SIMD_LSX)
.evolution_ (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA
| OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS
- | OPTION_MASK_ISA_FRECIPE));
+ | OPTION_MASK_ISA_FRECIPE | OPTION_MASK_ISA_SCQ));
static inline loongarch_cache la464_cache ()
diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h
index 0bcd2a7..0a7d0c9 100644
--- a/gcc/config/loongarch/loongarch-def.h
+++ b/gcc/config/loongarch/loongarch-def.h
@@ -78,12 +78,10 @@ extern loongarch_def_array<const char *, N_ISA_EXT_TYPES>
/* Base ABI */
-enum {
- ABI_BASE_LP64D = 0,
- ABI_BASE_LP64F = 1,
- ABI_BASE_LP64S = 2,
- N_ABI_BASE_TYPES = 3
-};
+#define ABI_BASE_LP64D 0
+#define ABI_BASE_LP64F 1
+#define ABI_BASE_LP64S 2
+#define N_ABI_BASE_TYPES 3
extern loongarch_def_array<const char *, N_ABI_BASE_TYPES>
loongarch_abi_base_strings;
diff --git a/gcc/config/loongarch/loongarch-evolution.cc b/gcc/config/loongarch/loongarch-evolution.cc
index de68624..a92a645 100644
--- a/gcc/config/loongarch/loongarch-evolution.cc
+++ b/gcc/config/loongarch/loongarch-evolution.cc
@@ -32,6 +32,7 @@ int la_evo_feature_masks[] = {
OPTION_MASK_ISA_DIV32,
OPTION_MASK_ISA_LAM_BH,
OPTION_MASK_ISA_LAMCAS,
+ OPTION_MASK_ISA_SCQ,
OPTION_MASK_ISA_LD_SEQ_SA,
};
@@ -40,6 +41,7 @@ const char* la_evo_macro_name[] = {
"__loongarch_div32",
"__loongarch_lam_bh",
"__loongarch_lamcas",
+ "__loongarch_scq",
"__loongarch_ld_seq_sa",
};
@@ -48,6 +50,7 @@ int la_evo_version_major[] = {
1, /* DIV32 */
1, /* LAM_BH */
1, /* LAMCAS */
+ 1, /* SCQ */
1, /* LD_SEQ_SA */
};
@@ -56,5 +59,6 @@ int la_evo_version_minor[] = {
1, /* DIV32 */
1, /* LAM_BH */
1, /* LAMCAS */
+ 1, /* SCQ */
1, /* LD_SEQ_SA */
};
diff --git a/gcc/config/loongarch/loongarch-evolution.h b/gcc/config/loongarch/loongarch-evolution.h
index 5f90839..7fb7b0d 100644
--- a/gcc/config/loongarch/loongarch-evolution.h
+++ b/gcc/config/loongarch/loongarch-evolution.h
@@ -36,6 +36,7 @@ static constexpr struct {
{ 2, 1u << 26, OPTION_MASK_ISA_DIV32 },
{ 2, 1u << 27, OPTION_MASK_ISA_LAM_BH },
{ 2, 1u << 28, OPTION_MASK_ISA_LAMCAS },
+ { 2, 1u << 30, OPTION_MASK_ISA_SCQ },
{ 3, 1u << 23, OPTION_MASK_ISA_LD_SEQ_SA },
};
@@ -58,8 +59,9 @@ enum {
EVO_DIV32 = 1,
EVO_LAM_BH = 2,
EVO_LAMCAS = 3,
- EVO_LD_SEQ_SA = 4,
- N_EVO_FEATURES = 5
+ EVO_SCQ = 4,
+ EVO_LD_SEQ_SA = 5,
+ N_EVO_FEATURES = 6
};
/* Condition macros */
@@ -71,6 +73,8 @@ enum {
(la_target.isa.evolution & OPTION_MASK_ISA_LAM_BH)
#define ISA_HAS_LAMCAS \
(la_target.isa.evolution & OPTION_MASK_ISA_LAMCAS)
+#define ISA_HAS_SCQ \
+ (la_target.isa.evolution & OPTION_MASK_ISA_SCQ)
#define ISA_HAS_LD_SEQ_SA \
(la_target.isa.evolution & OPTION_MASK_ISA_LD_SEQ_SA)
diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h
index 1546ea3..583cce8 100644
--- a/gcc/config/loongarch/loongarch-str.h
+++ b/gcc/config/loongarch/loongarch-str.h
@@ -70,6 +70,7 @@ along with GCC; see the file COPYING3. If not see
#define OPTSTR_DIV32 "div32"
#define OPTSTR_LAM_BH "lam-bh"
#define OPTSTR_LAMCAS "lamcas"
+#define OPTSTR_SCQ "scq"
#define OPTSTR_LD_SEQ_SA "ld-seq-sa"
#endif /* LOONGARCH_STR_H */
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index 493f95e..0935d7b 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -4388,6 +4388,7 @@ loongarch_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
break;
}
else if (TARGET_RECIP_VEC_DIV
+ && vectype
&& gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
{
machine_mode mode = TYPE_MODE (vectype);
@@ -6221,9 +6222,8 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part,
'Q' Print R_LARCH_RELAX for TLS IE.
'r' Print address 12-31bit relocation associated with OP.
'R' Print address 32-51bit relocation associated with OP.
- 'T' Print 'f' for (eq:CC ...), 't' for (ne:CC ...),
- 'z' for (eq:?I ...), 'n' for (ne:?I ...).
- 't' Like 'T', but with the EQ/NE cases reversed
+ 'T' Print a comment marker if %G outputs nothing.
+ 't' Print the register containing the higher 64 bits of a TImode.
'u' Print a LASX register.
'v' Print the insn size suffix b, h, w or d for vector modes V16QI, V8HI,
V4SI, V2SI, and w, d for vector modes V4SF, V2DF respectively.
@@ -6306,6 +6306,13 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
fputs ("dbar\t0x700", file);
break;
+ case 'T':
+ if (!loongarch_cas_failure_memorder_needs_acquire (
+ memmodel_from_int (INTVAL (op)))
+ && ISA_HAS_LD_SEQ_SA)
+ fprintf (file, "%s", ASM_COMMENT_START);
+ break;
+
case 'h':
if (code == HIGH)
op = XEXP (op, 0);
@@ -6384,14 +6391,6 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
false /* lo_reloc */);
break;
- case 't':
- case 'T':
- {
- int truth = (code == NE) == (letter == 'T');
- fputc ("zfnt"[truth * 2 + FCC_REG_P (REGNO (XEXP (op, 0)))], file);
- }
- break;
-
case 'V':
if (CONST_VECTOR_P (op))
{
@@ -6495,6 +6494,16 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
}
break;
+ case 't':
+ if (GET_MODE (op) != TImode
+ || (op != CONST0_RTX (TImode) && code != REG))
+ {
+ output_operand_lossage ("invalid use of '%%%c'", letter);
+ break;
+ }
+ op = loongarch_subword (op, 1);
+ letter = 'z';
+ /* fall through */
default:
switch (code)
{
@@ -10786,9 +10795,9 @@ loongarch_expand_vec_cmp (rtx operands[])
to a fixed type. */
static machine_mode
-loongarch_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
+loongarch_promote_function_mode (const_tree type,
machine_mode mode,
- int *punsignedp ATTRIBUTE_UNUSED,
+ int *punsignedp,
const_tree fntype ATTRIBUTE_UNUSED,
int for_return ATTRIBUTE_UNUSED)
{
@@ -11154,6 +11163,46 @@ loongarch_c_mode_for_suffix (char suffix)
return VOIDmode;
}
+/* Implement TARGET_C_BITINT_TYPE_INFO.
+ Return true if _BitInt(N) is supported and fill its details into *INFO. */
+bool
+loongarch_bitint_type_info (int n, struct bitint_info *info)
+{
+ if (n <= 8)
+ info->limb_mode = QImode;
+ else if (n <= 16)
+ info->limb_mode = HImode;
+ else if (n <= 32)
+ info->limb_mode = SImode;
+ else if (n <= 64)
+ info->limb_mode = DImode;
+ else if (n <= 128)
+ info->limb_mode = TImode;
+ else
+ info->limb_mode = DImode;
+
+ info->abi_limb_mode = info->limb_mode;
+
+ if (n > 64)
+ info->abi_limb_mode = TImode;
+
+ info->big_endian = false;
+ info->extended = true;
+ return true;
+}
+
+/* Implement TARGET_COMPUTE_PRESSURE_CLASSES. */
+
+static int
+loongarch_compute_pressure_classes (reg_class *classes)
+{
+ int i = 0;
+ classes[i++] = GENERAL_REGS;
+ classes[i++] = FP_REGS;
+ classes[i++] = FCC_REGS;
+ return i;
+}
+
/* Initialize the GCC target structure. */
#undef TARGET_ASM_ALIGNED_HI_OP
#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
@@ -11428,6 +11477,12 @@ loongarch_c_mode_for_suffix (char suffix)
#undef TARGET_C_MODE_FOR_SUFFIX
#define TARGET_C_MODE_FOR_SUFFIX loongarch_c_mode_for_suffix
+#undef TARGET_C_BITINT_TYPE_INFO
+#define TARGET_C_BITINT_TYPE_INFO loongarch_bitint_type_info
+
+#undef TARGET_COMPUTE_PRESSURE_CLASSES
+#define TARGET_COMPUTE_PRESSURE_CLASSES loongarch_compute_pressure_classes
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-loongarch.h"
diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
index 5fc8665..e8819bf 100644
--- a/gcc/config/loongarch/loongarch.h
+++ b/gcc/config/loongarch/loongarch.h
@@ -270,7 +270,9 @@ along with GCC; see the file COPYING3. If not see
if (GET_MODE_CLASS (MODE) == MODE_INT \
&& GET_MODE_SIZE (MODE) < UNITS_PER_WORD) \
{ \
- if ((MODE) == SImode) \
+ if ((MODE) == SImode \
+ && !(TYPE && TREE_CODE (TYPE) == BITINT_TYPE \
+ && TYPE_PRECISION (TYPE) < 32)) \
(UNSIGNEDP) = 0; \
(MODE) = Pmode; \
}
diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt
index 4d85cf5..fbe61c0 100644
--- a/gcc/config/loongarch/loongarch.opt
+++ b/gcc/config/loongarch/loongarch.opt
@@ -334,6 +334,10 @@ mlamcas
Target Mask(ISA_LAMCAS) Var(la_isa_evolution)
Support amcas[_db].{b/h/w/d} instructions.
+mscq
+Target Mask(ISA_SCQ) Var(la_isa_evolution)
+Support sc.q instruction.
+
mld-seq-sa
Target Mask(ISA_LD_SEQ_SA) Var(la_isa_evolution)
Do not need load-load barriers (dbar 0x700).
diff --git a/gcc/config/loongarch/loongarch.opt.urls b/gcc/config/loongarch/loongarch.opt.urls
index 5f644f6..606a211 100644
--- a/gcc/config/loongarch/loongarch.opt.urls
+++ b/gcc/config/loongarch/loongarch.opt.urls
@@ -90,6 +90,9 @@ UrlSuffix(gcc/LoongArch-Options.html#index-mlam-bh)
mlamcas
UrlSuffix(gcc/LoongArch-Options.html#index-mlamcas)
+mscq
+UrlSuffix(gcc/LoongArch-Options.html#index-mscq)
+
mld-seq-sa
UrlSuffix(gcc/LoongArch-Options.html#index-mld-seq-sa)
diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md
index dd17cd1..4156b26 100644
--- a/gcc/config/loongarch/simd.md
+++ b/gcc/config/loongarch/simd.md
@@ -773,7 +773,7 @@
(vec_select:<VEC_HALF>
(match_operand:IVEC 2 "register_operand" "f")
(match_operand:IVEC 4 "vect_par_cnst_even_or_odd_half")))
- (any_extend:<WVEC>
+ (any_extend:<WVEC_HALF>
(vec_select:<VEC_HALF>
(match_operand:IVEC 3 "register_operand" "f")
(match_dup 4))))
diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md
index fd8d732..2ee400e 100644
--- a/gcc/config/loongarch/sync.md
+++ b/gcc/config/loongarch/sync.md
@@ -21,25 +21,25 @@
(define_c_enum "unspec" [
UNSPEC_COMPARE_AND_SWAP
+ UNSPEC_COMPARE_AND_SWAP_AMCAS
UNSPEC_COMPARE_AND_SWAP_ADD
UNSPEC_COMPARE_AND_SWAP_SUB
- UNSPEC_COMPARE_AND_SWAP_AND
- UNSPEC_COMPARE_AND_SWAP_XOR
- UNSPEC_COMPARE_AND_SWAP_OR
UNSPEC_COMPARE_AND_SWAP_NAND
UNSPEC_SYNC_OLD_OP
UNSPEC_SYNC_EXCHANGE
UNSPEC_ATOMIC_STORE
UNSPEC_ATOMIC_LOAD
UNSPEC_MEMORY_BARRIER
+
+ UNSPEC_TI_FETCH_ADD
+ UNSPEC_TI_FETCH_SUB
+ UNSPEC_TI_FETCH_AND
+ UNSPEC_TI_FETCH_XOR
+ UNSPEC_TI_FETCH_OR
+ UNSPEC_TI_FETCH_NAND_MASK_INVERTED
])
(define_code_iterator any_atomic [plus ior xor and])
-(define_code_attr atomic_optab
- [(plus "add") (ior "or") (xor "xor") (and "and")])
-
-;; This attribute gives the format suffix for atomic memory operations.
-(define_mode_attr amo [(QI "b") (HI "h") (SI "w") (DI "d")])
;; <amop> expands to the name of the atomic operand that implements a
;; particular code.
@@ -107,7 +107,7 @@
(define_insn "atomic_load<mode>"
[(set (match_operand:QHWD 0 "register_operand" "=r")
(unspec_volatile:QHWD
- [(match_operand:QHWD 1 "memory_operand" "+m")
+ [(match_operand:QHWD 1 "memory_operand" "m")
(match_operand:SI 2 "const_int_operand")] ;; model
UNSPEC_ATOMIC_LOAD))]
""
@@ -142,9 +142,50 @@
}
[(set (attr "length") (const_int 12))])
+(define_insn "atomic_loadti_lsx"
+ [(set (match_operand:V2DI 0 "register_operand" "=f")
+ (unspec_volatile:V2DI
+ [(match_operand:TI 1 "memory_operand" "m")
+ (match_operand:SI 2 "const_int_operand")] ;; model
+ UNSPEC_ATOMIC_LOAD))]
+ "ISA_HAS_LSX && TARGET_64BIT"
+{
+ enum memmodel model = memmodel_base (INTVAL (operands[2]));
+
+ switch (model)
+ {
+ case MEMMODEL_SEQ_CST:
+ output_asm_insn ("dbar\t0x11", operands);
+ /* fall through */
+ case MEMMODEL_ACQUIRE:
+ case MEMMODEL_RELAXED:
+ return "vld\t%w0,%1\\n\\t%G2";
+
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set (attr "length") (const_int 12))])
+
+(define_expand "atomic_loadti"
+ [(match_operand:TI 0 "register_operand" "=r")
+ (match_operand:TI 1 "memory_operand" "m")
+ (match_operand:SI 2 "const_int_operand")]
+ "ISA_HAS_LSX && TARGET_64BIT"
+{
+ rtx vr = gen_reg_rtx (V2DImode);
+
+ emit_insn (gen_atomic_loadti_lsx (vr, operands[1], operands[2]));
+ for (int i = 0; i < 2; i++)
+ emit_insn (
+ gen_lsx_vpickve2gr_d (loongarch_subword (operands[0], i), vr,
+ GEN_INT (i)));
+ DONE;
+})
+
;; Implement atomic stores with amoswap. Fall back to fences for atomic loads.
(define_insn "atomic_store<mode>"
- [(set (match_operand:QHWD 0 "memory_operand" "+m")
+ [(set (match_operand:QHWD 0 "memory_operand" "=m")
(unspec_volatile:QHWD
[(match_operand:QHWD 1 "reg_or_0_operand" "rJ")
(match_operand:SI 2 "const_int_operand")] ;; model
@@ -175,7 +216,67 @@
}
[(set (attr "length") (const_int 12))])
-(define_insn "atomic_<atomic_optab><mode>"
+(define_insn "atomic_storeti_lsx"
+ [(set (match_operand:TI 0 "memory_operand" "=m")
+ (unspec_volatile:TI
+ [(match_operand:V2DI 1 "register_operand" "f")
+ (match_operand:SI 2 "const_int_operand")] ;; model
+ UNSPEC_ATOMIC_STORE))]
+ "ISA_HAS_LSX && TARGET_64BIT"
+{
+ enum memmodel model = memmodel_base (INTVAL (operands[2]));
+
+ switch (model)
+ {
+ case MEMMODEL_SEQ_CST:
+ return "dbar\t0x12\\n\\t"
+ "vst\t%w1,%0\\n\\t"
+ "dbar\t0x18";
+ case MEMMODEL_RELEASE:
+ return "dbar\t0x12\\n\\t"
+ "vst\t%w1,%0";
+ case MEMMODEL_RELAXED:
+ return "vst\t%w1,%0";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set (attr "length") (const_int 12))])
+
+(define_insn "atomic_storeti_scq"
+ [(set (match_operand:TI 0 "memory_operand" "=m")
+ (unspec_volatile:TI
+ [(match_operand:TI 1 "register_operand" "r")]
+ UNSPEC_ATOMIC_STORE))
+ (clobber (match_scratch:DI 2 "=&r"))]
+ "TARGET_64BIT && ISA_HAS_SCQ"
+ "1:\\n\\tll.d\t$r0,%0\n\tmove\t%2,%1\n\tsc.q\t%2,%t1,%0\n\tbeqz\t%2,1b"
+ [(set (attr "length") (const_int 16))])
+
+(define_expand "atomic_storeti"
+ [(match_operand:TI 0 "memory_operand" "=m")
+ (match_operand:TI 1 "reg_or_0_operand" "rJ")
+ (match_operand:SI 2 "const_int_operand")]
+ "TARGET_64BIT && (ISA_HAS_LSX || ISA_HAS_SCQ)"
+{
+ if (!ISA_HAS_LSX)
+ {
+ emit_insn (gen_atomic_storeti_scq (operands[0], operands[1]));
+ DONE;
+ }
+
+ rtx vr = gen_reg_rtx (V2DImode), op1 = operands[1];
+ rtvec v = rtvec_alloc (2);
+
+ for (int i = 0; i < 2; i++)
+ RTVEC_ELT (v, i) = loongarch_subword (op1, i);
+
+ emit_insn (gen_vec_initv2didi (vr, gen_rtx_PARALLEL (V2DImode, v)));
+ emit_insn (gen_atomic_storeti_lsx (operands[0], vr, operands[2]));
+ DONE;
+})
+
+(define_insn "atomic_<amop><mode>"
[(set (match_operand:GPR 0 "memory_operand" "+ZB")
(unspec_volatile:GPR
[(any_atomic:GPR (match_dup 0)
@@ -183,7 +284,7 @@
(match_operand:SI 2 "const_int_operand")] ;; model
UNSPEC_SYNC_OLD_OP))]
""
- "am<amop>%A2.<amo>\t$zero,%z1,%0"
+ "am<amop>%A2.<size>\t$zero,%z1,%0"
[(set (attr "length") (const_int 4))])
(define_insn "atomic_add<mode>"
@@ -194,10 +295,10 @@
(match_operand:SI 2 "const_int_operand")] ;; model
UNSPEC_SYNC_OLD_OP))]
"ISA_HAS_LAM_BH"
- "amadd%A2.<amo>\t$zero,%z1,%0"
+ "amadd%A2.<size>\t$zero,%z1,%0"
[(set (attr "length") (const_int 4))])
-(define_insn "atomic_fetch_<atomic_optab><mode>"
+(define_insn "atomic_fetch_<amop><mode>"
[(set (match_operand:GPR 0 "register_operand" "=&r")
(match_operand:GPR 1 "memory_operand" "+ZB"))
(set (match_dup 1)
@@ -207,9 +308,52 @@
(match_operand:SI 3 "const_int_operand")] ;; model
UNSPEC_SYNC_OLD_OP))]
""
- "am<amop>%A3.<amo>\t%0,%z2,%1"
+ "am<amop>%A3.<size>\t%0,%z2,%1"
[(set (attr "length") (const_int 4))])
+(define_insn "atomic_fetch_nand_mask_inverted<mode>"
+ [(set (match_operand:GPR 0 "register_operand" "=&r")
+ (match_operand:GPR 1 "memory_operand" "+ZC"))
+ (set (match_dup 1)
+ (unspec_volatile:GPR
+ [(ior:GPR (not (match_dup 1))
+ (match_operand:GPR 2 "register_operand" "r"))]
+ UNSPEC_SYNC_OLD_OP))
+ (clobber (match_scratch:GPR 3 "=&r"))]
+ ""
+ {
+ return "1:\\n\\t"
+ "ll.<d>\\t%0,%1\\n\\t"
+ "orn\\t%3,%2,%0\\n\\t"
+ "sc.<d>\\t%3,%1\\n\\t"
+ "beqz\\t%3,1b";
+ }
+ [(set (attr "length") (const_int 16))])
+
+(define_mode_iterator ALL_SC [GPR (TI "TARGET_64BIT && ISA_HAS_SCQ")])
+(define_mode_attr _scq [(SI "") (DI "") (TI "_scq")])
+(define_expand "atomic_fetch_nand<mode>"
+ [(match_operand:ALL_SC 0 "register_operand")
+ (match_operand:ALL_SC 1 "memory_operand")
+ (match_operand:ALL_SC 2 "reg_or_0_operand")
+ (match_operand:SI 3 "const_int_operand")]
+ ""
+ {
+ /* ~(atom & mask) = (~mask) | (~atom), so we can hoist
+ (~mask) out of the ll/sc loop and use the orn instruction in the
+ ll/sc loop. */
+ rtx inverted_mask = gen_reg_rtx (<MODE>mode);
+ emit_move_insn (inverted_mask,
+ expand_simple_unop (<MODE>mode, NOT, operands[2],
+ NULL_RTX, false));
+
+ emit_insn (
+ gen_atomic_fetch_nand_mask_inverted<mode><_scq> (operands[0],
+ operands[1],
+ inverted_mask));
+ DONE;
+ })
+
(define_insn "atomic_exchange<mode>"
[(set (match_operand:GPR 0 "register_operand" "=&r")
(unspec_volatile:GPR
@@ -219,9 +363,44 @@
(set (match_dup 1)
(match_operand:GPR 2 "register_operand" "r"))]
""
- "amswap%A3.<amo>\t%0,%z2,%1"
+ "amswap%A3.<size>\t%0,%z2,%1"
[(set (attr "length") (const_int 4))])
+(define_insn "atomic_exchangeti_scq"
+ [(set (match_operand:TI 0 "register_operand" "=&r")
+ (unspec_volatile:TI
+ [(match_operand:TI 1 "memory_operand" "+ZB")]
+ UNSPEC_SYNC_EXCHANGE))
+ (set (match_dup 1)
+ (match_operand:TI 2 "register_operand" "rJ"))
+ (clobber (match_scratch:DI 3 "=&r"))]
+ "TARGET_64BIT && ISA_HAS_SCQ"
+{
+ output_asm_insn ("1:", operands);
+ output_asm_insn ("ll.d\t%0,%1", operands);
+ if (!ISA_HAS_LD_SEQ_SA)
+ output_asm_insn ("dbar\t0x700", operands);
+ output_asm_insn ("ld.d\t%t0,%b1,8", operands);
+ output_asm_insn ("move\t%3,%z2", operands);
+ output_asm_insn ("sc.q\t%3,%t2,%1", operands);
+ output_asm_insn ("beqz\t%3,1b", operands);
+
+ return "";
+}
+ [(set (attr "length") (const_int 24))])
+
+(define_expand "atomic_exchangeti"
+ [(match_operand:TI 0 "register_operand" "=&r")
+ (match_operand:TI 1 "memory_operand" "+ZB")
+ (match_operand:TI 2 "register_operand" "rJ")
+ (match_operand:SI 3 "const_int_operand")] ;; model
+ "TARGET_64BIT && ISA_HAS_SCQ"
+{
+ emit_insn (gen_atomic_exchangeti_scq (operands[0], operands[1],
+ operands[2]));
+ DONE;
+})
+
(define_insn "atomic_exchange<mode>_short"
[(set (match_operand:SHORT 0 "register_operand" "=&r")
(unspec_volatile:SHORT
@@ -231,7 +410,7 @@
(set (match_dup 1)
(match_operand:SHORT 2 "register_operand" "r"))]
"ISA_HAS_LAM_BH"
- "amswap%A3.<amo>\t%0,%z2,%1"
+ "amswap%A3.<size>\t%0,%z2,%1"
[(set (attr "length") (const_int 4))])
(define_insn "atomic_cas_value_strong<mode>"
@@ -240,13 +419,13 @@
(set (match_dup 1)
(unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ")
(match_operand:GPR 3 "reg_or_0_operand" "rJ")
- (match_operand:SI 4 "const_int_operand")] ;; mod_s
+ (match_operand:SI 4 "const_int_operand")] ;; mod_f
UNSPEC_COMPARE_AND_SWAP))
(clobber (match_scratch:GPR 5 "=&r"))]
""
{
output_asm_insn ("1:", operands);
- output_asm_insn ("ll.<amo>\t%0,%1", operands);
+ output_asm_insn ("ll.<size>\t%0,%1", operands);
/* Like the test case atomic-cas-int.C, in loongarch64, O1 and higher, the
return value of the val_without_const_folding will not be truncated and
@@ -266,9 +445,9 @@
output_asm_insn ("bne\t%0,%z2,2f", operands);
output_asm_insn ("or%i3\t%5,$zero,%3", operands);
- output_asm_insn ("sc.<amo>\t%5,%1", operands);
+ output_asm_insn ("sc.<size>\t%5,%1", operands);
output_asm_insn ("beqz\t%5,1b", operands);
- output_asm_insn ("b\t3f", operands);
+ output_asm_insn ("%T4b\t3f", operands);
output_asm_insn ("2:", operands);
output_asm_insn ("%G4", operands);
output_asm_insn ("3:", operands);
@@ -288,10 +467,10 @@
(set (match_dup 1)
(unspec_volatile:QHWD [(match_operand:QHWD 2 "reg_or_0_operand" "rJ")
(match_operand:QHWD 3 "reg_or_0_operand" "rJ")
- (match_operand:SI 4 "const_int_operand")] ;; mod_s
- UNSPEC_COMPARE_AND_SWAP))]
+ (match_operand:SI 4 "const_int_operand")] ;; mod
+ UNSPEC_COMPARE_AND_SWAP_AMCAS))]
"ISA_HAS_LAMCAS"
- "ori\t%0,%z2,0\n\tamcas%A4.<amo>\t%0,%z3,%1"
+ "ori\t%0,%z2,0\n\tamcas%A4.<size>\t%0,%z3,%1"
[(set (attr "length") (const_int 8))])
(define_expand "atomic_compare_and_swap<mode>"
@@ -318,16 +497,14 @@
&& is_mm_release (memmodel_base (INTVAL (mod_s))))
mod_s = GEN_INT (MEMMODEL_ACQ_REL);
- operands[6] = mod_s;
-
if (ISA_HAS_LAMCAS)
emit_insn (gen_atomic_cas_value_strong<mode>_amcas (operands[1], operands[2],
operands[3], operands[4],
- operands[6]));
+ mod_s));
else
emit_insn (gen_atomic_cas_value_strong<mode> (operands[1], operands[2],
operands[3], operands[4],
- operands[6]));
+ mod_f));
rtx compare = operands[1];
if (operands[3] != const0_rtx)
@@ -349,49 +526,74 @@
DONE;
})
-(define_expand "atomic_test_and_set"
- [(match_operand:QI 0 "register_operand" "") ;; bool output
- (match_operand:QI 1 "memory_operand" "+ZB") ;; memory
- (match_operand:SI 2 "const_int_operand" "")] ;; model
+(define_expand "atomic_fetch_<amop><mode>"
+ [(match_operand:SHORT 0 "register_operand" "") ;; output
+ (any_bitwise (match_operand:SHORT 1 "memory_operand" "+ZB") ;; memory
+ (match_operand:SHORT 2 "reg_or_0_operand" "rJ")) ;; val
+ (match_operand:SI 3 "const_int_operand" "")] ;; model
""
{
- /* We have no QImode atomics, so use the address LSBs to form a mask,
- then use an aligned SImode atomic. */
+ /* We have no QI/HImode bitwise atomics, so use the address LSBs to form
+ a mask, then use an aligned SImode atomic. */
rtx result = operands[0];
rtx mem = operands[1];
- rtx model = operands[2];
+ rtx model = operands[3];
rtx addr = force_reg (Pmode, XEXP (mem, 0));
- rtx tmp_reg = gen_reg_rtx (Pmode);
- rtx zero_reg = gen_rtx_REG (Pmode, 0);
-
+ rtx mask = gen_int_mode (-4, Pmode);
rtx aligned_addr = gen_reg_rtx (Pmode);
- emit_move_insn (tmp_reg, gen_rtx_PLUS (Pmode, zero_reg, GEN_INT (-4)));
- emit_move_insn (aligned_addr, gen_rtx_AND (Pmode, addr, tmp_reg));
+
+ if (!and_operand (mask, Pmode))
+ mask = force_reg (Pmode, mask);
+
+ emit_move_insn (aligned_addr, gen_rtx_AND (Pmode, addr, mask));
rtx aligned_mem = change_address (mem, SImode, aligned_addr);
set_mem_alias_set (aligned_mem, 0);
- rtx offset = gen_reg_rtx (SImode);
- emit_move_insn (offset, gen_rtx_AND (SImode, gen_lowpart (SImode, addr),
- GEN_INT (3)));
-
rtx tmp = gen_reg_rtx (SImode);
- emit_move_insn (tmp, GEN_INT (1));
+ emit_move_insn (tmp, simplify_gen_unary (ZERO_EXTEND, SImode,
+ operands[2], <MODE>mode));
+ /* Note that we have defined SHIFT_COUNT_TRUNCATED to 1, so we don't need
+ to mask addr with 0b11 here. */
rtx shmt = gen_reg_rtx (SImode);
- emit_move_insn (shmt, gen_rtx_ASHIFT (SImode, offset, GEN_INT (3)));
+ emit_move_insn (shmt, gen_rtx_ASHIFT (SImode, gen_lowpart (SImode, addr),
+ GEN_INT (3)));
rtx word = gen_reg_rtx (SImode);
emit_move_insn (word, gen_rtx_ASHIFT (SImode, tmp, shmt));
+ if (<is_and>)
+ {
+ /* word = word | ~(mode_mask << shmt) */
+ rtx tmp = force_reg (SImode,
+ gen_int_mode (GET_MODE_MASK (<MODE>mode),
+ SImode));
+ emit_move_insn (tmp, gen_rtx_ASHIFT (SImode, tmp, shmt));
+ emit_move_insn (word, gen_rtx_IOR (SImode, gen_rtx_NOT (SImode, tmp),
+ word));
+ }
+
tmp = gen_reg_rtx (SImode);
- emit_insn (gen_atomic_fetch_orsi (tmp, aligned_mem, word, model));
+ emit_insn (gen_atomic_fetch_<amop>si (tmp, aligned_mem, word, model));
emit_move_insn (gen_lowpart (SImode, result),
gen_rtx_LSHIFTRT (SImode, tmp, shmt));
DONE;
})
+(define_expand "atomic_test_and_set"
+ [(match_operand:QI 0 "register_operand" "") ;; bool output
+ (match_operand:QI 1 "memory_operand" "+ZB") ;; memory
+ (match_operand:SI 2 "const_int_operand" "")] ;; model
+ ""
+{
+ rtx one = force_reg (QImode, gen_int_mode (1, QImode));
+ emit_insn (gen_atomic_fetch_orqi (operands[0], operands[1], one,
+ operands[2]));
+ DONE;
+})
+
(define_insn "atomic_cas_value_cmp_and_7_<mode>"
[(set (match_operand:GPR 0 "register_operand" "=&r")
(match_operand:GPR 1 "memory_operand" "+ZC"))
@@ -400,20 +602,20 @@
(match_operand:GPR 3 "reg_or_0_operand" "rJ")
(match_operand:GPR 4 "reg_or_0_operand" "rJ")
(match_operand:GPR 5 "reg_or_0_operand" "rJ")
- (match_operand:SI 6 "const_int_operand")] ;; model
+ (match_operand:SI 6 "const_int_operand")] ;; mod_f
UNSPEC_COMPARE_AND_SWAP))
(clobber (match_scratch:GPR 7 "=&r"))]
""
{
return "1:\\n\\t"
- "ll.<amo>\\t%0,%1\\n\\t"
+ "ll.<size>\\t%0,%1\\n\\t"
"and\\t%7,%0,%2\\n\\t"
"bne\\t%7,%z4,2f\\n\\t"
"and\\t%7,%0,%z3\\n\\t"
"or%i5\\t%7,%7,%5\\n\\t"
- "sc.<amo>\\t%7,%1\\n\\t"
+ "sc.<size>\\t%7,%1\\n\\t"
"beq\\t$zero,%7,1b\\n\\t"
- "b\\t3f\\n\\t"
+ "%T6b\\t3f\\n\\t"
"2:\\n\\t"
"%G6\\n\\t"
"3:\\n\\t";
@@ -444,18 +646,16 @@
&& is_mm_release (memmodel_base (INTVAL (mod_s))))
mod_s = GEN_INT (MEMMODEL_ACQ_REL);
- operands[6] = mod_s;
-
if (ISA_HAS_LAMCAS)
emit_insn (gen_atomic_cas_value_strong<mode>_amcas (operands[1], operands[2],
operands[3], operands[4],
- operands[6]));
+ mod_s));
else
{
union loongarch_gen_fn_ptrs generator;
generator.fn_7 = gen_atomic_cas_value_cmp_and_7_si;
loongarch_expand_atomic_qihi (generator, operands[1], operands[2],
- operands[3], operands[4], operands[6]);
+ operands[3], operands[4], mod_f);
}
rtx compare = operands[1];
@@ -481,83 +681,96 @@
DONE;
})
-(define_insn "atomic_cas_value_add_7_<mode>"
- [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res
- (match_operand:GPR 1 "memory_operand" "+ZC"))
+(define_insn "atomic_compare_and_swapti_scq"
+ [(set (match_operand:TI 0 "register_operand" "=&r")
+ (match_operand:TI 1 "memory_operand" "+ZB"))
(set (match_dup 1)
- (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ") ;; mask
- (match_operand:GPR 3 "reg_or_0_operand" "rJ") ;; inverted_mask
- (match_operand:GPR 4 "reg_or_0_operand" "rJ") ;; old val
- (match_operand:GPR 5 "reg_or_0_operand" "rJ") ;; new val
- (match_operand:SI 6 "const_int_operand")] ;; model
- UNSPEC_COMPARE_AND_SWAP_ADD))
- (clobber (match_scratch:GPR 7 "=&r"))
- (clobber (match_scratch:GPR 8 "=&r"))]
- ""
+ (unspec_volatile:TI [(match_operand:TI 2 "reg_or_0_operand" "rJ")
+ (match_operand:TI 3 "reg_or_0_operand" "rJ")
+ (match_operand:SI 4 "const_int_operand")] ;; mod_f
+ UNSPEC_COMPARE_AND_SWAP))
+ (clobber (match_scratch:DI 5 "=&r"))]
+ "TARGET_64BIT && ISA_HAS_SCQ"
{
- return "1:\\n\\t"
- "ll.<amo>\\t%0,%1\\n\\t"
- "and\\t%7,%0,%3\\n\\t"
- "add.w\\t%8,%0,%z5\\n\\t"
- "and\\t%8,%8,%z2\\n\\t"
- "or%i8\\t%7,%7,%8\\n\\t"
- "sc.<amo>\\t%7,%1\\n\\t"
- "beq\\t$zero,%7,1b";
-}
+ output_asm_insn ("1:", operands);
+ output_asm_insn ("ll.d\t%0,%1", operands);
- [(set (attr "length") (const_int 28))])
+ /* Compare the low word */
+ output_asm_insn ("bne\t%0,%z2,2f", operands);
-(define_insn "atomic_cas_value_sub_7_<mode>"
- [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res
- (match_operand:GPR 1 "memory_operand" "+ZC"))
- (set (match_dup 1)
- (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ") ;; mask
- (match_operand:GPR 3 "reg_or_0_operand" "rJ") ;; inverted_mask
- (match_operand:GPR 4 "reg_or_0_operand" "rJ") ;; old val
- (match_operand:GPR 5 "reg_or_0_operand" "rJ") ;; new val
- (match_operand:SI 6 "const_int_operand")] ;; model
- UNSPEC_COMPARE_AND_SWAP_SUB))
- (clobber (match_scratch:GPR 7 "=&r"))
- (clobber (match_scratch:GPR 8 "=&r"))]
- ""
-{
- return "1:\\n\\t"
- "ll.<amo>\\t%0,%1\\n\\t"
- "and\\t%7,%0,%3\\n\\t"
- "sub.w\\t%8,%0,%z5\\n\\t"
- "and\\t%8,%8,%z2\\n\\t"
- "or%i8\\t%7,%7,%8\\n\\t"
- "sc.<amo>\\t%7,%1\\n\\t"
- "beq\\t$zero,%7,1b";
+ /* Don't reorder the load of high word before ll.d. As the TImode
+ must be aligned in the memory, the high and low words must be in
+ the same cacheline, thus dbar 0x700 is enough. */
+ if (!ISA_HAS_LD_SEQ_SA)
+ output_asm_insn ("dbar\t0x700", operands);
+
+ /* Now load the high word. As the high and low words are in the same
+ cacheline, in case another core has clobbered the high word before the
+ sc.q instruction is executed, the LL bit for the low word will be
+ cleared. Thus a normal load is sufficient. */
+ output_asm_insn ("ld.d\t%t0,%b1,8", operands);
+
+ /* Compare the high word. */
+ output_asm_insn ("bne\t%t0,%t2,2f", operands);
+
+ /* Copy the low word of the new value as it'll be clobbered by sc.q. */
+ output_asm_insn ("move\t%5,%z3", operands);
+
+ /* Store both words if LL bit is still set. */
+ output_asm_insn ("sc.q\t%5,%t3,%1", operands);
+
+ /* Check if sc.q has done the store. */
+ output_asm_insn ("beqz\t%5,1b", operands);
+
+ /* Jump over the mod_f barrier if sc.q has succeeded. */
+ output_asm_insn ("%T4b\t3f", operands);
+
+ /* The barrier for mod_f. */
+ output_asm_insn ("2:", operands);
+ output_asm_insn ("%G4", operands);
+
+ output_asm_insn ("3:", operands);
+ return "";
}
- [(set (attr "length") (const_int 28))])
+ [(set_attr "length" "40")])
-(define_insn "atomic_cas_value_and_7_<mode>"
- [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res
- (match_operand:GPR 1 "memory_operand" "+ZC"))
- (set (match_dup 1)
- (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ") ;; mask
- (match_operand:GPR 3 "reg_or_0_operand" "rJ") ;; inverted_mask
- (match_operand:GPR 4 "reg_or_0_operand" "rJ") ;; old val
- (match_operand:GPR 5 "reg_or_0_operand" "rJ") ;; new val
- (match_operand:SI 6 "const_int_operand")] ;; model
- UNSPEC_COMPARE_AND_SWAP_AND))
- (clobber (match_scratch:GPR 7 "=&r"))
- (clobber (match_scratch:GPR 8 "=&r"))]
- ""
+(define_expand "atomic_compare_and_swapti"
+ [(match_operand:SI 0 "register_operand" "") ;; bool output
+ (match_operand:TI 1 "register_operand" "") ;; val output
+ (match_operand:TI 2 "memory_operand" "") ;; memory
+ (match_operand:TI 3 "reg_or_0_operand" "") ;; expected value
+ (match_operand:TI 4 "reg_or_0_operand" "") ;; desired value
+ (match_operand:SI 5 "const_int_operand" "") ;; is_weak
+ (match_operand:SI 6 "const_int_operand" "") ;; mod_s
+ (match_operand:SI 7 "const_int_operand" "")] ;; mod_f
+ "TARGET_64BIT && ISA_HAS_SCQ"
{
- return "1:\\n\\t"
- "ll.<amo>\\t%0,%1\\n\\t"
- "and\\t%7,%0,%3\\n\\t"
- "and\\t%8,%0,%z5\\n\\t"
- "and\\t%8,%8,%z2\\n\\t"
- "or%i8\\t%7,%7,%8\\n\\t"
- "sc.<amo>\\t%7,%1\\n\\t"
- "beq\\t$zero,%7,1b";
-}
- [(set (attr "length") (const_int 28))])
+ emit_insn (gen_atomic_compare_and_swapti_scq (operands[1], operands[2],
+ operands[3], operands[4],
+ operands[7]));
+
+ rtx t[2];
-(define_insn "atomic_cas_value_xor_7_<mode>"
+ for (int i = 0; i < 2; i++)
+ {
+ rtx compare = loongarch_subword (operands[1], i);
+ rtx expect = loongarch_subword (operands[3], i);
+
+ t[i] = gen_reg_rtx (DImode);
+
+ if (expect != const0_rtx)
+ emit_insn (gen_xordi3 (t[i], compare, expect));
+ else
+ emit_move_insn (t[i], compare);
+ }
+
+ emit_insn (gen_iordi3 (t[0], t[0], t[1]));
+ emit_insn (gen_rtx_SET (operands[0],
+ gen_rtx_EQ (SImode, t[0], const0_rtx)));
+ DONE;
+})
+
+(define_insn "atomic_cas_value_add_7_<mode>"
[(set (match_operand:GPR 0 "register_operand" "=&r") ;; res
(match_operand:GPR 1 "memory_operand" "+ZC"))
(set (match_dup 1)
@@ -566,24 +779,24 @@
(match_operand:GPR 4 "reg_or_0_operand" "rJ") ;; old val
(match_operand:GPR 5 "reg_or_0_operand" "rJ") ;; new val
(match_operand:SI 6 "const_int_operand")] ;; model
- UNSPEC_COMPARE_AND_SWAP_XOR))
+ UNSPEC_COMPARE_AND_SWAP_ADD))
(clobber (match_scratch:GPR 7 "=&r"))
(clobber (match_scratch:GPR 8 "=&r"))]
""
{
return "1:\\n\\t"
- "ll.<amo>\\t%0,%1\\n\\t"
+ "ll.<size>\\t%0,%1\\n\\t"
"and\\t%7,%0,%3\\n\\t"
- "xor\\t%8,%0,%z5\\n\\t"
+ "add.w\\t%8,%0,%z5\\n\\t"
"and\\t%8,%8,%z2\\n\\t"
"or%i8\\t%7,%7,%8\\n\\t"
- "sc.<amo>\\t%7,%1\\n\\t"
+ "sc.<size>\\t%7,%1\\n\\t"
"beq\\t$zero,%7,1b";
}
[(set (attr "length") (const_int 28))])
-(define_insn "atomic_cas_value_or_7_<mode>"
+(define_insn "atomic_cas_value_sub_7_<mode>"
[(set (match_operand:GPR 0 "register_operand" "=&r") ;; res
(match_operand:GPR 1 "memory_operand" "+ZC"))
(set (match_dup 1)
@@ -592,21 +805,20 @@
(match_operand:GPR 4 "reg_or_0_operand" "rJ") ;; old val
(match_operand:GPR 5 "reg_or_0_operand" "rJ") ;; new val
(match_operand:SI 6 "const_int_operand")] ;; model
- UNSPEC_COMPARE_AND_SWAP_OR))
+ UNSPEC_COMPARE_AND_SWAP_SUB))
(clobber (match_scratch:GPR 7 "=&r"))
(clobber (match_scratch:GPR 8 "=&r"))]
""
{
return "1:\\n\\t"
- "ll.<amo>\\t%0,%1\\n\\t"
+ "ll.<size>\\t%0,%1\\n\\t"
"and\\t%7,%0,%3\\n\\t"
- "or\\t%8,%0,%z5\\n\\t"
+ "sub.w\\t%8,%0,%z5\\n\\t"
"and\\t%8,%8,%z2\\n\\t"
"or%i8\\t%7,%7,%8\\n\\t"
- "sc.<amo>\\t%7,%1\\n\\t"
+ "sc.<size>\\t%7,%1\\n\\t"
"beq\\t$zero,%7,1b";
}
-
[(set (attr "length") (const_int 28))])
(define_insn "atomic_cas_value_nand_7_<mode>"
@@ -624,12 +836,12 @@
""
{
return "1:\\n\\t"
- "ll.<amo>\\t%0,%1\\n\\t"
+ "ll.<size>\\t%0,%1\\n\\t"
"and\\t%7,%0,%3\\n\\t"
"and\\t%8,%0,%z5\\n\\t"
"xor\\t%8,%8,%z2\\n\\t"
"or%i8\\t%7,%7,%8\\n\\t"
- "sc.<amo>\\t%7,%1\\n\\t"
+ "sc.<size>\\t%7,%1\\n\\t"
"beq\\t$zero,%7,1b";
}
[(set (attr "length") (const_int 28))])
@@ -648,10 +860,10 @@
""
{
return "1:\\n\\t"
- "ll.<amo>\\t%0,%1\\n\\t"
+ "ll.<size>\\t%0,%1\\n\\t"
"and\\t%7,%0,%z3\\n\\t"
"or%i5\\t%7,%7,%5\\n\\t"
- "sc.<amo>\\t%7,%1\\n\\t"
+ "sc.<size>\\t%7,%1\\n\\t"
"beqz\\t%7,1b\\n\\t";
}
[(set (attr "length") (const_int 20))])
@@ -678,6 +890,101 @@
DONE;
})
+(define_int_iterator UNSPEC_TI_FETCH_DIRECT
+ [UNSPEC_TI_FETCH_ADD
+ UNSPEC_TI_FETCH_SUB
+ UNSPEC_TI_FETCH_AND
+ UNSPEC_TI_FETCH_XOR
+ UNSPEC_TI_FETCH_OR])
+(define_int_iterator UNSPEC_TI_FETCH
+ [UNSPEC_TI_FETCH_DIRECT UNSPEC_TI_FETCH_NAND_MASK_INVERTED])
+(define_int_attr amop_ti_fetch
+ [(UNSPEC_TI_FETCH_ADD "add")
+ (UNSPEC_TI_FETCH_SUB "sub")
+ (UNSPEC_TI_FETCH_AND "and")
+ (UNSPEC_TI_FETCH_XOR "xor")
+ (UNSPEC_TI_FETCH_OR "or")
+ (UNSPEC_TI_FETCH_NAND_MASK_INVERTED "nand_mask_inverted")])
+(define_int_attr size_ti_fetch
+ [(UNSPEC_TI_FETCH_ADD "36")
+ (UNSPEC_TI_FETCH_SUB "36")
+ (UNSPEC_TI_FETCH_AND "28")
+ (UNSPEC_TI_FETCH_XOR "28")
+ (UNSPEC_TI_FETCH_OR "28")
+ (UNSPEC_TI_FETCH_NAND_MASK_INVERTED "28")])
+
+(define_insn "atomic_fetch_<amop_ti_fetch>ti_scq"
+ [(set (match_operand:TI 0 "register_operand" "=&r")
+ (match_operand:TI 1 "memory_operand" "+ZB"))
+ (set (match_dup 1)
+ (unspec_volatile:TI
+ [(match_dup 0)
+ (match_operand:TI 2 "reg_or_0_operand" "rJ")]
+ UNSPEC_TI_FETCH))
+ (clobber (match_scratch:DI 3 "=&r"))
+ (clobber (match_scratch:DI 4 "=&r"))]
+ "TARGET_64BIT && ISA_HAS_SCQ"
+{
+ output_asm_insn ("1:", operands);
+ output_asm_insn ("ll.d\t%0,%1", operands);
+ if (!ISA_HAS_LD_SEQ_SA)
+ output_asm_insn ("dbar\t0x700", operands);
+ output_asm_insn ("ld.d\t%t0,%b1,8", operands);
+
+ switch (<UNSPEC_TI_FETCH>)
+ {
+ case UNSPEC_TI_FETCH_AND:
+ case UNSPEC_TI_FETCH_OR:
+ case UNSPEC_TI_FETCH_XOR:
+ output_asm_insn ("<amop_ti_fetch>\t%3,%0,%z2", operands);
+ output_asm_insn ("<amop_ti_fetch>\t%4,%t0,%t2", operands);
+ break;
+ case UNSPEC_TI_FETCH_NAND_MASK_INVERTED:
+ output_asm_insn ("orn\t%3,%z2,%0", operands);
+ output_asm_insn ("orn\t%4,%t2,%t0", operands);
+ break;
+ case UNSPEC_TI_FETCH_ADD:
+ case UNSPEC_TI_FETCH_SUB:
+ output_asm_insn ("<amop_ti_fetch>.d\t%3,%0,%z2", operands);
+
+ /* Generate carry bit. */
+ output_asm_insn (
+ <UNSPEC_TI_FETCH> == UNSPEC_TI_FETCH_ADD ? "sltu\t%4,%3,%0"
+ : "sltu\t%4,%0,%3",
+ operands);
+
+ output_asm_insn ("<amop_ti_fetch>.d\t%4,%t0,%4", operands);
+ output_asm_insn ("<amop_ti_fetch>.d\t%4,%4,%t2", operands);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ output_asm_insn ("sc.q\t%3,%4,%1", operands);
+ output_asm_insn ("beqz\t%3,1b", operands);
+
+ return "";
+}
+ [(set_attr "length" "<size_ti_fetch>")])
+
+(define_expand "atomic_fetch_<amop_ti_fetch>ti"
+ [(set (match_operand:TI 0 "register_operand" "=&r")
+ (match_operand:TI 1 "memory_operand" "+ZB"))
+ (set (match_dup 1)
+ (unspec_volatile:TI
+ [(match_dup 0)
+ (match_operand:TI 2 "reg_or_0_operand" "rJ")]
+ UNSPEC_TI_FETCH_DIRECT))
+ (match_operand:SI 3 "const_int_operand")] ;; model
+ "TARGET_64BIT && ISA_HAS_SCQ"
+{
+ /* Model is ignored as sc.q implies a full barrier. */
+ emit_insn (gen_atomic_fetch_<amop_ti_fetch>ti_scq (operands[0],
+ operands[1],
+ operands[2]));
+ DONE;
+})
+
(define_insn "atomic_fetch_add<mode>_short"
[(set (match_operand:SHORT 0 "register_operand" "=&r")
(match_operand:SHORT 1 "memory_operand" "+ZB"))
@@ -688,7 +995,7 @@
(match_operand:SI 3 "const_int_operand")] ;; model
UNSPEC_SYNC_OLD_OP))]
"ISA_HAS_LAM_BH"
- "amadd%A3.<amo>\t%0,%z2,%1"
+ "amadd%A3.<size>\t%0,%z2,%1"
[(set (attr "length") (const_int 4))])
(define_expand "atomic_fetch_add<mode>"
@@ -724,7 +1031,7 @@
(match_operand:SHORT 2 "reg_or_0_operand" "rJ"))
(match_operand:SI 3 "const_int_operand")] ;; model
UNSPEC_SYNC_OLD_OP))]
- ""
+ "!ISA_HAS_LAM_BH"
{
union loongarch_gen_fn_ptrs generator;
generator.fn_7 = gen_atomic_cas_value_sub_7_si;
@@ -733,60 +1040,6 @@
DONE;
})
-(define_expand "atomic_fetch_and<mode>"
- [(set (match_operand:SHORT 0 "register_operand" "=&r")
- (match_operand:SHORT 1 "memory_operand" "+ZB"))
- (set (match_dup 1)
- (unspec_volatile:SHORT
- [(and:SHORT (match_dup 1)
- (match_operand:SHORT 2 "reg_or_0_operand" "rJ"))
- (match_operand:SI 3 "const_int_operand")] ;; model
- UNSPEC_SYNC_OLD_OP))]
- ""
-{
- union loongarch_gen_fn_ptrs generator;
- generator.fn_7 = gen_atomic_cas_value_and_7_si;
- loongarch_expand_atomic_qihi (generator, operands[0], operands[1],
- operands[1], operands[2], operands[3]);
- DONE;
-})
-
-(define_expand "atomic_fetch_xor<mode>"
- [(set (match_operand:SHORT 0 "register_operand" "=&r")
- (match_operand:SHORT 1 "memory_operand" "+ZB"))
- (set (match_dup 1)
- (unspec_volatile:SHORT
- [(xor:SHORT (match_dup 1)
- (match_operand:SHORT 2 "reg_or_0_operand" "rJ"))
- (match_operand:SI 3 "const_int_operand")] ;; model
- UNSPEC_SYNC_OLD_OP))]
- ""
-{
- union loongarch_gen_fn_ptrs generator;
- generator.fn_7 = gen_atomic_cas_value_xor_7_si;
- loongarch_expand_atomic_qihi (generator, operands[0], operands[1],
- operands[1], operands[2], operands[3]);
- DONE;
-})
-
-(define_expand "atomic_fetch_or<mode>"
- [(set (match_operand:SHORT 0 "register_operand" "=&r")
- (match_operand:SHORT 1 "memory_operand" "+ZB"))
- (set (match_dup 1)
- (unspec_volatile:SHORT
- [(ior:SHORT (match_dup 1)
- (match_operand:SHORT 2 "reg_or_0_operand" "rJ"))
- (match_operand:SI 3 "const_int_operand")] ;; model
- UNSPEC_SYNC_OLD_OP))]
- ""
-{
- union loongarch_gen_fn_ptrs generator;
- generator.fn_7 = gen_atomic_cas_value_or_7_si;
- loongarch_expand_atomic_qihi (generator, operands[0], operands[1],
- operands[1], operands[2], operands[3]);
- DONE;
-})
-
(define_expand "atomic_fetch_nand<mode>"
[(set (match_operand:SHORT 0 "register_operand" "=&r")
(match_operand:SHORT 1 "memory_operand" "+ZB"))
diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h
index e224ade..494f14c 100644
--- a/gcc/config/mips/mips.h
+++ b/gcc/config/mips/mips.h
@@ -2363,8 +2363,14 @@ enum reg_class
#define STACK_GROWS_DOWNWARD 1
-#define FRAME_GROWS_DOWNWARD (flag_stack_protect != 0 \
- || (flag_sanitize & SANITIZE_ADDRESS) != 0)
+/* Growing the frame downwards allows us to put spills closest to
+ the stack pointer which is good as they are likely to be accessed
+ frequently. We can also arrange for normal stack usage to place
+ scalars last so that they too are close to the stack pointer. */
+#define FRAME_GROWS_DOWNWARD ((TARGET_MIPS16 \
+ && TARGET_FRAME_GROWS_DOWNWARDS) \
+ || (flag_stack_protect != 0 \
+ || (flag_sanitize & SANITIZE_ADDRESS) != 0))
/* Size of the area allocated in the frame to save the GP. */
diff --git a/gcc/config/mips/mips.opt b/gcc/config/mips/mips.opt
index e245654..f07db5a 100644
--- a/gcc/config/mips/mips.opt
+++ b/gcc/config/mips/mips.opt
@@ -473,6 +473,10 @@ mframe-header-opt
Target Var(flag_frame_header_optimization) Optimization
Optimize frame header.
+mgrow-frame-downwards
+Target Var(TARGET_FRAME_GROWS_DOWNWARDS) Init(1) Undocumented
+Change the behaviour to grow the frame downwards.
+
noasmopt
Driver
diff --git a/gcc/config/pru/pru.cc b/gcc/config/pru/pru.cc
index 322e319..3fdc56e 100644
--- a/gcc/config/pru/pru.cc
+++ b/gcc/config/pru/pru.cc
@@ -941,10 +941,19 @@ pru_init_libfuncs (void)
/* Long long. */
set_optab_libfunc (ashr_optab, DImode, "__pruabi_asrll");
- set_optab_libfunc (smul_optab, DImode, "__pruabi_mpyll");
set_optab_libfunc (ashl_optab, DImode, "__pruabi_lslll");
set_optab_libfunc (lshr_optab, DImode, "__pruabi_lsrll");
+ if (TARGET_OPT_MUL)
+ {
+ set_optab_libfunc (smul_optab, DImode, "__pruabi_mpyll");
+ }
+ else
+ {
+ set_optab_libfunc (smul_optab, DImode, "__pruabi_softmpyll");
+ set_optab_libfunc (smul_optab, SImode, "__pruabi_softmpyi");
+ }
+
set_optab_libfunc (sdiv_optab, SImode, "__pruabi_divi");
set_optab_libfunc (udiv_optab, SImode, "__pruabi_divu");
set_optab_libfunc (smod_optab, SImode, "__pruabi_remi");
diff --git a/gcc/config/pru/pru.h b/gcc/config/pru/pru.h
index 6c0719b..9d547ed 100644
--- a/gcc/config/pru/pru.h
+++ b/gcc/config/pru/pru.h
@@ -65,6 +65,9 @@
#undef ENDFILE_SPEC
#define ENDFILE_SPEC "%{!mabi=ti:-lgloss} "
+#undef MULTILIB_DEFAULTS
+#define MULTILIB_DEFAULTS { "mloop", "mmul", "mfillzero" }
+
/* TI ABI mandates that ELF symbols do not start with any prefix. */
#undef USER_LABEL_PREFIX
#define USER_LABEL_PREFIX ""
diff --git a/gcc/config/pru/pru.md b/gcc/config/pru/pru.md
index 3504e42..b8ef55b 100644
--- a/gcc/config/pru/pru.md
+++ b/gcc/config/pru/pru.md
@@ -215,7 +215,7 @@
mov\\t%0, %1
ldi\\t%0, %%pmem(%1)
ldi\\t%0, %1
- fill\\t%0, 4
+ * return TARGET_OPT_FILLZERO ? \"fill\\t%0, 4\" : \"ldi32\\t%0, 0xffffffff\";
ldi32\\t%0, %1"
[(set_attr "type" "st,ld,alu,alu,alu,alu,alu")
(set_attr "length" "4,4,4,4,4,4,8")])
@@ -259,9 +259,11 @@
case 1:
return "lb%B1o\\t%b0, %1, %S1";
case 2:
- return "zero\\t%F0, 8";
+ return TARGET_OPT_FILLZERO ? "zero\\t%F0, 8"
+ : "ldi\\t%F0, 0\;ldi\\t%N0, 0";
case 3:
- return "fill\\t%F0, 8";
+ return TARGET_OPT_FILLZERO ? "fill\\t%F0, 8"
+ : "ldi32\\t%F0, 0xffffffff\;mov\\t%N0, %F0";
case 4:
/* careful with overlapping source and destination regs. */
gcc_assert (GP_REG_P (REGNO (operands[0])));
@@ -502,7 +504,7 @@
(define_insn "zero_extendqidi2"
[(set (match_operand:DI 0 "register_operand" "=r,r")
(zero_extend:DI (match_operand:QI 1 "register_operand" "0,r")))]
- ""
+ "TARGET_OPT_FILLZERO"
"@
zero\\t%F0.b1, 7
mov\\t%F0.b0, %1\;zero\\t%F0.b1, 7"
@@ -512,7 +514,7 @@
(define_insn "zero_extendhidi2"
[(set (match_operand:DI 0 "register_operand" "=r,r")
(zero_extend:DI (match_operand:HI 1 "register_operand" "0,r")))]
- ""
+ "TARGET_OPT_FILLZERO"
"@
zero\\t%F0.b2, 6
mov\\t%F0.w0, %1\;zero\\t%F0.b2, 6"
@@ -522,7 +524,7 @@
(define_insn "zero_extendsidi2"
[(set (match_operand:DI 0 "register_operand" "=r,r")
(zero_extend:DI (match_operand:SI 1 "register_operand" "0,r")))]
- ""
+ "TARGET_OPT_FILLZERO"
"@
zero\\t%N0, 4
mov\\t%F0, %1\;zero\\t%N0, 4"
@@ -535,7 +537,7 @@
(define_expand "extend<EQS0:mode><EQDHIDI:mode>2"
[(set (match_operand:EQDHIDI 0 "register_operand" "=r")
(sign_extend:EQDHIDI (match_operand:EQS0 1 "register_operand" "r")))]
- ""
+ "TARGET_OPT_FILLZERO"
{
rtx_code_label *skip_hiset_label;
@@ -744,7 +746,7 @@
(ior:HIDI
(match_operand:HIDI 1 "register_operand" "0")
(match_operand:HIDI 2 "const_fillbytes_operand" "Uf")))]
- ""
+ "TARGET_OPT_FILLZERO"
{
static char line[64];
pru_byterange r;
@@ -767,7 +769,7 @@
(and:HIDI
(match_operand:HIDI 1 "register_operand" "0")
(match_operand:HIDI 2 "const_zerobytes_operand" "Uz")))]
- ""
+ "TARGET_OPT_FILLZERO"
{
static char line[64];
pru_byterange r;
@@ -1114,7 +1116,8 @@
/* Try with the more efficient zero/fill patterns first. */
if (<LOGICAL_BITOP:CODE> == IOR
&& CONST_INT_P (operands[2])
- && const_fillbytes_operand (operands[2], DImode))
+ && const_fillbytes_operand (operands[2], DImode)
+ && TARGET_OPT_FILLZERO)
{
rtx insn = maybe_gen_pru_ior_fillbytes (DImode,
operands[0],
@@ -1130,7 +1133,8 @@
}
if (<LOGICAL_BITOP:CODE> == AND
&& CONST_INT_P (operands[2])
- && const_zerobytes_operand (operands[2], DImode))
+ && const_zerobytes_operand (operands[2], DImode)
+ && TARGET_OPT_FILLZERO)
{
rtx insn = maybe_gen_pru_and_zerobytes (DImode,
operands[0],
@@ -1212,7 +1216,7 @@
[(set (match_operand:SI 0 "pru_muldst_operand" "=Rmd0")
(mult:SI (match_operand:SI 1 "pru_mulsrc0_operand" "%Rms0")
(match_operand:SI 2 "pru_mulsrc1_operand" "Rms1")))]
- ""
+ "TARGET_OPT_MUL"
"nop\;xin\\t0, %0, 4"
[(set_attr "type" "alu")
(set_attr "length" "8")])
diff --git a/gcc/config/pru/pru.opt b/gcc/config/pru/pru.opt
index 8385beb..5206b2a 100644
--- a/gcc/config/pru/pru.opt
+++ b/gcc/config/pru/pru.opt
@@ -39,6 +39,14 @@ mloop
Target Mask(OPT_LOOP)
Allow (or do not allow) gcc to use the LOOP instruction.
+mmul
+Target Mask(OPT_MUL)
+Allow (or do not allow) gcc to use the PRU multiplier unit.
+
+mfillzero
+Target Mask(OPT_FILLZERO)
+Allow (or do not allow) gcc to use the FILL and ZERO instructions.
+
mabi=
Target RejectNegative Joined Enum(pru_abi_t) Var(pru_current_abi) Init(PRU_ABI_GNU) Save
Select target ABI variant.
diff --git a/gcc/config/pru/pru.opt.urls b/gcc/config/pru/pru.opt.urls
index c87affb..5c57892 100644
--- a/gcc/config/pru/pru.opt.urls
+++ b/gcc/config/pru/pru.opt.urls
@@ -12,6 +12,12 @@ UrlSuffix(gcc/PRU-Options.html#index-mno-relax-1)
mloop
UrlSuffix(gcc/PRU-Options.html#index-mloop)
+mmul
+UrlSuffix(gcc/PRU-Options.html#index-mmul)
+
+mfillzero
+UrlSuffix(gcc/PRU-Options.html#index-mfillzero)
+
mabi=
UrlSuffix(gcc/PRU-Options.html#index-mabi-4)
diff --git a/gcc/config/pru/t-multilib b/gcc/config/pru/t-multilib
new file mode 100644
index 0000000..1e3c2b8
--- /dev/null
+++ b/gcc/config/pru/t-multilib
@@ -0,0 +1,29 @@
+# Copyright (C) 2025 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 3, or (at your option) any later
+# version.
+#
+# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3. If not see
+# <http://www.gnu.org/licenses/>.
+
+MULTILIB_OPTIONS =
+MULTILIB_OPTIONS += mloop/mno-loop
+MULTILIB_OPTIONS += mmul/mno-mul
+MULTILIB_OPTIONS += mfillzero/mno-fillzero
+
+# Build two variants:
+# - Newer PRU core versions, present in AM335x and later.
+# - Older PRU core versions, present in AM18xx.
+MULTILIB_REQUIRED =
+MULTILIB_REQUIRED += mloop/mmul/mfillzero
+MULTILIB_REQUIRED += mno-loop/mno-mul/mno-fillzero
diff --git a/gcc/config/riscv/arch-canonicalize b/gcc/config/riscv/arch-canonicalize
index 5d24f5ed..15a3985 100755
--- a/gcc/config/riscv/arch-canonicalize
+++ b/gcc/config/riscv/arch-canonicalize
@@ -163,7 +163,19 @@ def parse_dep_exts(dep_exts_str):
ext_name = match.group(1)
condition_code = match.group(2)
deps.append({'ext': ext_name, 'type': 'conditional', 'condition': condition_code})
- conditional_matches.append((match.start(), match.end()))
+ # The conditional_pattern RE matches only the first code block enclosed
+ # in braces.
+ #
+ # Extend the match to the condition block's closing brace, encompassing
+ # all code blocks, by simply trying to match the numbers of opening
+ # and closing braces. While crude, this avoids writing a complicated
+ # parse here.
+ closing_braces_left = condition_code.count('{') - condition_code.count('}')
+ condition_end = match.end()
+ while closing_braces_left > 0:
+ condition_end = dep_exts_str.find('}', condition_end)
+ closing_braces_left -= 1
+ conditional_matches.append((match.start(), condition_end))
# Remove conditional dependency blocks from the string
remaining_str = dep_exts_str
@@ -534,6 +546,11 @@ def run_unit_tests():
assert extensions[0]['name'] == 'test'
assert len(extensions[0]['dep_exts']) == 2
+ def test_parse_long_condition_block():
+ """Test condition block containing several code blocks."""
+ result = arch_canonicalize("rv32ec", "20191213")
+ assert "rv32ec_zca" in result
+
# Collect test functions
test_functions = [
test_basic_arch_parsing,
@@ -542,7 +559,8 @@ def run_unit_tests():
test_conditional_dependencies,
test_parse_dep_exts,
test_evaluate_conditional_dependency,
- test_parse_define_riscv_ext
+ test_parse_define_riscv_ext,
+ test_parse_long_condition_block
]
# Run tests manually first, then optionally with pytest
diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index 6531996..9695fdc 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -1679,6 +1679,26 @@
;; Combine vec_duplicate + op.vv to op.vx
;; Include
;; - vadd.vx
+;; - vsub.vx
+;; - vrsub.vx
+;; - vand.vx
+;; - vor.vx
+;; - vmul.vx
+;; - vdiv.vx
+;; - vdivu.vx
+;; - vrem.vx
+;; - vremu.vx
+;; - vmax.vx
+;; - vmaxu.vx
+;; - vmin.vx
+;; - vminu.vx
+;; - vsadd.vx
+;; - vsaddu.vx
+;; - vssub.vx
+;; - vssubu.vx
+;; - vaadd.vx
+;; - vaaddu.vx
+;; - vmerge.vxm
;; =============================================================================
(define_insn_and_split "*<optab>_vx_<mode>"
[(set (match_operand:V_VLSI 0 "register_operand")
@@ -1694,6 +1714,8 @@
riscv_vector::expand_vx_binary_vec_dup_vec (operands[0], operands[2],
operands[1], <CODE>,
<MODE>mode);
+
+ DONE;
}
[(set_attr "type" "vialu")])
@@ -1711,6 +1733,8 @@
riscv_vector::expand_vx_binary_vec_vec_dup (operands[0], operands[1],
operands[2], <CODE>,
<MODE>mode);
+
+ DONE;
}
[(set_attr "type" "vialu")])
@@ -1782,6 +1806,69 @@
}
[(set_attr "type" "vaalu")])
+(define_insn_and_split "*merge_vx_<mode>"
+ [(set (match_operand:V_VLSI 0 "register_operand")
+ (if_then_else:V_VLSI
+ (match_operand:<VM> 3 "vector_mask_operand")
+ (vec_duplicate:V_VLSI
+ (match_operand:<VEL> 2 "reg_or_int_operand"))
+ (match_operand:V_VLSI 1 "register_operand")))]
+ "TARGET_VECTOR && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ insn_code icode = code_for_pred_merge_scalar (<MODE>mode);
+ riscv_vector::emit_vlmax_insn (icode, riscv_vector::MERGE_OP, operands);
+ DONE;
+ }
+ [(set_attr "type" "vimerge")])
+
+(define_insn_and_split "*vmacc_vx_<mode>"
+ [(set (match_operand:V_VLSI 0 "register_operand")
+ (plus:V_VLSI
+ (mult:V_VLSI
+ (vec_duplicate:V_VLSI
+ (match_operand:<VEL> 1 "register_operand"))
+ (match_operand:V_VLSI 2 "register_operand"))
+ (match_operand:V_VLSI 3 "register_operand")))]
+ "TARGET_VECTOR && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ insn_code icode = code_for_pred_mul_plus_vx (<MODE>mode);
+ rtx ops[] = {operands[0], operands[1], operands[2], operands[3],
+ RVV_VUNDEF(<MODE>mode)};
+ riscv_vector::emit_vlmax_insn (icode, riscv_vector::TERNARY_OP, ops);
+
+ DONE;
+ }
+ [(set_attr "type" "vimuladd")])
+
+(define_insn_and_split "*vnmsac_vx_<mode>"
+ [(set (match_operand:V_VLSI 0 "register_operand")
+ (minus:V_VLSI
+ (match_operand:V_VLSI 3 "register_operand")
+ (mult:V_VLSI
+ (vec_duplicate:V_VLSI
+ (match_operand:<VEL> 1 "register_operand"))
+ (match_operand:V_VLSI 2 "register_operand"))))]
+ "TARGET_VECTOR && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ insn_code icode = code_for_pred_vnmsac_vx (<MODE>mode);
+ rtx ops[] = {operands[0], operands[1], operands[2], operands[3],
+ RVV_VUNDEF(<MODE>mode)};
+ riscv_vector::emit_vlmax_insn (icode, riscv_vector::TERNARY_OP, ops);
+
+ DONE;
+ }
+ [(set_attr "type" "vimuladd")])
+
+
;; =============================================================================
;; Combine vec_duplicate + op.vv to op.vf
;; Include
@@ -1962,3 +2049,98 @@
}
[(set_attr "type" "vfwmuladd")]
)
+
+;; vfmul.vf
+(define_insn_and_split "*vfmul_vf_<mode>"
+ [(set (match_operand:V_VLSF 0 "register_operand")
+ (mult:V_VLSF
+ (vec_duplicate:V_VLSF
+ (match_operand:<VEL> 2 "register_operand"))
+ (match_operand:V_VLSF 1 "register_operand")))]
+ "TARGET_VECTOR && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ riscv_vector::emit_vlmax_insn (code_for_pred_scalar (MULT, <MODE>mode),
+ riscv_vector::BINARY_OP_FRM_DYN, operands);
+ DONE;
+ }
+ [(set_attr "type" "vfmul")]
+)
+
+;; vfrdiv.vf
+(define_insn_and_split "*vfrdiv_vf_<mode>"
+ [(set (match_operand:V_VLSF 0 "register_operand")
+ (div:V_VLSF
+ (vec_duplicate:V_VLSF
+ (match_operand:<VEL> 2 "register_operand"))
+ (match_operand:V_VLSF 1 "register_operand")))]
+ "TARGET_VECTOR && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ riscv_vector::emit_vlmax_insn (code_for_pred_reverse_scalar (DIV, <MODE>mode),
+ riscv_vector::BINARY_OP_FRM_DYN, operands);
+ DONE;
+ }
+ [(set_attr "type" "vfdiv")]
+)
+
+;; vfmin.vf
+(define_insn_and_split "*vfmin_vf_<mode>"
+ [(set (match_operand:V_VLSF 0 "register_operand")
+ (smin:V_VLSF
+ (vec_duplicate:V_VLSF
+ (match_operand:<VEL> 2 "register_operand"))
+ (match_operand:V_VLSF 1 "register_operand")))]
+ "TARGET_VECTOR && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ riscv_vector::emit_vlmax_insn (code_for_pred_scalar (SMIN, <MODE>mode),
+ riscv_vector::BINARY_OP, operands);
+ DONE;
+ }
+ [(set_attr "type" "vfminmax")]
+)
+
+(define_insn_and_split "*vfmin_vf_ieee_<mode>"
+ [(set (match_operand:V_VLSF 0 "register_operand")
+ (unspec:V_VLSF [
+ (vec_duplicate:V_VLSF
+ (match_operand:<VEL> 2 "register_operand"))
+ (match_operand:V_VLSF 1 "register_operand")
+ ] UNSPEC_VFMIN))]
+ "TARGET_VECTOR && !HONOR_SNANS (<MODE>mode) && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ riscv_vector::emit_vlmax_insn (code_for_pred_scalar (UNSPEC_VFMIN, <MODE>mode),
+ riscv_vector::BINARY_OP, operands);
+ DONE;
+ }
+ [(set_attr "type" "vfminmax")]
+)
+
+(define_insn_and_split "*vfmin_vf_ieee_<mode>"
+ [(set (match_operand:V_VLSF 0 "register_operand")
+ (unspec:V_VLSF [
+ (match_operand:V_VLSF 1 "register_operand")
+ (vec_duplicate:V_VLSF
+ (match_operand:<VEL> 2 "register_operand"))
+ ] UNSPEC_VFMIN))]
+ "TARGET_VECTOR && !HONOR_SNANS (<MODE>mode) && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ riscv_vector::emit_vlmax_insn (code_for_pred_scalar (UNSPEC_VFMIN, <MODE>mode),
+ riscv_vector::BINARY_OP, operands);
+ DONE;
+ }
+ [(set_attr "type" "vfminmax")]
+)
diff --git a/gcc/config/riscv/constraints.md b/gcc/config/riscv/constraints.md
index 5ecaa19..979e0df 100644
--- a/gcc/config/riscv/constraints.md
+++ b/gcc/config/riscv/constraints.md
@@ -330,3 +330,7 @@
(define_constraint "Q"
"An address operand that is valid for a prefetch instruction"
(match_operand 0 "prefetch_operand"))
+
+(define_address_constraint "ZD"
+ "An address operand that is valid for a mips prefetch instruction"
+ (match_test "riscv_prefetch_offset_address_p (op, mode)"))
diff --git a/gcc/config/riscv/gen-riscv-ext-opt.cc b/gcc/config/riscv/gen-riscv-ext-opt.cc
index 17b8f5b..1ca339c 100644
--- a/gcc/config/riscv/gen-riscv-ext-opt.cc
+++ b/gcc/config/riscv/gen-riscv-ext-opt.cc
@@ -4,50 +4,6 @@
#include <stdio.h>
#include "riscv-opts.h"
-struct version_t
-{
- int major;
- int minor;
- version_t (int major, int minor,
- enum riscv_isa_spec_class spec = ISA_SPEC_CLASS_NONE)
- : major (major), minor (minor)
- {}
- bool operator<(const version_t &other) const
- {
- if (major != other.major)
- return major < other.major;
- return minor < other.minor;
- }
-
- bool operator== (const version_t &other) const
- {
- return major == other.major && minor == other.minor;
- }
-};
-
-static void
-print_ext_doc_entry (const std::string &ext_name, const std::string &full_name,
- const std::string &desc,
- const std::vector<version_t> &supported_versions)
-{
- // Implementation of the function to print the documentation entry
- // for the extension.
- std::set<version_t> unique_versions;
- for (const auto &version : supported_versions)
- unique_versions.insert (version);
- printf ("@item %s\n", ext_name.c_str ());
- printf ("@tab");
- for (const auto &version : unique_versions)
- {
- printf (" %d.%d", version.major, version.minor);
- }
- printf ("\n");
- printf ("@tab %s", full_name.c_str ());
- if (desc.size ())
- printf (", %s", desc.c_str ());
- printf ("\n\n");
-}
-
int
main ()
{
diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index 381f96c..bdb3d22 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -27,10 +27,14 @@
(ior (match_operand 0 "const_arith_operand")
(match_operand 0 "register_operand")))
+(define_predicate "prefetch_const_operand"
+ (and (match_code "const_int")
+ (match_test "(IN_RANGE (INTVAL (op), 0, 511))")))
+
;; REG or REG+D where D fits in a simm12 and has the low 5 bits
;; off. The REG+D form can be reloaded into a temporary if needed
;; after FP elimination if that exposes an invalid offset.
-(define_predicate "prefetch_operand"
+(define_predicate "zicbop_prefetch_operand"
(ior (match_operand 0 "register_operand")
(and (match_test "const_arith_operand (op, VOIDmode)")
(match_test "(INTVAL (op) & 0x1f) == 0"))
@@ -39,6 +43,20 @@
(match_test "const_arith_operand (XEXP (op, 1), VOIDmode)")
(match_test "(INTVAL (XEXP (op, 1)) & 0x1f) == 0"))))
+;; REG or REG+D where D fits in a uimm9
+(define_predicate "mips_prefetch_operand"
+ (ior (match_operand 0 "register_operand")
+ (match_test "prefetch_const_operand (op, VOIDmode)")
+ (and (match_code "plus")
+ (match_test "register_operand (XEXP (op, 0), word_mode)")
+ (match_test "prefetch_const_operand (XEXP (op, 1), VOIDmode)"))))
+
+;; MIPS specific or Standard RISCV Extension
+(define_predicate "prefetch_operand"
+ (if_then_else (match_test "TARGET_XMIPSCBOP")
+ (match_operand 0 "mips_prefetch_operand")
+ (match_operand 0 "zicbop_prefetch_operand")))
+
(define_predicate "lui_operand"
(and (match_code "const_int")
(match_test "LUI_OPERAND (INTVAL (op))")))
diff --git a/gcc/config/riscv/riscv-avlprop.cc b/gcc/config/riscv/riscv-avlprop.cc
index 3031c29..b8547a7 100644
--- a/gcc/config/riscv/riscv-avlprop.cc
+++ b/gcc/config/riscv/riscv-avlprop.cc
@@ -156,6 +156,7 @@ get_insn_vtype_mode (rtx_insn *rinsn)
extract_insn_cached (rinsn);
int mode_idx = get_attr_mode_idx (rinsn);
gcc_assert (mode_idx != INVALID_ATTRIBUTE);
+ gcc_assert (mode_idx < recog_data.n_operands);
return GET_MODE (recog_data.operand[mode_idx]);
}
@@ -205,6 +206,7 @@ simplify_replace_vlmax_avl (rtx_insn *rinsn, rtx new_avl)
{
int index = get_attr_avl_type_idx (rinsn);
gcc_assert (index != INVALID_ATTRIBUTE);
+ gcc_assert (index < recog_data.n_operands);
validate_change_or_fail (rinsn, recog_data.operand_loc[index],
get_avl_type_rtx (avl_type::NONVLMAX), false);
}
@@ -361,6 +363,8 @@ pass_avlprop::get_vlmax_ta_preferred_avl (insn_info *insn) const
is not depend on. */
extract_insn_cached (use_insn->rtl ());
int merge_op_idx = get_attr_merge_op_idx (use_insn->rtl ());
+ gcc_assert (merge_op_idx == INVALID_ATTRIBUTE
+ || merge_op_idx < recog_data.n_operands);
if (merge_op_idx != INVALID_ATTRIBUTE
&& !satisfies_constraint_vu (recog_data.operand[merge_op_idx])
&& refers_to_regno_p (set->regno (),
@@ -531,7 +535,14 @@ pass_avlprop::execute (function *fn)
&& !m_avl_propagations->get (candidate.second)
&& imm_avl_p (vtype_mode))
{
- rtx new_avl = gen_int_mode (GET_MODE_NUNITS (vtype_mode), Pmode);
+ /* For segmented operations AVL refers to a single register and
+ not all NF registers. Therefore divide the mode size by NF
+ to obtain the proper AVL. */
+ int nf = 1;
+ if (riscv_v_ext_tuple_mode_p (vtype_mode))
+ nf = get_nf (vtype_mode);
+ rtx new_avl = gen_int_mode
+ (GET_MODE_NUNITS (vtype_mode).to_constant () / nf, Pmode);
simplify_replace_vlmax_avl (rinsn, new_avl);
}
}
diff --git a/gcc/config/riscv/riscv-cores.def b/gcc/config/riscv/riscv-cores.def
index 98f3470..8f0f630 100644
--- a/gcc/config/riscv/riscv-cores.def
+++ b/gcc/config/riscv/riscv-cores.def
@@ -113,7 +113,7 @@ RISCV_CORE("xt-c908v", "rv64imafdcv_zicbom_zicbop_zicboz_zicntr_zicsr_"
"zvfh_sstc_svinval_svnapot_svpbmt__xtheadba_"
"xtheadbb_xtheadbs_xtheadcmo_xtheadcondmov_"
"xtheadfmemidx_xtheadmac_xtheadmemidx_"
- "xtheadmempair_xtheadsync_xtheadvdot",
+ "xtheadmempair_xtheadsync",
"xt-c908")
RISCV_CORE("xt-c910", "rv64imafdc_zicntr_zicsr_zifencei_zihpm_zfh_"
"xtheadba_xtheadbb_xtheadbs_xtheadcmo_"
@@ -121,7 +121,7 @@ RISCV_CORE("xt-c910", "rv64imafdc_zicntr_zicsr_zifencei_zihpm_zfh_"
"xtheadmemidx_xtheadmempair_xtheadsync",
"xt-c910")
RISCV_CORE("xt-c910v2", "rv64imafdc_zicbom_zicbop_zicboz_zicntr_zicond_"
- "zicsr_zifencei _zihintntl_zihintpause_zihpm_"
+ "zicsr_zifencei_zihintntl_zihintpause_zihpm_"
"zawrs_zfa_zfbfmin_zfh_zca_zcb_zcd_zba_zbb_zbc_"
"zbs_sscofpmf_sstc_svinval_svnapot_svpbmt_"
"xtheadba_xtheadbb_xtheadbs_xtheadcmo_"
@@ -135,13 +135,13 @@ RISCV_CORE("xt-c920", "rv64imafdc_zicntr_zicsr_zifencei_zihpm_zfh_"
"xtheadvector",
"xt-c910")
RISCV_CORE("xt-c920v2", "rv64imafdcv_zicbom_zicbop_zicboz_zicntr_zicond_"
- "zicsr_zifencei _zihintntl_zihintpause_zihpm_"
+ "zicsr_zifencei_zihintntl_zihintpause_zihpm_"
"zawrs_zfa_zfbfmin_zfh_zca_zcb_zcd_zba_zbb_zbc_"
"zbs_zvfbfmin_zvfbfwma_zvfh_sscofpmf_sstc_"
"svinval_svnapot_svpbmt_xtheadba_xtheadbb_"
"xtheadbs_xtheadcmo_xtheadcondmov_xtheadfmemidx_"
"xtheadmac_xtheadmemidx_xtheadmempair_"
- "xtheadsync_xtheadvdot",
+ "xtheadsync",
"xt-c920v2")
RISCV_CORE("tt-ascalon-d8", "rv64imafdcv_zic64b_zicbom_zicbop_zicboz_"
diff --git a/gcc/config/riscv/riscv-ext-mips.def b/gcc/config/riscv/riscv-ext-mips.def
index 5d7836d..132f6c1 100644
--- a/gcc/config/riscv/riscv-ext-mips.def
+++ b/gcc/config/riscv/riscv-ext-mips.def
@@ -33,3 +33,16 @@ DEFINE_RISCV_EXT (
/* BITMASK_GROUP_ID. */ BITMASK_NOT_YET_ALLOCATED,
/* BITMASK_BIT_POSITION. */ BITMASK_NOT_YET_ALLOCATED,
/* EXTRA_EXTENSION_FLAGS. */ 0)
+
+DEFINE_RISCV_EXT (
+ /* NAME. */ xmipscbop,
+ /* UPPERCASE_NAME. */ XMIPSCBOP,
+ /* FULL_NAME. */ "Mips Prefetch extension",
+ /* DESC. */ "",
+ /* URL. */ ,
+ /* DEP_EXTS. */ ({}),
+ /* SUPPORTED_VERSIONS. */ ({{1, 0}}),
+ /* FLAG_GROUP. */ xmips,
+ /* BITMASK_GROUP_ID. */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION. */ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS. */ 0)
diff --git a/gcc/config/riscv/riscv-ext.opt b/gcc/config/riscv/riscv-ext.opt
index 26d6e68..ced05d2 100644
--- a/gcc/config/riscv/riscv-ext.opt
+++ b/gcc/config/riscv/riscv-ext.opt
@@ -449,3 +449,5 @@ Mask(XTHEADVECTOR) Var(riscv_xthead_subext)
Mask(XVENTANACONDOPS) Var(riscv_xventana_subext)
Mask(XMIPSCMOV) Var(riscv_xmips_subext)
+
+Mask(XMIPSCBOP) Var(riscv_xmips_subext)
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 539321f..46b256d 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -143,6 +143,8 @@ extern void riscv_expand_sstrunc (rtx, rtx);
extern int riscv_register_move_cost (machine_mode, reg_class_t, reg_class_t);
extern bool synthesize_ior_xor (rtx_code, rtx [3]);
extern bool synthesize_and (rtx [3]);
+extern bool synthesize_add (rtx [3]);
+extern bool synthesize_add_extended (rtx [3]);
#ifdef RTX_CODE
extern void riscv_expand_int_scc (rtx, enum rtx_code, rtx, rtx, bool *invert_ptr = 0);
@@ -830,16 +832,18 @@ extern bool th_print_operand_address (FILE *, machine_mode, rtx);
extern bool strided_load_broadcast_p (void);
extern bool riscv_use_divmod_expander (void);
-void riscv_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
+void riscv_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx, tree, int);
extern bool
riscv_option_valid_attribute_p (tree, tree, tree, int);
extern bool
riscv_option_valid_version_attribute_p (tree, tree, tree, int);
extern bool
-riscv_process_target_version_attr (tree, location_t);
+riscv_process_target_version_attr (tree, location_t *);
extern void
riscv_override_options_internal (struct gcc_options *);
extern void riscv_option_override (void);
+extern rtx riscv_prefetch_cookie (rtx, rtx);
+extern bool riscv_prefetch_offset_address_p (rtx, machine_mode);
struct riscv_tune_param;
/* Information about one micro-arch we know about. */
diff --git a/gcc/config/riscv/riscv-subset.h b/gcc/config/riscv/riscv-subset.h
index a35537d..4cd860f 100644
--- a/gcc/config/riscv/riscv-subset.h
+++ b/gcc/config/riscv/riscv-subset.h
@@ -52,8 +52,9 @@ private:
/* Original arch string. */
const char *m_arch;
- /* Location of arch string, used for report error. */
- location_t m_loc;
+ /* A pointer to the location that should be used for diagnostics,
+ or null if diagnostics should be suppressed. */
+ location_t *m_loc;
/* Head of subset info list. */
riscv_subset_t *m_head;
@@ -70,7 +71,7 @@ private:
/* Allow adding the same extension more than once. */
bool m_allow_adding_dup;
- riscv_subset_list (const char *, location_t);
+ riscv_subset_list (const char *, location_t *);
const char *parsing_subset_version (const char *, const char *, unsigned *,
unsigned *, bool, bool *);
@@ -106,12 +107,12 @@ public:
riscv_subset_list *clone () const;
- static riscv_subset_list *parse (const char *, location_t);
+ static riscv_subset_list *parse (const char *, location_t *);
const char *parse_single_ext (const char *, bool exact_single_p = true);
int match_score (riscv_subset_list *) const;
- void set_loc (location_t);
+ void set_loc (location_t *);
void set_allow_adding_dup (bool v) { m_allow_adding_dup = v; }
@@ -182,7 +183,7 @@ extern void
riscv_set_arch_by_subset_list (riscv_subset_list *, struct gcc_options *);
extern bool riscv_minimal_hwprobe_feature_bits (const char *,
struct riscv_feature_bits *,
- location_t);
+ location_t *);
extern bool
riscv_ext_is_subset (struct cl_target_option *, struct cl_target_option *);
diff --git a/gcc/config/riscv/riscv-target-attr.cc b/gcc/config/riscv/riscv-target-attr.cc
index 8ad3025..5e01c92 100644
--- a/gcc/config/riscv/riscv-target-attr.cc
+++ b/gcc/config/riscv/riscv-target-attr.cc
@@ -34,7 +34,7 @@ namespace {
class riscv_target_attr_parser
{
public:
- riscv_target_attr_parser (location_t loc)
+ riscv_target_attr_parser (location_t *loc)
: m_found_arch_p (false)
, m_found_tune_p (false)
, m_found_cpu_p (false)
@@ -62,7 +62,7 @@ private:
bool m_found_cpu_p;
bool m_found_priority_p;
riscv_subset_list *m_subset_list;
- location_t m_loc;
+ location_t *m_loc;
const riscv_cpu_info *m_cpu_info;
const char *m_tune;
int m_priority;
@@ -102,15 +102,17 @@ riscv_target_attr_parser::parse_arch (const char *str)
{
if (TARGET_64BIT && strncmp ("32", str + 2, strlen ("32")) == 0)
{
- error_at (m_loc, "unexpected arch for %<target()%> attribute: "
- "must start with rv64 but found %qs", str);
+ if (m_loc)
+ error_at (*m_loc, "unexpected arch for %<target()%> attribute: "
+ "must start with rv64 but found %qs", str);
goto fail;
}
if (!TARGET_64BIT && strncmp ("64", str + 2, strlen ("64")) == 0)
{
- error_at (m_loc, "unexpected arch for %<target()%> attribute: "
- "must start with rv32 but found %qs", str);
+ if (m_loc)
+ error_at (*m_loc, "unexpected arch for %<target()%> attribute: "
+ "must start with rv32 but found %qs", str);
goto fail;
}
@@ -140,10 +142,9 @@ riscv_target_attr_parser::parse_arch (const char *str)
{
if (token[0] != '+')
{
- error_at (
- m_loc,
- "unexpected arch for %<target()%> attribute: must start "
- "with + or rv");
+ if (*m_loc)
+ error_at (*m_loc, "unexpected arch for %<target()%> "
+ "attribute: must start with + or rv");
goto fail;
}
@@ -151,10 +152,9 @@ riscv_target_attr_parser::parse_arch (const char *str)
/* Check parse_single_ext has consume all string. */
if (*result != '\0')
{
- error_at (
- m_loc,
- "unexpected arch for %<target()%> attribute: bad "
- "string found %qs", token);
+ if (m_loc)
+ error_at (*m_loc, "unexpected arch for %<target()%> "
+ "attribute: bad string found %qs", token);
goto fail;
}
@@ -179,8 +179,8 @@ fail:
bool
riscv_target_attr_parser::handle_arch (const char *str)
{
- if (m_found_arch_p)
- error_at (m_loc, "%<target()%> attribute: arch appears more than once");
+ if (m_found_arch_p && m_loc)
+ error_at (*m_loc, "%<target()%> attribute: arch appears more than once");
m_found_arch_p = true;
return parse_arch (str);
}
@@ -190,15 +190,16 @@ riscv_target_attr_parser::handle_arch (const char *str)
bool
riscv_target_attr_parser::handle_cpu (const char *str)
{
- if (m_found_cpu_p)
- error_at (m_loc, "%<target()%> attribute: cpu appears more than once");
+ if (m_found_cpu_p && m_loc)
+ error_at (*m_loc, "%<target()%> attribute: cpu appears more than once");
m_found_cpu_p = true;
const riscv_cpu_info *cpu_info = riscv_find_cpu (str);
if (!cpu_info)
{
- error_at (m_loc, "%<target()%> attribute: unknown CPU %qs", str);
+ if (m_loc)
+ error_at (*m_loc, "%<target()%> attribute: unknown CPU %qs", str);
return false;
}
@@ -218,14 +219,15 @@ riscv_target_attr_parser::handle_cpu (const char *str)
bool
riscv_target_attr_parser::handle_tune (const char *str)
{
- if (m_found_tune_p)
- error_at (m_loc, "%<target()%> attribute: tune appears more than once");
+ if (m_found_tune_p && m_loc)
+ error_at (*m_loc, "%<target()%> attribute: tune appears more than once");
m_found_tune_p = true;
const struct riscv_tune_info *tune = riscv_parse_tune (str, true);
if (tune == nullptr)
{
- error_at (m_loc, "%<target()%> attribute: unknown TUNE %qs", str);
+ if (m_loc)
+ error_at (*m_loc, "%<target()%> attribute: unknown TUNE %qs", str);
return false;
}
@@ -237,13 +239,15 @@ riscv_target_attr_parser::handle_tune (const char *str)
bool
riscv_target_attr_parser::handle_priority (const char *str)
{
- if (m_found_priority_p)
- error_at (m_loc, "%<target()%> attribute: priority appears more than once");
+ if (m_found_priority_p && m_loc)
+ error_at (*m_loc, "%<target()%> attribute: priority appears "
+ "more than once");
m_found_priority_p = true;
if (sscanf (str, "%d", &m_priority) != 1)
{
- error_at (m_loc, "%<target()%> attribute: invalid priority %qs", str);
+ if (m_loc)
+ error_at (*m_loc, "%<target()%> attribute: invalid priority %qs", str);
return false;
}
@@ -282,7 +286,7 @@ riscv_target_attr_parser::update_settings (struct gcc_options *opts) const
static bool
riscv_process_one_target_attr (char *arg_str,
- location_t loc,
+ location_t *loc,
riscv_target_attr_parser &attr_parser,
const struct riscv_attribute_info *attrs)
{
@@ -290,7 +294,8 @@ riscv_process_one_target_attr (char *arg_str,
if (len == 0)
{
- error_at (loc, "malformed %<target()%> attribute");
+ if (loc)
+ error_at (*loc, "malformed %<target()%> attribute");
return false;
}
@@ -302,10 +307,9 @@ riscv_process_one_target_attr (char *arg_str,
if (!arg)
{
- error_at (
- loc,
- "attribute %<target(\"%s\")%> does not accept an argument",
- str_to_check);
+ if (loc)
+ error_at (*loc, "attribute %<target(\"%s\")%> does not "
+ "accept an argument", str_to_check);
return false;
}
@@ -324,7 +328,8 @@ riscv_process_one_target_attr (char *arg_str,
return (&attr_parser->*attr->handler) (arg);
}
- error_at (loc, "Got unknown attribute %<target(\"%s\")%>", str_to_check);
+ if (loc)
+ error_at (*loc, "Got unknown attribute %<target(\"%s\")%>", str_to_check);
return false;
}
@@ -347,11 +352,12 @@ num_occurrences_in_str (char c, char *str)
}
/* Parse the string in ARGS that contains the target attribute information
- and update the global target options space. */
+ and update the global target options space. If LOC is nonnull, report
+ diagnostics against location *LOC, otherwise remain silent. */
bool
riscv_process_target_attr (const char *args,
- location_t loc,
+ location_t *loc,
const struct riscv_attribute_info *attrs)
{
size_t len = strlen (args);
@@ -387,8 +393,8 @@ riscv_process_target_attr (const char *args,
if (num_attrs != num_semicolons + 1)
{
- error_at (loc, "malformed %<target(\"%s\")%> attribute",
- args);
+ if (loc)
+ error_at (*loc, "malformed %<target(\"%s\")%> attribute", args);
return false;
}
@@ -399,11 +405,12 @@ riscv_process_target_attr (const char *args,
}
/* Parse the tree in ARGS that contains the target attribute information
- and update the global target options space. */
+ and update the global target options space. If LOC is nonnull, report
+ diagnostics against *LOC, otherwise remain silent. */
static bool
riscv_process_target_attr (tree args,
- location_t loc,
+ location_t *loc,
const struct riscv_attribute_info *attrs)
{
if (TREE_CODE (args) == TREE_LIST)
@@ -424,7 +431,8 @@ riscv_process_target_attr (tree args,
if (TREE_CODE (args) != STRING_CST)
{
- error_at (loc, "attribute %<target%> argument not a string");
+ if (loc)
+ error_at (*loc, "attribute %<target%> argument not a string");
return false;
}
@@ -466,7 +474,7 @@ riscv_option_valid_attribute_p (tree fndecl, tree, tree args, int)
TREE_TARGET_OPTION (target_option_default_node));
/* Now we can parse the attributes and set &global_options accordingly. */
- ret = riscv_process_target_attr (args, loc, riscv_target_attrs);
+ ret = riscv_process_target_attr (args, &loc, riscv_target_attrs);
if (ret)
{
riscv_override_options_internal (&global_options);
@@ -481,16 +489,19 @@ riscv_option_valid_attribute_p (tree fndecl, tree, tree args, int)
}
/* Parse the tree in ARGS that contains the target_version attribute
- information and update the global target options space. */
+ information and update the global target options space. If LOC is nonnull,
+ report diagnostics against *LOC, otherwise remain silent. */
bool
-riscv_process_target_version_attr (tree args, location_t loc)
+riscv_process_target_version_attr (tree args, location_t *loc)
{
if (TREE_CODE (args) == TREE_LIST)
{
if (TREE_CHAIN (args))
{
- error ("attribute %<target_version%> has multiple values");
+ if (loc)
+ error_at (*loc, "attribute %<target_version%> "
+ "has multiple values");
return false;
}
args = TREE_VALUE (args);
@@ -498,7 +509,8 @@ riscv_process_target_version_attr (tree args, location_t loc)
if (!args || TREE_CODE (args) != STRING_CST)
{
- error ("attribute %<target_version%> argument not a string");
+ if (loc)
+ error_at (*loc, "attribute %<target_version%> argument not a string");
return false;
}
@@ -541,7 +553,7 @@ riscv_option_valid_version_attribute_p (tree fndecl, tree, tree args, int)
cl_target_option_restore (&global_options, &global_options_set,
TREE_TARGET_OPTION (target_option_current_node));
- ret = riscv_process_target_version_attr (args, loc);
+ ret = riscv_process_target_version_attr (args, &loc);
/* Set up any additional state. */
if (ret)
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index c9c8328..b27a0be 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -63,20 +63,37 @@ imm_avl_p (machine_mode mode)
{
poly_uint64 nunits = GET_MODE_NUNITS (mode);
+ /* For segmented operations AVL refers to a single register and not all NF
+ registers. Therefore divide the mode size by NF before checking if it is
+ in range. */
+ int nf = 1;
+ if (riscv_v_ext_tuple_mode_p (mode))
+ nf = get_nf (mode);
+
return nunits.is_constant ()
/* The vsetivli can only hold register 0~31. */
- ? (IN_RANGE (nunits.to_constant (), 0, 31))
+ ? (IN_RANGE (nunits.to_constant () / nf, 0, 31))
/* Only allowed in VLS-VLMAX mode. */
: false;
}
-/* Return true if LEN is equal to NUNITS that out of the range [0, 31]. */
+/* Return true if LEN equals the number of units in MODE if MODE is either a
+ VLA mode or MODE is a VLS mode its size equals the vector size.
+ In that case we can emit a VLMAX insn which can be optimized more easily
+ by the vsetvl pass. */
+
static bool
is_vlmax_len_p (machine_mode mode, rtx len)
{
poly_int64 value;
+ if (poly_int_rtx_p (len, &value)
+ && known_eq (value, GET_MODE_NUNITS (mode))
+ && known_eq (GET_MODE_UNIT_SIZE (mode) * value, BYTES_PER_RISCV_VECTOR))
+ return true;
+
return poly_int_rtx_p (len, &value)
- && known_eq (value, GET_MODE_NUNITS (mode));
+ && !GET_MODE_NUNITS (mode).is_constant ()
+ && known_eq (value, GET_MODE_NUNITS (mode));
}
/* Helper functions for insn_flags && insn_types */
@@ -954,6 +971,26 @@ emit_vlmax_masked_gather_mu_insn (rtx target, rtx op, rtx sel, rtx mask)
emit_vlmax_insn (icode, BINARY_OP_TAMU, ops);
}
+/* Function to emit a vslide1up instruction of mode MODE with destination
+ DEST and slideup element ELT. */
+
+rtx
+expand_slide1up (machine_mode mode, rtx dest, rtx elt)
+{
+ unsigned int unspec
+ = FLOAT_MODE_P (mode) ? UNSPEC_VFSLIDE1UP : UNSPEC_VSLIDE1UP;
+ insn_code icode = code_for_pred_slide (unspec, mode);
+ /* RVV Spec 16.3.1
+ The destination vector register group for vslideup cannot overlap the
+ source vector register group, otherwise the instruction encoding
+ is reserved. Thus, we need a new register. */
+ rtx tmp = gen_reg_rtx (mode);
+ rtx ops[] = {tmp, dest, elt};
+ emit_vlmax_insn (icode, BINARY_OP, ops);
+ return tmp;
+}
+
+
/* According to RVV ISA spec (16.5.1. Synthesizing vdecompress):
https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc
@@ -1175,16 +1212,7 @@ expand_vector_init_trailing_same_elem (rtx target,
{
rtx dup = expand_vector_broadcast (mode, builder.elt (nelts_reqd - 1));
for (int i = nelts_reqd - trailing_ndups - 1; i >= 0; i--)
- {
- unsigned int unspec
- = FLOAT_MODE_P (mode) ? UNSPEC_VFSLIDE1UP : UNSPEC_VSLIDE1UP;
- insn_code icode = code_for_pred_slide (unspec, mode);
- rtx tmp = gen_reg_rtx (mode);
- rtx ops[] = {tmp, dup, builder.elt (i)};
- emit_vlmax_insn (icode, BINARY_OP, ops);
- /* slide1up need source and dest to be different REG. */
- dup = tmp;
- }
+ dup = expand_slide1up (mode, dup, builder.elt (i));
emit_move_insn (target, dup);
return true;
@@ -1717,6 +1745,77 @@ expand_const_vector_stepped (rtx target, rtx src, rvv_builder *builder)
gcc_unreachable ();
}
+/* We don't actually allow this case in legitimate_constant_p but
+ the middle-end still expects us to handle it in an expander
+ (see PR121334). This is assumed to happen very rarely so the
+ implementation is not very efficient, particularly
+ for short vectors.
+*/
+
+static void
+expand_const_vector_onestep (rtx target, rvv_builder &builder)
+{
+ machine_mode mode = GET_MODE (target);
+ gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
+ gcc_assert (builder.nelts_per_pattern () == 2);
+
+ /* We have n encoded patterns
+ {csta_0, cstb_0},
+ {csta_1, cstb_1},
+ ...
+ {csta_{n-1}, cstb_{n-1}}
+ which should become one vector:
+ {csta_0, csta_1, ..., csta_{n-1},
+ cstb_0, cstb_1, ..., cstb_{n-1},
+ ...
+ cstb_0, cstb_1, ..., cstb_{n-1}}.
+
+ In order to achieve this we create a permute/gather constant
+ sel = {0, 1, ..., n - 1, 0, 1, ..., n - 1, ...}
+ and two vectors
+ va = {csta_0, csta_1, ..., csta_{n-1}},
+ vb = {cstb_0, cstb_1, ..., cstb_{n-1}}.
+
+ Then we use a VLMAX gather to "broadcast" vb and afterwards
+ overwrite the first n elements with va. */
+
+ int n = builder.npatterns ();
+ /* { 0, 1, 2, ..., n - 1 }. */
+ rtx vid = gen_reg_rtx (mode);
+ expand_vec_series (vid, const0_rtx, const1_rtx);
+
+ /* { 0, 1, ..., n - 1, 0, 1, ..., n - 1, ... }. */
+ rtx sel = gen_reg_rtx (mode);
+ rtx and_ops[] = {sel, vid, GEN_INT (n)};
+ emit_vlmax_insn (code_for_pred_scalar (AND, mode), BINARY_OP, and_ops);
+
+ /* va = { ELT (0), ELT (1), ... ELT (n - 1) }. */
+ rtx tmp1 = gen_reg_rtx (mode);
+ rtx ops1[] = {tmp1, builder.elt (0)};
+ expand_broadcast (mode, ops1);
+ for (int i = 1; i < n; i++)
+ tmp1 = expand_slide1up (mode, tmp1, builder.elt (i));
+
+ /* vb = { ELT (n), ELT (n + 1), ... ELT (2 * n - 1) }. */
+ rtx tmp2 = gen_reg_rtx (mode);
+ rtx ops2[] = {tmp2, builder.elt (n)};
+ expand_broadcast (mode, ops2);
+ for (int i = 1; i < n; i++)
+ tmp2 = expand_slide1up (mode, tmp2, builder.elt (n + i));
+
+ /* Duplicate vb. */
+ rtx tmp3 = gen_reg_rtx (mode);
+ emit_vlmax_gather_insn (tmp3, tmp2, sel);
+
+ /* Overwrite the first n - 1 elements with va. */
+ rtx dest = gen_reg_rtx (mode);
+ insn_code icode = code_for_pred_mov (mode);
+ rtx ops3[] = {dest, tmp3, tmp1};
+ emit_nonvlmax_insn (icode, __MASK_OP_TUMA | UNARY_OP_P, ops3, GEN_INT (n));
+
+ emit_move_insn (target, dest);
+}
+
static void
expand_const_vector (rtx target, rtx src)
{
@@ -1744,6 +1843,8 @@ expand_const_vector (rtx target, rtx src)
if (CONST_VECTOR_DUPLICATE_P (src))
return expand_const_vector_duplicate (target, &builder);
+ else if (CONST_VECTOR_NELTS_PER_PATTERN (src) == 2)
+ return expand_const_vector_onestep (target, builder);
else if (CONST_VECTOR_STEPPED_P (src))
return expand_const_vector_stepped (target, src, &builder);
@@ -2648,8 +2749,14 @@ expand_vector_init_merge_repeating_sequence (rtx target,
= get_repeating_sequence_dup_machine_mode (builder, mask_bit_mode);
uint64_t full_nelts = builder.full_nelts ().to_constant ();
+ gcc_assert (builder.nelts_per_pattern () == 1
+ || builder.nelts_per_pattern () == 2);
+
+ rtx first
+ = builder.nelts_per_pattern () == 1 ? builder.elt (0) : builder.elt (1);
+
/* Step 1: Broadcast the first pattern. */
- rtx ops[] = {target, force_reg (builder.inner_mode (), builder.elt (0))};
+ rtx ops[] = {target, force_reg (builder.inner_mode (), first)};
expand_broadcast (builder.mode (), ops);
/* Step 2: Merge the rest iteration of pattern. */
for (unsigned int i = 1; i < builder.npatterns (); i++)
@@ -2677,7 +2784,10 @@ expand_vector_init_merge_repeating_sequence (rtx target,
emit_move_insn (mask, gen_lowpart (mask_bit_mode, dup));
/* Step 2-2: Merge pattern according to the mask. */
- rtx ops[] = {target, target, builder.elt (i), mask};
+ unsigned int which = i;
+ if (builder.nelts_per_pattern () == 2)
+ which = 2 * which + 1;
+ rtx ops[] = {target, target, builder.elt (which), mask};
emit_vlmax_insn (code_for_pred_merge_scalar (GET_MODE (target)),
MERGE_OP, ops);
}
@@ -3220,15 +3330,17 @@ expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
mask_mode = get_mask_mode (data_mode);
rtx mask = gen_reg_rtx (mask_mode);
rtx max_sel = gen_const_vector_dup (sel_mode, nunits);
+ bool overlap = reg_overlap_mentioned_p (target, op1);
+ rtx tmp_target = overlap ? gen_reg_rtx (data_mode) : target;
/* Step 1: generate a mask that should select everything >= nunits into the
* mask. */
expand_vec_cmp (mask, GEU, sel_mod, max_sel);
- /* Step2: gather every op0 values indexed by sel into target,
+ /* Step2: gather every op0 values indexed by sel into TMP_TARGET,
we don't need to care about the result of the element
whose index >= nunits. */
- emit_vlmax_gather_insn (target, op0, sel_mod);
+ emit_vlmax_gather_insn (tmp_target, op0, sel_mod);
/* Step3: shift the range from (nunits, max_of_mode] to
[0, max_of_mode - nunits]. */
@@ -3238,7 +3350,10 @@ expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
/* Step4: gather those into the previously masked-out elements
of target. */
- emit_vlmax_masked_gather_mu_insn (target, op1, tmp, mask);
+ emit_vlmax_masked_gather_mu_insn (tmp_target, op1, tmp, mask);
+
+ if (overlap)
+ emit_move_insn (tmp_target, target);
}
/* Implement TARGET_VECTORIZE_VEC_PERM_CONST for RVV. */
@@ -4078,11 +4193,7 @@ shuffle_off_by_one_patterns (struct expand_vec_perm_d *d)
emit_vec_extract (tmp, d->op0, gen_int_mode (nunits - 1, Pmode));
/* Insert the scalar into element 0. */
- unsigned int unspec
- = FLOAT_MODE_P (d->vmode) ? UNSPEC_VFSLIDE1UP : UNSPEC_VSLIDE1UP;
- insn_code icode = code_for_pred_slide (unspec, d->vmode);
- rtx ops[] = {d->target, d->op1, tmp};
- emit_vlmax_insn (icode, BINARY_OP, ops);
+ expand_slide1up (d->vmode, d->op1, tmp);
}
return true;
@@ -4376,13 +4487,11 @@ expand_strided_load (machine_mode mode, rtx *ops)
int idx = 4;
get_else_operand (ops[idx++]);
rtx len = ops[idx];
- poly_int64 len_val;
insn_code icode = code_for_pred_strided_load (mode);
rtx emit_ops[] = {v_reg, mask, gen_rtx_MEM (mode, base), stride};
- if (poly_int_rtx_p (len, &len_val)
- && known_eq (len_val, GET_MODE_NUNITS (mode)))
+ if (is_vlmax_len_p (mode, len))
emit_vlmax_insn (icode, BINARY_OP_TAMA, emit_ops);
else
{
@@ -4400,11 +4509,9 @@ expand_strided_store (machine_mode mode, rtx *ops)
rtx stride = ops[1];
rtx mask = ops[3];
rtx len = ops[4];
- poly_int64 len_val;
rtx vl_type;
- if (poly_int_rtx_p (len, &len_val)
- && known_eq (len_val, GET_MODE_NUNITS (mode)))
+ if (is_vlmax_len_p (mode, len))
{
len = gen_reg_rtx (Pmode);
emit_vlmax_vsetvl (mode, len);
diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc
index 44ef44a..5e6cb67 100644
--- a/gcc/config/riscv/riscv-vector-costs.cc
+++ b/gcc/config/riscv/riscv-vector-costs.cc
@@ -607,7 +607,7 @@ costs::need_additional_vector_vars_p (stmt_vec_info stmt_info,
if (type == load_vec_info_type || type == store_vec_info_type)
{
if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)
- && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_GATHER_SCATTER)
+ && mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node)))
return true;
machine_mode mode = TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info));
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index e0d8904..591122f 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -3685,7 +3685,8 @@ riscv_legitimize_move (machine_mode mode, rtx dest, rtx src)
/* This test can fail if (for example) we want a HF and Z[v]fh is
not enabled. In that case we just want to let the standard
expansion path run. */
- if (riscv_vector::get_vector_mode (smode, nunits).exists (&vmode))
+ if (riscv_vector::get_vector_mode (smode, nunits).exists (&vmode)
+ && gen_lowpart_common (vmode, SUBREG_REG (src)))
{
rtx v = gen_lowpart (vmode, SUBREG_REG (src));
rtx int_reg = dest;
@@ -3958,41 +3959,6 @@ riscv_extend_cost (rtx op, bool unsigned_p)
return COSTS_N_INSNS (2);
}
-/* Return the cost of the vector binary rtx like add, minus, mult.
- The cost of scalar2vr_cost will be appended if there one of the
- op comes from the VEC_DUPLICATE. */
-
-static int
-get_vector_binary_rtx_cost (rtx x, int scalar2vr_cost)
-{
- gcc_assert (riscv_v_ext_mode_p (GET_MODE (x)));
-
- rtx neg;
- rtx op_0;
- rtx op_1;
-
- if (GET_CODE (x) == UNSPEC)
- {
- op_0 = XVECEXP (x, 0, 0);
- op_1 = XVECEXP (x, 0, 1);
- }
- else
- {
- op_0 = XEXP (x, 0);
- op_1 = XEXP (x, 1);
- }
-
- if (GET_CODE (op_0) == VEC_DUPLICATE
- || GET_CODE (op_1) == VEC_DUPLICATE)
- return (scalar2vr_cost + 1) * COSTS_N_INSNS (1);
- else if (GET_CODE (neg = op_0) == NEG
- && (GET_CODE (op_1) == VEC_DUPLICATE
- || GET_CODE (XEXP (neg, 0)) == VEC_DUPLICATE))
- return (scalar2vr_cost + 1) * COSTS_N_INSNS (1);
- else
- return COSTS_N_INSNS (1);
-}
-
/* Implement TARGET_RTX_COSTS. */
#define SINGLE_SHIFT_COST 1
@@ -4014,73 +3980,20 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
{
case SET:
{
- switch (GET_CODE (x))
+ if (GET_CODE (x) == VEC_DUPLICATE)
+ *total = (scalar2vr_cost + 1) * COSTS_N_INSNS (1);
+ else
{
- case VEC_DUPLICATE:
- *total = gr2vr_cost * COSTS_N_INSNS (1);
- break;
- case IF_THEN_ELSE:
- {
- rtx op = XEXP (x, 1);
+ int vec_dup_count = 0;
+ subrtx_var_iterator::array_type array;
- switch (GET_CODE (op))
- {
- case DIV:
- case UDIV:
- case MOD:
- case UMOD:
- case US_PLUS:
- case US_MINUS:
- case SS_PLUS:
- case SS_MINUS:
- *total = get_vector_binary_rtx_cost (op, scalar2vr_cost);
- break;
- case UNSPEC:
- {
- switch (XINT (op, 1))
- {
- case UNSPEC_VAADDU:
- case UNSPEC_VAADD:
- *total
- = get_vector_binary_rtx_cost (op, scalar2vr_cost);
- break;
- default:
- *total = COSTS_N_INSNS (1);
- break;
- }
- }
- break;
- default:
- *total = COSTS_N_INSNS (1);
- break;
- }
- }
- break;
- case PLUS:
- case MINUS:
- case AND:
- case IOR:
- case XOR:
- case MULT:
- case SMAX:
- case UMAX:
- case SMIN:
- case UMIN:
- {
- rtx op;
- rtx op_0 = XEXP (x, 0);
- rtx op_1 = XEXP (x, 1);
+ FOR_EACH_SUBRTX_VAR (iter, array, x, ALL)
+ if (GET_CODE (*iter) == VEC_DUPLICATE)
+ vec_dup_count++;
- if (GET_CODE (op = op_0) == MULT
- || GET_CODE (op = op_1) == MULT)
- *total = get_vector_binary_rtx_cost (op, scalar2vr_cost);
- else
- *total = get_vector_binary_rtx_cost (x, scalar2vr_cost);
- }
- break;
- default:
- *total = COSTS_N_INSNS (1);
- break;
+ int total_vec_dup_cost = vec_dup_count * scalar2vr_cost;
+
+ *total = COSTS_N_INSNS (1) * (total_vec_dup_cost + 1);
}
}
break;
@@ -5532,9 +5445,9 @@ canonicalize_comparands (rtx_code code, rtx *op0, rtx *op1)
/* We might have been handed back a SUBREG. Just to make things
easy, force it into a REG. */
- if (!REG_P (*op0) && !CONST_INT_P (*op0))
+ if (!REG_P (*op0) && !CONST_INT_P (*op0) && INTEGRAL_MODE_P (GET_MODE (*op0)))
*op0 = force_reg (word_mode, *op0);
- if (!REG_P (*op1) && !CONST_INT_P (*op1))
+ if (!REG_P (*op1) && !CONST_INT_P (*op1) && INTEGRAL_MODE_P (GET_MODE (*op1)))
*op1 = force_reg (word_mode, *op1);
}
@@ -6213,7 +6126,8 @@ riscv_pass_vls_aggregate_in_gpr (struct riscv_arg_info *info, machine_mode mode,
For a library call, FNTYPE is 0. */
void
-riscv_init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype, rtx, tree, int)
+riscv_init_cumulative_args (CUMULATIVE_ARGS *cum, const_tree fntype,
+ rtx, tree, int)
{
memset (cum, 0, sizeof (*cum));
@@ -6494,30 +6408,44 @@ riscv_arg_partial_bytes (cumulative_args_t cum,
return arg.stack_p ? arg.num_gprs * UNITS_PER_WORD : 0;
}
-/* Implement FUNCTION_VALUE and LIBCALL_VALUE. For normal calls,
- VALTYPE is the return type and MODE is VOIDmode. For libcalls,
- VALTYPE is null and MODE is the mode of the return value. */
+/* Implements hook TARGET_FUNCTION_VALUE. */
rtx
-riscv_function_value (const_tree type, const_tree func, machine_mode mode)
+riscv_function_value (const_tree ret_type, const_tree fn_decl_or_type,
+ bool)
{
struct riscv_arg_info info;
CUMULATIVE_ARGS args;
- if (type)
+ if (fn_decl_or_type)
{
- int unsigned_p = TYPE_UNSIGNED (type);
+ const_tree fntype = TREE_CODE (fn_decl_or_type) == FUNCTION_DECL ?
+ TREE_TYPE (fn_decl_or_type) : fn_decl_or_type;
+ riscv_init_cumulative_args (&args, fntype, NULL_RTX, NULL_TREE, 0);
+ }
+ else
+ memset (&args, 0, sizeof args);
- mode = TYPE_MODE (type);
+ int unsigned_p = TYPE_UNSIGNED (ret_type);
- /* Since TARGET_PROMOTE_FUNCTION_MODE unconditionally promotes,
- return values, promote the mode here too. */
- mode = promote_function_mode (type, mode, &unsigned_p, func, 1);
- }
+ machine_mode mode = TYPE_MODE (ret_type);
- memset (&args, 0, sizeof args);
+ /* Since TARGET_PROMOTE_FUNCTION_MODE unconditionally promotes,
+ return values, promote the mode here too. */
+ mode = promote_function_mode (ret_type, mode, &unsigned_p, fn_decl_or_type, 1);
- return riscv_get_arg_info (&info, &args, mode, type, true, true);
+ return riscv_get_arg_info (&info, &args, mode, ret_type, true, true);
+}
+
+/* Implements hook TARGET_LIBCALL_VALUE. */
+
+rtx
+riscv_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
+{
+ struct riscv_arg_info info;
+ CUMULATIVE_ARGS args;
+ memset (&args, 0, sizeof args);
+ return riscv_get_arg_info (&info, &args, mode, NULL_TREE, true, true);
}
/* Implement TARGET_PASS_BY_REFERENCE. */
@@ -14037,10 +13965,13 @@ riscv_c_mode_for_floating_type (enum tree_index ti)
return default_mode_for_floating_type (ti);
}
-/* This parses the attribute arguments to target_version in DECL and modifies
- the feature mask and priority required to select those targets. */
+/* Parse the attribute arguments to target_version in DECL and modify
+ the feature mask and priority required to select those targets.
+ If LOC is nonnull, report diagnostics against *LOC, otherwise
+ remain silent. */
static void
parse_features_for_version (tree decl,
+ location_t *loc,
struct riscv_feature_bits &res,
int &priority)
{
@@ -14071,14 +14002,12 @@ parse_features_for_version (tree decl,
cl_target_option_restore (&global_options, &global_options_set,
default_opts);
- riscv_process_target_version_attr (TREE_VALUE (version_attr),
- DECL_SOURCE_LOCATION (decl));
+ riscv_process_target_version_attr (TREE_VALUE (version_attr), loc);
priority = global_options.x_riscv_fmv_priority;
const char *arch_string = global_options.x_riscv_arch_string;
bool parse_res
- = riscv_minimal_hwprobe_feature_bits (arch_string, &res,
- DECL_SOURCE_LOCATION (decl));
+ = riscv_minimal_hwprobe_feature_bits (arch_string, &res, loc);
gcc_assert (parse_res);
cl_target_option_restore (&global_options, &global_options_set,
@@ -14135,8 +14064,8 @@ riscv_compare_version_priority (tree decl1, tree decl2)
struct riscv_feature_bits mask1, mask2;
int prio1, prio2;
- parse_features_for_version (decl1, mask1, prio1);
- parse_features_for_version (decl2, mask2, prio2);
+ parse_features_for_version (decl1, nullptr, mask1, prio1);
+ parse_features_for_version (decl2, nullptr, mask2, prio2);
return compare_fmv_features (mask1, mask2, prio1, prio2);
}
@@ -14439,6 +14368,7 @@ dispatch_function_versions (tree dispatch_decl,
version_info.version_decl = version_decl;
// Get attribute string, parse it and find the right features.
parse_features_for_version (version_decl,
+ &DECL_SOURCE_LOCATION (version_decl),
version_info.features,
version_info.prio);
function_versions.push_back (version_info);
@@ -15441,6 +15371,217 @@ synthesize_and (rtx operands[3])
return true;
}
+/* Synthesize OPERANDS[0] = OPERANDS[1] + OPERANDS[2].
+
+ OPERANDS[0] and OPERANDS[1] will be a REG and may be the same
+ REG.
+
+ OPERANDS[2] is a CONST_INT.
+
+ Return TRUE if the operation was fully synthesized and the caller
+ need not generate additional code. Return FALSE if the operation
+ was not synthesized and the caller is responsible for emitting the
+ proper sequence. */
+
+bool
+synthesize_add (rtx operands[3])
+{
+ /* Trivial cases that don't need synthesis. */
+ if (SMALL_OPERAND (INTVAL (operands[2])))
+ return false;
+
+ int budget1 = riscv_const_insns (operands[2], true);
+ int budget2 = riscv_const_insns (GEN_INT (-INTVAL (operands[2])), true);
+
+ HOST_WIDE_INT ival = INTVAL (operands[2]);
+
+ /* If we can emit two addi insns then that's better than synthesizing
+ the constant into a temporary, then adding the temporary to the
+ other input. The exception is when the constant can be loaded
+ in a single instruction which can issue whenever its convenient. */
+ if (SUM_OF_TWO_S12 (ival) && budget1 >= 2)
+ {
+ HOST_WIDE_INT saturated = HOST_WIDE_INT_M1U << (IMM_BITS - 1);
+
+ if (ival >= 0)
+ saturated = ~saturated;
+
+ ival -= saturated;
+
+ rtx x = gen_rtx_PLUS (word_mode, operands[1], GEN_INT (saturated));
+ emit_insn (gen_rtx_SET (operands[0], x));
+ rtx output = gen_rtx_PLUS (word_mode, operands[0], GEN_INT (ival));
+ emit_insn (gen_rtx_SET (operands[0], output));
+ return true;
+ }
+
+ /* If we can shift the constant by 1, 2, or 3 bit positions
+ and the result is a cheaper constant, then do so. */
+ ival = INTVAL (operands[2]);
+ if (TARGET_ZBA
+ && (((ival % 2) == 0 && budget1
+ > riscv_const_insns (GEN_INT (ival >> 1), true))
+ || ((ival % 4) == 0 && budget1
+ > riscv_const_insns (GEN_INT (ival >> 2), true))
+ || ((ival % 8) == 0 && budget1
+ > riscv_const_insns (GEN_INT (ival >> 3), true))))
+ {
+ // Load the shifted constant into a temporary
+ int shct = ctz_hwi (ival);
+
+ /* We can handle shifting up to 3 bit positions via shNadd. */
+ if (shct > 3)
+ shct = 3;
+
+ /* The adjusted constant may still need synthesis, so do not copy
+ it directly into register. Let the expander handle it. */
+ rtx tmp = force_reg (word_mode, GEN_INT (ival >> shct));
+
+ /* Generate shift-add of temporary and operands[1]
+ into the final destination. */
+ rtx x = gen_rtx_ASHIFT (word_mode, tmp, GEN_INT (shct));
+ rtx output = gen_rtx_PLUS (word_mode, x, operands[1]);
+ emit_insn (gen_rtx_SET (operands[0], output));
+ return true;
+ }
+
+ /* If the negated constant is cheaper than the original, then negate
+ the constant and use sub. */
+ if (budget2 < budget1)
+ {
+ // load -INTVAL (operands[2]) into a temporary
+ rtx tmp = force_reg (word_mode, GEN_INT (-INTVAL (operands[2])));
+
+ // subtract operads[2] from operands[1]
+ rtx output = gen_rtx_MINUS (word_mode, operands[1], tmp);
+ emit_insn (gen_rtx_SET (operands[0], output));
+ return true;
+ }
+
+ /* No add synthesis was found. Synthesize the constant into
+ a temporary and use that. */
+ rtx x = force_reg (word_mode, operands[2]);
+ x = gen_rtx_PLUS (word_mode, operands[1], x);
+ emit_insn (gen_rtx_SET (operands[0], x));
+ return true;
+}
+
+/* Synthesize OPERANDS[0] = OPERANDS[1] + OPERANDS[2].
+
+ For 32-bit object cases with a 64-bit target.
+
+ OPERANDS[0] and OPERANDS[1] will be a REG and may be the same
+ REG.
+
+ OPERANDS[2] is a CONST_INT.
+
+ Return TRUE if the operation was fully synthesized and the caller
+ need not generate additional code. Return FALSE if the operation
+ was not synthesized and the caller is responsible for emitting the
+ proper sequence. */
+
+
+bool
+synthesize_add_extended (rtx operands[3])
+{
+
+/* If operands[2] is a 12-bit signed immediate,
+ no synthesis needs to be done. */
+
+ if (SMALL_OPERAND (INTVAL (operands[2])))
+ return false;
+
+ HOST_WIDE_INT ival = INTVAL (operands[2]);
+ int budget1 = riscv_const_insns (operands[2], true);
+ int budget2 = riscv_const_insns (GEN_INT (-INTVAL (operands[2])), true);
+
+/* If operands[2] can be split into two 12-bit signed immediates,
+ split add into two adds. */
+
+ if (SUM_OF_TWO_S12 (ival))
+ {
+ HOST_WIDE_INT saturated = HOST_WIDE_INT_M1U << (IMM_BITS - 1);
+
+ if (ival >= 0)
+ saturated = ~saturated;
+
+ ival -= saturated;
+
+ rtx temp = gen_reg_rtx (DImode);
+ emit_insn (gen_addsi3_extended (temp, operands[1], GEN_INT (saturated)));
+ temp = gen_lowpart (SImode, temp);
+ SUBREG_PROMOTED_VAR_P (temp) = 1;
+ SUBREG_PROMOTED_SET (temp, SRP_SIGNED);
+ emit_insn (gen_rtx_SET (operands[0], temp));
+ rtx t = gen_reg_rtx (DImode);
+ emit_insn (gen_addsi3_extended (t, operands[0], GEN_INT (ival)));
+ t = gen_lowpart (SImode, t);
+ SUBREG_PROMOTED_VAR_P (t) = 1;
+ SUBREG_PROMOTED_SET (t, SRP_SIGNED);
+ emit_move_insn (operands[0], t);
+ return true;
+ }
+
+
+/* If the negated value is cheaper to synthesize, subtract that from
+ operands[1]. */
+
+ if (budget2 < budget1)
+ {
+ rtx tmp = gen_reg_rtx (SImode);
+ emit_insn (gen_rtx_SET (tmp, GEN_INT (-INTVAL (operands[2]))));
+
+ rtx t = gen_reg_rtx (DImode);
+ emit_insn (gen_subsi3_extended (t, operands[1], tmp));
+ t = gen_lowpart (SImode, t);
+ SUBREG_PROMOTED_VAR_P (t) = 1;
+ SUBREG_PROMOTED_SET (t, SRP_SIGNED);
+ emit_move_insn (operands[0], t);
+ return true;
+ }
+
+ rtx tsrc = force_reg (SImode, operands[2]);
+ rtx tdest = gen_reg_rtx (DImode);
+ emit_insn (gen_addsi3_extended (tdest, operands[1], tsrc));
+ tdest = gen_lowpart (SImode, tdest);
+ SUBREG_PROMOTED_VAR_P (tdest) = 1;
+ SUBREG_PROMOTED_SET (tdest, SRP_SIGNED);
+ emit_move_insn (operands[0], tdest);
+ return true;
+
+}
+
+
+/*
+ HINT : argument specify the target cache
+
+ TODO : LOCALITY is unused.
+
+ Return the first operand of the associated PREF or PREFX insn. */
+rtx
+riscv_prefetch_cookie (rtx hint, rtx locality)
+{
+ return (GEN_INT (INTVAL (hint)
+ + CacheHint::DCACHE_HINT + INTVAL (locality) * 0));
+}
+
+/* Return true if X is a legitimate address with offset for prefetch.
+ MODE is the mode of the value being accessed. */
+bool
+riscv_prefetch_offset_address_p (rtx x, machine_mode mode)
+{
+ struct riscv_address_info addr;
+
+ if (riscv_classify_address (&addr, x, mode, false)
+ && addr.type == ADDRESS_REG)
+ {
+ if (TARGET_XMIPSCBOP)
+ return (CONST_INT_P (addr.offset)
+ && MIPS_RISCV_9BIT_OFFSET_P (INTVAL (addr.offset)));
+ }
+
+ return true;
+}
/* Initialize the GCC target structure. */
#undef TARGET_ASM_ALIGNED_HI_OP
@@ -15804,6 +15945,12 @@ synthesize_and (rtx operands[3])
#undef TARGET_VECTOR_MODE_SUPPORTED_ANY_TARGET_P
#define TARGET_VECTOR_MODE_SUPPORTED_ANY_TARGET_P riscv_vector_mode_supported_any_target_p
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE riscv_function_value
+
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE riscv_libcall_value
+
#undef TARGET_FUNCTION_VALUE_REGNO_P
#define TARGET_FUNCTION_VALUE_REGNO_P riscv_function_value_regno_p
diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index 29342d8..9146571 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -765,12 +765,6 @@ enum reg_class
#define CALLEE_SAVED_FREG_NUMBER(REGNO) CALLEE_SAVED_REG_NUMBER (REGNO - 32)
-#define LIBCALL_VALUE(MODE) \
- riscv_function_value (NULL_TREE, NULL_TREE, MODE)
-
-#define FUNCTION_VALUE(VALTYPE, FUNC) \
- riscv_function_value (VALTYPE, FUNC, VOIDmode)
-
/* 1 if N is a possible register number for function argument passing.
We have no FP argument registers when soft-float. */
@@ -1325,4 +1319,15 @@ extern void riscv_remove_unneeded_save_restore_calls (void);
#define TARGET_HAS_FMV_TARGET_ATTRIBUTE 0
+/* mips pref valid offset range. */
+#define MIPS_RISCV_9BIT_OFFSET_P(OFFSET) (IN_RANGE (OFFSET, 0, 511))
+
+/* mips pref cache hint type. */
+typedef enum {
+ ICACHE_HINT = 0 << 3,
+ DCACHE_HINT = 1 << 3,
+ SCACHE_HINT = 2 << 3,
+ TCACHE_HINT = 3 << 3
+} CacheHint;
+
#endif /* ! GCC_RISCV_H */
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 578dd43..d34405c 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -712,24 +712,45 @@
(set_attr "mode" "SI")])
(define_expand "addsi3"
- [(set (match_operand:SI 0 "register_operand" "=r,r")
- (plus:SI (match_operand:SI 1 "register_operand" " r,r")
- (match_operand:SI 2 "arith_operand" " r,I")))]
+ [(set (match_operand:SI 0 "register_operand")
+ (plus:SI (match_operand:SI 1 "register_operand")
+ (match_operand:SI 2 "reg_or_const_int_operand")))]
""
{
+ /* We may be able to find a faster sequence, if so, then we are
+ done. Otherwise let expansion continue normally. */
+ if (CONST_INT_P (operands[2])
+ && ((!TARGET_64BIT && synthesize_add (operands))
+ || (TARGET_64BIT && synthesize_add_extended (operands))))
+ DONE;
+
+ /* Constants have already been handled already. */
if (TARGET_64BIT)
{
- rtx t = gen_reg_rtx (DImode);
- emit_insn (gen_addsi3_extended (t, operands[1], operands[2]));
- t = gen_lowpart (SImode, t);
- SUBREG_PROMOTED_VAR_P (t) = 1;
- SUBREG_PROMOTED_SET (t, SRP_SIGNED);
- emit_move_insn (operands[0], t);
+ rtx tdest = gen_reg_rtx (DImode);
+ emit_insn (gen_addsi3_extended (tdest, operands[1], operands[2]));
+ tdest = gen_lowpart (SImode, tdest);
+ SUBREG_PROMOTED_VAR_P (tdest) = 1;
+ SUBREG_PROMOTED_SET (tdest, SRP_SIGNED);
+ emit_move_insn (operands[0], tdest);
DONE;
}
+
})
-(define_insn "adddi3"
+(define_expand "adddi3"
+ [(set (match_operand:DI 0 "register_operand")
+ (plus:DI (match_operand:DI 1 "register_operand")
+ (match_operand:DI 2 "reg_or_const_int_operand")))]
+ "TARGET_64BIT"
+{
+ /* We may be able to find a faster sequence, if so, then we are
+ done. Otherwise let expansion continue normally. */
+ if (CONST_INT_P (operands[2]) && synthesize_add (operands))
+ DONE;
+})
+
+(define_insn "*adddi3"
[(set (match_operand:DI 0 "register_operand" "=r,r")
(plus:DI (match_operand:DI 1 "register_operand" " r,r")
(match_operand:DI 2 "arith_operand" " r,I")))]
@@ -2293,12 +2314,16 @@
rtx abs_reg = gen_reg_rtx (<ANYF:MODE>mode);
rtx coeff_reg = gen_reg_rtx (<ANYF:MODE>mode);
rtx tmp_reg = gen_reg_rtx (<ANYF:MODE>mode);
+ rtx fflags = gen_reg_rtx (SImode);
riscv_emit_move (tmp_reg, operands[1]);
riscv_emit_move (coeff_reg,
riscv_vector::get_fp_rounding_coefficient (<ANYF:MODE>mode));
emit_insn (gen_abs<ANYF:mode>2 (abs_reg, operands[1]));
+ /* fp compare can set invalid flag for NaN, so backup fflags. */
+ if (flag_trapping_math)
+ emit_insn (gen_riscv_frflags (fflags));
riscv_expand_conditional_branch (label, LT, abs_reg, coeff_reg);
emit_jump_insn (gen_jump (end_label));
@@ -2324,6 +2349,14 @@
emit_insn (gen_copysign<ANYF:mode>3 (tmp_reg, abs_reg, operands[1]));
emit_label (end_label);
+
+ /* Restore fflags, but after label. This is slightly different
+ than glibc implementation which only needs to restore under
+ the label, since it checks for NaN first, meaning following fp
+ compare can't raise fp exceptons and thus not clobber fflags. */
+ if (flag_trapping_math)
+ emit_insn (gen_riscv_fsflags (fflags));
+
riscv_emit_move (operands[0], tmp_reg);
}
@@ -4402,11 +4435,21 @@
)
(define_insn "prefetch"
- [(prefetch (match_operand 0 "prefetch_operand" "Qr")
- (match_operand 1 "imm5_operand" "i")
- (match_operand 2 "const_int_operand" "n"))]
- "TARGET_ZICBOP"
+ [(prefetch (match_operand 0 "prefetch_operand" "Qr,ZD")
+ (match_operand 1 "imm5_operand" "i,i")
+ (match_operand 2 "const_int_operand" "n,n"))]
+ "TARGET_ZICBOP || TARGET_XMIPSCBOP"
{
+ if (TARGET_XMIPSCBOP)
+ {
+ /* Mips Prefetch write is nop for p8700. */
+ if (operands[1] != CONST0_RTX (GET_MODE (operands[1])))
+ return "nop";
+
+ operands[1] = riscv_prefetch_cookie (operands[1], operands[2]);
+ return "mips.pref\t%1,%a0";
+ }
+
switch (INTVAL (operands[1]))
{
case 0:
diff --git a/gcc/config/riscv/sifive-p400.md b/gcc/config/riscv/sifive-p400.md
index ed8b8ec..0acdbda 100644
--- a/gcc/config/riscv/sifive-p400.md
+++ b/gcc/config/riscv/sifive-p400.md
@@ -153,10 +153,13 @@
(eq_attr "type" "fmove,fcvt"))
"p400_float_pipe,sifive_p400_fpu")
+;; We need something for HF so that we don't abort during
+;; scheduling if someone was to ask for p400 scheduling, but
+;; enable the various HF mode extensions.
(define_insn_reservation "sifive_p400_fdiv_s" 18
(and (eq_attr "tune" "sifive_p400")
(eq_attr "type" "fdiv,fsqrt")
- (eq_attr "mode" "SF"))
+ (eq_attr "mode" "HF,SF"))
"sifive_p400_FM, sifive_p400_fdiv*5")
(define_insn_reservation "sifive_p400_fdiv_d" 31
@@ -178,3 +181,18 @@
(define_bypass 1 "sifive_p400_f2i"
"sifive_p400_branch,sifive_p400_sfb_alu,sifive_p400_mul,
sifive_p400_div,sifive_p400_alu,sifive_p400_cpop")
+
+
+;; Someone familiar with the p400 uarch needs to put
+;; these into the right reservations. This is just a placeholder
+;; for everything I found that had no mapping to a reservation.
+;;
+;; Note that even if the processor does not implementat a particular
+;; instruction it should still have suitable reservations, even if
+;; they are just dummies like this one.
+(define_insn_reservation "sifive_p400_unknown" 1
+ (and (eq_attr "tune" "sifive_p400")
+ (eq_attr "type" "ghost,vfrecp,vclmul,vldm,vmffs,vclmulh,vlsegde,vfcvtitof,vsm4k,vfcvtftoi,vfdiv,vsm3c,vsm4r,viwmuladd,vfwredu,vcpop,vfwmuladd,vstux,vsshift,vfwcvtftof,vfncvtftof,vfwmaccbf16,vext,vssegte,rdvl,vaeskf1,vfslide1up,vmov,vimovvx,vaesef,vfsqrt,viminmax,vfwcvtftoi,vssegtox,vfclass,viwmul,vector,vgmul,vsm3me,vfcmp,vstm,vfredo,vfwmul,vaeskf2,vstox,vfncvtbf16,vislide1up,vgather,vldox,viwred,vctz,vghsh,vsts,vslidedown,vfmerge,vicmp,vsmul,vlsegdff,vfalu,vfmov,vislide1down,vfminmax,vcompress,vldr,vldff,vlsegdux,vimuladd,vsalu,vidiv,sf_vqmacc,vfslide1down,vaesem,vimerge,vfncvtftoi,vfwcvtitof,vicalu,vaesz,sf_vc_se,vsha2cl,vmsfs,vldux,vmidx,vslideup,vired,vlde,vfwredo,vfmovfv,vbrev,vfncvtitof,rdfrm,vsetvl,vssegts,vimul,vialu,vbrev8,vfwalu,rdvlenb,sf_vfnrclip,vclz,vnclip,sf_vc,vimov,vste,vfmuladd,vfmovvf,vwsll,vsetvl_pre,vlds,vlsegds,vmiota,vmalu,wrvxrm,wrfrm,viwalu,vaesdm,vssegtux,vaesdf,vimovxv,vror,vnshift,vstr,vaalu,vsha2ms,crypto,vfwcvtbf16,vlsegdox,vrol,vandn,vfsgnj,vmpop,vfredu,vsha2ch,vshift,vrev8,vfmul"))
+ "p400_int_pipe+sifive_p400_ialu")
+
+
diff --git a/gcc/config/riscv/sifive-p600.md b/gcc/config/riscv/sifive-p600.md
index 2401349..ccd006d 100644
--- a/gcc/config/riscv/sifive-p600.md
+++ b/gcc/config/riscv/sifive-p600.md
@@ -157,10 +157,13 @@
(eq_attr "type" "fmove,fcvt"))
"float_pipe,sifive_p600_fpu")
+;; We need something for HF so that we don't abort during
+;; scheduling if someone was to ask for p600 scheduling, but
+;; enable the various HF mode extensions.
(define_insn_reservation "sifive_p600_fdiv_s" 11
(and (eq_attr "tune" "sifive_p600")
(eq_attr "type" "fdiv,fsqrt")
- (eq_attr "mode" "SF"))
+ (eq_attr "mode" "HF,SF"))
"sifive_p600_FM, sifive_p600_fdiv*5")
(define_insn_reservation "sifive_p600_fdiv_d" 19
@@ -182,3 +185,15 @@
(define_bypass 1 "sifive_p600_f2i"
"sifive_p600_branch,sifive_p600_sfb_alu,sifive_p600_mul,
sifive_p600_div,sifive_p600_alu,sifive_p600_cpop")
+
+;; Someone familiar with the p600 uarch needs to put
+;; these into the right reservations. This is just a placeholder
+;; for everything I found that had no mapping to a reservation.
+;;
+;; Note that even if the processor does not implementat a particular
+;; instruction it should still have suitable reservations, even if
+;; they are just dummies like this one.
+(define_insn_reservation "sifive_p600_unknown" 1
+ (and (eq_attr "tune" "sifive_p600")
+ (eq_attr "type" "vicmp,vssegte,vbrev8,vfwalu,vimov,vmpop,vaesdf,vislide1up,vror,vsha2cl,vrol,vslideup,vimuladd,vclmul,vaesef,vext,vlsegdff,vfmuladd,vfclass,vmsfs,vfcmp,vsmul,vsm3me,vmalu,vshift,viwmuladd,vfslide1up,vlsegde,vsm4k,wrvxrm,vislide1down,vsm3c,vfwmuladd,vaesdm,vclmulh,vfwcvtftof,vfwredu,vfredo,sf_vfnrclip,vaesz,vwsll,vmiota,vctz,vsetvl_pre,vstm,vidiv,vssegtux,vfwmul,vcompress,vste,vired,vlsegds,vaesem,vfminmax,ghost,vandn,crypto,vfmul,vialu,vfmovvf,rdfrm,vldff,vfmerge,vsshift,vnclip,sf_vqmacc,vnshift,vfdiv,vfslide1down,vfncvtitof,vfsqrt,vimovxv,vstr,vfwcvtbf16,vfwcvtitof,vbrev,vssegtox,vssegts,vcpop,vmffs,viwmul,vldr,vmidx,rdvlenb,vfalu,vslidedown,vlde,vfsgnj,vfmov,viwalu,vsha2ch,vfncvtbf16,vfcvtitof,rdvl,vsetvl,vsha2ms,vector,vstux,vimerge,vclz,sf_vc,vfcvtftoi,viminmax,vsm4r,sf_vc_se,wrfrm,vstox,vfmovfv,vfncvtftoi,vimul,vsalu,vmov,vgmul,vgather,vldux,vlsegdox,vfncvtftof,vimovvx,vghsh,vldm,vldox,vfwcvtftoi,vlds,vfrecp,vaeskf2,vsts,vfredu,vicalu,vaalu,vfwmaccbf16,vrev8,vfwredo,vlsegdux,viwred,vaeskf1"))
+ "int_pipe+sifive_p600_ialu")
diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md
index 50ec8b3..ab6f430 100644
--- a/gcc/config/riscv/sync.md
+++ b/gcc/config/riscv/sync.md
@@ -376,7 +376,19 @@
(match_operand:SI 3 "const_int_operand")] ;; model
"TARGET_ZAAMO || TARGET_ZALRSC"
{
- if (TARGET_ZAAMO)
+ if (TARGET_ZAAMO && TARGET_64BIT && <MODE>mode == SImode)
+ {
+ rtx t = gen_reg_rtx (DImode);
+ emit_insn (gen_amo_atomic_exchange_extended (t,
+ operands[1],
+ operands[2],
+ operands[3]));
+ t = gen_lowpart (SImode, t);
+ SUBREG_PROMOTED_VAR_P (t) = 1;
+ SUBREG_PROMOTED_SET (t, SRP_SIGNED);
+ emit_move_insn (operands[0], t);
+ }
+ else if (TARGET_ZAAMO)
emit_insn (gen_amo_atomic_exchange<mode> (operands[0], operands[1],
operands[2], operands[3]));
else
@@ -386,18 +398,31 @@
})
(define_insn "amo_atomic_exchange<mode>"
- [(set (match_operand:GPR 0 "register_operand" "=&r")
+ [(set (match_operand:GPR 0 "register_operand" "=r")
(unspec_volatile:GPR
[(match_operand:GPR 1 "memory_operand" "+A")
(match_operand:SI 3 "const_int_operand")] ;; model
UNSPEC_SYNC_EXCHANGE))
(set (match_dup 1)
- (match_operand:GPR 2 "register_operand" "0"))]
+ (match_operand:GPR 2 "reg_or_0_operand" "rJ"))]
"TARGET_ZAAMO"
"amoswap.<amo>%A3\t%0,%z2,%1"
[(set_attr "type" "atomic")
(set (attr "length") (const_int 4))])
+(define_insn "amo_atomic_exchange_extended"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (sign_extend:DI (unspec_volatile:SI
+ [(match_operand:SI 1 "memory_operand" "+A")
+ (match_operand:SI 3 "const_int_operand")] ;; model
+ UNSPEC_SYNC_EXCHANGE)))
+ (set (match_dup 1)
+ (match_operand:SI 2 "reg_or_0_operand" "rJ"))]
+ "TARGET_64BIT && TARGET_ZAAMO"
+ "amoswap.w%A3\t%0,%z2,%1"
+ [(set_attr "type" "atomic")
+ (set (attr "length") (const_int 4))])
+
(define_insn "lrsc_atomic_exchange<mode>"
[(set (match_operand:GPR 0 "register_operand" "=&r")
(unspec_volatile:GPR
@@ -434,13 +459,13 @@
})
(define_insn "zabha_atomic_exchange<mode>"
- [(set (match_operand:SHORT 0 "register_operand" "=&r")
+ [(set (match_operand:SHORT 0 "register_operand" "=r")
(unspec_volatile:SHORT
[(match_operand:SHORT 1 "memory_operand" "+A")
(match_operand:SI 3 "const_int_operand")] ;; model
UNSPEC_SYNC_EXCHANGE_ZABHA))
(set (match_dup 1)
- (match_operand:SHORT 2 "register_operand" "0"))]
+ (match_operand:SHORT 2 "reg_or_0_operand" "rJ"))]
"TARGET_ZABHA"
"amoswap.<amobh>%A3\t%0,%z2,%1"
[(set_attr "type" "atomic")
diff --git a/gcc/config/riscv/t-rtems b/gcc/config/riscv/t-rtems
index f596e76..a4d2d03 100644
--- a/gcc/config/riscv/t-rtems
+++ b/gcc/config/riscv/t-rtems
@@ -1,8 +1,8 @@
MULTILIB_OPTIONS =
MULTILIB_DIRNAMES =
-MULTILIB_OPTIONS += march=rv32i/march=rv32iac/march=rv32im/march=rv32imf/march=rv32ima/march=rv32imac/march=rv32imaf/march=rv32imafc/march=rv32imafd/march=rv32imafdc/march=rv64ima/march=rv64imac/march=rv64imafd/march=rv64imafdc
-MULTILIB_DIRNAMES += rv32i rv32iac rv32im rv32imf rv32ima rv32imac rv32imaf rv32imafc rv32imafd rv32imafdc rv64ima rv64imac rv64imafd rv64imafdc
+MULTILIB_OPTIONS += march=rv32i/march=rv32iac/march=rv32im/march=rv32imf/march=rv32ima/march=rv32imac/march=rv32imaf/march=rv32imafc/march=rv32imafd/march=rv32imafdc/march=rv64ima/march=rv64imac/march=rv64imafd/march=rv64imafdc/march=rv64imc
+MULTILIB_DIRNAMES += rv32i rv32iac rv32im rv32imf rv32ima rv32imac rv32imaf rv32imafc rv32imafd rv32imafdc rv64ima rv64imac rv64imafd rv64imafdc rv64imc
MULTILIB_OPTIONS += mabi=ilp32/mabi=ilp32f/mabi=ilp32d/mabi=lp64/mabi=lp64d
MULTILIB_DIRNAMES += ilp32 ilp32f ilp32d lp64 lp64d
@@ -10,6 +10,9 @@ MULTILIB_DIRNAMES += ilp32 ilp32f ilp32d lp64 lp64d
MULTILIB_OPTIONS += mcmodel=medany
MULTILIB_DIRNAMES += medany
+MULTILIB_OPTIONS += mstrict-align
+MULTILIB_DIRNAMES += strict-align
+
MULTILIB_REQUIRED =
MULTILIB_REQUIRED += march=rv32i/mabi=ilp32
MULTILIB_REQUIRED += march=rv32iac/mabi=ilp32
@@ -25,3 +28,5 @@ MULTILIB_REQUIRED += march=rv64ima/mabi=lp64/mcmodel=medany
MULTILIB_REQUIRED += march=rv64imac/mabi=lp64/mcmodel=medany
MULTILIB_REQUIRED += march=rv64imafd/mabi=lp64d/mcmodel=medany
MULTILIB_REQUIRED += march=rv64imafdc/mabi=lp64d/mcmodel=medany
+MULTILIB_REQUIRED += march=rv64imafdc/mabi=lp64d/mcmodel=medany/mstrict-align
+MULTILIB_REQUIRED += march=rv64imc/mabi=lp64/mcmodel=medany/mstrict-align
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 66b7670..2b35d66 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -1398,6 +1398,7 @@
}
[(set_attr "type" "vmov,vlde,vste")
(set_attr "mode" "<VT:MODE>")
+ (set (attr "merge_op_idx") (const_int INVALID_ATTRIBUTE))
(set (attr "avl_type_idx") (const_int INVALID_ATTRIBUTE))
(set (attr "mode_idx") (const_int INVALID_ATTRIBUTE))])
@@ -1435,6 +1436,7 @@
}
[(set_attr "type" "vlde,vste,vmov")
(set_attr "mode" "<MODE>")
+ (set (attr "merge_op_idx") (const_int INVALID_ATTRIBUTE))
(set (attr "avl_type_idx") (const_int INVALID_ATTRIBUTE))
(set (attr "mode_idx") (const_int INVALID_ATTRIBUTE))]
)
@@ -1485,6 +1487,7 @@
}
[(set_attr "type" "vlde,vste,vmov")
(set_attr "mode" "<VLS_AVL_REG:MODE>")
+ (set (attr "merge_op_idx") (const_int INVALID_ATTRIBUTE))
(set (attr "avl_type_idx") (const_int INVALID_ATTRIBUTE))
(set (attr "mode_idx") (const_int INVALID_ATTRIBUTE))]
)
@@ -5490,6 +5493,98 @@
"TARGET_VECTOR"
{})
+(define_expand "@pred_mul_plus_vx_<mode>"
+ [(set (match_operand:V_VLSI_QHS 0 "register_operand")
+ (if_then_else:V_VLSI_QHS
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand")
+ (match_operand 6 "vector_length_operand")
+ (match_operand 7 "const_int_operand")
+ (match_operand 8 "const_int_operand")
+ (match_operand 9 "const_int_operand")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (plus:V_VLSI_QHS
+ (mult:V_VLSI_QHS
+ (vec_duplicate:V_VLSI_QHS
+ (match_operand:<VEL> 2 "register_operand"))
+ (match_operand:V_VLSI_QHS 3 "register_operand"))
+ (match_operand:V_VLSI_QHS 4 "register_operand"))
+ (match_operand:V_VLSI_QHS 5 "vector_merge_operand")))]
+ "TARGET_VECTOR"
+{
+ riscv_vector::prepare_ternary_operands (operands);
+})
+
+(define_expand "@pred_mul_plus_vx_<mode>"
+ [(set (match_operand:V_VLSI_D 0 "register_operand")
+ (if_then_else:V_VLSI_D
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand")
+ (match_operand 6 "vector_length_operand")
+ (match_operand 7 "const_int_operand")
+ (match_operand 8 "const_int_operand")
+ (match_operand 9 "const_int_operand")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (plus:V_VLSI_D
+ (mult:V_VLSI_D
+ (vec_duplicate:V_VLSI_D
+ (match_operand:<VEL> 2 "register_operand"))
+ (match_operand:V_VLSI_D 3 "register_operand"))
+ (match_operand:V_VLSI_D 4 "register_operand"))
+ (match_operand:V_VLSI_D 5 "vector_merge_operand")))]
+ "TARGET_VECTOR && TARGET_64BIT"
+{
+ riscv_vector::prepare_ternary_operands (operands);
+})
+
+(define_expand "@pred_vnmsac_vx_<mode>"
+ [(set (match_operand:V_VLSI_QHS 0 "register_operand")
+ (if_then_else:V_VLSI_QHS
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand")
+ (match_operand 6 "vector_length_operand")
+ (match_operand 7 "const_int_operand")
+ (match_operand 8 "const_int_operand")
+ (match_operand 9 "const_int_operand")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (minus:V_VLSI_QHS
+ (match_operand:V_VLSI_QHS 4 "register_operand")
+ (mult:V_VLSI_QHS
+ (vec_duplicate:V_VLSI_QHS
+ (match_operand:<VEL> 2 "register_operand"))
+ (match_operand:V_VLSI_QHS 3 "register_operand")))
+ (match_operand:V_VLSI_QHS 5 "vector_merge_operand")))]
+ "TARGET_VECTOR"
+{
+ riscv_vector::prepare_ternary_operands (operands);
+})
+
+(define_expand "@pred_vnmsac_vx_<mode>"
+ [(set (match_operand:V_VLSI_D 0 "register_operand")
+ (if_then_else:V_VLSI_D
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand")
+ (match_operand 6 "vector_length_operand")
+ (match_operand 7 "const_int_operand")
+ (match_operand 8 "const_int_operand")
+ (match_operand 9 "const_int_operand")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (minus:V_VLSI_D
+ (match_operand:V_VLSI_D 4 "register_operand")
+ (mult:V_VLSI_D
+ (vec_duplicate:V_VLSI_D
+ (match_operand:<VEL> 2 "register_operand"))
+ (match_operand:V_VLSI_D 3 "register_operand")))
+ (match_operand:V_VLSI_D 5 "vector_merge_operand")))]
+ "TARGET_VECTOR && TARGET_64BIT"
+{
+ riscv_vector::prepare_ternary_operands (operands);
+})
+
(define_insn "*pred_madd<mode>_scalar"
[(set (match_operand:V_VLSI 0 "register_operand" "=vd, vr")
(if_then_else:V_VLSI
@@ -6324,8 +6419,8 @@
(set_attr "mode" "<MODE>")])
(define_insn "@pred_<optab><mode>_scalar"
- [(set (match_operand:VF 0 "register_operand" "=vd, vd, vr, vr")
- (if_then_else:VF
+ [(set (match_operand:V_VLSF 0 "register_operand" "=vd, vd, vr, vr")
+ (if_then_else:V_VLSF
(unspec:<VM>
[(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
(match_operand 5 "vector_length_operand" "rvl,rvl,rvl,rvl")
@@ -6336,11 +6431,11 @@
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)
(reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
- (commutative_float_binop:VF
- (vec_duplicate:VF
+ (commutative_float_binop:V_VLSF
+ (vec_duplicate:V_VLSF
(match_operand:<VEL> 4 "register_operand" " f, f, f, f"))
- (match_operand:VF 3 "register_operand" " vr, vr, vr, vr"))
- (match_operand:VF 2 "vector_merge_operand" " vu, 0, vu, 0")))]
+ (match_operand:V_VLSF 3 "register_operand" " vr, vr, vr, vr"))
+ (match_operand:V_VLSF 2 "vector_merge_operand" " vu, 0, vu, 0")))]
"TARGET_VECTOR"
"vf<insn>.vf\t%0,%3,%4%p1"
[(set_attr "type" "<float_insn_type>")
@@ -6349,43 +6444,43 @@
(symbol_ref "riscv_vector::get_frm_mode (operands[9])"))])
(define_insn "@pred_<optab><mode>_scalar"
- [(set (match_operand:VF 0 "register_operand" "=vd, vd, vr, vr")
- (if_then_else:VF
+ [(set (match_operand:V_VLSF 0 "register_operand" "=vd, vd, vr, vr")
+ (if_then_else:V_VLSF
(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
- (match_operand 5 "vector_length_operand" "rvl,rvl,rvl,rvl")
- (match_operand 6 "const_int_operand" " i, i, i, i")
- (match_operand 7 "const_int_operand" " i, i, i, i")
- (match_operand 8 "const_int_operand" " i, i, i, i")
+ [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
+ (match_operand 5 "vector_length_operand" "rvl,rvl,rvl,rvl")
+ (match_operand 6 "const_int_operand" " i, i, i, i")
+ (match_operand 7 "const_int_operand" " i, i, i, i")
+ (match_operand 8 "const_int_operand" " i, i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
- (commutative_float_binop_nofrm:VF
- (vec_duplicate:VF
- (match_operand:<VEL> 4 "register_operand" " f, f, f, f"))
- (match_operand:VF 3 "register_operand" " vr, vr, vr, vr"))
- (match_operand:VF 2 "vector_merge_operand" " vu, 0, vu, 0")))]
+ (commutative_float_binop_nofrm:V_VLSF
+ (vec_duplicate:V_VLSF
+ (match_operand:<VEL> 4 "register_operand" " f, f, f, f"))
+ (match_operand:V_VLSF 3 "register_operand" " vr, vr, vr, vr"))
+ (match_operand:V_VLSF 2 "vector_merge_operand" " vu, 0, vu, 0")))]
"TARGET_VECTOR"
"vf<insn>.vf\t%0,%3,%4%p1"
[(set_attr "type" "<float_insn_type>")
(set_attr "mode" "<MODE>")])
(define_insn "@pred_<ieee_fmaxmin_op><mode>_scalar"
- [(set (match_operand:VF 0 "register_operand" "=vd, vd, vr, vr")
- (if_then_else:VF
+ [(set (match_operand:V_VLSF 0 "register_operand" "=vd, vd, vr, vr")
+ (if_then_else:V_VLSF
(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
- (match_operand 5 "vector_length_operand" "rvl,rvl,rvl,rvl")
- (match_operand 6 "const_int_operand" " i, i, i, i")
- (match_operand 7 "const_int_operand" " i, i, i, i")
- (match_operand 8 "const_int_operand" " i, i, i, i")
+ [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
+ (match_operand 5 "vector_length_operand" "rvl,rvl,rvl,rvl")
+ (match_operand 6 "const_int_operand" " i, i, i, i")
+ (match_operand 7 "const_int_operand" " i, i, i, i")
+ (match_operand 8 "const_int_operand" " i, i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
- (unspec:VF
- [(match_operand:VF 3 "register_operand" " vr, vr, vr, vr")
- (vec_duplicate:VF
+ (unspec:V_VLSF
+ [(match_operand:V_VLSF 3 "register_operand" " vr, vr, vr, vr")
+ (vec_duplicate:V_VLSF
(match_operand:<VEL> 4 "register_operand" " f, f, f, f"))]
UNSPEC_VFMAXMIN)
- (match_operand:VF 2 "vector_merge_operand" " vu, 0, vu, 0")))]
+ (match_operand:V_VLSF 2 "vector_merge_operand" " vu, 0, vu, 0")))]
"TARGET_VECTOR"
"v<ieee_fmaxmin_op>.vf\t%0,%3,%4%p1"
[(set_attr "type" "vfminmax")
@@ -6417,8 +6512,8 @@
(symbol_ref "riscv_vector::get_frm_mode (operands[9])"))])
(define_insn "@pred_<optab><mode>_reverse_scalar"
- [(set (match_operand:VF 0 "register_operand" "=vd, vd, vr, vr")
- (if_then_else:VF
+ [(set (match_operand:V_VLSF 0 "register_operand" "=vd, vd, vr, vr")
+ (if_then_else:V_VLSF
(unspec:<VM>
[(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
(match_operand 5 "vector_length_operand" "rvl,rvl,rvl,rvl")
@@ -6429,11 +6524,11 @@
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)
(reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
- (non_commutative_float_binop:VF
- (vec_duplicate:VF
+ (non_commutative_float_binop:V_VLSF
+ (vec_duplicate:V_VLSF
(match_operand:<VEL> 4 "register_operand" " f, f, f, f"))
- (match_operand:VF 3 "register_operand" " vr, vr, vr, vr"))
- (match_operand:VF 2 "vector_merge_operand" " vu, 0, vu, 0")))]
+ (match_operand:V_VLSF 3 "register_operand" " vr, vr, vr, vr"))
+ (match_operand:V_VLSF 2 "vector_merge_operand" " vu, 0, vu, 0")))]
"TARGET_VECTOR"
"vfr<insn>.vf\t%0,%3,%4%p1"
[(set_attr "type" "<float_insn_type>")
@@ -8839,6 +8934,106 @@
[(set_attr "type" "vssegt<order>x")
(set_attr "mode" "<V32T:MODE>")])
+(define_insn "*pred_macc_<mode>_scalar_undef"
+ [(set (match_operand:V_VLSI_QHS 0 "register_operand" "=vd, vr")
+ (if_then_else:V_VLSI_QHS
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " vm, Wc1")
+ (match_operand 6 "vector_length_operand" "rvl, rvl")
+ (match_operand 7 "const_int_operand" " i, i")
+ (match_operand 8 "const_int_operand" " i, i")
+ (match_operand 9 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (plus:V_VLSI_QHS
+ (mult:V_VLSI_QHS
+ (vec_duplicate:V_VLSI_QHS
+ (match_operand:<VEL> 3 "reg_or_0_operand" " rJ, rJ"))
+ (match_operand:V_VLSI_QHS 4 "register_operand" " vr, vr"))
+ (match_operand:V_VLSI_QHS 5 "register_operand" " 0, 0"))
+ (match_operand:V_VLSI_QHS 2 "vector_undef_operand")))]
+ "TARGET_VECTOR"
+ "@
+ vmacc.vx\t%0,%z3,%4%p1
+ vmacc.vx\t%0,%z3,%4%p1"
+ [(set_attr "type" "vimuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*pred_macc_<mode>_scalar_undef"
+ [(set (match_operand:V_VLSI_D 0 "register_operand" "=vd, vr")
+ (if_then_else:V_VLSI_D
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " vm, Wc1")
+ (match_operand 6 "vector_length_operand" "rvl, rvl")
+ (match_operand 7 "const_int_operand" " i, i")
+ (match_operand 8 "const_int_operand" " i, i")
+ (match_operand 9 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (plus:V_VLSI_D
+ (mult:V_VLSI_D
+ (vec_duplicate:V_VLSI_D
+ (match_operand:<VEL> 3 "reg_or_0_operand" " rJ, rJ"))
+ (match_operand:V_VLSI_D 4 "register_operand" " vr, vr"))
+ (match_operand:V_VLSI_D 5 "register_operand" " 0, 0"))
+ (match_operand:V_VLSI_D 2 "vector_undef_operand")))]
+ "TARGET_VECTOR && TARGET_64BIT"
+ "@
+ vmacc.vx\t%0,%z3,%4%p1
+ vmacc.vx\t%0,%z3,%4%p1"
+ [(set_attr "type" "vimuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*pred_nmsac_<mode>_scalar_undef"
+ [(set (match_operand:V_VLSI_QHS 0 "register_operand" "=vd, vr")
+ (if_then_else:V_VLSI_QHS
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " vm, Wc1")
+ (match_operand 6 "vector_length_operand" "rvl, rvl")
+ (match_operand 7 "const_int_operand" " i, i")
+ (match_operand 8 "const_int_operand" " i, i")
+ (match_operand 9 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (minus:V_VLSI_QHS
+ (match_operand:V_VLSI_QHS 5 "register_operand" " 0, 0")
+ (mult:V_VLSI_QHS
+ (vec_duplicate:V_VLSI_QHS
+ (match_operand:<VEL> 3 "reg_or_0_operand" " rJ, rJ"))
+ (match_operand:V_VLSI_QHS 4 "register_operand" " vr, vr")))
+ (match_operand:V_VLSI_QHS 2 "vector_undef_operand")))]
+ "TARGET_VECTOR"
+ "@
+ vnmsac.vx\t%0,%z3,%4%p1
+ vnmsac.vx\t%0,%z3,%4%p1"
+ [(set_attr "type" "vimuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*pred_nmsac_<mode>_scalar_undef"
+ [(set (match_operand:V_VLSI_D 0 "register_operand" "=vd, vr")
+ (if_then_else:V_VLSI_D
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " vm, Wc1")
+ (match_operand 6 "vector_length_operand" "rvl, rvl")
+ (match_operand 7 "const_int_operand" " i, i")
+ (match_operand 8 "const_int_operand" " i, i")
+ (match_operand 9 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (minus:V_VLSI_D
+ (match_operand:V_VLSI_D 5 "register_operand" " 0, 0")
+ (mult:V_VLSI_D
+ (vec_duplicate:V_VLSI_D
+ (match_operand:<VEL> 3 "reg_or_0_operand" " rJ, rJ"))
+ (match_operand:V_VLSI_D 4 "register_operand" " vr, vr")))
+ (match_operand:V_VLSI_D 2 "vector_undef_operand")))]
+ "TARGET_VECTOR && TARGET_64BIT"
+ "@
+ vnmsac.vx\t%0,%z3,%4%p1
+ vnmsac.vx\t%0,%z3,%4%p1"
+ [(set_attr "type" "vimuladd")
+ (set_attr "mode" "<MODE>")])
+
(include "autovec.md")
(include "autovec-opt.md")
(include "sifive-vector.md")
diff --git a/gcc/config/riscv/xiangshan.md b/gcc/config/riscv/xiangshan.md
index 34b4a8f..6179140 100644
--- a/gcc/config/riscv/xiangshan.md
+++ b/gcc/config/riscv/xiangshan.md
@@ -144,13 +144,13 @@
(define_insn_reservation "xiangshan_sfdiv" 11
(and (eq_attr "tune" "xiangshan")
(eq_attr "type" "fdiv")
- (eq_attr "mode" "SF"))
+ (eq_attr "mode" "HF,SF"))
"xs_fmisc_rs")
(define_insn_reservation "xiangshan_sfsqrt" 17
(and (eq_attr "tune" "xiangshan")
(eq_attr "type" "fsqrt")
- (eq_attr "mode" "SF"))
+ (eq_attr "mode" "HF,SF"))
"xs_fmisc_rs")
(define_insn_reservation "xiangshan_dfdiv" 21
diff --git a/gcc/config/rl78/rl78.opt.urls b/gcc/config/rl78/rl78.opt.urls
index 96eff5f..66e874b 100644
--- a/gcc/config/rl78/rl78.opt.urls
+++ b/gcc/config/rl78/rl78.opt.urls
@@ -4,7 +4,7 @@ msim
UrlSuffix(gcc/RL78-Options.html#index-msim-6)
mmul=
-UrlSuffix(gcc/RL78-Options.html#index-mmul)
+UrlSuffix(gcc/RL78-Options.html#index-mmul-1)
mallregs
UrlSuffix(gcc/RL78-Options.html#index-mallregs)
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 764b499..8dd23f8 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -10322,7 +10322,7 @@ can_be_rotated_to_negative_lis (HOST_WIDE_INT c, int *rot)
rotated over the highest bit. */
unsigned HOST_WIDE_INT uc = c;
int pos_one = clz_hwi ((HOST_WIDE_INT) (uc << 16) >> 16);
- if (pos_one != 0)
+ if (pos_one > 0 && pos_one < HOST_BITS_PER_WIDE_INT)
{
middle_zeros = ctz_hwi (c >> (HOST_BITS_PER_WIDE_INT - pos_one));
int middle_ones = clz_hwi (~(uc << pos_one));
@@ -10585,7 +10585,7 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns)
{
/* li/lis; rldicX */
unsigned HOST_WIDE_INT imm = (c | ~mask);
- if (shift != 0)
+ if (shift > 0 && shift < HOST_BITS_PER_WIDE_INT)
imm = (imm >> shift) | (imm << (HOST_BITS_PER_WIDE_INT - shift));
count_or_emit_insn (temp, GEN_INT (imm));
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index e31ee40..04a6c0f 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -15665,10 +15665,10 @@
(if_then_else:SI (lt (match_dup 3)
(const_int 0))
(const_int -1)
- (if_then_else (gt (match_dup 3)
- (const_int 0))
- (const_int 1)
- (const_int 0))))]
+ (if_then_else:SI (gt (match_dup 3)
+ (const_int 0))
+ (const_int 1)
+ (const_int 0))))]
"TARGET_P9_MISC"
{
operands[3] = gen_reg_rtx (CCmode);
@@ -15703,10 +15703,10 @@
(if_then_else:SI (lt (match_operand:CC 1 "cc_reg_operand" "y")
(const_int 0))
(const_int -1)
- (if_then_else (gt (match_dup 1)
- (const_int 0))
- (const_int 1)
- (const_int 0))))]
+ (if_then_else:SI (gt (match_dup 1)
+ (const_int 0))
+ (const_int 1)
+ (const_int 0))))]
"TARGET_P9_MISC"
"setb %0,%1"
[(set_attr "type" "logical")])
@@ -15716,10 +15716,10 @@
(if_then_else:SI (ltu (match_operand:CCUNS 1 "cc_reg_operand" "y")
(const_int 0))
(const_int -1)
- (if_then_else (gtu (match_dup 1)
- (const_int 0))
- (const_int 1)
- (const_int 0))))]
+ (if_then_else:SI (gtu (match_dup 1)
+ (const_int 0))
+ (const_int 1)
+ (const_int 0))))]
"TARGET_P9_MISC"
"setb %0,%1"
[(set_attr "type" "logical")])
@@ -15751,10 +15751,10 @@
(if_then_else:SI (lt (match_dup 3)
(const_int 0))
(const_int -1)
- (if_then_else (gt (match_dup 3)
- (const_int 0))
- (const_int 1)
- (const_int 0))))]
+ (if_then_else:SI (gt (match_dup 3)
+ (const_int 0))
+ (const_int 1)
+ (const_int 0))))]
"TARGET_P9_MISC"
{
operands[3] = gen_reg_rtx (CCmode);
@@ -15807,10 +15807,10 @@
(if_then_else:SI (lt (match_dup 3)
(const_int 0))
(const_int -1)
- (if_then_else (gt (match_dup 3)
- (const_int 0))
- (const_int 1)
- (const_int 0))))]
+ (if_then_else:SI (gt (match_dup 3)
+ (const_int 0))
+ (const_int 1)
+ (const_int 0))))]
"TARGET_P9_MISC && TARGET_64BIT"
{
operands[3] = gen_reg_rtx (CCmode);
diff --git a/gcc/config/rx/rx.cc b/gcc/config/rx/rx.cc
index dd730dc..c563881 100644
--- a/gcc/config/rx/rx.cc
+++ b/gcc/config/rx/rx.cc
@@ -1648,16 +1648,20 @@ mark_frame_related (rtx insn)
static void
add_pop_cfi_notes (rtx_insn *insn, unsigned int high, unsigned int low)
{
- rtx t = plus_constant (Pmode, stack_pointer_rtx,
- (high - low + 1) * UNITS_PER_WORD);
+ rtx src = stack_pointer_rtx;
+ rtx t;
+ for (unsigned int i = low; i <= high; i++)
+ {
+ add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (word_mode, i));
+ if (i == FRAME_POINTER_REGNUM && frame_pointer_needed)
+ src = frame_pointer_rtx;
+ }
+ t = plus_constant (Pmode, src, (high - low + 1) * UNITS_PER_WORD);
t = gen_rtx_SET (stack_pointer_rtx, t);
add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
RTX_FRAME_RELATED_P (insn) = 1;
- for (unsigned int i = low; i <= high; i++)
- add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (word_mode, i));
}
-
static bool
ok_for_max_constant (HOST_WIDE_INT val)
{
@@ -1816,36 +1820,17 @@ rx_expand_prologue (void)
}
}
- /* If needed, set up the frame pointer. */
- if (frame_pointer_needed)
- gen_safe_add (frame_pointer_rtx, stack_pointer_rtx,
- GEN_INT (- (HOST_WIDE_INT) frame_size), true);
-
- /* Allocate space for the outgoing args.
- If the stack frame has not already been set up then handle this as well. */
- if (stack_size)
+ if (stack_size || frame_size)
{
- if (frame_size)
- {
- if (frame_pointer_needed)
- gen_safe_add (stack_pointer_rtx, frame_pointer_rtx,
- GEN_INT (- (HOST_WIDE_INT) stack_size), true);
- else
- gen_safe_add (stack_pointer_rtx, stack_pointer_rtx,
- GEN_INT (- (HOST_WIDE_INT) (frame_size + stack_size)),
- true);
- }
- else
- gen_safe_add (stack_pointer_rtx, stack_pointer_rtx,
- GEN_INT (- (HOST_WIDE_INT) stack_size), true);
+ gen_safe_add (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (- (HOST_WIDE_INT) (stack_size + frame_size)),
+ true);
}
- else if (frame_size)
+ if (frame_pointer_needed)
{
- if (! frame_pointer_needed)
- gen_safe_add (stack_pointer_rtx, stack_pointer_rtx,
- GEN_INT (- (HOST_WIDE_INT) frame_size), true);
- else
- gen_safe_add (stack_pointer_rtx, frame_pointer_rtx, NULL_RTX, true);
+ gen_safe_add (frame_pointer_rtx, stack_pointer_rtx,
+ GEN_INT ((HOST_WIDE_INT) stack_size),
+ true);
}
}
diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index d044f9a..1a47f47 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -8318,7 +8318,7 @@ s390_expand_int_spaceship (rtx op0, rtx op1, rtx op2, rtx op3)
}
/* Expand floating-point op0 = op1 <=> op2, i.e.,
- op0 = op1 == op2 ? 0 : op1 < op2 ? -1 : op1 > op2 ? 1 : 2.
+ op0 = op1 == op2 ? 0 : op1 < op2 ? -1 : op1 > op2 ? 1 : -128.
If op3 equals const0_rtx, then we are interested in the compare only (see
test spaceship-fp-4.c). Otherwise, op3 is a CONST_INT different than
@@ -8368,7 +8368,7 @@ s390_expand_fp_spaceship (rtx op0, rtx op1, rtx op2, rtx op3)
{
emit_jump (l_end);
emit_label (l_unordered);
- rtx unord_val = op3 == const0_rtx ? const2_rtx : op3;
+ rtx unord_val = op3 == const0_rtx ? GEN_INT (-128) : op3;
emit_move_insn (op0, unord_val);
}
emit_label (l_end);
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 8cc48b0..858387c 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -5248,18 +5248,19 @@
})
(define_insn "*zero_extendsidi2"
- [(set (match_operand:DI 0 "register_operand" "=d,d,d")
- (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "d,T,b")))]
+ [(set (match_operand:DI 0 "register_operand" "=d,d,d,d")
+ (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "d,T,b,v")))]
"TARGET_ZARCH"
"@
llgfr\t%0,%1
llgf\t%0,%1
- llgfrl\t%0,%1"
- [(set_attr "op_type" "RRE,RXY,RIL")
- (set_attr "type" "*,*,larl")
- (set_attr "cpu_facility" "*,*,z10")
- (set_attr "z10prop" "z10_fwd_E1,z10_fwd_A3,z10_fwd_A3")
- (set_attr "relative_long" "*,*,yes")])
+ llgfrl\t%0,%1
+ vlgvf\t%0,%v1,0"
+ [(set_attr "op_type" "RRE,RXY,RIL,VRS")
+ (set_attr "type" "*,*,larl,*")
+ (set_attr "cpu_facility" "*,*,z10,vx")
+ (set_attr "z10prop" "z10_fwd_E1,z10_fwd_A3,z10_fwd_A3,*")
+ (set_attr "relative_long" "*,*,yes,*")])
;
; LLGT-type instructions (zero-extend from 31 bit to 64 bit).
@@ -5362,29 +5363,32 @@
; llhrl, llghrl
(define_insn "*zero_extendhi<mode>2_z10"
- [(set (match_operand:GPR 0 "register_operand" "=d,d,d")
- (zero_extend:GPR (match_operand:HI 1 "nonimmediate_operand" "d,T,b")))]
+ [(set (match_operand:GPR 0 "register_operand" "=d,d,d,d")
+ (zero_extend:GPR (match_operand:HI 1 "nonimmediate_operand" "d,T,b,v")))]
"TARGET_Z10"
"@
ll<g>hr\t%0,%1
ll<g>h\t%0,%1
- ll<g>hrl\t%0,%1"
- [(set_attr "op_type" "RXY,RRE,RIL")
- (set_attr "type" "*,*,larl")
- (set_attr "cpu_facility" "*,*,z10")
- (set_attr "z10prop" "z10_super_E1,z10_fwd_A3,z10_fwd_A3")
- (set_attr "relative_long" "*,*,yes")])
+ ll<g>hrl\t%0,%1
+ vlgvh\t%0,%v1,0"
+ [(set_attr "op_type" "RXY,RRE,RIL,VRS")
+ (set_attr "type" "*,*,larl,*")
+ (set_attr "cpu_facility" "*,*,z10,vx")
+ (set_attr "z10prop" "z10_super_E1,z10_fwd_A3,z10_fwd_A3,*")
+ (set_attr "relative_long" "*,*,yes,*")])
; llhr, llcr, llghr, llgcr, llh, llc, llgh, llgc
(define_insn "*zero_extend<HQI:mode><GPR:mode>2_extimm"
- [(set (match_operand:GPR 0 "register_operand" "=d,d")
- (zero_extend:GPR (match_operand:HQI 1 "nonimmediate_operand" "d,T")))]
+ [(set (match_operand:GPR 0 "register_operand" "=d,d,d")
+ (zero_extend:GPR (match_operand:HQI 1 "nonimmediate_operand" "d,T,v")))]
"TARGET_EXTIMM"
"@
ll<g><hc>r\t%0,%1
- ll<g><hc>\t%0,%1"
- [(set_attr "op_type" "RRE,RXY")
- (set_attr "z10prop" "z10_super_E1,z10_fwd_A3")])
+ ll<g><hc>\t%0,%1
+ vlgv<HQI:bhfgq>\t%0,%v1,0"
+ [(set_attr "op_type" "RRE,RXY,VRS")
+ (set_attr "cpu_facility" "*,*,vx")
+ (set_attr "z10prop" "z10_super_E1,z10_fwd_A3,*")])
; llgh, llgc
(define_insn "*zero_extend<HQI:mode><GPR:mode>2"
diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index 12bbeb6..745634e 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -501,54 +501,6 @@
SIL,SIL,RI,RI,RRE,RRE,RIL,RR,RXY,RXY,RIL")])
-; Instructions vlgvb, vlgvh, vlgvf zero all remaining bits of a GPR, i.e.,
-; an implicit zero extend is done.
-
-(define_insn "*movdi<mode>_zero_extend_A"
- [(set (match_operand:DI 0 "register_operand" "=d")
- (zero_extend:DI (match_operand:SINT 1 "register_operand" "v")))]
- "TARGET_VX"
- "vlgv<bhfgq>\t%0,%v1,0"
- [(set_attr "op_type" "VRS")])
-
-(define_insn "*movsi<mode>_zero_extend_A"
- [(set (match_operand:SI 0 "register_operand" "=d")
- (zero_extend:SI (match_operand:HQI 1 "register_operand" "v")))]
- "TARGET_VX"
- "vlgv<bhfgq>\t%0,%v1,0"
- [(set_attr "op_type" "VRS")])
-
-(define_mode_iterator VLGV_DI [V1QI V2QI V4QI V8QI V16QI
- V1HI V2HI V4HI V8HI
- V1SI V2SI V4SI])
-(define_insn "*movdi<mode>_zero_extend_B"
- [(set (match_operand:DI 0 "register_operand" "=d")
- (zero_extend:DI (vec_select:<non_vec>
- (match_operand:VLGV_DI 1 "register_operand" "v")
- (parallel [(match_operand:SI 2 "const_int_operand" "n")]))))]
- "TARGET_VX"
-{
- operands[2] = GEN_INT (UINTVAL (operands[2]) & (GET_MODE_NUNITS (<MODE>mode) - 1));
- return "vlgv<bhfgq>\t%0,%v1,%Y2";
-}
- [(set_attr "op_type" "VRS")
- (set_attr "mnemonic" "vlgv<bhfgq>")])
-
-(define_mode_iterator VLGV_SI [V1QI V2QI V4QI V8QI V16QI
- V1HI V2HI V4HI V8HI])
-(define_insn "*movsi<mode>_zero_extend_B"
- [(set (match_operand:SI 0 "register_operand" "=d")
- (zero_extend:SI (vec_select:<non_vec>
- (match_operand:VLGV_SI 1 "register_operand" "v")
- (parallel [(match_operand:SI 2 "const_int_operand" "n")]))))]
- "TARGET_VX"
-{
- operands[2] = GEN_INT (UINTVAL (operands[2]) & (GET_MODE_NUNITS (<MODE>mode) - 1));
- return "vlgv<bhfgq>\t%0,%v1,%Y2";
-}
- [(set_attr "op_type" "VRS")
- (set_attr "mnemonic" "vlgv<bhfgq>")])
-
; vec_load_lanes?
; vec_store_lanes?
@@ -763,6 +715,42 @@
DONE;
})
+; Instructions vlgvb, vlgvh, vlgvf zero all remaining bits of a GPR, i.e.,
+; an implicit zero extend is done.
+
+(define_mode_iterator VLGV_DI [V1QI V2QI V4QI V8QI V16QI
+ V1HI V2HI V4HI V8HI
+ V1SI V2SI V4SI])
+(define_insn "*vec_extract<mode>_zero_extend"
+ [(set (match_operand:DI 0 "register_operand" "=d")
+ (zero_extend:DI (vec_select:<non_vec>
+ (match_operand:VLGV_DI 1 "register_operand" "v")
+ (parallel [(match_operand:SI 2 "nonmemory_operand" "an")]))))]
+ "TARGET_VX"
+{
+ if (CONST_INT_P (operands[2]))
+ operands[2] = GEN_INT (UINTVAL (operands[2]) & (GET_MODE_NUNITS (<MODE>mode) - 1));
+ return "vlgv<bhfgq>\t%0,%v1,%Y2";
+}
+ [(set_attr "op_type" "VRS")
+ (set_attr "mnemonic" "vlgv<bhfgq>")])
+
+(define_mode_iterator VLGV_SI [V1QI V2QI V4QI V8QI V16QI
+ V1HI V2HI V4HI V8HI])
+(define_insn "*vec_extract<mode>_zero_extend"
+ [(set (match_operand:SI 0 "register_operand" "=d")
+ (zero_extend:SI (vec_select:<non_vec>
+ (match_operand:VLGV_SI 1 "register_operand" "v")
+ (parallel [(match_operand:SI 2 "nonmemory_operand" "an")]))))]
+ "TARGET_VX"
+{
+ if (CONST_INT_P (operands[2]))
+ operands[2] = GEN_INT (UINTVAL (operands[2]) & (GET_MODE_NUNITS (<MODE>mode) - 1));
+ return "vlgv<bhfgq>\t%0,%v1,%Y2";
+}
+ [(set_attr "op_type" "VRS")
+ (set_attr "mnemonic" "vlgv<bhfgq>")])
+
(define_insn "*vec_vllezlf<mode>"
[(set (match_operand:V_HW_4 0 "register_operand" "=v")
(vec_concat:V_HW_4
diff --git a/gcc/config/xtensa/constraints.md b/gcc/config/xtensa/constraints.md
index 77c9571..727ec1e 100644
--- a/gcc/config/xtensa/constraints.md
+++ b/gcc/config/xtensa/constraints.md
@@ -130,7 +130,7 @@
(and (match_code "mem")
(match_test "smalloffset_mem_p (op)")))
-(define_memory_constraint "T"
+(define_special_memory_constraint "T"
"Memory in a literal pool (addressable with an L32R instruction)."
(and (match_code "mem")
(match_test "!TARGET_CONST16 && constantpool_mem_p (op)")))
diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md
index 9aeaba6..20160a4 100644
--- a/gcc/config/xtensa/predicates.md
+++ b/gcc/config/xtensa/predicates.md
@@ -189,6 +189,9 @@
(define_predicate "ubranch_operator"
(match_code "ltu,geu"))
+(define_predicate "alt_ubranch_operator"
+ (match_code "gtu,leu"))
+
(define_predicate "boolean_operator"
(match_code "eq,ne"))
diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h
index 1f5dcf5..98e75c6 100644
--- a/gcc/config/xtensa/xtensa-protos.h
+++ b/gcc/config/xtensa/xtensa-protos.h
@@ -60,6 +60,7 @@ extern bool xtensa_tls_referenced_p (rtx);
extern enum rtx_code xtensa_shlrd_which_direction (rtx, rtx);
extern bool xtensa_split1_finished_p (void);
extern void xtensa_split_DI_reg_imm (rtx *);
+extern char *xtensa_bswapsi2_output (rtx_insn *, const char *);
#ifdef TREE_CODE
extern void init_cumulative_args (CUMULATIVE_ARGS *, int);
diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index d75cba4..f3b89de 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -2645,6 +2645,94 @@ xtensa_split_DI_reg_imm (rtx *operands)
}
+/* Return the asm output string of bswapsi2_internal insn pattern.
+ It does this by scanning backwards for the BB from the specified insn,
+ and if an another bswapsi2_internal is found, it omits the instruction
+ to set SAR to 8. If not found, or if a CALL, JUMP, ASM, or other insn
+ that clobbers SAR is found first, prepend an instruction to set SAR to
+ 8 as usual. */
+
+static int
+xtensa_bswapsi2_output_1 (rtx_insn *insn)
+{
+ int icode;
+ rtx pat;
+ const char *iname;
+
+ /* CALL insn do not preserve SAR.
+ JUMP insn only appear at the end of BB, so they do not need to be
+ considered when scanning backwards. */
+ if (CALL_P (insn))
+ return -1;
+
+ switch (icode = INSN_CODE (insn))
+ {
+ /* rotate insns clobber SAR. */
+ case CODE_FOR_rotlsi3:
+ case CODE_FOR_rotrsi3:
+ return -1;
+ /* simple shift insns clobber SAR if non-immediate shift amounts. */
+ case CODE_FOR_ashlsi3_internal:
+ case CODE_FOR_ashrsi3:
+ case CODE_FOR_lshrsi3:
+ if (! CONST_INT_P (XEXP (SET_SRC (PATTERN (insn)), 1)))
+ return -1;
+ break;
+ /* this insn always set SAR to 8. */
+ case CODE_FOR_bswapsi2_internal:
+ return 1;
+ default:
+ break;
+ }
+
+ /* "*shift_per_byte" and "*shlrd_*" complex shift insns clobber SAR. */
+ if (icode >= CODE_FOR_nothing
+ && (! strcmp (iname = insn_data[icode].name, "*shift_per_byte")
+ || ! strncmp (iname, "*shlrd_", 7)))
+ return -1;
+
+ /* asm statements may also clobber SAR, so they are anything goes. */
+ if (NONJUMP_INSN_P (insn))
+ switch (GET_CODE (pat = PATTERN (insn)))
+ {
+ case SET:
+ return GET_CODE (SET_SRC (pat)) == ASM_OPERANDS ? -1 : 0;
+ case PARALLEL:
+ return (GET_CODE (pat = XVECEXP (pat, 0, 0)) == SET
+ && GET_CODE (SET_SRC (pat)) == ASM_OPERANDS)
+ || GET_CODE (pat) == ASM_OPERANDS
+ || GET_CODE (pat) == ASM_INPUT ? -1 : 0;
+ case ASM_OPERANDS:
+ return -1;
+ default:
+ break;
+ }
+
+ /* All other insns are not interested in SAR. */
+ return 0;
+}
+
+char *
+xtensa_bswapsi2_output (rtx_insn *insn, const char *output)
+{
+ static char result[128];
+ int i;
+
+ strcpy (result, "ssai\t8\n\t");
+ while ((insn = prev_nonnote_nondebug_insn_bb (insn)))
+ if ((i = xtensa_bswapsi2_output_1 (insn)) < 0)
+ break;
+ else if (i > 0)
+ {
+ result[0] = '\0';
+ break;
+ }
+ strcat (result, output);
+
+ return result;
+}
+
+
/* Try to split an integer value into what are suitable for two consecutive
immediate addition instructions, ADDI or ADDMI. */
diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 629dfdd..52ffb16 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -88,6 +88,7 @@
;; This mode iterator allows the HI and QI patterns to be defined from
;; the same template.
(define_mode_iterator HQI [HI QI])
+(define_mode_attr mode_bits [(HI "16") (QI "8")])
;; This mode iterator allows the SI and HI patterns to be defined from
;; the same template.
@@ -176,19 +177,18 @@
;; Addition.
(define_insn "addsi3"
- [(set (match_operand:SI 0 "register_operand" "=D,D,a,a,a")
- (plus:SI (match_operand:SI 1 "register_operand" "%d,d,r,r,r")
- (match_operand:SI 2 "add_operand" "d,O,r,J,N")))]
- ""
- "@
- add.n\t%0, %1, %2
- addi.n\t%0, %1, %d2
- add\t%0, %1, %2
- addi\t%0, %1, %d2
- addmi\t%0, %1, %x2"
- [(set_attr "type" "arith,arith,arith,arith,arith")
- (set_attr "mode" "SI")
- (set_attr "length" "2,2,3,3,3")])
+ [(set (match_operand:SI 0 "register_operand")
+ (plus:SI (match_operand:SI 1 "register_operand")
+ (match_operand:SI 2 "add_operand")))]
+ ""
+ {@ [cons: =0, %1, 2; attrs: type, length]
+ [D, d, d; arith, 2] add.n\t%0, %1, %2
+ [D, d, O; arith, 2] addi.n\t%0, %1, %d2
+ [a, r, r; arith, 3] add\t%0, %1, %2
+ [a, r, J; arith, 3] addi\t%0, %1, %d2
+ [a, r, N; arith, 3] addmi\t%0, %1, %x2
+ }
+ [(set_attr "mode" "SI")])
(define_insn "*addsubx"
[(set (match_operand:SI 0 "register_operand" "=a")
@@ -392,18 +392,15 @@
(set_attr "length" "3")])
(define_insn "<u>mulhisi3"
- [(set (match_operand:SI 0 "register_operand" "=C,A")
- (mult:SI (any_extend:SI
- (match_operand:HI 1 "register_operand" "%r,r"))
- (any_extend:SI
- (match_operand:HI 2 "register_operand" "r,r"))))]
+ [(set (match_operand:SI 0 "register_operand")
+ (mult:SI (any_extend:SI (match_operand:HI 1 "register_operand"))
+ (any_extend:SI (match_operand:HI 2 "register_operand"))))]
"TARGET_MUL16 || TARGET_MAC16"
- "@
- mul16<su>\t%0, %1, %2
- <u>mul.aa.ll\t%1, %2"
- [(set_attr "type" "mul16,mac16")
- (set_attr "mode" "SI")
- (set_attr "length" "3,3")])
+ {@ [cons: =0, %1, 2; attrs: type, length]
+ [C, r, r; mul16, 3] mul16<su>\t%0, %1, %2
+ [A, r, r; mac16, 3] <u>mul.aa.ll\t%1, %2
+ }
+ [(set_attr "mode" "SI")])
(define_insn "muladdhisi"
[(set (match_operand:SI 0 "register_operand" "=A")
@@ -652,36 +649,15 @@
})
(define_insn "bswapsi2_internal"
- [(set (match_operand:SI 0 "register_operand" "=a,&a")
- (bswap:SI (match_operand:SI 1 "register_operand" "0,r")))
- (clobber (match_scratch:SI 2 "=&a,X"))]
+ [(set (match_operand:SI 0 "register_operand")
+ (bswap:SI (match_operand:SI 1 "register_operand")))
+ (clobber (match_scratch:SI 2))]
"!optimize_debug && optimize > 1 && !optimize_size"
-{
- rtx_insn *prev_insn = prev_nonnote_nondebug_insn (insn);
- const char *init = "ssai\t8\;";
- static char result[128];
- if (prev_insn && NONJUMP_INSN_P (prev_insn))
- {
- rtx x = PATTERN (prev_insn);
- if (GET_CODE (x) == PARALLEL && XVECLEN (x, 0) == 2
- && GET_CODE (XVECEXP (x, 0, 0)) == SET
- && GET_CODE (XVECEXP (x, 0, 1)) == CLOBBER)
- {
- x = XEXP (XVECEXP (x, 0, 0), 1);
- if (GET_CODE (x) == BSWAP && GET_MODE (x) == SImode)
- init = "";
- }
- }
- sprintf (result,
- (which_alternative == 0)
- ? "%s" "srli\t%%2, %%1, 16\;src\t%%2, %%2, %%1\;src\t%%2, %%2, %%2\;src\t%%0, %%1, %%2"
- : "%s" "srli\t%%0, %%1, 16\;src\t%%0, %%0, %%1\;src\t%%0, %%0, %%0\;src\t%%0, %%1, %%0",
- init);
- return result;
-}
- [(set_attr "type" "arith,arith")
- (set_attr "mode" "SI")
- (set_attr "length" "15,15")])
+ {@ [cons: =0, 1, =2; attrs: type, length]
+ [ a, 0, &a; arith, 15] << xtensa_bswapsi2_output (insn, "srli\t%2, %1, 16\;src\t%2, %2, %1\;src\t%2, %2, %2\;src\t%0, %1, %2");
+ [&a, r, X; arith, 15] << xtensa_bswapsi2_output (insn, "srli\t%0, %1, 16\;src\t%0, %0, %1\;src\t%0, %0, %0\;src\t%0, %1, %0");
+ }
+ [(set_attr "mode" "SI")])
(define_expand "bswapdi2"
[(set (match_operand:DI 0 "register_operand" "")
@@ -742,16 +718,15 @@
;; Logical instructions.
(define_insn "andsi3"
- [(set (match_operand:SI 0 "register_operand" "=a,a")
- (and:SI (match_operand:SI 1 "register_operand" "%r,r")
- (match_operand:SI 2 "mask_operand" "P,r")))]
+ [(set (match_operand:SI 0 "register_operand")
+ (and:SI (match_operand:SI 1 "register_operand")
+ (match_operand:SI 2 "mask_operand")))]
""
- "@
- extui\t%0, %1, 0, %K2
- and\t%0, %1, %2"
- [(set_attr "type" "arith,arith")
- (set_attr "mode" "SI")
- (set_attr "length" "3,3")])
+ {@ [cons: =0, %1, 2; attrs: type, length]
+ [a, r, P; arith, 3] extui\t%0, %1, 0, %K2
+ [a, r, r; arith, 3] and\t%0, %1, %2
+ }
+ [(set_attr "mode" "SI")])
(define_insn_and_split "*andsi3_bitcmpl"
[(set (match_operand:SI 0 "register_operand" "=a")
@@ -944,27 +919,15 @@
;; Zero-extend instructions.
-(define_insn "zero_extendhisi2"
- [(set (match_operand:SI 0 "register_operand" "=a,a")
- (zero_extend:SI (match_operand:HI 1 "nonimmed_operand" "r,U")))]
- ""
- "@
- extui\t%0, %1, 0, 16
- %v1l16ui\t%0, %1"
- [(set_attr "type" "arith,load")
- (set_attr "mode" "SI")
- (set_attr "length" "3,3")])
-
-(define_insn "zero_extendqisi2"
- [(set (match_operand:SI 0 "register_operand" "=a,a")
- (zero_extend:SI (match_operand:QI 1 "nonimmed_operand" "r,U")))]
+(define_insn "zero_extend<mode>si2"
+ [(set (match_operand:SI 0 "register_operand")
+ (zero_extend:SI (match_operand:HQI 1 "nonimmed_operand")))]
""
- "@
- extui\t%0, %1, 0, 8
- %v1l8ui\t%0, %1"
- [(set_attr "type" "arith,load")
- (set_attr "mode" "SI")
- (set_attr "length" "3,3")])
+ {@ [cons: =0, 1; attrs: type, length]
+ [a, r; arith, 3] extui\t%0, %1, 0, <mode_bits>
+ [a, U; load , 3] %v1l<mode_bits>ui\t%0, %1
+ }
+ [(set_attr "mode" "SI")])
;; Sign-extend instructions.
@@ -982,15 +945,14 @@
})
(define_insn "extendhisi2_internal"
- [(set (match_operand:SI 0 "register_operand" "=B,a")
- (sign_extend:SI (match_operand:HI 1 "sext_operand" "r,U")))]
+ [(set (match_operand:SI 0 "register_operand")
+ (sign_extend:SI (match_operand:HI 1 "sext_operand")))]
""
- "@
- sext\t%0, %1, 15
- %v1l16si\t%0, %1"
- [(set_attr "type" "arith,load")
- (set_attr "mode" "SI")
- (set_attr "length" "3,3")])
+ {@ [cons: =0, 1; attrs: type, length]
+ [B, r; arith, 3] sext\t%0, %1, 15
+ [a, U; load , 3] %v1l16si\t%0, %1
+ }
+ [(set_attr "mode" "SI")])
(define_expand "extendqisi2"
[(set (match_operand:SI 0 "register_operand" "")
@@ -1327,29 +1289,28 @@
})
(define_insn "movsi_internal"
- [(set (match_operand:SI 0 "nonimmed_operand" "=D,D,D,D,R,R,a,q,a,a,W,a,a,U,*a,*A")
- (match_operand:SI 1 "move_operand" "M,D,d,R,D,d,r,r,I,Y,i,T,U,r,*A,*r"))]
+ [(set (match_operand:SI 0 "nonimmed_operand")
+ (match_operand:SI 1 "move_operand"))]
"xtensa_valid_move (SImode, operands)"
- "@
- movi.n\t%0, %x1
- mov.n\t%0, %1
- mov.n\t%0, %1
- %v1l32i.n\t%0, %1
- %v0s32i.n\t%1, %0
- %v0s32i.n\t%1, %0
- mov\t%0, %1
- movsp\t%0, %1
- movi\t%0, %x1
- movi\t%0, %1
- const16\t%0, %t1\;const16\t%0, %b1
- %v1l32r\t%0, %1
- %v1l32i\t%0, %1
- %v0s32i\t%1, %0
- rsr\t%0, ACCLO
- wsr\t%1, ACCLO"
- [(set_attr "type" "move,move,move,load,store,store,move,move,move,load,move,load,load,store,rsr,wsr")
- (set_attr "mode" "SI")
- (set_attr "length" "2,2,2,2,2,2,3,3,3,3,6,3,3,3,3,3")])
+ {@ [cons: =0, 1; attrs: type, length]
+ [ D, M; move , 2] movi.n\t%0, %x1
+ [ D, D; move , 2] mov.n\t%0, %1
+ [ D, d; move , 2] ^
+ [ D, R; load , 2] %v1l32i.n\t%0, %1
+ [ R, D; store, 2] %v0s32i.n\t%1, %0
+ [ R, d; store, 2] ^
+ [ a, r; move , 3] mov\t%0, %1
+ [ q, r; move , 3] movsp\t%0, %1
+ [ a, I; move , 3] movi\t%0, %x1
+ [ a, Y; load , 3] movi\t%0, %1
+ [ W, i; move , 6] const16\t%0, %t1\;const16\t%0, %b1
+ [ a, T; load , 3] %v1l32r\t%0, %1
+ [ a, U; load , 3] %v1l32i\t%0, %1
+ [ U, r; store, 3] %v0s32i\t%1, %0
+ [*a, *A; rsr , 3] rsr\t%0, ACCLO
+ [*A, *r; wsr , 3] wsr\t%1, ACCLO
+ }
+ [(set_attr "mode" "SI")])
(define_split
[(set (match_operand:SHI 0 "register_operand")
@@ -1399,23 +1360,22 @@
})
(define_insn "movhi_internal"
- [(set (match_operand:HI 0 "nonimmed_operand" "=D,D,a,a,a,a,a,U,*a,*A")
- (match_operand:HI 1 "move_operand" "M,d,r,I,Y,T,U,r,*A,*r"))]
+ [(set (match_operand:HI 0 "nonimmed_operand")
+ (match_operand:HI 1 "move_operand"))]
"xtensa_valid_move (HImode, operands)"
- "@
- movi.n\t%0, %x1
- mov.n\t%0, %1
- mov\t%0, %1
- movi\t%0, %x1
- movi\t%0, %1
- %v1l32r\t%0, %1
- %v1l16ui\t%0, %1
- %v0s16i\t%1, %0
- rsr\t%0, ACCLO
- wsr\t%1, ACCLO"
- [(set_attr "type" "move,move,move,move,load,load,load,store,rsr,wsr")
- (set_attr "mode" "HI")
- (set_attr "length" "2,2,3,3,3,3,3,3,3,3")])
+ {@ [cons: =0, 1; attrs: type, length]
+ [ D, M; move , 2] movi.n\t%0, %x1
+ [ D, d; move , 2] mov.n\t%0, %1
+ [ a, r; move , 3] mov\t%0, %1
+ [ a, I; move , 3] movi\t%0, %x1
+ [ a, Y; load , 3] movi\t%0, %1
+ [ a, T; load , 3] %v1l32r\t%0, %1
+ [ a, U; load , 3] %v1l16ui\t%0, %1
+ [ U, r; store, 3] %v0s16i\t%1, %0
+ [*a, *A; rsr , 3] rsr\t%0, ACCLO
+ [*A, *r; wsr , 3] wsr\t%1, ACCLO
+ }
+ [(set_attr "mode" "HI")])
;; 8-bit Integer moves
@@ -1429,21 +1389,20 @@
})
(define_insn "movqi_internal"
- [(set (match_operand:QI 0 "nonimmed_operand" "=D,D,a,a,a,U,*a,*A")
- (match_operand:QI 1 "move_operand" "M,d,r,I,U,r,*A,*r"))]
+ [(set (match_operand:QI 0 "nonimmed_operand")
+ (match_operand:QI 1 "move_operand"))]
"xtensa_valid_move (QImode, operands)"
- "@
- movi.n\t%0, %x1
- mov.n\t%0, %1
- mov\t%0, %1
- movi\t%0, %x1
- %v1l8ui\t%0, %1
- %v0s8i\t%1, %0
- rsr\t%0, ACCLO
- wsr\t%1, ACCLO"
- [(set_attr "type" "move,move,move,move,load,store,rsr,wsr")
- (set_attr "mode" "QI")
- (set_attr "length" "2,2,3,3,3,3,3,3")])
+ {@ [cons: =0, 1; attrs: type, length]
+ [ D, M; move , 2] movi.n\t%0, %x1
+ [ D, d; move , 2] mov.n\t%0, %1
+ [ a, r; move , 3] mov\t%0, %1
+ [ a, I; move , 3] movi\t%0, %x1
+ [ a, U; load , 3] %v1l8ui\t%0, %1
+ [ U, r; store, 3] %v0s8i\t%1, %0
+ [*a, *A; rsr , 3] rsr\t%0, ACCLO
+ [*A, *r; wsr , 3] wsr\t%1, ACCLO
+ }
+ [(set_attr "mode" "QI")])
;; Sub-word reloads from the constant pool.
@@ -1501,30 +1460,29 @@
})
(define_insn "movsf_internal"
- [(set (match_operand:SF 0 "nonimmed_operand" "=f,f,U,D,a,D,R,a,f,a,a,W,a,U")
- (match_operand:SF 1 "move_operand" "f,^U,f,d,T,R,d,r,r,f,Y,iF,U,r"))]
+ [(set (match_operand:SF 0 "nonimmed_operand")
+ (match_operand:SF 1 "move_operand"))]
"((register_operand (operands[0], SFmode)
|| register_operand (operands[1], SFmode))
&& !(FP_REG_P (xt_true_regnum (operands[0]))
&& (constantpool_mem_p (operands[1]) || CONSTANT_P (operands[1]))))"
- "@
- mov.s\t%0, %1
- %v1lsi\t%0, %1
- %v0ssi\t%1, %0
- mov.n\t%0, %1
- %v1l32r\t%0, %1
- %v1l32i.n\t%0, %1
- %v0s32i.n\t%1, %0
- mov\t%0, %1
- wfr\t%0, %1
- rfr\t%0, %1
- movi\t%0, %y1
- const16\t%0, %t1\;const16\t%0, %b1
- %v1l32i\t%0, %1
- %v0s32i\t%1, %0"
- [(set_attr "type" "farith,fload,fstore,move,load,load,store,move,farith,farith,load,move,load,store")
- (set_attr "mode" "SF")
- (set_attr "length" "3,3,3,2,3,2,2,3,3,3,3,6,3,3")])
+ {@ [cons: =0, 1; attrs: type, length]
+ [f, f; farith, 3] mov.s\t%0, %1
+ [f, ^U; fload , 3] %v1lsi\t%0, %1
+ [U, f; fstore, 3] %v0ssi\t%1, %0
+ [D, d; move , 2] mov.n\t%0, %1
+ [a, T; load , 3] %v1l32r\t%0, %1
+ [D, R; load , 2] %v1l32i.n\t%0, %1
+ [R, d; store , 2] %v0s32i.n\t%1, %0
+ [a, r; move , 3] mov\t%0, %1
+ [f, r; farith, 3] wfr\t%0, %1
+ [a, f; farith, 3] rfr\t%0, %1
+ [a, Y; load , 3] movi\t%0, %y1
+ [W, iF; move , 6] const16\t%0, %t1\;const16\t%0, %b1
+ [a, U; load , 3] %v1l32i\t%0, %1
+ [U, r; store , 3] %v0s32i\t%1, %0
+ }
+ [(set_attr "mode" "SF")])
(define_insn "*lsiu"
[(set (match_operand:SF 0 "register_operand" "=f")
@@ -1692,16 +1650,15 @@
})
(define_insn "ashlsi3_internal"
- [(set (match_operand:SI 0 "register_operand" "=a,a")
- (ashift:SI (match_operand:SI 1 "register_operand" "r,r")
- (match_operand:SI 2 "arith_operand" "J,r")))]
+ [(set (match_operand:SI 0 "register_operand")
+ (ashift:SI (match_operand:SI 1 "register_operand")
+ (match_operand:SI 2 "arith_operand")))]
""
- "@
- slli\t%0, %1, %R2
- ssl\t%2\;sll\t%0, %1"
- [(set_attr "type" "arith,arith")
- (set_attr "mode" "SI")
- (set_attr "length" "3,6")])
+ {@ [cons: =0, 1, 2; attrs: type, length]
+ [a, r, J; arith, 3] slli\t%0, %1, %R2
+ [a, r, r; arith, 6] ssl\t%2\;sll\t%0, %1
+ }
+ [(set_attr "mode" "SI")])
(define_split
[(set (match_operand:SI 0 "register_operand")
@@ -1713,35 +1670,26 @@
(match_dup 1)))])
(define_insn "ashrsi3"
- [(set (match_operand:SI 0 "register_operand" "=a,a")
- (ashiftrt:SI (match_operand:SI 1 "register_operand" "r,r")
- (match_operand:SI 2 "arith_operand" "J,r")))]
+ [(set (match_operand:SI 0 "register_operand")
+ (ashiftrt:SI (match_operand:SI 1 "register_operand")
+ (match_operand:SI 2 "arith_operand")))]
""
- "@
- srai\t%0, %1, %R2
- ssr\t%2\;sra\t%0, %1"
- [(set_attr "type" "arith,arith")
- (set_attr "mode" "SI")
- (set_attr "length" "3,6")])
+ {@ [cons: =0, 1, 2; attrs: type, length]
+ [a, r, J; arith, 3] srai\t%0, %1, %R2
+ [a, r, r; arith, 6] ssr\t%2\;sra\t%0, %1
+ }
+ [(set_attr "mode" "SI")])
(define_insn "lshrsi3"
- [(set (match_operand:SI 0 "register_operand" "=a,a")
- (lshiftrt:SI (match_operand:SI 1 "register_operand" "r,r")
- (match_operand:SI 2 "arith_operand" "J,r")))]
+ [(set (match_operand:SI 0 "register_operand")
+ (lshiftrt:SI (match_operand:SI 1 "register_operand")
+ (match_operand:SI 2 "arith_operand")))]
""
-{
- if (which_alternative == 0)
- {
- if ((INTVAL (operands[2]) & 0x1f) < 16)
- return "srli\t%0, %1, %R2";
- else
- return "extui\t%0, %1, %R2, %L2";
- }
- return "ssr\t%2\;srl\t%0, %1";
-}
- [(set_attr "type" "arith,arith")
- (set_attr "mode" "SI")
- (set_attr "length" "3,6")])
+ {@ [cons: =0, 1, 2; attrs: type, length]
+ [a, r, J; arith, 3] << (INTVAL (operands[2]) & 0x1f) < 16 ? \"srli\t%0, %1, %R2\" : \"extui\t%0, %1, %R2, %L2\";
+ [a, r, r; arith, 6] ssr\t%2\;srl\t%0, %1
+ }
+ [(set_attr "mode" "SI")])
(define_insn "*shift_per_byte"
[(set (match_operand:SI 0 "register_operand" "=a")
@@ -1944,28 +1892,26 @@
(set_attr "length" "6")])
(define_insn "rotlsi3"
- [(set (match_operand:SI 0 "register_operand" "=a,a")
- (rotate:SI (match_operand:SI 1 "register_operand" "r,r")
- (match_operand:SI 2 "arith_operand" "J,r")))]
+ [(set (match_operand:SI 0 "register_operand")
+ (rotate:SI (match_operand:SI 1 "register_operand")
+ (match_operand:SI 2 "arith_operand")))]
""
- "@
- ssai\t%L2\;src\t%0, %1, %1
- ssl\t%2\;src\t%0, %1, %1"
- [(set_attr "type" "multi,multi")
- (set_attr "mode" "SI")
- (set_attr "length" "6,6")])
+ {@ [cons: =0, 1, 2; attrs: type, length]
+ [a, r, J; multi, 6] ssai\t%L2\;src\t%0, %1, %1
+ [a, r, r; multi, 6] ssl\t%2\;src\t%0, %1, %1
+ }
+ [(set_attr "mode" "SI")])
(define_insn "rotrsi3"
- [(set (match_operand:SI 0 "register_operand" "=a,a")
- (rotatert:SI (match_operand:SI 1 "register_operand" "r,r")
- (match_operand:SI 2 "arith_operand" "J,r")))]
+ [(set (match_operand:SI 0 "register_operand")
+ (rotatert:SI (match_operand:SI 1 "register_operand")
+ (match_operand:SI 2 "arith_operand")))]
""
- "@
- ssai\t%R2\;src\t%0, %1, %1
- ssr\t%2\;src\t%0, %1, %1"
- [(set_attr "type" "multi,multi")
- (set_attr "mode" "SI")
- (set_attr "length" "6,6")])
+ {@ [cons: =0, 1, 2; attrs: type, length]
+ [a, r, J; multi, 6] ssai\t%R2\;src\t%0, %1, %1
+ [a, r, r; multi, 6] ssr\t%2\;src\t%0, %1, %1
+ }
+ [(set_attr "mode" "SI")])
;; Comparisons.
@@ -2024,26 +1970,23 @@
[(match_operand:SI 0 "register_operand" "r")
(const_int -2147483648)])
(label_ref (match_operand 1 ""))
- (pc)))]
+ (pc)))
+ (clobber (match_scratch:SI 3 "=a"))]
"TARGET_ABS"
"#"
- "&& can_create_pseudo_p ()"
+ "&& 1"
[(set (match_dup 3)
(abs:SI (match_dup 0)))
(set (pc)
(if_then_else (match_op_dup 2
- [(zero_extract:SI (match_dup 3)
- (const_int 1)
- (match_dup 4))
+ [(match_dup 3)
(const_int 0)])
(label_ref (match_dup 1))
(pc)))]
{
- operands[3] = gen_reg_rtx (SImode);
- operands[4] = GEN_INT (BITS_BIG_ENDIAN ? 0 : 31);
- operands[2] = gen_rtx_fmt_ee (reverse_condition (GET_CODE (operands[2])),
- VOIDmode, XEXP (operands[2], 0),
- const0_rtx);
+ if (GET_CODE (operands[3]) == SCRATCH)
+ operands[3] = gen_reg_rtx (SImode);
+ PUT_CODE (operands[2], GET_CODE (operands[2]) == EQ ? LT : GE);
}
[(set_attr "type" "jump")
(set_attr "mode" "none")
@@ -2190,7 +2133,7 @@
(label_ref (match_dup 1))
(pc)))]
{
- operands[3] = GEN_INT ((1 << GET_MODE_BITSIZE (GET_MODE (operands[3]))) - 1);
+ operands[3] = GEN_INT (GET_MODE_MASK (GET_MODE (operands[3])));
})
(define_insn_and_split "*masktrue_const_pow2_minus_one"
@@ -3370,6 +3313,42 @@
(const_int 8)
(const_int 9))))])
+(define_insn_and_split "*eqne_in_range"
+ [(set (pc)
+ (if_then_else (match_operator 4 "alt_ubranch_operator"
+ [(plus:SI (match_operand:SI 0 "register_operand" "r")
+ (match_operand:SI 1 "const_int_operand" "i"))
+ (match_operand:SI 2 "const_int_operand" "i")])
+ (label_ref (match_operand 3 ""))
+ (pc)))
+ (clobber (match_scratch:SI 5 "=&a"))]
+ "TARGET_MINMAX && TARGET_CLAMPS
+ && INTVAL (operands[1]) * 2 - INTVAL (operands[2]) == 1
+ && IN_RANGE (exact_log2 (INTVAL (operands[1])), 7, 22)"
+ "#"
+ "&& 1"
+ [(set (match_dup 5)
+ (smin:SI (smax:SI (match_dup 0)
+ (match_dup 1))
+ (match_dup 2)))
+ (set (pc)
+ (if_then_else (match_op_dup 4
+ [(match_dup 0)
+ (match_dup 5)])
+ (label_ref (match_dup 3))
+ (pc)))]
+{
+ HOST_WIDE_INT v = INTVAL (operands[1]);
+ operands[1] = GEN_INT (-v);
+ operands[2] = GEN_INT (v - 1);
+ PUT_CODE (operands[4], GET_CODE (operands[4]) == GTU ? NE : EQ);
+ if (GET_CODE (operands[5]) == SCRATCH)
+ operands[5] = gen_reg_rtx (SImode);
+}
+ [(set_attr "type" "jump")
+ (set_attr "mode" "none")
+ (set_attr "length" "6")])
+
(define_split
[(clobber (match_operand 0 "register_operand"))]
"HARD_REGISTER_P (operands[0])