aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/riscv
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/riscv')
-rw-r--r--gcc/config/riscv/andes-23-series.md190
-rw-r--r--gcc/config/riscv/andes-25-series.md322
-rw-r--r--gcc/config/riscv/andes-45-series.md379
-rw-r--r--gcc/config/riscv/autovec-opt.md96
-rw-r--r--gcc/config/riscv/autovec.md98
-rw-r--r--gcc/config/riscv/bitmanip.md29
-rw-r--r--gcc/config/riscv/crypto.md34
-rw-r--r--gcc/config/riscv/predicates.md3
-rw-r--r--gcc/config/riscv/riscv-avlprop.cc41
-rw-r--r--gcc/config/riscv/riscv-c.cc53
-rw-r--r--gcc/config/riscv/riscv-cores.def27
-rw-r--r--gcc/config/riscv/riscv-ext-spacemit.def36
-rw-r--r--gcc/config/riscv/riscv-ext.def1
-rw-r--r--gcc/config/riscv/riscv-ext.opt30
-rw-r--r--gcc/config/riscv/riscv-modes.def24
-rw-r--r--gcc/config/riscv/riscv-opt-popretz.cc294
-rw-r--r--gcc/config/riscv/riscv-opts.h4
-rw-r--r--gcc/config/riscv/riscv-passes.def1
-rw-r--r--gcc/config/riscv/riscv-profiles.def4
-rw-r--r--gcc/config/riscv/riscv-protos.h14
-rw-r--r--gcc/config/riscv/riscv-string.cc29
-rw-r--r--gcc/config/riscv/riscv-subset.h2
-rw-r--r--gcc/config/riscv/riscv-target-attr.cc97
-rw-r--r--gcc/config/riscv/riscv-v.cc159
-rw-r--r--gcc/config/riscv/riscv-vector-builtins-bases.cc8
-rw-r--r--gcc/config/riscv/riscv-vector-builtins.cc19
-rw-r--r--gcc/config/riscv/riscv-vector-builtins.h2
-rw-r--r--gcc/config/riscv/riscv-vector-switch.def12
-rw-r--r--gcc/config/riscv/riscv-vsetvl.cc10
-rw-r--r--gcc/config/riscv/riscv.cc1066
-rw-r--r--gcc/config/riscv/riscv.h20
-rw-r--r--gcc/config/riscv/riscv.md111
-rw-r--r--gcc/config/riscv/riscv.opt16
-rw-r--r--gcc/config/riscv/riscv.opt.urls2
-rw-r--r--gcc/config/riscv/spacemit-x60.md190
-rw-r--r--gcc/config/riscv/sync.md16
-rw-r--r--gcc/config/riscv/t-riscv9
-rw-r--r--gcc/config/riscv/thead.md18
-rw-r--r--gcc/config/riscv/vector-iterators.md121
-rw-r--r--gcc/config/riscv/vector.md114
40 files changed, 3265 insertions, 436 deletions
diff --git a/gcc/config/riscv/andes-23-series.md b/gcc/config/riscv/andes-23-series.md
new file mode 100644
index 0000000..8e19e05
--- /dev/null
+++ b/gcc/config/riscv/andes-23-series.md
@@ -0,0 +1,190 @@
+;; DFA-based pipeline description for Andes 23 series.
+;;
+;; Copyright (C) 2025 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "andes_23_arch")
+
+(define_cpu_unit
+ "andes_23_alu0, andes_23_alu1, andes_23_lsu0,
+ andes_23_lsu1, andes_23_lsu2"
+ "andes_23_arch")
+
+(define_cpu_unit "andes_23_mdu" "andes_23_arch")
+(define_cpu_unit "andes_23_fpu" "andes_23_arch")
+
+;; andes 23 unsupported insns are mapped to dummies reservations
+(define_reservation "andes_23_dummies"
+ "andes_23_alu0 | andes_23_alu1 | andes_23_lsu0 | andes_23_lsu1 |
+ andes_23_lsu2 | andes_23_mdu | andes_23_fpu")
+
+(define_reservation "andes_23_alu"
+ "andes_23_alu0 | andes_23_alu1")
+
+(define_reservation "andes_23_lsu"
+ "andes_23_lsu0 | andes_23_lsu1 | andes_23_lsu2")
+
+(define_reservation "andes_23_pipe_unify"
+ "andes_23_alu0 + andes_23_alu1")
+
+(define_insn_reservation "andes_23_alu_insn" 1
+ (and (eq_attr "tune" "andes_23_series")
+ (eq_attr "type" "unknown,const,arith,slt,multi,nop,move,
+ shift,logical,mvpair,auipc"))
+ "andes_23_alu")
+
+(define_insn_reservation "andes_23_load" 3
+ (and (eq_attr "tune" "andes_23_series")
+ (eq_attr "type" "load"))
+ "andes_23_pipe_unify, andes_23_lsu*3")
+
+(define_insn_reservation "andes_23_store" 0
+ (and (eq_attr "tune" "andes_23_series")
+ (eq_attr "type" "store"))
+ "andes_23_pipe_unify,andes_23_lsu*3")
+
+(define_insn_reservation "andes_23_branch" 0
+ (and (eq_attr "tune" "andes_23_series")
+ (eq_attr "type" "branch,jump,call,jalr,ret,trap"))
+ "andes_23_pipe_unify")
+
+(define_insn_reservation "andes_23_imul" 2
+ (and (eq_attr "tune" "andes_23_series")
+ (eq_attr "type" "imul"))
+ "andes_23_alu0, andes_23_mdu")
+
+(define_insn_reservation "andes_23_idivsi" 35
+ (and (eq_attr "tune" "andes_23_series")
+ (and (eq_attr "type" "idiv")
+ (eq_attr "mode" "SI")))
+ "andes_23_pipe_unify, andes_23_mdu* 34")
+
+(define_insn_reservation "andes_23_idivdi" 35
+ (and (eq_attr "tune" "andes_23_series")
+ (and (eq_attr "type" "idiv")
+ (eq_attr "mode" "DI")))
+ "andes_23_pipe_unify, andes_23_mdu* 34")
+
+(define_insn_reservation "andes_23_xfer" 1
+ (and (eq_attr "tune" "andes_23_series")
+ (eq_attr "type" "mfc,mtc"))
+ "andes_23_alu")
+
+(define_insn_reservation "andes_23_fpu_alu" 4
+ (and (eq_attr "tune" "andes_23_series")
+ (eq_attr "type" "fadd"))
+ "andes_23_pipe_unify, andes_23_fpu")
+
+(define_insn_reservation "andes_23_fpu_mul" 4
+ (and (eq_attr "tune" "andes_23_series")
+ (eq_attr "type" "fmul"))
+ "andes_23_pipe_unify, andes_23_fpu")
+
+(define_insn_reservation "andes_23_fpu_mac" 4
+ (and (eq_attr "tune" "andes_23_series")
+ (eq_attr "type" "fmadd"))
+ "andes_23_pipe_unify, andes_23_fpu")
+
+(define_insn_reservation "andes_23_fpu_div" 33
+ (and (eq_attr "tune" "andes_23_series")
+ (eq_attr "type" "fdiv"))
+ "andes_23_pipe_unify, andes_23_fpu*33")
+
+(define_insn_reservation "andes_23_fpu_sqrt" 33
+ (and (eq_attr "tune" "andes_23_series")
+ (eq_attr "type" "fsqrt"))
+ "andes_23_pipe_unify, andes_23_fpu*33")
+
+(define_insn_reservation "andes_23_fpu_move" 2
+ (and (eq_attr "tune" "andes_23_series")
+ (eq_attr "type" "fmove,mtc,mfc"))
+ "andes_23_pipe_unify, andes_23_fpu")
+
+(define_insn_reservation "andes_23_fpu_cmp" 3
+ (and (eq_attr "tune" "andes_23_series")
+ (eq_attr "type" "fcmp"))
+ "andes_23_pipe_unify, andes_23_fpu")
+
+(define_insn_reservation "andes_23_fpu_cvt" 3
+ (and (eq_attr "tune" "andes_23_series")
+ (eq_attr "type" "fcvt,fcvt_i2f,fcvt_f2i"))
+ "andes_23_pipe_unify, andes_23_fpu")
+
+(define_insn_reservation "andes_23_fpu_load" 3
+ (and (eq_attr "tune" "andes_23_series")
+ (eq_attr "type" "fpload"))
+ "andes_23_pipe_unify, andes_23_lsu*3")
+
+(define_insn_reservation "andes_23_fpu_store" 0
+ (and (eq_attr "tune" "andes_23_series")
+ (eq_attr "type" "fpstore"))
+ "andes_23_pipe_unify, andes_23_lsu*3")
+
+(define_insn_reservation "andes_23_bitmanip" 1
+ (and (eq_attr "tune" "andes_23_series")
+ (eq_attr "type" "bitmanip,minu,maxu,min,max,clmul,rotate,cpop,clz,ctz"))
+ "andes_23_alu0")
+
+(define_insn_reservation "andes_23_crypto" 1
+ (and (eq_attr "tune" "andes_23_series")
+ (eq_attr "type" "crypto"))
+ "andes_23_alu0")
+
+(define_bypass 3
+ "andes_23_fpu_mul"
+ "andes_23_fpu_alu,andes_23_fpu_mac,
+ andes_23_fpu_div,andes_23_fpu_sqrt")
+
+(define_bypass 3
+ "andes_23_fpu_alu"
+ "andes_23_fpu_mul,andes_23_fpu_alu,andes_23_fpu_mac,
+ andes_23_fpu_div,andes_23_fpu_sqrt")
+
+(define_bypass 3
+ "andes_23_fpu_mac"
+ "andes_23_fpu_mul,andes_23_fpu_alu,andes_23_fpu_mac,
+ andes_23_fpu_div,andes_23_fpu_sqrt")
+
+(define_bypass 2
+ "andes_23_fpu_load"
+ "andes_23_fpu_div,andes_23_fpu_sqrt")
+
+(define_insn_reservation "andes_23_unknown" 1
+ (and (eq_attr "tune" "andes_23_series")
+ (eq_attr "type" "ghost,zicond,mvpair,sfb_alu,condmove,atomic,
+ vclz,vror,vsha2ch,vsm4k,vaesef,vghsh,vsm4r,vsm3c,
+ vaeskf1,vandn,vaesdm,vclmul,vclmulh,vrol,vcpop,vbrev8,
+ vsm3me,vbrev,vctz,vgmul,vsha2ms,vaesz,vrev8,
+ vaeskf2,vsha2cl,vwsll,vaesdf,vaesem,vfwmaccbf16,
+ sf_vqmacc,sf_vc,sf_vc_se,sf_vfnrclip,vmsfs,vfwalu,
+ vnshift,vldm,vslidedown,vicmp,vfcvtftoi,vmffs,vlsegdux,
+ vfredo,vstux,vsshift,vfwcvtbf16,vmpop,vicalu,vldff,
+ vislide1down,vstox,vfwcvtftof,vfmov,vislide1up,vldr,
+ vfmul,vfrecp,vfncvtitof,vfwcvtftoi,vsts,viminmax,vext,
+ vaalu,vfdiv,vidiv,viwalu,vssegte,wrvxrm,vfmovvf,vlde,
+ vfclass,vshift,vimovxv,vssegtox,vfsqrt,vector,vmalu,
+ vfcvtitof,vlsegdff,vfslide1down,vimov,vialu,vmidx,
+ vsalu,vfmerge,rdvl,vlds,vfmuladd,vfsgnj,vslideup,
+ vfcmp,vfmovfv,vfwcvtitof,vfwmuladd,vfwredo,vlsegdox,
+ viwmul,vldox,vsmul,vstm,vfminmax,vmov,vfalu,vfncvtbf16,
+ vnclip,vimerge,vfwmul,vimovvx,vfncvtftoi,viwred,rdvlenb,
+ vfslide1up,vfncvtftof,vsetvl,viwmuladd,vfredu,vfwredu,
+ vlsegde,vmiota,vstr,vgather,vssegts,vldux,vlsegds,vimul,
+ vste,vsetvl_pre,vimuladd,vcompress,vssegtux,wrfrm,rdfrm,
+ vired"))
+ "andes_23_dummies")
diff --git a/gcc/config/riscv/andes-25-series.md b/gcc/config/riscv/andes-25-series.md
new file mode 100644
index 0000000..ef1a926
--- /dev/null
+++ b/gcc/config/riscv/andes-25-series.md
@@ -0,0 +1,322 @@
+;; DFA-based pipeline description for Andes 25 series.
+;;
+;; Copyright (C) 2025 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "andes_25_arch, andes_25_vector")
+
+;; Integer pipeline
+(define_cpu_unit "andes_25_pipe" "andes_25_arch")
+;; Division operation unit
+(define_cpu_unit "andes_25_mdu" "andes_25_arch")
+;; Floating point units
+(define_cpu_unit "andes_25_fpu, andes_25_fpu_eu" "andes_25_arch")
+
+;; Vector execution unit.
+(define_cpu_unit "andes_25_vpu_lsu, andes_25_vpu_alu, andes_25_vpu_mac,
+ andes_25_vpu_msk, andes_25_vpu_div, andes_25_vpu_fmac,
+ andes_25_vpu_fmis, andes_25_vpu_perm, andes_25_vpu_pipe"
+ "andes_25_vector")
+
+;; andes 25 series unsupported insns are mapped to dummies reservations
+(define_reservation "andes_25_dummies"
+ "andes_25_pipe | andes_25_mdu | andes_25_fpu"
+)
+
+;; andes 25 series vector unsupported insns are mapped to dummies reservations
+(define_reservation "andes_25_vector_dummies"
+ "andes_25_vpu_lsu | andes_25_vpu_alu | andes_25_vpu_mac | andes_25_vpu_msk |
+ andes_25_vpu_div | andes_25_vpu_fmac | andes_25_vpu_fmis |
+ andes_25_vpu_perm | andes_25_vpu_pipe"
+)
+
+(define_reservation "andes_25_fpu_arith"
+ "(andes_25_pipe + andes_25_fpu), andes_25_fpu_eu * 2")
+
+(define_reservation "andes_25_fpu_pipe"
+ "andes_25_pipe + andes_25_fpu")
+
+(define_insn_reservation "andes_25_alu_insn" 1
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "unknown,const,arith,shift,slt,multi,nop,logical,move,
+ auipc,atomic"))
+ "andes_25_pipe")
+
+(define_insn_reservation "andes_25_load_wd" 2
+ (and (eq_attr "tune" "andes_25_series")
+ (and (eq_attr "type" "load")
+ (not (eq_attr "mode" "QI,HI"))))
+ "andes_25_pipe")
+
+(define_insn_reservation "andes_25_load_bh" 3
+ (and (eq_attr "tune" "andes_25_series")
+ (and (eq_attr "type" "load")
+ (eq_attr "mode" "QI,HI")))
+ "andes_25_pipe")
+
+(define_insn_reservation "andes_25_store" 0
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "store"))
+ "andes_25_pipe")
+
+(define_insn_reservation "andes_25_branch" 0
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "branch,jump,call,jalr,trap,ret"))
+ "andes_25_pipe")
+
+(define_insn_reservation "andes_25_imul" 3
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "imul"))
+ "andes_25_pipe")
+
+(define_insn_reservation "andes_25_idivsi" 38
+ (and (eq_attr "tune" "andes_25_series")
+ (and (eq_attr "type" "idiv")
+ (eq_attr "mode" "SI")))
+ "andes_25_pipe, andes_25_mdu * 34")
+
+(define_insn_reservation "andes_25_idivdi" 70
+ (and (eq_attr "tune" "andes_25_series")
+ (and (eq_attr "type" "idiv")
+ (eq_attr "mode" "DI")))
+ "andes_25_pipe, andes_25_mdu * 66")
+
+(define_insn_reservation "andes_25_xfer" 1
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "mfc,mtc"))
+ "andes_25_pipe")
+
+(define_insn_reservation "andes_25_fpu_alu" 5
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "fadd"))
+ "andes_25_fpu_arith")
+
+(define_insn_reservation "andes_25_fpu_mul" 5
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "fmul"))
+ "andes_25_fpu_arith")
+
+(define_insn_reservation "andes_25_fpu_mac" 5
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "fmadd"))
+ "andes_25_fpu_arith")
+
+(define_insn_reservation "andes_25_fpu_div" 33
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "fdiv"))
+ "andes_25_fpu_arith, andes_25_fpu_eu * 27")
+
+(define_insn_reservation "andes_25_fpu_sqrt" 33
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "fsqrt"))
+ "andes_25_fpu_arith, andes_25_fpu_eu * 27")
+
+(define_insn_reservation "andes_25_fpu_move" 3
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "fmove,mtc,mfc"))
+ "andes_25_fpu_pipe")
+
+(define_insn_reservation "andes_25_fpu_cmp" 3
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "fcmp"))
+ "andes_25_fpu_pipe")
+
+(define_insn_reservation "andes_25_fpu_cvt" 6
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "fcvt,fcvt_i2f,fcvt_f2i"))
+ "andes_25_fpu_arith, andes_25_fpu_eu")
+
+(define_insn_reservation "andes_25_fpu_load" 3
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "fpload"))
+ "andes_25_fpu_pipe")
+
+(define_insn_reservation "andes_25_fpu_store" 0
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "fpstore"))
+ "andes_25_fpu_pipe")
+
+(define_insn_reservation "andes_25_bitmanip" 1
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "bitmanip"))
+ "andes_25_pipe")
+
+;; Vector pipeline.
+
+(define_insn_reservation "andes_25_vload" 5
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "vlde,vldm,vlds,vldff,vldr"))
+ "(andes_25_vpu_pipe + andes_25_vpu_lsu)*3")
+
+(define_insn_reservation "andes_25_index_vload" 8
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "vldux,vldox"))
+ "(andes_25_vpu_pipe + andes_25_vpu_lsu)*3")
+
+(define_insn_reservation "andes_25_seg_vload" 16
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "vlsegde,vlsegds,vlsegdux,vlsegdox,vlsegdff"))
+ "(andes_25_vpu_pipe + andes_25_vpu_lsu)*3")
+
+(define_insn_reservation "andes_25_vstore" 0
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "vste,vstm,vsts,vstux,vstox,vstr,vssegte,\
+ vssegts,vssegtux,vssegtox"))
+ "(andes_25_vpu_pipe + andes_25_vpu_lsu)*3")
+
+(define_insn_reservation "andes_25_vialu" 1
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "vialu,vicalu,vshift,viminmax,vicmp,vimov,\
+ vsalu,vaalu,vmov,vector,vimerge"))
+ "andes_25_vpu_pipe + andes_25_vpu_alu")
+
+(define_insn_reservation "andes_25_widen_vialu" 2
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "viwalu, vext, vsshift"))
+ "andes_25_vpu_pipe + andes_25_vpu_alu")
+
+(define_insn_reservation "andes_25_narrow_vialu" 3
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "vnshift,vnclip"))
+ "andes_25_vpu_pipe + andes_25_vpu_alu")
+
+(define_insn_reservation "andes_25_vimul" 2
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "vimul,vimuladd,vsmul"))
+ "andes_25_vpu_pipe + andes_25_vpu_mac")
+
+(define_insn_reservation "andes_25_widen_vimul" 3
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "viwmul,viwmuladd"))
+ "andes_25_vpu_pipe + andes_25_vpu_mac")
+
+(define_insn_reservation "andes_25_vperm" 3
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "vslideup,vslidedown,vislide1up,vislide1down,\
+ vfslide1up,vfslide1down,vgather"))
+ "andes_25_vpu_pipe + andes_25_vpu_perm")
+
+(define_insn_reservation "andes_25_vcompress" 4
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "vcompress"))
+ "andes_25_vpu_pipe + andes_25_vpu_perm")
+
+(define_insn_reservation "andes_25_vmovv" 7
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "vimovvx,vfmovvf"))
+ "(andes_25_vpu_pipe + andes_25_vpu_perm)*5")
+
+(define_insn_reservation "andes_25_vmovx" 3
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "vimovxv,vfmovfv,vfmov"))
+ "andes_25_vpu_pipe + andes_25_vpu_perm")
+
+(define_insn_reservation "andes_25_vreduction" 3
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "vired,viwred"))
+ "andes_25_vpu_pipe + andes_25_vpu_alu*5")
+
+(define_insn_reservation "andes_25_vidiv" 35
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "vidiv"))
+ "andes_25_vpu_pipe + andes_25_vpu_div*34")
+
+(define_insn_reservation "andes_25_vmask_2" 2
+ (eq_attr "type" "vmalu,vmsfs")
+ "andes_25_vpu_pipe + andes_25_vpu_msk")
+
+(define_insn_reservation "andes_25_vmask_3" 3
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "vmiota,vmidx"))
+ "andes_25_vpu_pipe + andes_25_vpu_msk")
+
+(define_insn_reservation "andes_25_vpopc" 6
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "vmpop"))
+ "andes_25_vpu_pipe + andes_25_vpu_msk")
+
+(define_insn_reservation "andes_25_vffs" 7
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "vmffs"))
+ "andes_25_vpu_pipe + andes_25_vpu_msk")
+
+(define_insn_reservation "andes_25_vfadd" 4
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "vfalu,vfwalu,vfmul,vfwmul,vfmuladd,\
+ vfwmuladd"))
+ "andes_25_vpu_pipe + andes_25_vpu_fmac")
+
+(define_insn_reservation "andes_25_vfdiv" 39
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "vfdiv,vfsqrt"))
+ "andes_25_vpu_pipe + andes_25_vpu_div*19")
+
+(define_insn_reservation "andes_25_vfmis" 2
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "vfminmax,vfcmp,vfsgnj,vfclass,vfmerge"))
+ "andes_25_vpu_pipe + andes_25_vpu_fmis")
+
+(define_insn_reservation "andes_25_vfrecp" 3
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "vfrecp"))
+ "andes_25_vpu_pipe + andes_25_vpu_div")
+
+(define_insn_reservation "andes_25_vfcvt" 2
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "vfcvtitof,vfcvtftoi"))
+ "andes_25_vpu_pipe + andes_25_vpu_fmis")
+
+(define_insn_reservation "andes_25_widen_vfcvt" 5
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "vfwcvtitof,vfwcvtftoi,vfwcvtftof,vfwcvtbf16"))
+ "andes_25_vpu_pipe + andes_25_vpu_fmis")
+
+(define_insn_reservation "andes_25_narrow_vfcvt" 4
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "vfncvtitof,vfncvtftoi,vfncvtftof,vfncvtbf16"))
+ "andes_25_vpu_pipe + andes_25_vpu_fmis")
+
+(define_insn_reservation "andes_25_vfreduction" 6
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "vfredu,vfwredu,vfredo,vfwredo"))
+ "andes_25_vpu_pipe + andes_25_vpu_fmac*24")
+
+(define_insn_reservation "andes_25_vesetvl" 1
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "vsetvl,vsetvl_pre"))
+ "andes_25_vpu_pipe")
+
+(define_insn_reservation "andes_25_vcsr" 1
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "wrvxrm,wrfrm,rdvlenb,rdvl"))
+ "andes_25_vpu_pipe")
+
+(define_insn_reservation "andes_25_unknown" 1
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "ghost,cpop,clz,ctz,zicond,mvpair,sfb_alu,minu,maxu,
+ min,max,clmul,rotate,crypto,condmove,rdfrm"))
+ "andes_25_dummies")
+
+(define_insn_reservation "andes_25_vector_unknown" 1
+ (and (eq_attr "tune" "andes_25_series")
+ (eq_attr "type" "vclz,vror,vsha2ch,vsm4k,vaesef,vghsh,vsm4r,vsm3c,
+ vaeskf1,vandn,vaesdm,vclmul,vclmulh,vrol,vcpop,vbrev8,
+ vsm3me,vbrev,vctz,vgmul,vsha2ms,vaesz,vrev8,
+ vaeskf2,vsha2cl,vwsll,vaesdf,vaesem,vfwmaccbf16,
+ sf_vqmacc,sf_vc,sf_vc_se,sf_vfnrclip"))
+ "andes_25_vector_dummies")
diff --git a/gcc/config/riscv/andes-45-series.md b/gcc/config/riscv/andes-45-series.md
new file mode 100644
index 0000000..7693db8
--- /dev/null
+++ b/gcc/config/riscv/andes-45-series.md
@@ -0,0 +1,379 @@
+;; DFA-based pipeline description for Andes 45 series.
+;;
+;; Copyright (C) 2025 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "andes_45_arch, andes_45_vector")
+
+(define_cpu_unit "andes_45_pipe0" "andes_45_arch")
+(define_cpu_unit "andes_45_pipe1" "andes_45_arch")
+(define_cpu_unit "andes_45_vpu_pipe0" "andes_45_vector")
+(define_cpu_unit "andes_45_vpu_pipe1" "andes_45_vector")
+
+(define_reservation "andes_45_vpu_pipe" "(andes_45_vpu_pipe0 + andes_45_pipe0 | andes_45_vpu_pipe1 + andes_45_pipe1)")
+
+(define_cpu_unit "andes_45_mdu,andes_45_alu0,andes_45_alu1,andes_45_bru0,andes_45_bru1,andes_45_lsu" "andes_45_arch")
+(define_cpu_unit "andes_45_fpu_fmac,andes_45_fpu_fdiv,andes_45_fpu_fmis,andes_45_fpu_fmv" "andes_45_arch")
+(define_cpu_unit "andes_45_vpu_alu,andes_45_vpu_mac,andes_45_vpu_fmis,andes_45_vpu_permut,
+ andes_45_vpu_div,andes_45_vpu_fdiv,andes_45_vpu_mask,andes_45_vpu_lsu" "andes_45_vector")
+
+(define_reservation "andes_45_fpu_arith"
+ "andes_45_pipe0 + andes_45_fpu_fmac | andes_45_pipe1 + andes_45_fpu_fmac")
+
+;; andes 45 series unsupported insns are mapped to dummies reservations
+(define_reservation "andes_45_dummies"
+ "andes_45_pipe0 | andes_45_pipe1, andes_45_alu0 | andes_45_alu1")
+
+;; andes 45 series vector unsupported insns are mapped to dummies reservations
+(define_reservation "andes_45_vector_dummies"
+ "andes_45_pipe0 | andes_45_pipe1, andes_45_vpu_alu")
+
+(define_insn_reservation "andes_45_alu_insn_s" 1
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "shift,nop,logical"))
+ "andes_45_pipe0 + andes_45_alu0 | andes_45_pipe1 + andes_45_alu1")
+
+(define_insn_reservation "andes_45_alu_insn_l" 2
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "unknown,const,arith,multi,slt,move,auipc,atomic,bitmanip"))
+ "andes_45_pipe0 + andes_45_alu0 | andes_45_pipe1 + andes_45_alu1")
+
+(define_insn_reservation "andes_45_cmov" 1
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "condmove"))
+ "andes_45_pipe0 + andes_45_alu0 + andes_45_pipe1 + andes_45_alu1")
+
+(define_insn_reservation "andes_45_load_wd" 4
+ (and (eq_attr "tune" "andes_45_series")
+ (and (eq_attr "type" "load")
+ (not (eq_attr "mode" "QI,HI"))))
+ "andes_45_pipe0 + andes_45_lsu | andes_45_pipe1 + andes_45_lsu")
+
+(define_insn_reservation "andes_45_load_bh" 5
+ (and (eq_attr "tune" "andes_45_series")
+ (and (eq_attr "type" "load")
+ (eq_attr "mode" "QI,HI")))
+ "andes_45_pipe0 + andes_45_lsu | andes_45_pipe1 + andes_45_lsu")
+
+(define_insn_reservation "andes_45_store_d" 0
+ (and (eq_attr "tune" "andes_45_series")
+ (and (eq_attr "type" "store")
+ (eq_attr "mode" "DI,SI")))
+ "andes_45_pipe0 + andes_45_lsu | andes_45_pipe1 + andes_45_lsu")
+
+(define_insn_reservation "andes_45_store" 0
+ (and (eq_attr "tune" "andes_45_series")
+ (and (eq_attr "type" "store")
+ (not (eq_attr "mode" "DI,SI"))))
+ "andes_45_pipe0 + andes_45_pipe1 + andes_45_lsu")
+
+(define_insn_reservation "andes_45_branch" 1
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "branch,jump,call,ret,jalr,trap"))
+ "andes_45_pipe0 + andes_45_bru0 | andes_45_pipe1 + andes_45_bru1")
+
+(define_insn_reservation "andes_45_imul" 3
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "imul"))
+ "andes_45_pipe0 + andes_45_alu0 | andes_45_pipe1 + andes_45_alu1, andes_45_mdu * 2")
+
+(define_insn_reservation "andes_45_idivsi" 38
+ (and (eq_attr "tune" "andes_45_series")
+ (and (eq_attr "type" "idiv")
+ (eq_attr "mode" "SI")))
+ "andes_45_pipe0 + andes_45_alu0 | andes_45_pipe1 + andes_45_alu1, andes_45_mdu * 2")
+
+(define_insn_reservation "andes_45_idivdi" 70
+ (and (eq_attr "tune" "andes_45_series")
+ (and (eq_attr "type" "idiv")
+ (eq_attr "mode" "DI")))
+ "andes_45_pipe0 + andes_45_alu0 | andes_45_pipe1 + andes_45_alu1, andes_45_mdu * 2")
+
+(define_insn_reservation "andes_45_xfer" 1
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "mfc,mtc"))
+ "andes_45_pipe0 + andes_45_alu0 | andes_45_pipe1 + andes_45_alu1")
+
+(define_insn_reservation "andes_45_fpu_alu_s" 3
+ (and (eq_attr "tune" "andes_45_series")
+ (and (eq_attr "type" "fadd")
+ (eq_attr "mode" "SF")))
+ "andes_45_fpu_arith")
+
+(define_insn_reservation "andes_45_fpu_alu_d" 4
+ (and (eq_attr "tune" "andes_45_series")
+ (and (eq_attr "type" "fadd")
+ (eq_attr "mode" "DF")))
+ "andes_45_fpu_arith")
+
+(define_insn_reservation "andes_45_fpu_mul_s" 3
+ (and (eq_attr "tune" "andes_45_series")
+ (and (eq_attr "type" "fmul")
+ (eq_attr "mode" "SF")))
+ "andes_45_fpu_arith")
+
+(define_insn_reservation "andes_45_fpu_mul_d" 4
+ (and (eq_attr "tune" "andes_45_series")
+ (and (eq_attr "type" "fmul")
+ (eq_attr "mode" "DF")))
+ "andes_45_fpu_arith")
+
+(define_insn_reservation "andes_45_fpu_mac_s" 3
+ (and (eq_attr "tune" "andes_45_series")
+ (and (eq_attr "type" "fmadd")
+ (eq_attr "mode" "SF")))
+ "(andes_45_pipe0 | andes_45_pipe1) + andes_45_fpu_fmac + andes_45_fpu_fmv + andes_45_fpu_fmis")
+
+(define_insn_reservation "andes_45_fpu_mac_d" 4
+ (and (eq_attr "tune" "andes_45_series")
+ (and (eq_attr "type" "fmadd")
+ (eq_attr "mode" "DF")))
+ "(andes_45_pipe0 | andes_45_pipe1) + andes_45_fpu_fmac + andes_45_fpu_fmv + andes_45_fpu_fmis")
+
+(define_insn_reservation "andes_45_fpu_div" 33
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "fdiv"))
+ "andes_45_pipe0 + andes_45_fpu_fdiv | andes_45_pipe1 + andes_45_fpu_fdiv, andes_45_fpu_fdiv * 27")
+
+(define_insn_reservation "andes_45_fpu_sqrt" 33
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "fsqrt"))
+ "andes_45_pipe0 + andes_45_fpu_fdiv | andes_45_pipe1 + andes_45_fpu_fdiv, andes_45_fpu_fdiv * 27")
+
+(define_insn_reservation "andes_45_fpu_move" 1
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "fmove,mtc,mfc"))
+ "andes_45_pipe0 + andes_45_fpu_fmv | andes_45_pipe1 + andes_45_fpu_fmv")
+
+(define_insn_reservation "andes_45_fpu_cmp" 2
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "fcmp"))
+ "andes_45_pipe0 + andes_45_fpu_fmis | andes_45_pipe1 + andes_45_fpu_fmis")
+
+(define_insn_reservation "andes_45_fpu_cvt" 2
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "fcvt,fcvt_f2i,fcvt_i2f"))
+ "andes_45_pipe0 + andes_45_fpu_fmis | andes_45_pipe1 + andes_45_fpu_fmis")
+
+(define_insn_reservation "andes_45_fpu_load" 4
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "fpload"))
+ "andes_45_pipe0 + andes_45_pipe1 + andes_45_lsu")
+
+(define_insn_reservation "andes_45_fpu_store" 0
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "fpstore"))
+ "andes_45_pipe0 + andes_45_pipe1 + andes_45_lsu")
+
+(define_insn_reservation "andes_45_vpu_load_e" 8
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "vlde,vldm,vldr,vlsegde,vldff,vlsegdff"))
+ "(andes_45_vpu_pipe + andes_45_vpu_lsu), andes_45_vpu_lsu * 2")
+
+(define_insn_reservation "andes_45_vpu_load_s" 10
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "vlds,vlsegds"))
+ "(andes_45_vpu_pipe + andes_45_vpu_lsu), andes_45_vpu_lsu * 3")
+
+(define_insn_reservation "andes_45_vpu_load_x" 12
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "vldox,vldux,vlsegdox,vlsegdux"))
+ "(andes_45_vpu_pipe + andes_45_vpu_lsu), andes_45_vpu_lsu * 4")
+
+(define_insn_reservation "andes_45_vpu_store" 0
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "vste,vstm,vstr,vsts,vstux,vstox,vssegtox,vssegte,
+ vssegtux,vssegts"))
+ "andes_45_vpu_pipe + andes_45_lsu + andes_45_vpu_lsu")
+
+(define_insn_reservation "andes_45_vpu_alu" 2
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "vialu,viwalu,vicalu,vsalu,vaalu,vector"))
+ "andes_45_vpu_pipe + andes_45_vpu_alu")
+
+(define_insn_reservation "andes_45_vpu_ext" 3
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "vext"))
+ "andes_45_vpu_pipe + andes_45_vpu_permut")
+
+(define_insn_reservation "andes_45_vpu_shift" 2
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "vshift,vnshift,vsshift"))
+ "andes_45_vpu_pipe + andes_45_vpu_alu")
+
+(define_insn_reservation "andes_45_vpu_minmax" 2
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "viminmax"))
+ "andes_45_vpu_pipe + andes_45_vpu_alu")
+
+(define_insn_reservation "andes_45_vpu_cmp" 2
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "vicmp"))
+ "andes_45_vpu_pipe + andes_45_vpu_alu")
+
+(define_insn_reservation "andes_45_vpu_mul" 3
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "vimul,viwmul,vsmul"))
+ "andes_45_vpu_pipe + andes_45_vpu_mac")
+
+(define_insn_reservation "andes_45_vpu_div" 36
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "vidiv"))
+ "andes_45_vpu_pipe + andes_45_vpu_div * 35")
+
+(define_insn_reservation "andes_45_vpu_madd" 4
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "vimuladd,viwmuladd"))
+ "andes_45_vpu_pipe + andes_45_vpu_mac")
+
+(define_insn_reservation "andes_45_vpu_merge" 2
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "vimerge"))
+ "andes_45_vpu_pipe + andes_45_vpu_alu")
+
+(define_insn_reservation "andes_45_vpu_move" 3
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "vimov,vimovvx,vimovxv,vmov,vslideup,vslidedown,vislide1up,vislide1down"))
+ "andes_45_vpu_pipe + andes_45_vpu_permut")
+
+(define_insn_reservation "andes_45_vpu_clip" 3
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "vnclip"))
+ "andes_45_vpu_pipe + andes_45_vpu_alu")
+
+(define_insn_reservation "andes_45_vpu_falu" 4
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "vfalu,vfwalu,vfmul,vfwmul"))
+ "andes_45_vpu_pipe + andes_45_vpu_mac")
+
+(define_insn_reservation "andes_45_vpu_fdiv" 38
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "vfdiv,vfsqrt"))
+ "andes_45_vpu_pipe + andes_45_vpu_fdiv")
+
+(define_insn_reservation "andes_45_vpu_fmadd" 5
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "vfmuladd,vfwmuladd"))
+ "andes_45_vpu_pipe + andes_45_vpu_mac")
+
+(define_insn_reservation "andes_45_vpu_fminmax" 2
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "vfminmax"))
+ "andes_45_vpu_pipe + andes_45_vpu_fmis")
+
+(define_insn_reservation "andes_45_vpu_fcmp" 3
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "vfcmp,vfrecp"))
+ "andes_45_vpu_pipe + andes_45_vpu_fmis")
+
+(define_insn_reservation "andes_45_vpu_fsgnj" 2
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "vfsgnj"))
+ "andes_45_vpu_pipe + andes_45_vpu_fmis")
+
+(define_insn_reservation "andes_45_vpu_fclass" 2
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "vfclass"))
+ "andes_45_vpu_pipe + andes_45_vpu_fmis")
+
+(define_insn_reservation "andes_45_vpu_fmerge" 2
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "vfmerge"))
+ "andes_45_vpu_pipe + andes_45_vpu_fmis")
+
+(define_insn_reservation "andes_45_vpu_fmove" 2
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "vfmov,vfmovvf,vfmovfv,vfslide1up,vfslide1down"))
+ "andes_45_vpu_pipe + andes_45_vpu_permut")
+
+(define_insn_reservation "andes_45_vpu_fcvt" 3
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "vfcvtitof,vfcvtftoi,vfwcvtitof,vfwcvtftoi,vfwcvtftof,
+ vfncvtitof,vfncvtftoi,vfncvtftof,vfwcvtbf16,vfncvtbf16"))
+ "andes_45_vpu_pipe + andes_45_vpu_fmis")
+
+(define_insn_reservation "andes_45_vpu_red" 9
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "vired,viwred"))
+ "andes_45_vpu_pipe + andes_45_vpu_alu")
+
+(define_insn_reservation "andes_45_vpu_fredu" 6
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "vfredu,vfwredu"))
+ "andes_45_vpu_pipe + andes_45_vpu_mac")
+
+(define_insn_reservation "andes_45_vpu_fredo" 34
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "vfredo,vfwredo"))
+ "andes_45_vpu_pipe + andes_45_vpu_mac")
+
+(define_insn_reservation "andes_45_vpu_malu" 3
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "vmalu"))
+ "andes_45_vpu_pipe + andes_45_vpu_mask")
+
+(define_insn_reservation "andes_45_vpu_mask" 4
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "vmpop,vmffs,vmsfs,vmiota,vmidx"))
+ "andes_45_vpu_pipe + andes_45_vpu_mask")
+
+(define_insn_reservation "andes_45_vpu_gather" 2
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "vgather"))
+ "andes_45_vpu_pipe + andes_45_vpu_permut")
+
+(define_insn_reservation "andes_45_vpu_compress" 4
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "vcompress"))
+ "andes_45_vpu_pipe + andes_45_vpu_permut")
+
+(define_insn_reservation "andes_45_vcpu_csr" 1
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "wrvxrm,wrfrm,rdvlenb,rdvl,vsetvl,vsetvl_pre"))
+ "andes_45_vpu_pipe")
+
+(define_bypass 1
+ "andes_45_fpu_alu_s, andes_45_fpu_mul_s, andes_45_fpu_mac_s"
+ "andes_45_load_wd, andes_45_load_bh, andes_45_store,
+ andes_45_fpu_load, andes_45_fpu_store")
+
+(define_bypass 2
+ "andes_45_fpu_alu_d, andes_45_fpu_mul_d, andes_45_fpu_mac_d"
+ "andes_45_load_wd, andes_45_load_bh, andes_45_store,
+ andes_45_fpu_load, andes_45_fpu_store")
+
+(define_bypass 1
+ "andes_45_fpu_cmp, andes_45_fpu_cvt"
+ "andes_45_load_wd, andes_45_load_bh, andes_45_store,
+ andes_45_fpu_load, andes_45_fpu_store, andes_45_alu_insn_s,
+ andes_45_alu_insn_l, andes_45_xfer")
+
+(define_insn_reservation "andes_45_unknown" 1
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "ghost,cpop,clz,ctz,zicond,mvpair,sfb_alu,minu,maxu,
+ min,max,clmul,rotate,crypto,condmove,rdfrm"))
+ "andes_45_dummies")
+
+(define_insn_reservation "andes_45_vector_unknown" 1
+ (and (eq_attr "tune" "andes_45_series")
+ (eq_attr "type" "vclz,vror,vsha2ch,vsm4k,vaesef,vghsh,vsm4r,vsm3c,
+ vaeskf1,vandn,vaesdm,vclmul,vclmulh,vrol,vcpop,vbrev8,
+ vsm3me,vbrev,vctz,vgmul,vsha2ms,vaesz,vrev8,
+ vaeskf2,vsha2cl,vwsll,vaesdf,vaesem,vfwmaccbf16,
+ sf_vqmacc,sf_vc,sf_vc_se,sf_vfnrclip,vlsegde"))
+ "andes_45_vector_dummies")
diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index d2705cf..40627fa 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -1951,6 +1951,61 @@
}
[(set_attr "type" "viwalu")])
+(define_insn_and_split "*widen_mul_plus_vx_<mode>"
+ [(set (match_operand:VWEXTI 0 "register_operand")
+ (plus:VWEXTI
+ (mult:VWEXTI
+ (zero_extend:VWEXTI
+ (match_operand:<V_DOUBLE_TRUNC> 2 "register_operand"))
+ (vec_duplicate:VWEXTI
+ (zero_extend:<VEL>
+ (match_operand:<VSUBEL> 1 "register_operand"))))
+ (match_operand:VWEXTI 3 "register_operand")))]
+ "TARGET_VECTOR && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ insn_code icode = code_for_pred_widen_mul_plus_u_vx (<MODE>mode);
+ rtx v_undef = RVV_VUNDEF(<MODE>mode);
+ rtx ops[] = {operands[0], operands[1], operands[2], operands[3], v_undef};
+
+ riscv_vector::emit_vlmax_insn (icode, riscv_vector::TERNARY_OP, ops);
+
+ DONE;
+ }
+ [(set_attr "type" "viwmuladd")])
+
+(define_insn_and_split "*pred_cmp_swapped<mode>_scalar"
+ [(set (match_operand:<VM> 0 "register_operand")
+ (if_then_else:<VM>
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand")
+ (match_operand 6 "vector_length_operand")
+ (match_operand 7 "const_int_operand")
+ (match_operand 8 "const_int_operand")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (match_operator:<VM> 3 "comparison_swappable_operator"
+ [(vec_duplicate:V_VLSI
+ (match_operand:<VEL> 4 "register_operand"))
+ (match_operand:V_VLSI 5 "register_operand")])
+ (unspec:<VM>
+ [(match_operand:DI 2 "register_operand")] UNSPEC_VUNDEF)))]
+ "TARGET_VECTOR && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ riscv_vector::expand_vx_cmp_vec_dup_vec (operands[0], operands[4],
+ operands[5],
+ GET_CODE (operands[3]),
+ <MODE>mode);
+
+ DONE;
+ }
+ [(set_attr "type" "vicmp")])
+
;; =============================================================================
;; Combine vec_duplicate + op.vv to op.vf
;; Include
@@ -2399,3 +2454,44 @@
}
[(set_attr "type" "vfalu")]
)
+
+;; Combine vsext.vf + vsll.vi to vwsll.vi depends on ZVBB.
+;; The vwsll.vi is zero extend, thus only the ashift bits
+;; is equal or greater than double truncated bits is valid.
+;; Appears in the satd function of x264.
+(define_insn_and_split "*vwsll_sign_extend_<mode>"
+ [(set (match_operand:VWEXTI 0 "register_operand")
+ (ashift:VWEXTI
+ (sign_extend:VWEXTI
+ (match_operand:<V_DOUBLE_TRUNC> 1 "register_operand"))
+ (match_operand 2 "const_int_operand")))]
+ "TARGET_VECTOR && TARGET_ZVBB && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ int imm = INTVAL (operands[2]);
+ int trunc_prec = GET_MODE_PRECISION (GET_MODE_INNER (<V_DOUBLE_TRUNC>mode));
+
+ if (imm >= trunc_prec)
+ {
+ insn_code icode = code_for_pred_vwsll_scalar (<MODE>mode);
+ emit_vlmax_insn (icode, riscv_vector::BINARY_OP, operands);
+ }
+ else
+ {
+ insn_code icode = code_for_pred_vf2 (SIGN_EXTEND, <MODE>mode);
+ rtx extend = gen_reg_rtx (<MODE>mode);
+ rtx unary_ops[] = {extend, operands[1]};
+ riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP,
+ unary_ops);
+
+ icode = code_for_pred_scalar (ASHIFT, <MODE>mode);
+ rtx binary_ops[] = {operands[0], extend, operands[2]};
+ riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP,
+ binary_ops);
+ }
+
+ DONE;
+ }
+)
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 48de5ef..c694684 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -55,8 +55,8 @@
[(match_operand:RATIO64 0 "register_operand")
(match_operand 1 "pmode_reg_or_0_operand")
(match_operand:RATIO64I 2 "register_operand")
- (match_operand 3 "<RATIO64:gs_extension>")
- (match_operand 4 "<RATIO64:gs_scale>")
+ (match_operand 3 "const_1_operand")
+ (match_operand 4 "const_1_operand")
(match_operand:<RATIO64:VM> 5 "vector_mask_operand")
(match_operand 6 "maskload_else_operand")
(match_operand 7 "autovec_length_operand")
@@ -71,8 +71,8 @@
[(match_operand:RATIO32 0 "register_operand")
(match_operand 1 "pmode_reg_or_0_operand")
(match_operand:RATIO32I 2 "register_operand")
- (match_operand 3 "<RATIO32:gs_extension>")
- (match_operand 4 "<RATIO32:gs_scale>")
+ (match_operand 3 "const_1_operand")
+ (match_operand 4 "const_1_operand")
(match_operand:<RATIO32:VM> 5 "vector_mask_operand")
(match_operand 6 "maskload_else_operand")
(match_operand 7 "autovec_length_operand")
@@ -87,8 +87,8 @@
[(match_operand:RATIO16 0 "register_operand")
(match_operand 1 "pmode_reg_or_0_operand")
(match_operand:RATIO16I 2 "register_operand")
- (match_operand 3 "<RATIO16:gs_extension>")
- (match_operand 4 "<RATIO16:gs_scale>")
+ (match_operand 3 "const_1_operand")
+ (match_operand 4 "const_1_operand")
(match_operand:<RATIO16:VM> 5 "vector_mask_operand")
(match_operand 6 "maskload_else_operand")
(match_operand 7 "autovec_length_operand")
@@ -103,8 +103,8 @@
[(match_operand:RATIO8 0 "register_operand")
(match_operand 1 "pmode_reg_or_0_operand")
(match_operand:RATIO8I 2 "register_operand")
- (match_operand 3 "<RATIO8:gs_extension>")
- (match_operand 4 "<RATIO8:gs_scale>")
+ (match_operand 3 "const_1_operand")
+ (match_operand 4 "const_1_operand")
(match_operand:<RATIO8:VM> 5 "vector_mask_operand")
(match_operand 6 "maskload_else_operand")
(match_operand 7 "autovec_length_operand")
@@ -119,8 +119,8 @@
[(match_operand:RATIO4 0 "register_operand")
(match_operand 1 "pmode_reg_or_0_operand")
(match_operand:RATIO4I 2 "register_operand")
- (match_operand 3 "<RATIO4:gs_extension>")
- (match_operand 4 "<RATIO4:gs_scale>")
+ (match_operand 3 "const_1_operand")
+ (match_operand 4 "const_1_operand")
(match_operand:<RATIO4:VM> 5 "vector_mask_operand")
(match_operand 6 "maskload_else_operand")
(match_operand 7 "autovec_length_operand")
@@ -135,8 +135,8 @@
[(match_operand:RATIO2 0 "register_operand")
(match_operand 1 "pmode_reg_or_0_operand")
(match_operand:RATIO2I 2 "register_operand")
- (match_operand 3 "<RATIO2:gs_extension>")
- (match_operand 4 "<RATIO2:gs_scale>")
+ (match_operand 3 "const_1_operand")
+ (match_operand 4 "const_1_operand")
(match_operand:<RATIO2:VM> 5 "vector_mask_operand")
(match_operand 6 "maskload_else_operand")
(match_operand 7 "autovec_length_operand")
@@ -155,8 +155,8 @@
[(match_operand:RATIO1 0 "register_operand")
(match_operand 1 "pmode_reg_or_0_operand")
(match_operand:RATIO1 2 "register_operand")
- (match_operand 3 "<gs_extension>")
- (match_operand 4 "<gs_scale>")
+ (match_operand 3 "const_1_operand")
+ (match_operand 4 "const_1_operand")
(match_operand:<VM> 5 "vector_mask_operand")
(match_operand 6 "maskload_else_operand")
(match_operand 7 "autovec_length_operand")
@@ -174,8 +174,8 @@
(define_expand "mask_len_scatter_store<RATIO64:mode><RATIO64I:mode>"
[(match_operand 0 "pmode_reg_or_0_operand")
(match_operand:RATIO64I 1 "register_operand")
- (match_operand 2 "<RATIO64:gs_extension>")
- (match_operand 3 "<RATIO64:gs_scale>")
+ (match_operand 2 "const_1_operand")
+ (match_operand 3 "const_1_operand")
(match_operand:RATIO64 4 "register_operand")
(match_operand:<RATIO64:VM> 5 "vector_mask_operand")
(match_operand 6 "autovec_length_operand")
@@ -189,8 +189,8 @@
(define_expand "mask_len_scatter_store<RATIO32:mode><RATIO32I:mode>"
[(match_operand 0 "pmode_reg_or_0_operand")
(match_operand:RATIO32I 1 "register_operand")
- (match_operand 2 "<RATIO32:gs_extension>")
- (match_operand 3 "<RATIO32:gs_scale>")
+ (match_operand 2 "const_1_operand")
+ (match_operand 3 "const_1_operand")
(match_operand:RATIO32 4 "register_operand")
(match_operand:<RATIO32:VM> 5 "vector_mask_operand")
(match_operand 6 "autovec_length_operand")
@@ -204,8 +204,8 @@
(define_expand "mask_len_scatter_store<RATIO16:mode><RATIO16I:mode>"
[(match_operand 0 "pmode_reg_or_0_operand")
(match_operand:RATIO16I 1 "register_operand")
- (match_operand 2 "<RATIO16:gs_extension>")
- (match_operand 3 "<RATIO16:gs_scale>")
+ (match_operand 2 "const_1_operand")
+ (match_operand 3 "const_1_operand")
(match_operand:RATIO16 4 "register_operand")
(match_operand:<RATIO16:VM> 5 "vector_mask_operand")
(match_operand 6 "autovec_length_operand")
@@ -219,8 +219,8 @@
(define_expand "mask_len_scatter_store<RATIO8:mode><RATIO8I:mode>"
[(match_operand 0 "pmode_reg_or_0_operand")
(match_operand:RATIO8I 1 "register_operand")
- (match_operand 2 "<RATIO8:gs_extension>")
- (match_operand 3 "<RATIO8:gs_scale>")
+ (match_operand 2 "const_1_operand")
+ (match_operand 3 "const_1_operand")
(match_operand:RATIO8 4 "register_operand")
(match_operand:<RATIO8:VM> 5 "vector_mask_operand")
(match_operand 6 "autovec_length_operand")
@@ -234,8 +234,8 @@
(define_expand "mask_len_scatter_store<RATIO4:mode><RATIO4I:mode>"
[(match_operand 0 "pmode_reg_or_0_operand")
(match_operand:RATIO4I 1 "register_operand")
- (match_operand 2 "<RATIO4:gs_extension>")
- (match_operand 3 "<RATIO4:gs_scale>")
+ (match_operand 2 "const_1_operand")
+ (match_operand 3 "const_1_operand")
(match_operand:RATIO4 4 "register_operand")
(match_operand:<RATIO4:VM> 5 "vector_mask_operand")
(match_operand 6 "autovec_length_operand")
@@ -249,8 +249,8 @@
(define_expand "mask_len_scatter_store<RATIO2:mode><RATIO2I:mode>"
[(match_operand 0 "pmode_reg_or_0_operand")
(match_operand:RATIO2I 1 "register_operand")
- (match_operand 2 "<RATIO2:gs_extension>")
- (match_operand 3 "<RATIO2:gs_scale>")
+ (match_operand 2 "const_1_operand")
+ (match_operand 3 "const_1_operand")
(match_operand:RATIO2 4 "register_operand")
(match_operand:<RATIO2:VM> 5 "vector_mask_operand")
(match_operand 6 "autovec_length_operand")
@@ -268,8 +268,8 @@
(define_expand "mask_len_scatter_store<mode><mode>"
[(match_operand 0 "pmode_reg_or_0_operand")
(match_operand:RATIO1 1 "register_operand")
- (match_operand 2 "<gs_extension>")
- (match_operand 3 "<gs_scale>")
+ (match_operand 2 "const_1_operand")
+ (match_operand 3 "const_1_operand")
(match_operand:RATIO1 4 "register_operand")
(match_operand:<VM> 5 "vector_mask_operand")
(match_operand 6 "autovec_length_operand")
@@ -1335,10 +1335,11 @@
;; == SELECT_VL
;; =========================================================================
-(define_expand "select_vl<mode>"
+(define_expand "select_vl<V:mode><P:mode>"
[(match_operand:P 0 "register_operand")
(match_operand:P 1 "vector_length_operand")
- (match_operand:P 2 "immediate_operand")]
+ (match_operand:P 2 "immediate_operand")
+ (match_operand:V 3)]
"TARGET_VECTOR"
{
riscv_vector::expand_select_vl (operands);
@@ -1350,9 +1351,9 @@
;; -------------------------------------------------------------------------
(define_expand "vec_set<mode>"
- [(match_operand:V_VLS 0 "register_operand")
- (match_operand:<VEL> 1 "register_operand")
- (match_operand 2 "nonmemory_operand")]
+ [(match_operand:V_VLS_ZVFH 0 "register_operand")
+ (match_operand:<VEL> 1 "register_operand")
+ (match_operand 2 "nonmemory_operand")]
"TARGET_VECTOR"
{
/* If we set the first element, emit an v(f)mv.s.[xf]. */
@@ -2301,6 +2302,37 @@
})
;; -------------------------------------------------------------------------
+;; ---- [INT] Mask reductions
+;; -------------------------------------------------------------------------
+
+(define_expand "reduc_sbool_and_scal_<mode>"
+ [(match_operand:QI 0 "register_operand")
+ (match_operand:VB_VLS 1 "register_operand")]
+ "TARGET_VECTOR"
+{
+ riscv_vector::expand_mask_reduction (operands, AND);
+ DONE;
+})
+
+(define_expand "reduc_sbool_ior_scal_<mode>"
+ [(match_operand:QI 0 "register_operand")
+ (match_operand:VB_VLS 1 "register_operand")]
+ "TARGET_VECTOR"
+{
+ riscv_vector::expand_mask_reduction (operands, IOR);
+ DONE;
+})
+
+(define_expand "reduc_sbool_xor_scal_<mode>"
+ [(match_operand:QI 0 "register_operand")
+ (match_operand:VB_VLS 1 "register_operand")]
+ "TARGET_VECTOR"
+{
+ riscv_vector::expand_mask_reduction (operands, XOR);
+ DONE;
+})
+
+;; -------------------------------------------------------------------------
;; ---- [FP] Tree reductions
;; -------------------------------------------------------------------------
;; Includes:
diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index 59b71ed..166ddd9 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -1,4 +1,4 @@
-;); Machine description for RISC-V Bit Manipulation operations.
+;; Machine description for RISC-V Bit Manipulation operations.
;; Copyright (C) 2021-2025 Free Software Foundation, Inc.
;; This file is part of GCC.
@@ -237,19 +237,20 @@
[(set_attr "type" "bitmanip")
(set_attr "mode" "<X:MODE>")])
-(define_insn_and_split "*<optab>_not_const<mode>"
- [(set (match_operand:X 0 "register_operand" "=r")
- (bitmanip_bitwise:X (not:X (match_operand:X 1 "register_operand" "r"))
- (match_operand:X 2 "const_arith_operand" "I")))
- (clobber (match_scratch:X 3 "=&r"))]
+(define_peephole2
+ [(match_scratch:X 4 "r")
+ (set (match_operand:X 0 "register_operand")
+ (not:X (match_operand:X 1 "register_operand")))
+ (set (match_operand:X 2 "register_operand")
+ (bitmanip_bitwise:X (match_dup 0)
+ (match_operand 3 "const_int_operand")))
+ (match_dup 4)]
"(TARGET_ZBB || TARGET_ZBKB) && !TARGET_ZCB
- && !optimize_function_for_size_p (cfun)"
- "#"
- "&& reload_completed"
- [(set (match_dup 3) (match_dup 2))
- (set (match_dup 0) (bitmanip_bitwise:X (not:X (match_dup 1)) (match_dup 3)))]
- ""
- [(set_attr "type" "bitmanip")])
+ && !optimize_function_for_size_p (cfun)
+ && rtx_equal_p (operands[0], operands[2])
+ && riscv_const_insns (operands[3], false) == 1"
+ [(set (match_dup 4) (match_dup 3))
+ (set (match_dup 0) (bitmanip_bitwise:X (not:X (match_dup 1)) (match_dup 4)))])
;; '(a >= 0) ? b : 0' is emitted branchless (from if-conversion). Without a
;; bit of extra help for combine (i.e., the below split), we end up emitting
@@ -356,7 +357,7 @@
{
if (TARGET_XTHEADBB && !immediate_operand (operands[2], VOIDmode))
FAIL;
- if (TARGET_64BIT && register_operand (operands[2], QImode))
+ if (TARGET_64BIT)
{
rtx t = gen_reg_rtx (DImode);
emit_insn (gen_rotrsi3_sext (t, operands[1], operands[2]));
diff --git a/gcc/config/riscv/crypto.md b/gcc/config/riscv/crypto.md
index 37ab5c3..98bb4d6 100644
--- a/gcc/config/riscv/crypto.md
+++ b/gcc/config/riscv/crypto.md
@@ -173,6 +173,40 @@
(zero_extend:SI (match_dup 2)))))]
"operands[1] = gen_lowpart (SImode, operands[1]);")
+(define_split
+ [(set (match_operand:DI 0 "register_operand")
+ (ior:DI (zero_extend:DI (match_operand:HI 1 "register_operand"))
+ (ashift:DI
+ (sign_extend:DI (match_operand:HI 2 "register_operand"))
+ (const_int 16))))]
+ "TARGET_ZBKB && TARGET_64BIT"
+ [(set (match_dup 0)
+ (sign_extend:DI (ior:SI (ashift:SI (match_dup 2) (const_int 16))
+ (zero_extend:SI (match_dup 1)))))]
+ "operands[2] = gen_lowpart (SImode, operands[2]);")
+
+(define_split
+ [(set (match_operand:DI 0 "register_operand")
+ (ior:DI (sign_extend:DI
+ (ashift:SI (match_operand:SI 1 "register_operand")
+ (const_int 16)))
+ (zero_extend:DI (match_operand:HI 2 "register_operand"))))]
+ "TARGET_ZBKB && TARGET_64BIT"
+ [(set (match_dup 0)
+ (sign_extend:DI (ior:SI (ashift:SI (match_dup 1) (const_int 16))
+ (zero_extend:SI (match_dup 2)))))])
+
+(define_split
+ [(set (match_operand:DI 0 "register_operand")
+ (ior:DI (zero_extend:DI (match_operand:HI 1 "register_operand"))
+ (sign_extend:DI
+ (ashift:SI (match_operand:SI 2 "register_operand")
+ (const_int 16)))))]
+ "TARGET_ZBKB && TARGET_64BIT"
+ [(set (match_dup 0)
+ (sign_extend:DI (ior:SI (ashift:SI (match_dup 2) (const_int 16))
+ (zero_extend:SI (match_dup 1)))))])
+
;; And this patches the result of the splitter above.
(define_insn "*riscv_packw_2"
[(set (match_operand:DI 0 "register_operand" "=r")
diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index f811a4e..5b44165 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -611,6 +611,9 @@
(define_predicate "comparison_except_ge_operator"
(match_code "eq,ne,le,leu,gt,gtu,lt,ltu"))
+(define_predicate "comparison_swappable_operator"
+ (match_code "gtu,gt"))
+
(define_predicate "ge_operator"
(match_code "ge,geu"))
diff --git a/gcc/config/riscv/riscv-avlprop.cc b/gcc/config/riscv/riscv-avlprop.cc
index b8547a7..a42764e 100644
--- a/gcc/config/riscv/riscv-avlprop.cc
+++ b/gcc/config/riscv/riscv-avlprop.cc
@@ -77,6 +77,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-pass.h"
#include "df.h"
#include "rtl-ssa.h"
+#include "rtl-iter.h"
#include "cfgcleanup.h"
#include "insn-attr.h"
#include "tm-constrs.h"
@@ -412,6 +413,46 @@ pass_avlprop::get_vlmax_ta_preferred_avl (insn_info *insn) const
&& def1->insn ()->compare_with (insn) >= 0)
return NULL_RTX;
}
+ else
+ {
+ /* If the use is in a subreg e.g. in a store it is possible that
+ we punned the vector mode with a larger mode like
+ (subreg:V1SI (reg:V4QI 123)).
+ For an AVL of 1 that means we actually store one SImode
+ element and not 1 QImode elements. But the latter is what we
+ would propagate if we took the AVL operand literally.
+ Instead we scale it by the ratio of inner and outer mode
+ (4 in the example above). */
+ int factor = 1;
+ if (use->includes_subregs ())
+ {
+ subrtx_iterator::array_type array;
+ FOR_EACH_SUBRTX (iter, array, use_insn->rtl (), NONCONST)
+ {
+ const_rtx x = *iter;
+ if (x
+ && SUBREG_P (x)
+ && REG_P (SUBREG_REG (x))
+ && REGNO (SUBREG_REG (x)) == use->regno ()
+ && known_eq (GET_MODE_SIZE (use->mode ()),
+ GET_MODE_SIZE (GET_MODE (x))))
+ {
+ if (can_div_trunc_p (GET_MODE_NUNITS (use->mode ()),
+ GET_MODE_NUNITS (GET_MODE (x)),
+ &factor))
+ {
+ gcc_assert (factor > 0);
+ break;
+ }
+ else
+ return NULL_RTX;
+ }
+ }
+ }
+
+ if (factor > 1)
+ new_use_avl = GEN_INT (INTVAL (new_use_avl) * factor);
+ }
if (!use_avl)
use_avl = new_use_avl;
diff --git a/gcc/config/riscv/riscv-c.cc b/gcc/config/riscv/riscv-c.cc
index d497326..24537d5 100644
--- a/gcc/config/riscv/riscv-c.cc
+++ b/gcc/config/riscv/riscv-c.cc
@@ -165,15 +165,6 @@ riscv_cpu_cpp_builtins (cpp_reader *pfile)
if (!subset_list)
return;
- /* Define profile macro if a profile was used. */
- const char *profile_name = subset_list->get_profile_name ();
- if (profile_name)
- {
- char *profile_macro = (char *)alloca (strlen (profile_name) + 10);
- sprintf (profile_macro, "__riscv_%s", profile_name);
- builtin_define (profile_macro);
- }
-
size_t max_ext_len = 0;
/* Figure out the max length of extension name for reserving buffer. */
@@ -222,6 +213,49 @@ riscv_pragma_intrinsic (cpp_reader *)
error ("unknown %<#pragma riscv intrinsic%> option %qs", name);
}
+/* Implement TARGETM.TARGET_OPTION.PRAGMA_PARSE. */
+
+static bool
+riscv_pragma_target_parse (tree args, tree pop_target)
+{
+ /* If args is not NULL then process it and setup the target-specific
+ information that it specifies. */
+ if (args)
+ {
+ if (!riscv_process_target_attr_for_pragma (args))
+ return false;
+
+ riscv_override_options_internal (&global_options);
+ }
+ /* args is NULL, restore to the state described in pop_target. */
+ else
+ {
+ pop_target = pop_target ? pop_target : target_option_default_node;
+ cl_target_option_restore (&global_options, &global_options_set,
+ TREE_TARGET_OPTION (pop_target));
+ }
+
+ target_option_current_node
+ = build_target_option_node (&global_options, &global_options_set);
+
+ riscv_reset_previous_fndecl ();
+
+ /* For the definitions, ensure all newly defined macros are considered
+ as used for -Wunused-macros. There is no point warning about the
+ compiler predefined macros. */
+ cpp_options *cpp_opts = cpp_get_options (parse_in);
+ unsigned char saved_warn_unused_macros = cpp_opts->warn_unused_macros;
+ cpp_opts->warn_unused_macros = 0;
+
+ cpp_force_token_locations (parse_in, BUILTINS_LOCATION);
+ riscv_cpu_cpp_builtins (parse_in);
+ cpp_stop_forcing_token_locations (parse_in);
+
+ cpp_opts->warn_unused_macros = saved_warn_unused_macros;
+
+ return true;
+}
+
/* Implement TARGET_CHECK_BUILTIN_CALL. */
static bool
riscv_check_builtin_call (location_t loc, vec<location_t> arg_loc, tree fndecl,
@@ -281,5 +315,6 @@ riscv_register_pragmas (void)
{
targetm.resolve_overloaded_builtin = riscv_resolve_overloaded_builtin;
targetm.check_builtin_call = riscv_check_builtin_call;
+ targetm.target_option.pragma_parse = riscv_pragma_target_parse;
c_register_pragma ("riscv", "intrinsic", riscv_pragma_intrinsic);
}
diff --git a/gcc/config/riscv/riscv-cores.def b/gcc/config/riscv/riscv-cores.def
index cc9d5c0..7266b5e 100644
--- a/gcc/config/riscv/riscv-cores.def
+++ b/gcc/config/riscv/riscv-cores.def
@@ -50,9 +50,13 @@ RISCV_TUNE("xt-c920", generic, generic_ooo_tune_info)
RISCV_TUNE("xt-c920v2", generic, generic_ooo_tune_info)
RISCV_TUNE("xiangshan-nanhu", xiangshan, xiangshan_nanhu_tune_info)
RISCV_TUNE("xiangshan-kunminghu", xiangshan, generic_ooo_tune_info)
+RISCV_TUNE("spacemit-x60", spacemit_x60, spacemit_x60_tune_info)
RISCV_TUNE("generic-ooo", generic_ooo, generic_ooo_tune_info)
RISCV_TUNE("size", generic, optimize_size_tune_info)
RISCV_TUNE("mips-p8700", mips_p8700, mips_p8700_tune_info)
+RISCV_TUNE("andes-25-series", andes_25_series, andes_25_tune_info)
+RISCV_TUNE("andes-23-series", andes_23_series, andes_23_tune_info)
+RISCV_TUNE("andes-45-series", andes_45_series, andes_45_tune_info)
#undef RISCV_TUNE
@@ -171,4 +175,27 @@ RISCV_CORE("xiangshan-kunminghu", "rv64imafdcbvh_sdtrig_sha_shcounterenw_"
RISCV_CORE("mips-p8700", "rv64imfd_zicsr_zifencei_zalrsc_zba_zbb",
"mips-p8700")
+
+RISCV_CORE("andes-n22", "rv32imc_zicsr_zifencei_xandesperf", "andes-25-series")
+RISCV_CORE("andes-n25", "rv32imc_zicsr_zifencei_xandesperf", "andes-25-series")
+RISCV_CORE("andes-a25", "rv32imafdc_zicsr_zifencei_xandesperf", "andes-25-series")
+RISCV_CORE("andes-nx25", "rv64imc_zicsr_zifencei_xandesperf", "andes-25-series")
+RISCV_CORE("andes-ax25", "rv64imafdc_zicsr_zifencei_xandesperf", "andes-25-series")
+RISCV_CORE("andes-a27", "rv32imafdc_zicsr_zifencei_xandesperf", "andes-25-series")
+RISCV_CORE("andes-ax27", "rv64imafdc_zicsr_zifencei_xandesperf", "andes-25-series")
+RISCV_CORE("andes-n225", "rv32im_zicsr_zifencei_zca_zcb_zcmp_zcmt_"
+ "zba_zbb_zbc_zbs_xandesperf",
+ "andes-23-series")
+RISCV_CORE("andes-d23", "rv32im_zicsr_zifencei_zicbop_zicbom_zicboz_"
+ "zca_zcb_zcmp_zcmt_zba_zbb_zbc_zbs_xandesperf",
+ "andes-23-series")
+RISCV_CORE("andes-n45", "rv32imc_zicsr_zifencei_xandesperf", "andes-45-series")
+RISCV_CORE("andes-nx45", "rv64imc_zicsr_zifencei_xandesperf", "andes-45-series")
+RISCV_CORE("andes-a45", "rv32imafdc_zicsr_zifencei_xandesperf", "andes-45-series")
+RISCV_CORE("andes-ax45", "rv64imafdc_zicsr_zifencei_xandesperf", "andes-45-series")
+
+RISCV_CORE("spacemit-x60", "rv64imafdcv_zba_zbb_zbc_zbs_zicboz_zicond_"
+ "zbkc_zfh_zvfh_zvkt_zvl256b_sscofpmf_xsmtvdot",
+ "spacemit-x60")
+
#undef RISCV_CORE
diff --git a/gcc/config/riscv/riscv-ext-spacemit.def b/gcc/config/riscv/riscv-ext-spacemit.def
new file mode 100644
index 0000000..3482384
--- /dev/null
+++ b/gcc/config/riscv/riscv-ext-spacemit.def
@@ -0,0 +1,36 @@
+/* SpacemiT extension definition file for RISC-V.
+ Copyright (C) 2025 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>.
+
+Please run `make riscv-regen` in build folder to make sure updated anything.
+
+Format of DEFINE_RISCV_EXT, please refer to riscv-ext.def. */
+
+DEFINE_RISCV_EXT(
+ /* NAME */ xsmtvdot,
+ /* UPPERCASE_NAME */ XSMTVDOT,
+ /* FULL_NAME */ "SpacemiT vector dot product extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zve32x"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ xsmt,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
diff --git a/gcc/config/riscv/riscv-ext.def b/gcc/config/riscv/riscv-ext.def
index 80f534c..62d6380 100644
--- a/gcc/config/riscv/riscv-ext.def
+++ b/gcc/config/riscv/riscv-ext.def
@@ -2084,3 +2084,4 @@ DEFINE_RISCV_EXT(
#include "riscv-ext-ventana.def"
#include "riscv-ext-mips.def"
#include "riscv-ext-andes.def"
+#include "riscv-ext-spacemit.def"
diff --git a/gcc/config/riscv/riscv-ext.opt b/gcc/config/riscv/riscv-ext.opt
index 2036c16..af8e556 100644
--- a/gcc/config/riscv/riscv-ext.opt
+++ b/gcc/config/riscv/riscv-ext.opt
@@ -56,6 +56,9 @@ TargetVariable
int riscv_xsf_subext
TargetVariable
+int riscv_xsmt_subext
+
+TargetVariable
int riscv_xthead_subext
TargetVariable
@@ -403,18 +406,6 @@ Mask(SVADE) Var(riscv_sv_subext)
Mask(SVBARE) Var(riscv_sv_subext)
-Mask(XANDESPERF) Var(riscv_xandes_subext)
-
-Mask(XANDESBFHCVT) Var(riscv_xandes_subext)
-
-Mask(XANDESVBFHCVT) Var(riscv_xandes_subext)
-
-Mask(XANDESVSINTLOAD) Var(riscv_xandes_subext)
-
-Mask(XANDESVPACKFPH) Var(riscv_xandes_subext)
-
-Mask(XANDESVDOT) Var(riscv_xandes_subext)
-
Mask(XCVALU) Var(riscv_xcv_subext)
Mask(XCVBI) Var(riscv_xcv_subext)
@@ -466,3 +457,18 @@ Mask(XVENTANACONDOPS) Var(riscv_xventana_subext)
Mask(XMIPSCMOV) Var(riscv_xmips_subext)
Mask(XMIPSCBOP) Var(riscv_xmips_subext)
+
+Mask(XANDESPERF) Var(riscv_xandes_subext)
+
+Mask(XANDESBFHCVT) Var(riscv_xandes_subext)
+
+Mask(XANDESVBFHCVT) Var(riscv_xandes_subext)
+
+Mask(XANDESVSINTLOAD) Var(riscv_xandes_subext)
+
+Mask(XANDESVPACKFPH) Var(riscv_xandes_subext)
+
+Mask(XANDESVDOT) Var(riscv_xandes_subext)
+
+Mask(XSMTVDOT) Var(riscv_xsmt_subext)
+
diff --git a/gcc/config/riscv/riscv-modes.def b/gcc/config/riscv/riscv-modes.def
index 55f7fd0..f3d987e 100644
--- a/gcc/config/riscv/riscv-modes.def
+++ b/gcc/config/riscv/riscv-modes.def
@@ -331,7 +331,7 @@ RVV_NF4_MODES (4)
\
ADJUST_ALIGNMENT (RVVM4x##NF##QI, 1); \
ADJUST_ALIGNMENT (RVVM4x##NF##HI, 2); \
- ADJUST_ALIGNMENT (RVVM4x##NF##BF, 2); \
+ ADJUST_ALIGNMENT (RVVM4x##NF##BF, 2); \
ADJUST_ALIGNMENT (RVVM4x##NF##HF, 2); \
ADJUST_ALIGNMENT (RVVM4x##NF##SI, 4); \
ADJUST_ALIGNMENT (RVVM4x##NF##SF, 4); \
@@ -395,6 +395,7 @@ ADJUST_PRECISION (V4096BI, 4096);
VECTOR_MODE_WITH_PREFIX (V, INT, SI, NBYTES / 4, 1); \
VECTOR_MODE_WITH_PREFIX (V, INT, DI, NBYTES / 8, 1); \
VECTOR_MODE_WITH_PREFIX (V, FLOAT, HF, NBYTES / 2, 1); \
+ VECTOR_MODE_WITH_PREFIX (V, FLOAT, BF, NBYTES / 2, 1); \
VECTOR_MODE_WITH_PREFIX (V, FLOAT, SF, NBYTES / 4, 1); \
VECTOR_MODE_WITH_PREFIX (V, FLOAT, DF, NBYTES / 8, 1);
@@ -403,6 +404,7 @@ VECTOR_MODE_WITH_PREFIX (V, INT, HI, 1, 1); /* V1HI */
VECTOR_MODE_WITH_PREFIX (V, INT, SI, 1, 1); /* V1SI */
VECTOR_MODE_WITH_PREFIX (V, INT, DI, 1, 1); /* V1DI */
VECTOR_MODE_WITH_PREFIX (V, FLOAT, HF, 1, 1); /* V1HF */
+VECTOR_MODE_WITH_PREFIX (V, FLOAT, BF, 1, 1); /* V1BF */
VECTOR_MODE_WITH_PREFIX (V, FLOAT, SF, 1, 1); /* V1SF */
VECTOR_MODE_WITH_PREFIX (V, FLOAT, DF, 1, 1); /* V1DF */
VECTOR_MODE_WITH_PREFIX (V, INT, QI, 2, 1); /* V2QI */
@@ -411,18 +413,20 @@ VECTOR_MODE_WITH_PREFIX (V, INT, QI, 8, 1); /* V8QI */
VECTOR_MODE_WITH_PREFIX (V, INT, HI, 2, 1); /* V2HI */
VECTOR_MODE_WITH_PREFIX (V, INT, HI, 4, 1); /* V4HI */
VECTOR_MODE_WITH_PREFIX (V, FLOAT, HF, 2, 1); /* V2HF */
+VECTOR_MODE_WITH_PREFIX (V, FLOAT, BF, 2, 1); /* V2BF */
VECTOR_MODE_WITH_PREFIX (V, FLOAT, HF, 4, 1); /* V4HF */
+VECTOR_MODE_WITH_PREFIX (V, FLOAT, BF, 4, 1); /* V4BF */
VECTOR_MODE_WITH_PREFIX (V, INT, SI, 2, 1); /* V2SI */
VECTOR_MODE_WITH_PREFIX (V, FLOAT, SF, 2, 1); /* V2SF */
-VLS_MODES (16); /* V16QI V8HI V4SI V2DI V8HF V4SF V2DF */
-VLS_MODES (32); /* V32QI V16HI V8SI V4DI V16HF V8SF V4DF */
-VLS_MODES (64); /* V64QI V32HI V16SI V8DI V32HF V16SF V8DF */
-VLS_MODES (128); /* V128QI V64HI V32SI V16DI V64HF V32SF V16DF */
-VLS_MODES (256); /* V256QI V128HI V64SI V32DI V128HF V64SF V32DF */
-VLS_MODES (512); /* V512QI V256HI V128SI V64DI V256HF V128SF V64DF */
-VLS_MODES (1024); /* V1024QI V512HI V256SI V128DI V512HF V256SF V128DF */
-VLS_MODES (2048); /* V2048QI V1024HI V512SI V256DI V1024HF V512SF V256DF */
-VLS_MODES (4096); /* V4096QI V2048HI V1024SI V512DI V2048HF V1024SF V512DF */
+VLS_MODES (16); /* V16QI V8HI V4SI V2DI V8HF V8BF V4SF V2DF */
+VLS_MODES (32); /* V32QI V16HI V8SI V4DI V16HF V16BF V8SF V4DF */
+VLS_MODES (64); /* V64QI V32HI V16SI V8DI V32HF V32BF V16SF V8DF */
+VLS_MODES (128); /* V128QI V64HI V32SI V16DI V64HF V64BF V32SF V16DF */
+VLS_MODES (256); /* V256QI V128HI V64SI V32DI V128HF V128BF V64SF V32DF */
+VLS_MODES (512); /* V512QI V256HI V128SI V64DI V256HF V256BF V128SF V64DF */
+VLS_MODES (1024); /* V1024QI V512HI V256SI V128DI V512HF V512BF V256SF V128DF */
+VLS_MODES (2048); /* V2048QI V1024HI V512SI V256DI V1024HF V1024BF V512SF V256DF */
+VLS_MODES (4096); /* V4096QI V2048HI V1024SI V512DI V2048HF V2048BF V1024SF V512DF */
/* TODO: According to RISC-V 'V' ISA spec, the maximum vector length can
be 65536 for a single vector register which means the vector mode in
diff --git a/gcc/config/riscv/riscv-opt-popretz.cc b/gcc/config/riscv/riscv-opt-popretz.cc
new file mode 100644
index 0000000..43b2d5e
--- /dev/null
+++ b/gcc/config/riscv/riscv-opt-popretz.cc
@@ -0,0 +1,294 @@
+/* RISC-V cm.popretz optimization pass.
+ Copyright (C) 2025 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>. */
+
+/*
+ This pass combines "li a0, 0" + "cm.popret" into "cm.popretz" instruction
+ for the RISC-V Zcmp extension.
+
+ Rationale:
+ ---------
+ Ideally, cm.popretz should be generated during prologue/epilogue expansion.
+ However, as documented in PR113715 [1], this approach causes shrink-wrapping
+ analysis to fail, resulting in incorrect code generation.
+
+ To address this issue, we use a dedicated RTL pass to combine these
+ instructions later in the compilation pipeline, after shrink-wrapping has
+ completed.
+
+ Why not use peephole2?
+ ----------------------
+ An alternative approach would be to use a peephole2 pattern to perform this
+ optimization. However, between "li a0, 0" and "cm.popret", there can be
+ STACK_TIE and other instructions that make it difficult to write a robust
+ peephole pattern that handles all cases.
+
+ For example, in RV32, when the return value is in DImode but the low part
+ (a0) is zero, this pattern is hard to describe effectively in peephole2.
+ Using a dedicated pass gives us more flexibility to handle these cases.
+
+ [1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113715 */
+
+#define IN_TARGET_CODE 1
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "target.h"
+#include "rtl.h"
+#include "tree.h"
+#include "tm_p.h"
+#include "emit-rtl.h"
+#include "dumpfile.h"
+#include "tree-pass.h"
+#include "insn-config.h"
+#include "insn-opinit.h"
+#include "recog.h"
+
+namespace {
+
+const pass_data pass_data_combine_popretz =
+{
+ RTL_PASS, /* type. */
+ "popretz", /* name. */
+ OPTGROUP_NONE, /* optinfo_flags. */
+ TV_MACH_DEP, /* tv_id. */
+ 0, /* properties_required. */
+ 0, /* properties_provided. */
+ 0, /* properties_destroyed. */
+ 0, /* todo_flags_start. */
+ 0, /* todo_flags_finish. */
+};
+
+class pass_combine_popretz : public rtl_opt_pass
+{
+public:
+ pass_combine_popretz (gcc::context *ctxt)
+ : rtl_opt_pass (pass_data_combine_popretz, ctxt)
+ {}
+
+ virtual bool gate (function *)
+ {
+ return TARGET_ZCMP && !frame_pointer_needed;
+ }
+
+ virtual unsigned int execute (function *);
+}; // class pass_combine_popretz
+
+
+/* Check if the given instruction code is a cm.popret instruction.
+ Returns true if the code corresponds to any variant of gpr_multi_popret
+ (for different register bounds and modes). */
+static bool
+riscv_popret_insn_p (int code)
+{
+#define CASE_CODE_FOR_POPRET_(REG_BOUND, MODE) \
+ case CODE_FOR_gpr_multi_popret_up_to_##REG_BOUND##_##MODE:
+#define CASE_CODE_FOR_POPRET(REG_BOUND) \
+ CASE_CODE_FOR_POPRET_(REG_BOUND, si) \
+ CASE_CODE_FOR_POPRET_(REG_BOUND, di)
+#define ALL_CASE_CODE_FOR_POPRET \
+ CASE_CODE_FOR_POPRET(ra) \
+ CASE_CODE_FOR_POPRET(s0) \
+ CASE_CODE_FOR_POPRET(s1) \
+ CASE_CODE_FOR_POPRET(s2) \
+ CASE_CODE_FOR_POPRET(s3) \
+ CASE_CODE_FOR_POPRET(s4) \
+ CASE_CODE_FOR_POPRET(s5) \
+ CASE_CODE_FOR_POPRET(s6) \
+ CASE_CODE_FOR_POPRET(s7) \
+ CASE_CODE_FOR_POPRET(s8) \
+ CASE_CODE_FOR_POPRET(s9) \
+ CASE_CODE_FOR_POPRET(s11) \
+
+ switch (code)
+ {
+ ALL_CASE_CODE_FOR_POPRET
+ return true;
+ default:
+ return false;
+ }
+
+#undef CASE_CODE_FOR_POPRET_
+#undef CASE_CODE_FOR_POPRET
+#undef ALL_CASE_CODE_FOR_POPRET
+}
+
+/* Convert a cm.popret instruction code to its corresponding cm.popretz code.
+ Given an instruction code for gpr_multi_popret, returns the equivalent
+ gpr_multi_popretz instruction code. Returns CODE_FOR_nothing if the
+ input is not a valid popret instruction. */
+static int
+riscv_code_for_popretz (int code)
+{
+#define CASE_CODE_FOR_POPRETZ_(REG_BOUND, MODE) \
+ case CODE_FOR_gpr_multi_popret_up_to_##REG_BOUND##_##MODE: \
+ return CODE_FOR_gpr_multi_popretz_up_to_##REG_BOUND##_##MODE;
+
+#define CASE_CODE_FOR_POPRETZ(REG_BOUND) \
+ CASE_CODE_FOR_POPRETZ_(REG_BOUND, si) \
+ CASE_CODE_FOR_POPRETZ_(REG_BOUND, di)
+
+#define ALL_CASE_CODE_FOR_POPRETZ \
+ CASE_CODE_FOR_POPRETZ(ra) \
+ CASE_CODE_FOR_POPRETZ(s0) \
+ CASE_CODE_FOR_POPRETZ(s1) \
+ CASE_CODE_FOR_POPRETZ(s2) \
+ CASE_CODE_FOR_POPRETZ(s3) \
+ CASE_CODE_FOR_POPRETZ(s4) \
+ CASE_CODE_FOR_POPRETZ(s5) \
+ CASE_CODE_FOR_POPRETZ(s6) \
+ CASE_CODE_FOR_POPRETZ(s7) \
+ CASE_CODE_FOR_POPRETZ(s8) \
+ CASE_CODE_FOR_POPRETZ(s9) \
+ CASE_CODE_FOR_POPRETZ(s11) \
+
+ switch (code)
+ {
+ ALL_CASE_CODE_FOR_POPRETZ
+ default:
+ return CODE_FOR_nothing;
+ }
+
+#undef CASE_CODE_FOR_POPRETZ_
+#undef CASE_CODE_FOR_POPRETZ
+#undef ALL_CASE_CODE_FOR_POPRETZ
+}
+
+/* Combine "li a0, 0" with "cm.popret" to form "cm.popretz".
+
+ This pass scans basic blocks that precede the exit block, looking for
+ the following pattern:
+ 1. A cm.popret instruction (function epilogue with return)
+ 2. A (use a0) pseudo-instruction before the cm.popret
+ 3. A "li a0, 0" instruction (set a0 to zero) before the use
+
+ When this pattern is found AND a0 is not referenced by any other
+ instructions between the "li a0, 0" and the (use a0), we can safely
+ combine them into a single cm.popretz instruction, which performs
+ the same operations more efficiently.
+
+ This is a late RTL pass that runs before branch shortening. */
+unsigned int
+pass_combine_popretz::execute (function *fn)
+{
+ timevar_push (TV_MACH_DEP);
+ edge e;
+ edge_iterator ei;
+
+ /* Only visit exit block's pred since popret will only appear there. */
+ FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (fn)->preds)
+ {
+ basic_block bb = e->src;
+ rtx_insn *popret_insn = BB_END (bb);
+ if (!JUMP_P (popret_insn))
+ continue;
+ int code = recog_memoized (popret_insn);
+ if (!riscv_popret_insn_p (code))
+ continue;
+
+ rtx_insn *def_a0_insn = NULL;
+ rtx_insn *use_a0_insn = NULL;
+ rtx a0_reg = NULL;
+ /* Scan backwards from popret to find the pattern:
+ 1. First, find the (use a0) pseudo-instruction
+ 2. Continue scanning to find "li a0, 0" (set a0 to const0_rtx)
+ 3. Ensure a0 is not referenced by any instructions between them
+ 4. Stop at the first definition of a0 (to ensure we have the
+ last/most recent def before the use). */
+ for (rtx_insn *def_insn = PREV_INSN (popret_insn);
+ def_insn && def_insn != PREV_INSN (BB_HEAD (bb));
+ def_insn = PREV_INSN (def_insn))
+ {
+ if (!INSN_P (def_insn))
+ continue;
+ rtx def_pat = PATTERN (def_insn);
+ if (GET_CODE (def_pat) == USE
+ && REG_P (XEXP (def_pat, 0))
+ && REGNO (XEXP (def_pat, 0)) == A0_REGNUM)
+ {
+ a0_reg = XEXP (def_pat, 0);
+ use_a0_insn = def_insn;
+ continue;
+ }
+
+ if (use_a0_insn && reg_referenced_p (a0_reg, def_pat))
+ {
+ /* a0 is used by other instruction before its use in popret. */
+ use_a0_insn = NULL;
+ break;
+ }
+
+ if (use_a0_insn
+ && GET_CODE (def_pat) == SET
+ && REG_P (SET_DEST (def_pat))
+ && REGNO (SET_DEST (def_pat)) == A0_REGNUM)
+ {
+ if (SET_SRC (def_pat) == CONST0_RTX (GET_MODE (SET_SRC (def_pat))))
+ def_a0_insn = def_insn;
+ /* Stop the search regardless of the value assigned to a0,
+ because we only want to match the last (most recent)
+ definition of a0 before the (use a0). */
+ break;
+ }
+ }
+
+ /* If we found a def of a0 before its use, and the value is zero,
+ we can replace the popret with popretz. */
+ if (!def_a0_insn || !use_a0_insn)
+ continue;
+
+ int code_for_popretz = riscv_code_for_popretz (code);
+ gcc_assert (code_for_popretz != CODE_FOR_nothing);
+
+ /* Extract the stack adjustment value from the popret instruction.
+ The popret pattern is a PARALLEL, and the first element is the
+ stack pointer adjustment: (set sp (plus sp const_int)). */
+ rtx stack_adj_rtx = XVECEXP (PATTERN (popret_insn), 0, 0);
+ gcc_assert (GET_CODE (stack_adj_rtx) == SET
+ && REG_P (SET_DEST (stack_adj_rtx))
+ && REGNO (SET_DEST (stack_adj_rtx)) == SP_REGNUM
+ && GET_CODE (SET_SRC (stack_adj_rtx)) == PLUS
+ && CONST_INT_P (XEXP (SET_SRC (stack_adj_rtx), 1)));
+
+ rtx stack_adj_val = XEXP (SET_SRC (stack_adj_rtx), 1);
+
+ /* Generate and insert the popretz instruction at the position of
+ the original popret. emit_insn_after places the new instruction
+ after PREV_INSN(popret_insn). */
+ rtx popretz = GEN_FCN (code_for_popretz) (stack_adj_val);
+ emit_insn_after (popretz, PREV_INSN (popret_insn));
+
+ /* Clean up those instructions. */
+ remove_insn (popret_insn);
+ remove_insn (use_a0_insn);
+ remove_insn (def_a0_insn);
+ }
+
+ timevar_pop (TV_MACH_DEP);
+ return 0;
+}
+
+} // anon namespace
+
+rtl_opt_pass *
+make_pass_combine_popretz (gcc::context *ctxt)
+{
+ return new pass_combine_popretz (ctxt);
+}
diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
index 4e4e9d8..9b92a96 100644
--- a/gcc/config/riscv/riscv-opts.h
+++ b/gcc/config/riscv/riscv-opts.h
@@ -61,6 +61,10 @@ enum riscv_microarchitecture_type {
generic_ooo,
mips_p8700,
tt_ascalon_d8,
+ andes_25_series,
+ andes_23_series,
+ andes_45_series,
+ spacemit_x60,
};
extern enum riscv_microarchitecture_type riscv_microarchitecture;
diff --git a/gcc/config/riscv/riscv-passes.def b/gcc/config/riscv/riscv-passes.def
index 5aa4122..d41cc58 100644
--- a/gcc/config/riscv/riscv-passes.def
+++ b/gcc/config/riscv/riscv-passes.def
@@ -22,5 +22,6 @@ INSERT_PASS_AFTER (pass_rtl_store_motion, 1, pass_shorten_memrefs);
INSERT_PASS_AFTER (pass_split_all_insns, 1, pass_avlprop);
INSERT_PASS_BEFORE (pass_fast_rtl_dce, 1, pass_vsetvl);
INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_insert_landing_pad);
+INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_combine_popretz);
INSERT_PASS_AFTER (pass_cse2, 1, pass_vector_permconst);
diff --git a/gcc/config/riscv/riscv-profiles.def b/gcc/config/riscv/riscv-profiles.def
index 741c471..ea1c235 100644
--- a/gcc/config/riscv/riscv-profiles.def
+++ b/gcc/config/riscv/riscv-profiles.def
@@ -61,7 +61,7 @@ RISCV_PROFILE("rva23s64", "rv64imafdcbv_zicsr_zicntr_zihpm_ziccif_ziccrse"
"_zicbom_zicbop_zicboz_zfhmin_zkt_zvfhmin_zvbb_zvkt_zihintntl"
"_zicond_zimop_zcmop_zcb_zfa_zawrs_svbare_svade_ssccptr"
"_sstvecd_sstvala_sscounterenw_svpbmt_svinval_svnapot_sstc"
- "_sscofpmf_ssnpm_ssu64xl_sha_supm")
+ "_sscofpmf_ssnpm_ssu64xl_sha_supm_zifencei")
/* RVB23 contains all mandatory base ISA for RVA22U64 and the new extension
'zihintntl,zicond,zimop,zcmop,zfa,zawrs' as mandatory extensions. */
@@ -77,6 +77,6 @@ RISCV_PROFILE("rvb23s64", "rv64imafdcb_zicsr_zicntr_zihpm_ziccif_ziccrse"
"_zicbom_zicbop_zicboz_zfhmin_zkt_zvfhmin_zvbb_zvkt"
"_zihintntl_zicond_zimop_zcmop_zcb_zfa_zawrs_svbare_svade"
"_ssccptr_sstvecd_sstvala_sscounterenw_svpbmt_svinval_svnapot"
- "_sstc_sscofpmf_ssu64xl_supm")
+ "_sstc_sscofpmf_ssu64xl_supm_zifencei")
#undef RISCV_PROFILE
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 013b1dd..abf9df7 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -208,7 +208,13 @@ rtl_opt_pass * make_pass_vsetvl (gcc::context *ctxt);
rtl_opt_pass * make_pass_insert_landing_pad (gcc::context *ctxt);
rtl_opt_pass * make_pass_vector_permconst (gcc::context *ctxt);
rtl_opt_pass * make_pass_bclr_lowest_set_bit (gcc::context *ctxt);
+rtl_opt_pass * make_pass_combine_popretz (gcc::context *ctxt);
+/* Routines implemented in riscv-vsetvl.cc. */
+extern bool has_vtype_op (rtx_insn *);
+extern bool mask_agnostic_p (rtx_insn *);
+extern rtx get_avl (rtx_insn *);
+extern bool vsetvl_insn_p (rtx_insn *);
/* Routines implemented in riscv-string.c. */
extern bool riscv_expand_block_compare (rtx, rtx, rtx, rtx);
@@ -658,6 +664,7 @@ bool expand_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool);
void expand_cond_len_unop (unsigned, rtx *);
void expand_cond_len_binop (unsigned, rtx *);
void expand_reduction (unsigned, unsigned, unsigned, rtx *, rtx);
+void expand_mask_reduction (rtx *, rtx_code);
void expand_vec_ceil (rtx, rtx, machine_mode, machine_mode);
void expand_vec_floor (rtx, rtx, machine_mode, machine_mode);
void expand_vec_nearbyint (rtx, rtx, machine_mode, machine_mode);
@@ -685,6 +692,7 @@ void expand_vx_binary_vec_dup_vec (rtx, rtx, rtx, rtx_code, machine_mode);
void expand_vx_binary_vec_vec_dup (rtx, rtx, rtx, rtx_code, machine_mode);
void expand_vx_binary_vxrm_vec_vec_dup (rtx, rtx, rtx, int, int, machine_mode);
void expand_vx_binary_vxrm_vec_dup_vec (rtx, rtx, rtx, int, int, machine_mode);
+void expand_vx_cmp_vec_dup_vec (rtx, rtx, rtx, rtx_code, machine_mode);
#endif
bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode,
bool, void (*)(rtx *, rtx), enum avl_type);
@@ -834,18 +842,22 @@ extern bool th_print_operand_address (FILE *, machine_mode, rtx);
extern bool strided_load_broadcast_p (void);
extern bool riscv_prefer_agnostic_p (void);
extern bool riscv_use_divmod_expander (void);
-void riscv_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx, tree, int);
+void riscv_init_cumulative_args (CUMULATIVE_ARGS *, const_tree,
+ rtx, tree, int, bool);
extern bool
riscv_option_valid_attribute_p (tree, tree, tree, int);
extern bool
riscv_option_valid_version_attribute_p (tree, tree, tree, int);
extern bool
+riscv_process_target_attr_for_pragma (tree);
+extern bool
riscv_process_target_version_attr (tree, location_t *);
extern bool
riscv_process_target_version_str (string_slice, location_t *);
extern void
riscv_override_options_internal (struct gcc_options *);
extern void riscv_option_override (void);
+extern void riscv_reset_previous_fndecl (void);
extern rtx riscv_prefetch_cookie (rtx, rtx);
extern bool riscv_prefetch_offset_address_p (rtx, machine_mode);
diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index 61c4a09..c5710e4 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -923,6 +923,10 @@ riscv_expand_block_move_scalar (rtx dest, rtx src, rtx length)
unsigned HOST_WIDE_INT hwi_length = UINTVAL (length);
unsigned HOST_WIDE_INT factor, align;
+ if (riscv_memcpy_size_threshold >= 0
+ && hwi_length > (unsigned HOST_WIDE_INT) riscv_memcpy_size_threshold)
+ return false;
+
if (riscv_slow_unaligned_access_p)
{
align = MIN (MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), BITS_PER_WORD);
@@ -1233,6 +1237,21 @@ expand_block_move (rtx dst_in, rtx src_in, rtx length_in, bool movmem_p)
if (!use_vector_stringop_p (info, potential_ew, length_in))
return false;
+ if (CONST_INT_P (length_in))
+ {
+ HOST_WIDE_INT length = INTVAL (length_in);
+ if (movmem_p
+ && riscv_memmove_size_threshold >= 0
+ && length > riscv_memmove_size_threshold)
+ return false;
+ else if (!movmem_p
+ && riscv_memmove_size_threshold >= 0
+ && length > riscv_memcpy_size_threshold)
+ return false;
+ }
+ else
+ return false;
+
/* Inlining general memmove is a pessimisation: we can't avoid having to
decide which direction to go at runtime, which is costly in instruction
count however for situations where the entire move fits in one vector
@@ -1615,6 +1634,16 @@ expand_vec_setmem (rtx dst_in, rtx length_in, rtx fill_value_in)
if (!use_vector_stringop_p (info, 1, length_in) || info.need_loop)
return false;
+ if (CONST_INT_P (length_in))
+ {
+ HOST_WIDE_INT length = INTVAL (length_in);
+ if (riscv_memset_size_threshold >= 0
+ && length > riscv_memset_size_threshold)
+ return false;
+ }
+ else
+ return false;
+
rtx dst_addr = copy_addr_to_reg (XEXP (dst_in, 0));
rtx dst = change_address (dst_in, info.vmode, dst_addr);
diff --git a/gcc/config/riscv/riscv-subset.h b/gcc/config/riscv/riscv-subset.h
index 1887ed7..4cd860f 100644
--- a/gcc/config/riscv/riscv-subset.h
+++ b/gcc/config/riscv/riscv-subset.h
@@ -105,8 +105,6 @@ public:
unsigned xlen () const {return m_xlen;};
- const char *get_profile_name () const;
-
riscv_subset_list *clone () const;
static riscv_subset_list *parse (const char *, location_t *);
diff --git a/gcc/config/riscv/riscv-target-attr.cc b/gcc/config/riscv/riscv-target-attr.cc
index 54edeeb..eb3e688 100644
--- a/gcc/config/riscv/riscv-target-attr.cc
+++ b/gcc/config/riscv/riscv-target-attr.cc
@@ -44,6 +44,7 @@ public:
, m_cpu_info (nullptr)
, m_tune (nullptr)
, m_priority (0)
+ , m_max_vect (false)
{
}
@@ -51,6 +52,7 @@ public:
bool handle_cpu (const char *);
bool handle_tune (const char *);
bool handle_priority (const char *);
+ bool handle_max_vect (const char *);
void update_settings (struct gcc_options *opts) const;
private:
@@ -66,31 +68,35 @@ private:
const riscv_cpu_info *m_cpu_info;
const char *m_tune;
int m_priority;
+ bool m_max_vect;
};
}
/* All the information needed to handle a target attribute.
NAME is the name of the attribute.
- HANDLER is the function that takes the attribute string as an argument. */
+ HANDLER is the function that takes the attribute string as an argument.
+ REQUIRES_ARG indicates whether this attribute requires an argument value. */
struct riscv_attribute_info
{
const char *name;
bool (riscv_target_attr_parser::*handler) (const char *);
+ bool requires_arg;
};
/* The target attributes that we support. */
static const struct riscv_attribute_info riscv_target_attrs[]
- = {{"arch", &riscv_target_attr_parser::handle_arch},
- {"cpu", &riscv_target_attr_parser::handle_cpu},
- {"tune", &riscv_target_attr_parser::handle_tune},
- {NULL, NULL}};
+ = {{"arch", &riscv_target_attr_parser::handle_arch, true},
+ {"cpu", &riscv_target_attr_parser::handle_cpu, true},
+ {"tune", &riscv_target_attr_parser::handle_tune, true},
+ {"max-vectorization", &riscv_target_attr_parser::handle_max_vect, false},
+ {NULL, NULL, false}};
static const struct riscv_attribute_info riscv_target_version_attrs[]
- = {{"arch", &riscv_target_attr_parser::handle_arch},
- {"priority", &riscv_target_attr_parser::handle_priority},
- {NULL, NULL}};
+ = {{"arch", &riscv_target_attr_parser::handle_arch, true},
+ {"priority", &riscv_target_attr_parser::handle_priority, true},
+ {NULL, NULL, false}};
bool
riscv_target_attr_parser::parse_arch (const char *str)
@@ -254,6 +260,17 @@ riscv_target_attr_parser::handle_priority (const char *str)
return true;
}
+/* Handle max-vectorization. There are no further options, just
+ enable it. */
+
+bool
+riscv_target_attr_parser::handle_max_vect (const char *str ATTRIBUTE_UNUSED)
+{
+ m_max_vect = true;
+
+ return true;
+}
+
void
riscv_target_attr_parser::update_settings (struct gcc_options *opts) const
{
@@ -279,6 +296,9 @@ riscv_target_attr_parser::update_settings (struct gcc_options *opts) const
if (m_priority)
opts->x_riscv_fmv_priority = m_priority;
+
+ if (m_max_vect)
+ opts->x_riscv_max_vectorization = true;
}
/* Parse ARG_STR which contains the definition of one target attribute.
@@ -303,33 +323,50 @@ riscv_process_one_target_attr (char *arg_str,
char *str_to_check = buf.get();
strcpy (str_to_check, arg_str);
+ /* Split attribute name from argument (if present). */
char *arg = strchr (str_to_check, '=');
-
- if (!arg)
+ if (arg)
{
- if (loc)
- error_at (*loc, "attribute %<target(\"%s\")%> does not "
- "accept an argument", str_to_check);
- return false;
+ *arg = '\0';
+ ++arg;
+ /* Check for empty argument after '='. */
+ if (*arg == '\0')
+ {
+ if (loc)
+ error_at (*loc, "attribute %<target(\"%s\")%> has empty argument",
+ str_to_check);
+ return false;
+ }
}
- arg[0] = '\0';
- ++arg;
- for (const auto *attr = attrs;
- attr->name;
- ++attr)
+ /* Find matching attribute. */
+ for (const auto *attr = attrs; attr->name; ++attr)
{
- /* If the names don't match up, or the user has given an argument
- to an attribute that doesn't accept one, or didn't give an argument
- to an attribute that expects one, fail to match. */
- if (strncmp (str_to_check, attr->name, strlen (attr->name)) != 0)
+ if (strcmp (str_to_check, attr->name) != 0)
continue;
+ /* Validate argument presence matches expectations. */
+ if (attr->requires_arg && !arg)
+ {
+ if (loc)
+ error_at (*loc, "attribute %<target(\"%s\")%> expects "
+ "an argument", str_to_check);
+ return false;
+ }
+
+ if (!attr->requires_arg && arg)
+ {
+ if (loc)
+ error_at (*loc, "attribute %<target(\"%s\")%> does not "
+ "accept an argument", str_to_check);
+ return false;
+ }
+
return (&attr_parser->*attr->handler) (arg);
}
if (loc)
- error_at (*loc, "Got unknown attribute %<target(\"%s\")%>", str_to_check);
+ error_at (*loc, "unknown attribute %<target(\"%s\")%>", str_to_check);
return false;
}
@@ -371,6 +408,7 @@ riscv_process_target_str (string_slice args,
std::unique_ptr<char[]> buf (new char[len+1]);
char *str_to_check = buf.get ();
+ str_to_check[len] = '\0';
strncpy (str_to_check, args.begin (), args.size ());
/* Used to catch empty spaces between semi-colons i.e.
@@ -488,6 +526,17 @@ riscv_option_valid_attribute_p (tree fndecl, tree, tree args, int)
return ret;
}
+/* Public wrapper for pragma processing.
+ Parse ARGS (a TREE_LIST of target attributes) and update global_options.
+ This is used by #pragma GCC target. */
+
+bool
+riscv_process_target_attr_for_pragma (tree args)
+{
+ location_t loc = UNKNOWN_LOCATION;
+ return riscv_process_target_attr (args, &loc, riscv_target_attrs);
+}
+
/* Parse the tree in ARGS that contains the target_version attribute
information and update the global target options space. If LOC is nonnull,
report diagnostics against *LOC, otherwise remain silent. */
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 707924d..f3c4431 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1811,7 +1811,8 @@ expand_const_vector_onestep (rtx target, rvv_builder &builder)
rtx dest = gen_reg_rtx (mode);
insn_code icode = code_for_pred_mov (mode);
rtx ops3[] = {dest, tmp3, tmp1};
- emit_nonvlmax_insn (icode, __MASK_OP_TUMA | UNARY_OP_P, ops3, GEN_INT (n));
+ emit_nonvlmax_insn (icode, (unsigned) __MASK_OP_TUMA | UNARY_OP_P,
+ ops3, GEN_INT (n));
emit_move_insn (target, dest);
}
@@ -4747,8 +4748,6 @@ void
expand_gather_scatter (rtx *ops, bool is_load)
{
rtx ptr, vec_offset, vec_reg;
- bool zero_extend_p;
- int shift;
rtx mask = ops[5];
rtx len = ops[6];
if (is_load)
@@ -4758,78 +4757,18 @@ expand_gather_scatter (rtx *ops, bool is_load)
vec_reg = ops[0];
ptr = ops[1];
vec_offset = ops[2];
- zero_extend_p = INTVAL (ops[3]);
- shift = exact_log2 (INTVAL (ops[4]));
}
else
{
vec_reg = ops[4];
ptr = ops[0];
vec_offset = ops[1];
- zero_extend_p = INTVAL (ops[2]);
- shift = exact_log2 (INTVAL (ops[3]));
}
machine_mode vec_mode = GET_MODE (vec_reg);
machine_mode idx_mode = GET_MODE (vec_offset);
- scalar_mode inner_idx_mode = GET_MODE_INNER (idx_mode);
- unsigned inner_offsize = GET_MODE_BITSIZE (inner_idx_mode);
- poly_int64 nunits = GET_MODE_NUNITS (vec_mode);
bool is_vlmax = is_vlmax_len_p (vec_mode, len);
- bool use_widening_shift = false;
-
- /* Extend the offset element to address width. */
- if (inner_offsize < BITS_PER_WORD)
- {
- use_widening_shift = TARGET_ZVBB && zero_extend_p && shift == 1;
- /* 7.2. Vector Load/Store Addressing Modes.
- If the vector offset elements are narrower than XLEN, they are
- zero-extended to XLEN before adding to the ptr effective address. If
- the vector offset elements are wider than XLEN, the least-significant
- XLEN bits are used in the address calculation. An implementation must
- raise an illegal instruction exception if the EEW is not supported for
- offset elements.
-
- RVV spec only refers to the shift == 0 case. */
- if (!zero_extend_p || shift)
- {
- if (zero_extend_p)
- inner_idx_mode
- = int_mode_for_size (inner_offsize * 2, 0).require ();
- else
- inner_idx_mode = int_mode_for_size (BITS_PER_WORD, 0).require ();
- machine_mode new_idx_mode
- = get_vector_mode (inner_idx_mode, nunits).require ();
- if (!use_widening_shift)
- {
- rtx tmp = gen_reg_rtx (new_idx_mode);
- emit_insn (gen_extend_insn (tmp, vec_offset, new_idx_mode, idx_mode,
- zero_extend_p ? true : false));
- vec_offset = tmp;
- }
- idx_mode = new_idx_mode;
- }
- }
-
- if (shift)
- {
- rtx tmp;
- if (!use_widening_shift)
- tmp = expand_binop (idx_mode, ashl_optab, vec_offset,
- gen_int_mode (shift, Pmode), NULL_RTX, 0,
- OPTAB_DIRECT);
- else
- {
- tmp = gen_reg_rtx (idx_mode);
- insn_code icode = code_for_pred_vwsll_scalar (idx_mode);
- rtx ops[] = {tmp, vec_offset, const1_rtx};
- emit_vlmax_insn (icode, BINARY_OP, ops);
- }
-
- vec_offset = tmp;
- }
-
insn_code icode = prepare_gather_scatter (vec_mode, idx_mode, is_load);
if (is_vlmax)
{
@@ -4946,6 +4885,54 @@ expand_reduction (unsigned unspec, unsigned unspec_for_vl0_safe,
emit_insn (gen_pred_extract_first (m1_mode, scalar_dest, m1_tmp2));
}
+/* Expand mask reductions. OPS are {dest, src} where DEST's mode
+ is QImode and SRC's mode is a mask mode.
+ CODE is one of AND, IOR, XOR. */
+
+void
+expand_mask_reduction (rtx *ops, rtx_code code)
+{
+ machine_mode mode = GET_MODE (ops[1]);
+ rtx dest = ops[0];
+ gcc_assert (GET_MODE (dest) == QImode);
+
+ rtx tmp = gen_reg_rtx (Xmode);
+ rtx cpop_ops[] = {tmp, ops[1]};
+ emit_vlmax_insn (code_for_pred_popcount (mode, Xmode), CPOP_OP, cpop_ops);
+
+ bool eq_zero = false;
+
+ /* AND reduction is popcount (mask) == len,
+ IOR reduction is popcount (mask) != 0,
+ XOR reduction is popcount (mask) & 1 != 0. */
+ if (code == AND)
+ {
+ rtx len = gen_int_mode (GET_MODE_NUNITS (mode), HImode);
+ tmp = expand_binop (Xmode, sub_optab, tmp, len, NULL, true,
+ OPTAB_DIRECT);
+ eq_zero = true;
+ }
+ else if (code == IOR)
+ ;
+ else if (code == XOR)
+ tmp = expand_binop (Xmode, and_optab, tmp, GEN_INT (1), NULL, true,
+ OPTAB_DIRECT);
+ else
+ gcc_unreachable ();
+
+ rtx els = gen_label_rtx ();
+ rtx end = gen_label_rtx ();
+
+ riscv_expand_conditional_branch (els, eq_zero ? EQ : NE, tmp, const0_rtx);
+ emit_move_insn (dest, const0_rtx);
+ emit_jump_insn (gen_jump (end));
+ emit_barrier ();
+
+ emit_label (els);
+ emit_move_insn (dest, const1_rtx);
+ emit_label (end);
+}
+
/* Prepare ops for ternary operations.
It can be called before or after RA. */
void
@@ -5327,7 +5314,7 @@ emit_vec_cvt_x_f (rtx op_dest, rtx op_src, rtx mask,
{
insn_code icode = code_for_pred_fcvt_x_f (UNSPEC_VFCVT, vec_mode);
- if (type & USE_VUNDEF_MERGE_P)
+ if (type & (insn_type) USE_VUNDEF_MERGE_P)
{
rtx cvt_x_ops[] = {op_dest, mask, op_src};
emit_vlmax_insn (icode, type, cvt_x_ops);
@@ -5395,7 +5382,7 @@ emit_vec_cvt_x_f_rtz (rtx op_dest, rtx op_src, rtx mask,
{
insn_code icode = code_for_pred (FIX, vec_mode);
- if (type & USE_VUNDEF_MERGE_P)
+ if (type & (insn_type) USE_VUNDEF_MERGE_P)
{
rtx cvt_x_ops[] = {op_dest, mask, op_src};
emit_vlmax_insn (icode, type, cvt_x_ops);
@@ -5923,6 +5910,40 @@ expand_vx_binary_vec_vec_dup (rtx op_0, rtx op_1, rtx op_2,
emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops);
}
+static rtx_code
+get_swapped_cmp_rtx_code (rtx_code code)
+{
+ switch (code)
+ {
+ case GTU:
+ return LTU;
+ case GT:
+ return LT;
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Expand the binary vx combine with the format like v2 = vec_dup(x) > v1.
+ Aka the first op comes from the vec_duplicate, and the second op is the vector
+ reg. Unfortunately, the RVV vms* only form like v2 = v1 < vec_dup(x), so
+ we need to swap the op_1 and op_2, then emit the swapped(from gtu to ltu)
+ insn instead. */
+
+void
+expand_vx_cmp_vec_dup_vec (rtx op_0, rtx op_1, rtx op_2, rtx_code code,
+ machine_mode mode)
+{
+ machine_mode mask_mode = get_mask_mode (mode);
+ rtx_code swapped_code = get_swapped_cmp_rtx_code (code);
+
+ insn_code icode = code_for_pred_cmp_scalar (mode);
+ rtx cmp = gen_rtx_fmt_ee (swapped_code, mask_mode, op_2, op_1);
+ rtx ops[] = {op_0, cmp, op_2, op_1};
+
+ emit_vlmax_insn (icode, COMPARE_OP, ops);
+}
+
/* Vectorize popcount by the Wilkes-Wheeler-Gill algorithm that libgcc uses as
well. */
void
@@ -6133,6 +6154,10 @@ can_be_broadcast_p (rtx op)
if (mode == HFmode && !TARGET_ZVFH)
return false;
+ /* We don't have a vfmv.bf16.v.f. */
+ if (mode == BFmode)
+ return false;
+
/* Same for float, just that we can always handle 64-bit doubles
even on !TARGET_64BIT. We have ruled out 16-bit HF already
above. */
@@ -6172,6 +6197,10 @@ strided_broadcast_p (rtx op)
if (!TARGET_ZVFH && mode == HFmode)
return true;
+ /* We don't have a vfmv.bf16.v.f. */
+ if (mode == BFmode)
+ return true;
+
return false;
}
diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc b/gcc/config/riscv/riscv-vector-builtins-bases.cc
index 7e4d396..d00403a 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
@@ -1793,12 +1793,13 @@ public:
The fold routines expect the replacement statement to have the
same lhs as the original call, so return the copy statement
rather than the field update. */
- gassign *copy = gimple_build_assign (unshare_expr (f.lhs), rhs_tuple);
+ gassign *copy = gimple_build_assign (f.lhs, rhs_tuple);
/* Get a reference to the individual vector. */
tree field = tuple_type_field (TREE_TYPE (f.lhs));
tree lhs_array
- = build3 (COMPONENT_REF, TREE_TYPE (field), f.lhs, field, NULL_TREE);
+ = build3 (COMPONENT_REF, TREE_TYPE (field), unshare_expr (f.lhs),
+ field, NULL_TREE);
tree lhs_vector = build4 (ARRAY_REF, TREE_TYPE (rhs_vector), lhs_array,
index, NULL_TREE, NULL_TREE);
gassign *update = gimple_build_assign (lhs_vector, rhs_vector);
@@ -2129,7 +2130,8 @@ public:
rtx expand (function_expander &e) const override
{
- return e.use_exact_insn (code_for_pred_fault_load (e.vector_mode ()));
+ return e.use_contiguous_load_insn
+ (code_for_pred_fault_load (e.vector_mode ()));
}
};
diff --git a/gcc/config/riscv/riscv-vector-builtins.cc b/gcc/config/riscv/riscv-vector-builtins.cc
index a3e596a..f92e94b 100644
--- a/gcc/config/riscv/riscv-vector-builtins.cc
+++ b/gcc/config/riscv/riscv-vector-builtins.cc
@@ -4443,7 +4443,7 @@ function_builder::get_attributes (const function_instance &instance)
registered_function &
function_builder::add_function (const function_instance &instance,
const char *name, tree fntype, tree attrs,
- bool placeholder_p, const char *overload_name,
+ const char *overload_name,
const vec<tree> &argument_types,
enum required_ext required,
bool overloaded_p = false)
@@ -4464,7 +4464,7 @@ function_builder::add_function (const function_instance &instance,
nodes and remove the target hook. For now, however, we need to appease the
validation and return a non-NULL, non-error_mark_node node, so we
arbitrarily choose integer_zero_node. */
- tree decl = placeholder_p || in_lto_p
+ tree decl = in_lto_p
? integer_zero_node
: simulate_builtin_function_decl (input_location, name, fntype,
code, NULL, attrs);
@@ -4508,7 +4508,7 @@ function_builder::add_unique_function (const function_instance &instance,
argument_types.address ());
tree attrs = get_attributes (instance);
registered_function &rfn
- = add_function (instance, name, fntype, attrs, false, overload_name,
+ = add_function (instance, name, fntype, attrs, overload_name,
argument_types.copy (), required);
/* Enter the function into the hash table. */
@@ -4523,7 +4523,7 @@ function_builder::add_unique_function (const function_instance &instance,
/* Attribute lists shouldn't be shared. */
tree attrs = get_attributes (instance);
if (m_direct_overloads)
- add_function (instance, overload_name, fntype, attrs, false, NULL,
+ add_function (instance, overload_name, fntype, attrs, NULL,
vNULL, required);
else
{
@@ -4562,7 +4562,7 @@ function_builder::add_overloaded_function (const function_instance &instance,
/* To avoid API conflicting, take void return type and void argument
for the overloaded function. */
tree fntype = build_function_type (void_type_node, void_list_node);
- add_function (instance, name, fntype, NULL_TREE, false, name,
+ add_function (instance, name, fntype, NULL_TREE, name,
vNULL, required, true);
obstack_free (&m_string_obstack, name);
}
@@ -4709,7 +4709,8 @@ function_expander::use_exact_insn (insn_code icode)
/* The RVV floating-point only support dynamic rounding mode in the
FRM register. */
- if (opno != insn_data[icode].n_generator_args)
+ if (base->may_require_frm_p ()
+ && opno < insn_data[icode].n_generator_args)
add_input_operand (Pmode, gen_int_mode (riscv_vector::FRM_DYN, Pmode));
return generate_insn (icode);
@@ -4894,7 +4895,8 @@ function_expander::use_ternop_insn (bool vd_accum_p, insn_code icode)
/* The RVV floating-point only support dynamic rounding mode in the
FRM register. */
- if (opno != insn_data[icode].n_generator_args)
+ if (base->may_require_frm_p ()
+ && opno < insn_data[icode].n_generator_args)
add_input_operand (Pmode, gen_int_mode (riscv_vector::FRM_DYN, Pmode));
return generate_insn (icode);
@@ -4938,7 +4940,8 @@ function_expander::use_widen_ternop_insn (insn_code icode)
/* The RVV floating-point only support dynamic rounding mode in the
FRM register. */
- if (opno != insn_data[icode].n_generator_args)
+ if (base->may_require_frm_p ()
+ && opno < insn_data[icode].n_generator_args)
add_input_operand (Pmode, gen_int_mode (riscv_vector::FRM_DYN, Pmode));
return generate_insn (icode);
diff --git a/gcc/config/riscv/riscv-vector-builtins.h b/gcc/config/riscv/riscv-vector-builtins.h
index 8cb243b..9e1a474 100644
--- a/gcc/config/riscv/riscv-vector-builtins.h
+++ b/gcc/config/riscv/riscv-vector-builtins.h
@@ -435,7 +435,7 @@ private:
tree get_attributes (const function_instance &);
registered_function &add_function (const function_instance &, const char *,
- tree, tree, bool, const char *,
+ tree, tree, const char *,
const vec<tree> &, enum required_ext,
bool);
diff --git a/gcc/config/riscv/riscv-vector-switch.def b/gcc/config/riscv/riscv-vector-switch.def
index 1b0d619..6b71ab6 100644
--- a/gcc/config/riscv/riscv-vector-switch.def
+++ b/gcc/config/riscv/riscv-vector-switch.def
@@ -401,6 +401,18 @@ VLS_ENTRY (V256HF, riscv_vector::vls_mode_valid_p (V256HFmode) && TARGET_VECTOR_
VLS_ENTRY (V512HF, riscv_vector::vls_mode_valid_p (V512HFmode) && TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 1024)
VLS_ENTRY (V1024HF, riscv_vector::vls_mode_valid_p (V1024HFmode) && TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 2048)
VLS_ENTRY (V2048HF, riscv_vector::vls_mode_valid_p (V2048HFmode) && TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 4096)
+VLS_ENTRY (V1BF, riscv_vector::vls_mode_valid_p (V1BFmode) && TARGET_VECTOR_ELEN_BF_16)
+VLS_ENTRY (V2BF, riscv_vector::vls_mode_valid_p (V2BFmode) && TARGET_VECTOR_ELEN_BF_16)
+VLS_ENTRY (V4BF, riscv_vector::vls_mode_valid_p (V4BFmode) && TARGET_VECTOR_ELEN_BF_16)
+VLS_ENTRY (V8BF, riscv_vector::vls_mode_valid_p (V8BFmode) && TARGET_VECTOR_ELEN_BF_16)
+VLS_ENTRY (V16BF, riscv_vector::vls_mode_valid_p (V16BFmode) && TARGET_VECTOR_ELEN_BF_16)
+VLS_ENTRY (V32BF, riscv_vector::vls_mode_valid_p (V32BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 64)
+VLS_ENTRY (V64BF, riscv_vector::vls_mode_valid_p (V64BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 128)
+VLS_ENTRY (V128BF, riscv_vector::vls_mode_valid_p (V128BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 256)
+VLS_ENTRY (V256BF, riscv_vector::vls_mode_valid_p (V256BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 512)
+VLS_ENTRY (V512BF, riscv_vector::vls_mode_valid_p (V512BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 1024)
+VLS_ENTRY (V1024BF, riscv_vector::vls_mode_valid_p (V1024BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 2048)
+VLS_ENTRY (V2048BF, riscv_vector::vls_mode_valid_p (V2048BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 4096)
VLS_ENTRY (V1SF, riscv_vector::vls_mode_valid_p (V1SFmode) && TARGET_VECTOR_ELEN_FP_32)
VLS_ENTRY (V2SF, riscv_vector::vls_mode_valid_p (V2SFmode) && TARGET_VECTOR_ELEN_FP_32)
VLS_ENTRY (V4SF, riscv_vector::vls_mode_valid_p (V4SFmode) && TARGET_VECTOR_ELEN_FP_32)
diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index 3586d0c..127187b 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -258,7 +258,7 @@ policy_to_str (bool agnostic_p)
/* Return true if it is an RVV instruction depends on VTYPE global
status register. */
-static bool
+bool
has_vtype_op (rtx_insn *rinsn)
{
return recog_memoized (rinsn) >= 0 && get_attr_has_vtype_op (rinsn);
@@ -306,7 +306,7 @@ vector_config_insn_p (rtx_insn *rinsn)
}
/* Return true if it is vsetvldi or vsetvlsi. */
-static bool
+bool
vsetvl_insn_p (rtx_insn *rinsn)
{
if (!rinsn || !vector_config_insn_p (rinsn))
@@ -386,7 +386,7 @@ get_vl (rtx_insn *rinsn)
}
/* Helper function to get AVL operand. */
-static rtx
+rtx
get_avl (rtx_insn *rinsn)
{
if (vsetvl_insn_p (rinsn) || vsetvl_discard_result_insn_p (rinsn))
@@ -411,7 +411,7 @@ get_default_ma ()
}
/* Helper function to get MA operand. */
-static bool
+bool
mask_agnostic_p (rtx_insn *rinsn)
{
/* If it doesn't have MA, we return agnostic by default. */
@@ -1176,7 +1176,7 @@ public:
if (fault_first_load_p (insn->rtl ()))
{
for (insn_info *i = insn->next_nondebug_insn ();
- i->bb () == insn->bb (); i = i->next_nondebug_insn ())
+ i && i->bb () == insn->bb (); i = i->next_nondebug_insn ())
{
if (find_access (i->defs (), VL_REGNUM))
break;
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index d5de76c..96519c9 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -735,11 +735,111 @@ static const struct riscv_tune_param mips_p8700_tune_info = {
true, /* prefer-agnostic. */
};
+/* Costs to use when optimizing for Andes 25 series. */
+static const struct riscv_tune_param andes_25_tune_info = {
+ {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */
+ {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */
+ {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */
+ {COSTS_N_INSNS (2), COSTS_N_INSNS (2)}, /* int_mul */
+ {COSTS_N_INSNS (24), COSTS_N_INSNS (24)}, /* int_div */
+ 1, /* issue_rate */
+ 3, /* branch_cost */
+ 3, /* memory_cost */
+ 8, /* fmv_cost */
+ false, /* slow_unaligned_access */
+ false, /* vector_unaligned_access */
+ true, /* use_divmod_expansion */
+ false, /* overlap_op_by_pieces */
+ false, /* use_zero_stride_load */
+ false, /* speculative_sched_vsetvl */
+ RISCV_FUSE_NOTHING, /* fusible_ops */
+ NULL, /* vector cost */
+ NULL, /* function_align */
+ NULL, /* jump_align */
+ NULL, /* loop_align. */
+ true, /* prefer-agnostic. */
+};
+
+static const struct riscv_tune_param spacemit_x60_tune_info= {
+ {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */
+ {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */
+ {COSTS_N_INSNS (15), COSTS_N_INSNS (22)}, /* fp_div */
+ {COSTS_N_INSNS (3), COSTS_N_INSNS (6)}, /* int_mul */
+ {COSTS_N_INSNS (12), COSTS_N_INSNS (20)}, /* int_div */
+ 2, /* issue_rate */
+ 3, /* branch_cost */
+ 5, /* memory_cost */
+ 6, /* fmv_cost */
+ false, /* slow_unaligned_access */
+ false, /* vector_unaligned_access */
+ false, /* use_divmod_expansion */
+ false, /* overlap_op_by_pieces */
+ false, /* use_zero_stride_load */
+ true, /* speculative_sched_vsetvl */
+ RISCV_FUSE_NOTHING, /* fusible_ops */
+ NULL, /* vector cost */
+ NULL, /* function_align */
+ NULL, /* jump_align */
+ NULL, /* loop_align */
+ true, /* prefer-agnostic. */
+};
+
+/* Costs to use when optimizing for Andes 23 series. */
+static const struct riscv_tune_param andes_23_tune_info = {
+ {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */
+ {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */
+ {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */
+ {COSTS_N_INSNS (2), COSTS_N_INSNS (2)}, /* int_mul */
+ {COSTS_N_INSNS (24), COSTS_N_INSNS (24)}, /* int_div */
+ 2, /* issue_rate */
+ 3, /* branch_cost */
+ 3, /* memory_cost */
+ 8, /* fmv_cost */
+ false, /* slow_unaligned_access */
+ false, /* vector_unaligned_access */
+ true, /* use_divmod_expansion */
+ false, /* overlap_op_by_pieces */
+ false, /* use_zero_stride_load */
+ false, /* speculative_sched_vsetvl */
+ RISCV_FUSE_NOTHING, /* fusible_ops */
+ NULL, /* vector cost */
+ NULL, /* function_align */
+ NULL, /* jump_align */
+ NULL, /* loop_align */
+ true, /* prefer-agnostic. */
+};
+
+/* Costs to use when optimizing for Andes 45 series. */
+static const struct riscv_tune_param andes_45_tune_info = {
+ {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */
+ {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */
+ {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */
+ {COSTS_N_INSNS (2), COSTS_N_INSNS (2)}, /* int_mul */
+ {COSTS_N_INSNS (24), COSTS_N_INSNS (24)}, /* int_div */
+ 2, /* issue_rate */
+ 3, /* branch_cost */
+ 3, /* memory_cost */
+ 8, /* fmv_cost */
+ false, /* slow_unaligned_access */
+ false, /* vector_unaligned_access */
+ true, /* use_divmod_expansion */
+ false, /* overlap_op_by_pieces */
+ false, /* use_zero_stride_load */
+ false, /* speculative_sched_vsetvl */
+ RISCV_FUSE_NOTHING, /* fusible_ops */
+ NULL, /* vector cost */
+ NULL, /* function_align */
+ NULL, /* jump_align */
+ NULL, /* loop_align */
+ true, /* prefer-agnostic. */
+};
+
static bool riscv_avoid_shrink_wrapping_separate ();
static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
static tree riscv_handle_type_attribute (tree *, tree, tree, int, bool *);
static tree riscv_handle_rvv_vector_bits_attribute (tree *, tree, tree, int,
bool *);
+static tree riscv_handle_rvv_vls_cc_attribute (tree *, tree, tree, int, bool *);
/* Defining target-specific uses of __attribute__. */
static const attribute_spec riscv_gnu_attributes[] =
@@ -763,6 +863,8 @@ static const attribute_spec riscv_gnu_attributes[] =
standard vector calling convention variant. Syntax:
__attribute__((riscv_vector_cc)). */
{"riscv_vector_cc", 0, 0, false, true, true, true, NULL, NULL},
+ {"riscv_vls_cc", 0, 1, false, true, true, true,
+ riscv_handle_rvv_vls_cc_attribute, NULL},
/* This attribute is used to declare a new type, to appoint the exactly
bits size of the type. For example:
@@ -790,6 +892,8 @@ static const attribute_spec riscv_attributes[] =
standard vector calling convention variant. Syntax:
[[riscv::vector_cc]]. */
{"vector_cc", 0, 0, false, true, true, true, NULL, NULL},
+ {"vls_cc", 0, 1, false, true, true, true, riscv_handle_rvv_vls_cc_attribute,
+ NULL},
/* This attribute is used to declare a new type, to appoint the exactly
bits size of the type. For example:
@@ -1736,8 +1840,19 @@ riscv_symbolic_constant_p (rtx x, enum riscv_symbol_type *symbol_type)
/* Nonzero offsets are only valid for references that don't use the GOT. */
switch (*symbol_type)
{
- case SYMBOL_ABSOLUTE:
case SYMBOL_PCREL:
+ /* In 64-bit mode, PC-relative offsets with ranges beyond +/-1GiB are
+ more likely than not to end up out of range for an auipc instruction
+ randomly-placed within the 2GB range usable by medany, and such
+ offsets are quite unlikely to come up by chance, so be conservative
+ and separate the offset for them when in 64-bit mode, where they don't
+ wrap around. */
+ if (TARGET_64BIT)
+ return sext_hwi (INTVAL (offset), 30) == INTVAL (offset);
+
+ /* Fall through. */
+
+ case SYMBOL_ABSOLUTE:
case SYMBOL_TLS_LE:
/* GAS rejects offsets outside the range [-2^31, 2^31-1]. */
return sext_hwi (INTVAL (offset), 32) == INTVAL (offset);
@@ -2760,7 +2875,7 @@ riscv_unspec_address_offset (rtx base, rtx offset,
enum riscv_symbol_type symbol_type)
{
base = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, base),
- UNSPEC_ADDRESS_FIRST + symbol_type);
+ UNSPEC_ADDRESS_FIRST + (int) symbol_type);
if (offset != const0_rtx)
base = gen_rtx_PLUS (Pmode, base, offset);
return gen_rtx_CONST (Pmode, base);
@@ -3723,6 +3838,11 @@ riscv_legitimize_move (machine_mode mode, rtx dest, rtx src)
riscv_vector::emit_vec_extract (result, v,
gen_int_mode (index + i, Pmode));
+ /* The low-part must be zero-extended when ELEN == 32 and
+ mode == 64. */
+ if (num == 2 && i == 0)
+ int_reg = convert_modes (mode, smode, result, true);
+
if (i == 1)
{
if (UNITS_PER_WORD < mode_size)
@@ -3768,6 +3888,8 @@ riscv_legitimize_move (machine_mode mode, rtx dest, rtx src)
temp_reg = gen_reg_rtx (word_mode);
zero_extend_p = (LOAD_EXTEND_OP (mode) == ZERO_EXTEND);
+ /* SRC is a MEM, so we can always extend it directly, so
+ no need to indirect through convert_modes. */
emit_insn (gen_extend_insn (temp_reg, src, word_mode, mode,
zero_extend_p));
riscv_emit_move (dest, gen_lowpart (mode, temp_reg));
@@ -3822,9 +3944,8 @@ riscv_legitimize_move (machine_mode mode, rtx dest, rtx src)
{
rtx mask = force_reg (word_mode, gen_int_mode (-65536, word_mode));
rtx temp = gen_reg_rtx (word_mode);
- emit_insn (gen_extend_insn (temp,
- gen_lowpart (HImode, src),
- word_mode, HImode, 1));
+ temp = convert_modes (word_mode, HImode,
+ gen_lowpart (HImode, src), true);
if (word_mode == SImode)
emit_insn (gen_iorsi3 (temp, mask, temp));
else
@@ -4711,6 +4832,13 @@ riscv_noce_conversion_profitable_p (rtx_insn *seq,
if (last_dest)
last_dest = dest;
}
+ else if (REG_P (dest) && src == CONST0_RTX (GET_MODE (dest)))
+ {
+ /* A GPR set to zero can always be replaced with x0, so any
+ insn that sets a GPR to zero will eventually be eliminated. */
+ riscv_if_info.original_cost += COSTS_N_INSNS (1);
+ riscv_if_info.max_seq_cost += COSTS_N_INSNS (1);
+ }
else
last_dest = NULL_RTX;
@@ -4897,7 +5025,7 @@ riscv_output_move (rtx dest, rtx src)
if (TARGET_ZFHMIN || TARGET_ZFBFMIN)
return "fmv.x.h\t%0,%1";
/* Using fmv.x.s + sign-extend to emulate fmv.x.h. */
- return "fmv.x.s\t%0,%1;slli\t%0,%0,16;srai\t%0,%0,16";
+ return "fmv.x.s\t%0,%1\n\tslli\t%0,%0,16\n\tsrai\t%0,%0,16";
case 4:
return "fmv.x.s\t%0,%1";
case 8:
@@ -5872,13 +6000,50 @@ typedef struct {
floating-point registers. */
static int
-riscv_flatten_aggregate_field (const_tree type,
- riscv_aggregate_field fields[2],
+riscv_flatten_aggregate_field (const_tree type, riscv_aggregate_field *fields,
int n, HOST_WIDE_INT offset,
- bool ignore_zero_width_bit_field_p)
+ bool ignore_zero_width_bit_field_p,
+ bool ignore_empty_union_and_zero_len_array_p,
+ bool vls_p = false, unsigned abi_vlen = 0)
{
+ int max_aggregate_field = vls_p ? 8 : 2;
switch (TREE_CODE (type))
{
+ case UNION_TYPE:
+ {
+ if (!ignore_empty_union_and_zero_len_array_p)
+ return -1;
+ /* Empty union should ignore. */
+ if (TYPE_SIZE (type) == NULL || integer_zerop (TYPE_SIZE (type)))
+ return n;
+ /* Or all union member are empty union or empty struct. */
+ for (tree f = TYPE_FIELDS (type); f; f = DECL_CHAIN (f))
+ {
+ if (TREE_CODE (f) != FIELD_DECL)
+ continue;
+ int m;
+ HOST_WIDE_INT pos = offset + int_byte_position (f);
+ switch (TREE_CODE (TREE_TYPE (f)))
+ {
+ case ARRAY_TYPE:
+ case UNION_TYPE:
+ case RECORD_TYPE:
+ m = riscv_flatten_aggregate_field (
+ TREE_TYPE (f), fields, n, pos,
+ ignore_zero_width_bit_field_p,
+ true);
+ /* Any non-empty struct/union/array will stop the flatten. */
+ if (m != n)
+ return -1;
+ break;
+ default:
+ /* Any member are not struct, union or array will stop the
+ flatten. */
+ return -1;
+ }
+ }
+ return n;
+ }
case RECORD_TYPE:
/* Can't handle incomplete types nor sizes that are not fixed. */
if (!COMPLETE_TYPE_P (type)
@@ -5903,9 +6068,11 @@ riscv_flatten_aggregate_field (const_tree type,
else
{
HOST_WIDE_INT pos = offset + int_byte_position (f);
- n = riscv_flatten_aggregate_field (TREE_TYPE (f),
- fields, n, pos,
- ignore_zero_width_bit_field_p);
+ n = riscv_flatten_aggregate_field (
+ TREE_TYPE (f), fields, n, pos,
+ ignore_zero_width_bit_field_p,
+ ignore_empty_union_and_zero_len_array_p,
+ vls_p, abi_vlen);
}
if (n < 0)
return -1;
@@ -5915,16 +6082,23 @@ riscv_flatten_aggregate_field (const_tree type,
case ARRAY_TYPE:
{
HOST_WIDE_INT n_elts;
- riscv_aggregate_field subfields[2];
+ riscv_aggregate_field subfields[8];
tree index = TYPE_DOMAIN (type);
tree elt_size = TYPE_SIZE_UNIT (TREE_TYPE (type));
- int n_subfields = riscv_flatten_aggregate_field (TREE_TYPE (type),
- subfields, 0, offset,
- ignore_zero_width_bit_field_p);
+ /* Array with zero size member should be ignored. */
+ if (ignore_empty_union_and_zero_len_array_p && integer_zerop (elt_size))
+ return n;
+
+ int n_subfields
+ = riscv_flatten_aggregate_field (
+ TREE_TYPE (type), subfields, 0,
+ offset,
+ ignore_zero_width_bit_field_p,
+ ignore_empty_union_and_zero_len_array_p,
+ vls_p, abi_vlen);
/* Can't handle incomplete types nor sizes that are not fixed. */
- if (n_subfields <= 0
- || !COMPLETE_TYPE_P (type)
+ if (!COMPLETE_TYPE_P (type)
|| TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
|| !index
|| !TYPE_MAX_VALUE (index)
@@ -5934,6 +6108,15 @@ riscv_flatten_aggregate_field (const_tree type,
|| !tree_fits_uhwi_p (elt_size))
return -1;
+ /* Zero-length array with empty union/struct should be ignored. */
+ if (ignore_empty_union_and_zero_len_array_p && n_subfields == 0
+ && integer_zerop (TYPE_MIN_VALUE (index))
+ && integer_all_onesp (TYPE_MAX_VALUE (index)))
+ return n;
+
+ if (n_subfields <= 0)
+ return -1;
+
n_elts = 1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
- tree_to_uhwi (TYPE_MIN_VALUE (index));
gcc_assert (n_elts >= 0);
@@ -5941,7 +6124,7 @@ riscv_flatten_aggregate_field (const_tree type,
for (HOST_WIDE_INT i = 0; i < n_elts; i++)
for (int j = 0; j < n_subfields; j++)
{
- if (n >= 2)
+ if (n >= max_aggregate_field)
return -1;
fields[n] = subfields[j];
@@ -5973,18 +6156,36 @@ riscv_flatten_aggregate_field (const_tree type,
}
default:
- if (n < 2
- && ((SCALAR_FLOAT_TYPE_P (type)
- && GET_MODE_SIZE (TYPE_MODE (type)).to_constant () <= UNITS_PER_FP_ARG)
- || (INTEGRAL_TYPE_P (type)
- && GET_MODE_SIZE (TYPE_MODE (type)).to_constant () <= UNITS_PER_WORD)))
+ poly_uint64 mode_size = GET_MODE_SIZE (TYPE_MODE (type));
+ if (vls_p)
{
- fields[n].type = type;
- fields[n].offset = offset;
- return n + 1;
+ gcc_assert (abi_vlen != 0);
+ if (n < max_aggregate_field
+ && (VECTOR_TYPE_P (type) && mode_size.is_constant ()
+ && (mode_size.to_constant () <= abi_vlen * 8)))
+ {
+ fields[n].type = type;
+ fields[n].offset = offset;
+ return n + 1;
+ }
+ else
+ return -1;
}
else
- return -1;
+ {
+ if (n < max_aggregate_field
+ && ((SCALAR_FLOAT_TYPE_P (type)
+ && mode_size.to_constant () <= UNITS_PER_FP_ARG)
+ || (INTEGRAL_TYPE_P (type)
+ && mode_size.to_constant () <= UNITS_PER_WORD)))
+ {
+ fields[n].type = type;
+ fields[n].offset = offset;
+ return n + 1;
+ }
+ else
+ return -1;
+ }
}
}
@@ -5993,14 +6194,27 @@ riscv_flatten_aggregate_field (const_tree type,
static int
riscv_flatten_aggregate_argument (const_tree type,
- riscv_aggregate_field fields[2],
- bool ignore_zero_width_bit_field_p)
+ riscv_aggregate_field *fields,
+ bool ignore_zero_width_bit_field_p,
+ bool ignore_empty_union_and_zero_len_array_p,
+ bool vls_p = false, unsigned abi_vlen = 0)
{
if (!type || TREE_CODE (type) != RECORD_TYPE)
return -1;
return riscv_flatten_aggregate_field (type, fields, 0, 0,
- ignore_zero_width_bit_field_p);
+ ignore_zero_width_bit_field_p,
+ ignore_empty_union_and_zero_len_array_p,
+ vls_p, abi_vlen);
+}
+
+static bool
+riscv_any_non_float_type_field (riscv_aggregate_field *fields, int n)
+{
+ for (int i = 0; i < n; i++)
+ if (!SCALAR_FLOAT_TYPE_P (fields[i].type))
+ return true;
+ return false;
}
/* See whether TYPE is a record whose fields should be returned in one or
@@ -6011,24 +6225,18 @@ riscv_pass_aggregate_in_fpr_pair_p (const_tree type,
riscv_aggregate_field fields[2])
{
static int warned = 0;
+ if (!type)
+ return 0;
/* This is the old ABI, which differs for C++ and C. */
- int n_old = riscv_flatten_aggregate_argument (type, fields, false);
- for (int i = 0; i < n_old; i++)
- if (!SCALAR_FLOAT_TYPE_P (fields[i].type))
- {
- n_old = -1;
- break;
- }
+ int n_old = riscv_flatten_aggregate_argument (type, fields, false, false);
+ if (riscv_any_non_float_type_field (fields, n_old))
+ n_old = -1;
/* This is the new ABI, which is the same for C++ and C. */
- int n_new = riscv_flatten_aggregate_argument (type, fields, true);
- for (int i = 0; i < n_new; i++)
- if (!SCALAR_FLOAT_TYPE_P (fields[i].type))
- {
- n_new = -1;
- break;
- }
+ int n_new = riscv_flatten_aggregate_argument (type, fields, true, false);
+ if (riscv_any_non_float_type_field (fields, n_new))
+ n_new = -1;
if ((n_old != n_new) && (warned == 0))
{
@@ -6037,7 +6245,58 @@ riscv_pass_aggregate_in_fpr_pair_p (const_tree type,
warned = 1;
}
- return n_new > 0 ? n_new : 0;
+ /* ABI with fixing flatten empty union. */
+ int n_new2 = riscv_flatten_aggregate_argument (type, fields, true, true);
+ if (riscv_any_non_float_type_field (fields, n_new2))
+ n_new2 = -1;
+
+ bool num_fpr = riscv_pass_mode_in_fpr_p (TYPE_MODE (type));
+
+ /* There is a special case for struct with zero length array with struct and a
+ floating point member.
+ e.g:
+ struct S0ae_1f {
+ struct {
+ } e1[0];
+ float f;
+ };
+
+ This case we will got 1, but legacy ABI will got -1, however legacy ABI
+ will got 1 in later logic, so we should consider this case as compatible.
+ */
+ bool compatible_p = n_new2 == 1 && n_new == -1 && num_fpr == 1;
+
+ if ((n_new2 != n_new)
+ && !compatible_p && (warned == 0))
+ {
+ warning (OPT_Wpsabi, "ABI for flattened empty union and zero "
+ "length array changed in GCC 16");
+ warned = 1;
+ }
+
+ return n_new2 > 0 ? n_new2 : 0;
+}
+
+struct riscv_aggregate_field_info_t {
+ unsigned num_fpr;
+ unsigned num_gpr;
+
+ riscv_aggregate_field_info_t ()
+ : num_fpr (0), num_gpr (0)
+ {}
+};
+
+static riscv_aggregate_field_info_t
+riscv_parse_aggregate_field_info (riscv_aggregate_field *fields, int n)
+{
+ riscv_aggregate_field_info_t info;
+ for (int i = 0; i < n; i++)
+ {
+ info.num_fpr += SCALAR_FLOAT_TYPE_P (fields[i].type);
+ info.num_gpr += INTEGRAL_TYPE_P (fields[i].type);
+ }
+
+ return info;
}
/* See whether TYPE is a record whose fields should be returned in one or
@@ -6051,35 +6310,48 @@ riscv_pass_aggregate_in_fpr_and_gpr_p (const_tree type,
static int warned = 0;
/* This is the old ABI, which differs for C++ and C. */
- unsigned num_int_old = 0, num_float_old = 0;
- int n_old = riscv_flatten_aggregate_argument (type, fields, false);
- for (int i = 0; i < n_old; i++)
- {
- num_float_old += SCALAR_FLOAT_TYPE_P (fields[i].type);
- num_int_old += INTEGRAL_TYPE_P (fields[i].type);
- }
+ int n_old = riscv_flatten_aggregate_argument (type, fields, false, false);
+ riscv_aggregate_field_info_t old_info;
+ old_info = riscv_parse_aggregate_field_info (fields, n_old);
/* This is the new ABI, which is the same for C++ and C. */
- unsigned num_int_new = 0, num_float_new = 0;
- int n_new = riscv_flatten_aggregate_argument (type, fields, true);
- for (int i = 0; i < n_new; i++)
- {
- num_float_new += SCALAR_FLOAT_TYPE_P (fields[i].type);
- num_int_new += INTEGRAL_TYPE_P (fields[i].type);
- }
+ int n_new = riscv_flatten_aggregate_argument (type, fields, true, false);
+ riscv_aggregate_field_info_t new_info;
+ new_info = riscv_parse_aggregate_field_info (fields, n_new);
+
+ bool values_changed = old_info.num_fpr != new_info.num_fpr
+ || old_info.num_gpr != new_info.num_gpr;
+ bool old_is_one_one = old_info.num_fpr == 1 && old_info.num_gpr == 1;
+ bool new_is_one_one = new_info.num_fpr == 1 && new_info.num_gpr == 1;
- if (((num_int_old == 1 && num_float_old == 1
- && (num_int_old != num_int_new || num_float_old != num_float_new))
- || (num_int_new == 1 && num_float_new == 1
- && (num_int_old != num_int_new || num_float_old != num_float_new)))
- && (warned == 0))
+ if (values_changed
+ && (old_is_one_one || new_is_one_one)
+ && warned == 0)
{
warning (OPT_Wpsabi, "ABI for flattened struct with zero-length "
"bit-fields changed in GCC 10");
warned = 1;
}
- return num_int_new == 1 && num_float_new == 1;
+ /* ABI with fixing flatten empty union. */
+ int n_new2 = riscv_flatten_aggregate_argument (type, fields, true, true);
+ riscv_aggregate_field_info_t new2_info;
+ new2_info = riscv_parse_aggregate_field_info (fields, n_new2);
+
+ values_changed = new_info.num_fpr != new2_info.num_fpr
+ || new_info.num_gpr != new2_info.num_gpr;
+ bool new2_is_one_one = new2_info.num_fpr == 1 && new2_info.num_gpr == 1;
+
+ if (values_changed
+ && (new_is_one_one || new2_is_one_one)
+ && warned == 0)
+ {
+ warning (OPT_Wpsabi, "ABI for flattened empty union and zero "
+ "length array changed in GCC 16");
+ warned = 1;
+ }
+
+ return new2_is_one_one;
}
/* Return the representation of an argument passed or returned in an FPR
@@ -6163,18 +6435,22 @@ riscv_pass_vls_aggregate_in_gpr (struct riscv_arg_info *info, machine_mode mode,
return gen_rtx_PARALLEL (mode, gen_rtvec (1, x));
}
+static const predefined_function_abi &
+riscv_fntype_abi_1 (const_tree fntype, bool check_only);
+
/* Initialize a variable CUM of type CUMULATIVE_ARGS
for a call to a function whose data type is FNTYPE.
For a library call, FNTYPE is 0. */
void
riscv_init_cumulative_args (CUMULATIVE_ARGS *cum, const_tree fntype,
- rtx, tree, int)
+ rtx, tree, int, bool check_only)
{
memset (cum, 0, sizeof (*cum));
if (fntype)
- cum->variant_cc = (riscv_cc) fntype_abi (fntype).id ();
+ cum->variant_cc =
+ (riscv_cc) riscv_fntype_abi_1 (fntype, check_only).id ();
else
cum->variant_cc = RISCV_CC_BASE;
}
@@ -6197,7 +6473,7 @@ riscv_hard_regno_nregs (unsigned int regno, machine_mode mode);
static rtx
riscv_get_vector_arg (struct riscv_arg_info *info, const CUMULATIVE_ARGS *cum,
- machine_mode mode, bool return_p)
+ machine_mode mode, bool return_p, bool vls_p = false)
{
gcc_assert (riscv_v_ext_mode_p (mode));
@@ -6233,8 +6509,9 @@ riscv_get_vector_arg (struct riscv_arg_info *info, const CUMULATIVE_ARGS *cum,
int arg_reg_end = V_ARG_LAST - V_REG_FIRST;
int aligned_reg_start = ROUND_UP (arg_reg_start, LMUL);
- /* For scalable data and scalable tuple return value. */
- if (return_p)
+ /* For scalable data and scalable tuple return value.
+ For VLS CC, we may pass struct like tuple, so need defer the handling. */
+ if (return_p && !vls_p)
return gen_rtx_REG (mode, aligned_reg_start + V_REG_FIRST);
/* Iterate through the USED_VRS array to find vector register groups that have
@@ -6271,6 +6548,224 @@ riscv_get_vector_arg (struct riscv_arg_info *info, const CUMULATIVE_ARGS *cum,
return NULL_RTX;
}
+
+#define RISCV_ALL_VALID_ABI_VLEN(F) \
+ F (32) \
+ F (64) \
+ F (128) \
+ F (256) \
+ F (512) \
+ F (1024) \
+ F (2048) \
+ F (4096) \
+ F (8192) \
+ F (16384)
+
+/* Return true if CC is a variant of VLS CC. */
+
+static bool
+riscv_vls_cc_p (riscv_cc cc)
+{
+ switch (cc)
+ {
+#define VLS_CC_ABI_VLEN_CASE(ABI_VLEN) \
+ case RISCV_CC_VLS_V_##ABI_VLEN:
+ RISCV_ALL_VALID_ABI_VLEN (VLS_CC_ABI_VLEN_CASE)
+
+#undef VLS_CC_ABI_VLEN_CASE
+ return true;
+ default:
+ return false;
+ }
+}
+
+/* Get ABI_VLEN from cc. */
+
+static unsigned int
+riscv_get_cc_abi_vlen (riscv_cc cc)
+{
+ switch (cc)
+ {
+#define VLS_CC_ABI_VLEN_CASE(ABI_VLEN) \
+ case RISCV_CC_VLS_V_##ABI_VLEN: \
+ return ABI_VLEN;
+ RISCV_ALL_VALID_ABI_VLEN (VLS_CC_ABI_VLEN_CASE)
+
+#undef VLS_CC_ABI_VLEN_CASE
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Return true if ABI_VLEN is a valid for VLS_CC. */
+
+static bool
+riscv_valid_abi_vlen_vls_cc_p (unsigned abi_vlen)
+{
+ switch (abi_vlen)
+ {
+#define VLS_CC_ABI_VLEN_CASE(ABI_VLEN) \
+ case ABI_VLEN:
+ RISCV_ALL_VALID_ABI_VLEN (VLS_CC_ABI_VLEN_CASE)
+
+#undef VLS_CC_ABI_VLEN_CASE
+ return true;
+ default:
+ return false;
+ }
+}
+
+static riscv_cc
+riscv_get_riscv_cc_by_abi_vlen (unsigned abi_vlen)
+{
+ switch (abi_vlen)
+ {
+#define VLS_CC_ABI_VLEN_CASE(ABI_VLEN) \
+ case ABI_VLEN: \
+ return RISCV_CC_VLS_V_##ABI_VLEN;
+ RISCV_ALL_VALID_ABI_VLEN (VLS_CC_ABI_VLEN_CASE)
+
+#undef VLS_CC_ABI_VLEN_CASE
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Get a VLS type has same size as MODE in ABI_VLEN, but element is always
+ in integer mode. */
+
+static machine_mode
+riscv_get_vls_container_type (machine_mode mode, unsigned abi_vlen)
+{
+ machine_mode element_mode = GET_MODE_INNER (mode);
+ unsigned int mode_size = GET_MODE_SIZE (mode).to_constant ();
+ unsigned int lmul = ROUND_UP (mode_size * 8, abi_vlen) / abi_vlen;
+
+ /* Always use integer mode to pass to simplify the logic - we allow pass
+ unsupported vector type in vector register, e.g. float16x4_t even no vector
+ fp16 support. */
+ switch (GET_MODE_SIZE (element_mode).to_constant ())
+ {
+ case 1:
+ element_mode = QImode;
+ break;
+ case 2:
+ element_mode = HImode;
+ break;
+ case 4:
+ element_mode = SImode;
+ break;
+ case 8:
+ element_mode = DImode;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ scalar_mode smode = as_a<scalar_mode> (element_mode);
+ return get_lmul_mode (smode, lmul).require ();
+}
+
+/* Pass VLS type argument in vector argument register. */
+
+static rtx
+riscv_pass_vls_in_vr (struct riscv_arg_info *info, const CUMULATIVE_ARGS *cum,
+ machine_mode mode, bool return_p)
+{
+ gcc_assert (riscv_v_ext_vls_mode_p (mode));
+
+ unsigned int abi_vlen = riscv_get_cc_abi_vlen (cum->variant_cc);
+ unsigned int mode_size = GET_MODE_SIZE (mode).to_constant ();
+ unsigned int lmul = ROUND_UP (mode_size * 8, abi_vlen) / abi_vlen;
+
+ /* Put into memory if it need more than 8 registers (> LMUL 8). */
+ if (lmul > 8)
+ return NULL_RTX;
+
+ machine_mode vla_mode = riscv_get_vls_container_type (mode, abi_vlen);
+ rtx reg = riscv_get_vector_arg (info, cum, vla_mode,
+ return_p, /* vls_p */ true);
+
+ /* Can't get vector register to pass, pass by memory. */
+ if (!reg)
+ return NULL_RTX;
+
+ PUT_MODE (reg, mode);
+
+ return reg;
+}
+
+/* Pass aggregate with VLS type argument in vector argument registers. */
+
+static rtx
+riscv_pass_aggregate_in_vr (struct riscv_arg_info *info,
+ const CUMULATIVE_ARGS *cum, const_tree type,
+ bool return_p)
+{
+ riscv_aggregate_field fields[8];
+ unsigned int abi_vlen = riscv_get_cc_abi_vlen (cum->variant_cc);
+ int i;
+ int n = riscv_flatten_aggregate_argument (type, fields, true, true,
+ /* vls_p */ true, abi_vlen);
+
+ if (n == -1)
+ return NULL_RTX;
+
+ /* Check all field has same size. */
+ unsigned int mode_size
+ = GET_MODE_SIZE (TYPE_MODE (fields[0].type)).to_constant ();
+ for (int i = 1; i < n; i++)
+ if (GET_MODE_SIZE (TYPE_MODE (fields[i].type)).to_constant () != mode_size)
+ return NULL_RTX; /* Return NULL_RTX if we cannot find a suitable reg. */
+
+ /* Check total size is <= abi_vlen * 8, we use up to 8 vector register to
+ pass argument. */
+ if (mode_size * 8 > abi_vlen)
+ return NULL_RTX; /* Return NULL_RTX if we cannot find a suitable reg. */
+
+ /* Backup cum->used_vrs since we will defer the update until
+ riscv_function_arg_advance. */
+ CUMULATIVE_ARGS local_cum;
+ memcpy (&local_cum, cum, sizeof (local_cum));
+
+ unsigned num_vrs = 0;
+
+ /* Allocate vector registers for the arguments. */
+ rtx expr_list[8];
+ for (i = 0; i < n; i++)
+ {
+ machine_mode mode = TYPE_MODE (fields[i].type);
+ machine_mode vla_mode = riscv_get_vls_container_type (mode, abi_vlen);
+ /* Use riscv_get_vector_arg with VLA type to simplify the calling
+ convention implementation. */
+ rtx reg
+ = riscv_get_vector_arg (info, &local_cum, vla_mode,
+ return_p, /* vls_p */true);
+
+ /* Can't get vector register to pass, pass by memory. */
+ if (!reg)
+ return NULL_RTX;
+
+ PUT_MODE (reg, mode);
+
+ expr_list[i]
+ = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (fields[i].offset));
+
+ num_vrs += info->num_vrs;
+
+ /* Set the corresponding register in USED_VRS to used status. */
+ for (unsigned int i = 0; i < info->num_vrs; i++)
+ {
+ gcc_assert (!local_cum.used_vrs[info->vr_offset + i]);
+ local_cum.used_vrs[info->vr_offset + i] = true;
+ }
+ }
+
+ info->num_vrs = num_vrs;
+
+ return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (n, expr_list));
+}
+
/* Fill INFO with information about a single argument, and return an RTL
pattern to pass or return the argument. Return NULL_RTX if argument cannot
pass or return in registers, then the argument may be passed by reference or
@@ -6363,7 +6858,17 @@ riscv_get_arg_info (struct riscv_arg_info *info, const CUMULATIVE_ARGS *cum,
if (riscv_vector_type_p (type) && riscv_v_ext_mode_p (mode))
return riscv_get_vector_arg (info, cum, mode, return_p);
- /* For vls mode aggregated in gpr. */
+ if (riscv_vls_cc_p (cum->variant_cc))
+ {
+ if (riscv_v_ext_vls_mode_p (mode))
+ return riscv_pass_vls_in_vr (info, cum, mode, return_p);
+
+ rtx ret = riscv_pass_aggregate_in_vr (info, cum, type, return_p);
+ if (ret)
+ return ret;
+ }
+
+ /* For vls mode aggregated in gpr (for non-VLS-CC). */
if (riscv_v_ext_vls_mode_p (mode))
return riscv_pass_vls_aggregate_in_gpr (info, mode, gpr_base);
}
@@ -6420,7 +6925,8 @@ riscv_function_arg_advance (cumulative_args_t cum_v,
cum->used_vrs[info.vr_offset + i] = true;
}
- if ((info.num_vrs > 0 || info.num_mrs > 0) && cum->variant_cc != RISCV_CC_V)
+ if ((info.num_vrs > 0 || info.num_mrs > 0) && cum->variant_cc != RISCV_CC_V
+ && !riscv_vls_cc_p (cum->variant_cc))
{
error ("RVV type %qT cannot be passed to an unprototyped function",
arg.type);
@@ -6463,7 +6969,8 @@ riscv_function_value (const_tree ret_type, const_tree fn_decl_or_type,
{
const_tree fntype = TREE_CODE (fn_decl_or_type) == FUNCTION_DECL ?
TREE_TYPE (fn_decl_or_type) : fn_decl_or_type;
- riscv_init_cumulative_args (&args, fntype, NULL_RTX, NULL_TREE, 0);
+ riscv_init_cumulative_args (&args, fntype, NULL_RTX, NULL_TREE, 0,
+ /* check_only */true);
}
else
memset (&args, 0, sizeof args);
@@ -6532,14 +7039,20 @@ riscv_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg)
/* Implement TARGET_RETURN_IN_MEMORY. */
static bool
-riscv_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
+riscv_return_in_memory (const_tree type, const_tree fntype)
{
CUMULATIVE_ARGS args;
+
+ if (fntype)
+ riscv_init_cumulative_args (&args, fntype, NULL_RTX, NULL_TREE, 0,
+ /* check_only */true);
+ else
+ /* The rules for returning in memory are the same as for passing the
+ first named argument by reference. */
+ memset (&args, 0, sizeof args);
+
cumulative_args_t cum = pack_cumulative_args (&args);
- /* The rules for returning in memory are the same as for passing the
- first named argument by reference. */
- memset (&args, 0, sizeof args);
function_arg_info arg (const_cast<tree> (type), /*named=*/true);
return riscv_pass_by_reference (cum, arg);
}
@@ -6583,9 +7096,9 @@ riscv_setup_incoming_varargs (cumulative_args_t cum,
/* Return the descriptor of the Standard Vector Calling Convention Variant. */
static const predefined_function_abi &
-riscv_v_abi ()
+riscv_v_abi (riscv_cc abi)
{
- predefined_function_abi &v_abi = function_abis[RISCV_CC_V];
+ predefined_function_abi &v_abi = function_abis[abi];
if (!v_abi.initialized_p ())
{
HARD_REG_SET full_reg_clobbers
@@ -6595,7 +7108,7 @@ riscv_v_abi ()
CLEAR_HARD_REG_BIT (full_reg_clobbers, regno);
for (int regno = V_REG_FIRST + 24; regno <= V_REG_FIRST + 31; regno += 1)
CLEAR_HARD_REG_BIT (full_reg_clobbers, regno);
- v_abi.initialize (RISCV_CC_V, full_reg_clobbers);
+ v_abi.initialize (abi, full_reg_clobbers);
}
return v_abi;
}
@@ -6756,13 +7269,14 @@ riscv_validate_vector_type (const_tree type, const char *hint)
RISC-V V registers. */
static bool
-riscv_return_value_is_vector_type_p (const_tree fntype)
+riscv_return_value_is_vector_type_p (const_tree fntype, bool check_only)
{
tree return_type = TREE_TYPE (fntype);
if (riscv_vector_type_p (return_type))
{
- riscv_validate_vector_type (return_type, "return type");
+ if (!check_only)
+ riscv_validate_vector_type (return_type, "return type");
return true;
}
else
@@ -6773,7 +7287,7 @@ riscv_return_value_is_vector_type_p (const_tree fntype)
RISC-V V registers. */
static bool
-riscv_arguments_is_vector_type_p (const_tree fntype)
+riscv_arguments_is_vector_type_p (const_tree fntype, bool check_only)
{
for (tree chain = TYPE_ARG_TYPES (fntype); chain && chain != void_list_node;
chain = TREE_CHAIN (chain))
@@ -6781,7 +7295,8 @@ riscv_arguments_is_vector_type_p (const_tree fntype)
tree arg_type = TREE_VALUE (chain);
if (riscv_vector_type_p (arg_type))
{
- riscv_validate_vector_type (arg_type, "argument type");
+ if (!check_only)
+ riscv_validate_vector_type (arg_type, "argument type");
return true;
}
}
@@ -6792,14 +7307,15 @@ riscv_arguments_is_vector_type_p (const_tree fntype)
/* Return true if FUNC is a riscv_vector_cc function.
For more details please reference the below link.
https://github.com/riscv-non-isa/riscv-c-api-doc/pull/67 */
+
static bool
-riscv_vector_cc_function_p (const_tree fntype)
+riscv_vector_cc_function_p (const_tree fntype, bool check_only)
{
tree attr = TYPE_ATTRIBUTES (fntype);
bool vector_cc_p = lookup_attribute ("vector_cc", attr) != NULL_TREE
|| lookup_attribute ("riscv_vector_cc", attr) != NULL_TREE;
- if (vector_cc_p && !TARGET_VECTOR)
+ if (vector_cc_p && !TARGET_VECTOR && !check_only)
error_at (input_location,
"function attribute %qs requires the V ISA extension",
"riscv_vector_cc");
@@ -6807,26 +7323,91 @@ riscv_vector_cc_function_p (const_tree fntype)
return vector_cc_p;
}
-/* Implement TARGET_FNTYPE_ABI. */
+/* Return the riscv_cc value according to the attribute arguments.
+ If the attribute arguments are invalid, return RISCV_CC_UNKNOWN
+ and emit an error message. */
+
+static riscv_cc
+riscv_get_vls_cc_attr (const_tree args, bool check_only = false)
+{
+ /* Default ABI_VLEN is 128. */
+ int abi_vlen = 128;
+
+ if (args && TREE_CODE (args) == TREE_LIST)
+ {
+ tree vlen_arg = TREE_VALUE (args);
+ if (vlen_arg && TREE_CODE (vlen_arg) == INTEGER_CST)
+ abi_vlen = TREE_INT_CST_LOW (vlen_arg);
+ }
+
+ if (!riscv_valid_abi_vlen_vls_cc_p (abi_vlen) && !check_only)
+ {
+ error_at (input_location,
+ "unsupported %<ABI_VLEN%> value %d for %qs attribute;"
+ "%<ABI_VLEN must%> be in the range [32, 16384] and must be "
+ "a power of 2",
+ abi_vlen, "riscv_vls_cc");
+ return RISCV_CC_UNKNOWN;
+ }
+
+ return riscv_get_riscv_cc_by_abi_vlen (abi_vlen);
+}
+
+/* Return true if FUNC is a riscv_vector_cc function.
+ For more details please reference the below link.
+ https://github.com/riscv-non-isa/riscv-c-api-doc/pull/67 */
+static riscv_cc
+riscv_vls_cc_function_abi (const_tree fntype, bool check_only)
+{
+ tree attr = TYPE_ATTRIBUTES (fntype);
+ bool vls_cc_p = lookup_attribute ("vls_cc", attr) != NULL_TREE
+ || lookup_attribute ("riscv_vls_cc", attr) != NULL_TREE;
+
+ if (!vls_cc_p)
+ return RISCV_CC_UNKNOWN;
+
+ if (!TARGET_VECTOR && !check_only)
+ error_at (input_location,
+ "function attribute %qs requires the vector ISA extension",
+ "riscv_vls_cc");
+
+ tree args = TREE_VALUE (attr);
+ return riscv_get_vls_cc_attr (args);
+}
+
+/* Implemention of TARGET_FNTYPE_ABI, but one extra parameter `check_only`
+ to suppress warning message. */
static const predefined_function_abi &
-riscv_fntype_abi (const_tree fntype)
+riscv_fntype_abi_1 (const_tree fntype, bool check_only)
{
/* Implement the vector calling convention. For more details please
reference the below link.
https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/389 */
bool validate_v_abi_p = false;
- validate_v_abi_p |= riscv_return_value_is_vector_type_p (fntype);
- validate_v_abi_p |= riscv_arguments_is_vector_type_p (fntype);
- validate_v_abi_p |= riscv_vector_cc_function_p (fntype);
+ validate_v_abi_p |= riscv_return_value_is_vector_type_p (fntype, check_only);
+ validate_v_abi_p |= riscv_arguments_is_vector_type_p (fntype, check_only);
+ validate_v_abi_p |= riscv_vector_cc_function_p (fntype, check_only);
if (validate_v_abi_p)
- return riscv_v_abi ();
+ return riscv_v_abi (RISCV_CC_V);
+
+ riscv_cc abi = riscv_vls_cc_function_abi (fntype, check_only);
+ if (abi != RISCV_CC_UNKNOWN)
+ return riscv_v_abi (abi);
return default_function_abi;
}
+/* Implement TARGET_FNTYPE_ABI. */
+
+static const predefined_function_abi &
+riscv_fntype_abi (const_tree fntype)
+{
+ return riscv_fntype_abi_1 (fntype, /* check_only */true);
+}
+
/* Return riscv calling convention of call_insn. */
riscv_cc
get_riscv_cc (const rtx use)
@@ -6916,6 +7497,25 @@ riscv_handle_type_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
}
static tree
+riscv_handle_rvv_vls_cc_attribute (tree *, tree name, tree args,
+ ATTRIBUTE_UNUSED int flags,
+ bool *no_add_attrs)
+{
+ bool vls_cc_p = is_attribute_p ("vls_cc", name)
+ || is_attribute_p ("riscv_vls_cc", name);
+
+ if (!vls_cc_p)
+ return NULL_TREE;
+
+ riscv_cc cc = riscv_get_vls_cc_attr (args);
+
+ if (cc == RISCV_CC_UNKNOWN)
+ *no_add_attrs = true;
+
+ return NULL_TREE;
+}
+
+static tree
riscv_handle_rvv_vector_bits_attribute (tree *node, tree name, tree args,
ATTRIBUTE_UNUSED int flags,
bool *no_add_attrs)
@@ -10215,6 +10815,71 @@ riscv_issue_rate (void)
return tune_param->issue_rate;
}
+/* Structure for very basic vector configuration tracking in the scheduler. */
+struct last_vconfig
+{
+ bool valid;
+ bool ta;
+ bool ma;
+ uint8_t sew;
+ uint8_t vlmul;
+ rtx avl;
+} last_vconfig;
+
+/* Clear LAST_VCONFIG so we have no known state. */
+static void
+clear_vconfig (void)
+{
+ memset (&last_vconfig, 0, sizeof (last_vconfig));
+}
+
+/* Return TRUE if INSN is a vector insn needing a particular
+ vector configuration that is trivially equal to the last
+ vector insn issued. Return FALSE otherwise. */
+static bool
+compatible_with_last_vconfig (rtx_insn *insn)
+{
+ /* We might be able to extract the data from a preexisting vsetvl. */
+ if (vsetvl_insn_p (insn))
+ return false;
+
+ /* Nothing to do for these cases. */
+ if (!NONDEBUG_INSN_P (insn) || !has_vtype_op (insn))
+ return false;
+
+ extract_insn_cached (insn);
+
+ rtx avl = get_avl (insn);
+ if (avl != last_vconfig.avl)
+ return false;
+
+ if (get_sew (insn) != last_vconfig.sew)
+ return false;
+
+ if (get_vlmul (insn) != last_vconfig.vlmul)
+ return false;
+
+ if (tail_agnostic_p (insn) != last_vconfig.ta)
+ return false;
+
+ if (mask_agnostic_p (insn) != last_vconfig.ma)
+ return false;
+
+ /* No differences found, they're trivially compatible. */
+ return true;
+}
+
+/* Implement TARGET_SCHED_INIT, we use this to track the vector configuration
+ of the last issued vector instruction. We can then use that information
+ to potentially adjust the ready queue to issue instructions of a compatible
+ vector configuration instead of a conflicting configuration. That will
+ reduce the number of vsetvl instructions we ultimately emit. */
+static void
+riscv_sched_init (FILE *, int, int)
+{
+ clear_vconfig ();
+}
+
/* Implement TARGET_SCHED_VARIABLE_ISSUE. */
static int
riscv_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
@@ -10239,9 +10904,88 @@ riscv_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
an assert so we can find and fix this problem. */
gcc_assert (insn_has_dfa_reservation_p (insn));
+ /* If this is a vector insn with vl/vtype info, then record the last
+ vector configuration. */
+ if (vsetvl_insn_p (insn))
+ clear_vconfig ();
+ else if (NONDEBUG_INSN_P (insn) && has_vtype_op (insn))
+ {
+ extract_insn_cached (insn);
+
+ rtx avl = get_avl (insn);
+ if (avl == RVV_VLMAX)
+ avl = const0_rtx;
+
+ if (!avl || !CONST_INT_P (avl))
+ clear_vconfig ();
+ else
+ {
+ last_vconfig.valid = true;
+ last_vconfig.avl = avl;
+ last_vconfig.sew = get_sew (insn);
+ last_vconfig.vlmul = get_vlmul (insn);
+ last_vconfig.ta = tail_agnostic_p (insn);
+ last_vconfig.ma = mask_agnostic_p (insn);
+ }
+ }
+
return more - 1;
}
+/* Implement TARGET_SCHED_REORDER. The goal here is to look at the ready
+ queue and reorder it ever so slightly to encourage issing an insn with
+ the same vector configuration as the most recently issued vector
+ instruction. That will reduce vsetvl instructions. */
+static int
+riscv_sched_reorder (FILE *, int, rtx_insn **ready, int *nreadyp, int)
+{
+ /* If we don't have a valid prior vector configuration, then there is
+ no point in reordering the ready queue, similarly if there is
+ just one entry in the queue. */
+ if (!last_vconfig.valid || *nreadyp == 1)
+ return riscv_issue_rate ();
+
+ return riscv_issue_rate ();
+ int nready = *nreadyp;
+ int priority = INSN_PRIORITY (ready[nready - 1]);
+ for (int i = nready - 1; i >= 0; i--)
+ {
+ rtx_insn *insn = ready[i];
+
+ /* On a high performance core, vsetvl instructions should be
+ inexpensive. Removing them is very much a secondary concern, so
+ be extremely conservative with reordering, essentially only
+ allowing reordering within the highest priority value.
+
+ Lower end cores may benefit from more flexibility here. That
+ tuning is left to those who understand their core's behavior
+ and can thoroughly benchmark the result. Assuming such
+ designs appear, we can probably put an entry in the tuning
+ structure to indicate how much difference in priority to allow. */
+ if (INSN_PRIORITY (insn) < priority)
+ break;
+
+ if (compatible_with_last_vconfig (insn))
+ {
+ /* This entry is compatible with the last vconfig and has
+ the same priority as the most important insn. So swap
+ it so that we keep the vector configuration as-is and
+ ultimately eliminate a vsetvl.
+
+ Note no need to swap if this is the first entry in the
+ queue. */
+ if (i == nready - 1)
+ break;
+
+ std::swap (ready[i], ready[nready - 1]);
+ break;
+ }
+ }
+
+ return riscv_issue_rate ();
+}
+
+
/* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
instruction fusion of some sort. */
@@ -11082,7 +11826,7 @@ riscv_asm_output_variant_cc (FILE *stream, const tree decl, const char *name)
if (TREE_CODE (decl) == FUNCTION_DECL)
{
riscv_cc cc = (riscv_cc) fndecl_abi (decl).id ();
- if (cc == RISCV_CC_V)
+ if (cc == RISCV_CC_V || riscv_vls_cc_p (cc))
{
fprintf (stream, "\t.variant_cc\t");
assemble_name (stream, name);
@@ -11481,6 +12225,12 @@ riscv_override_options_internal (struct gcc_options *opts)
/* Convert -march and -mrvv-vector-bits to a chunks count. */
riscv_vector_chunks = riscv_convert_vector_chunks (opts);
+ /* Set scalar costing to a high value such that we always pick
+ vectorization. Increase scalar costing by 100x. */
+ if (opts->x_riscv_max_vectorization)
+ SET_OPTION_IF_UNSET (&global_options, &global_options_set,
+ param_vect_scalar_cost_multiplier, 10000);
+
if (opts->x_flag_cf_protection != CF_NONE)
{
if ((opts->x_flag_cf_protection & CF_RETURN) == CF_RETURN
@@ -11718,6 +12468,39 @@ riscv_option_restore (struct gcc_options *opts,
static GTY (()) tree riscv_previous_fndecl;
+/* Reset the previous function declaration. */
+
+void
+riscv_reset_previous_fndecl (void)
+{
+ riscv_previous_fndecl = NULL;
+}
+
+/* Implement TARGET_OPTION_SAVE. */
+
+static void
+riscv_option_save (struct cl_target_option *ptr,
+ struct gcc_options *opts,
+ struct gcc_options * /* opts_set */)
+{
+ ptr->x_riscv_arch_string = opts->x_riscv_arch_string;
+ ptr->x_riscv_tune_string = opts->x_riscv_tune_string;
+ ptr->x_riscv_cpu_string = opts->x_riscv_cpu_string;
+}
+
+/* Implement TARGET_OPTION_PRINT. */
+
+static void
+riscv_option_print (FILE *file, int indent, struct cl_target_option *ptr)
+{
+ fprintf (file, "%*sarch = %s\n", indent, "",
+ ptr->x_riscv_arch_string ? ptr->x_riscv_arch_string : "default");
+ fprintf (file, "%*stune = %s\n", indent, "",
+ ptr->x_riscv_tune_string ? ptr->x_riscv_tune_string : "default");
+ if (ptr->x_riscv_cpu_string)
+ fprintf (file, "%*scpu = %s\n", indent, "", ptr->x_riscv_cpu_string);
+}
+
/* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
static void
@@ -12054,7 +12837,7 @@ riscv_get_interrupt_type (tree decl)
/* Implement `TARGET_SET_CURRENT_FUNCTION'. Unpack the codegen decisions
like tuning and ISA features from the DECL_FUNCTION_SPECIFIC_TARGET
of the function, if such exists. This function may be called multiple
- times on a single function so use aarch64_previous_fndecl to avoid
+ times on a single function so use riscv_previous_fndecl to avoid
setting up identical state. */
/* Sanity checking for above function attributes. */
@@ -13510,84 +14293,14 @@ riscv_get_raw_result_mode (int regno)
return default_get_reg_raw_mode (regno);
}
-/* Generate a REG rtx of Xmode from the given rtx and mode.
- The rtx x can be REG (QI/HI/SI/DI) or const_int.
- The machine_mode mode is the original mode from define pattern.
- The rtx_code can be ZERO_EXTEND or SIGN_EXTEND.
-
- If rtx is REG:
-
- 1. If rtx Xmode, the RTX x will be returned directly.
- 2. If rtx non-Xmode, the value extended into a new REG of Xmode will be
- returned.
-
- The scalar ALU like add don't support non-Xmode like QI/HI. Then the
- gen_lowpart will have problem here. For example, when we would like
- to add -1 (0xff if QImode) and 2 (0x2 if QImode). The 0xff and 0x2 will
- be loaded to register for adding. Aka:
-
- 0xff + 0x2 = 0x101 instead of -1 + 2 = 1.
-
- Thus we need to sign extend 0xff to 0xffffffffffffffff if Xmode is DImode
- for correctness. Similar the unsigned also need zero extend.
-
- If rtx is const_int:
-
- 1. A new REG rtx will be created to hold the value of const_int.
-
- According to the gccint doc, the constants generated for modes with fewer
- bits than in HOST_WIDE_INT must be sign extended to full width. Thus there
- will be two cases here, take QImode as example.
-
- For .SAT_SUB (127, y) in QImode, we have (const_int 127) and one simple
- mov from const_int to the new REG rtx is good enough here.
-
- For .SAT_SUB (254, y) in QImode, we have (const_int -2) after define_expand.
- Aka 0xfffffffffffffffe in Xmode of RV64 but we actually need 0xfe in Xmode
- of RV64. So we need to cleanup the highest 56 bits of the new REG rtx moved
- from the (const_int -2).
-
- Then the underlying expanding can perform the code generation based on
- the REG rtx of Xmode, instead of taking care of these in expand func. */
-
+/* Force X into an Xmode register. */
static rtx
riscv_extend_to_xmode_reg (rtx x, machine_mode mode, enum rtx_code rcode)
{
gcc_assert (rcode == ZERO_EXTEND || rcode == SIGN_EXTEND);
- rtx xmode_reg = gen_reg_rtx (Xmode);
-
- if (CONST_INT_P (x))
- {
- if (mode == Xmode)
- emit_move_insn (xmode_reg, x);
- else if (rcode == ZERO_EXTEND)
- {
- /* Combine deliberately does not simplify extensions of constants
- (long story). So try to generate the zero extended constant
- efficiently.
-
- First extract the constant and mask off all the bits not in
- MODE. */
- HOST_WIDE_INT val = INTVAL (x);
- val &= GET_MODE_MASK (mode);
-
- /* X may need synthesis, so do not blindly copy it. */
- xmode_reg = force_reg (Xmode, gen_int_mode (val, Xmode));
- }
- else /* SIGN_EXTEND. */
- {
- rtx x_reg = gen_reg_rtx (mode);
- emit_move_insn (x_reg, x);
- riscv_emit_unary (rcode, xmode_reg, x_reg);
- }
- }
- else if (mode == Xmode)
- return x;
- else
- riscv_emit_unary (rcode, xmode_reg, x);
-
- return xmode_reg;
+ rtx t = convert_modes (Xmode, mode, x, rcode == ZERO_EXTEND);
+ return force_reg (Xmode, t);
}
/* Implements the unsigned saturation add standard name usadd for int mode.
@@ -13934,7 +14647,7 @@ riscv_expand_ustrunc (rtx dest, rtx src)
gcc_assert (precision < 64);
uint64_t max = ((uint64_t)1u << precision) - 1u;
- rtx xmode_src = gen_lowpart (Xmode, src);
+ rtx xmode_src = riscv_extend_to_xmode_reg (src, GET_MODE (src), ZERO_EXTEND);
rtx xmode_dest = gen_reg_rtx (Xmode);
rtx xmode_lt = gen_reg_rtx (Xmode);
@@ -15237,7 +15950,8 @@ synthesize_and (rtx operands[3])
if (tmode != VOIDmode)
{
rtx tmp = gen_lowpart (tmode, operands[1]);
- emit_insn (gen_extend_insn (operands[0], tmp, word_mode, tmode, 1));
+ emit_move_insn (operands[0], convert_modes (word_mode, tmode,
+ tmp, true));
return true;
}
}
@@ -15634,9 +16348,15 @@ riscv_prefetch_offset_address_p (rtx x, machine_mode mode)
#undef TARGET_OPTION_OVERRIDE
#define TARGET_OPTION_OVERRIDE riscv_option_override
+#undef TARGET_OPTION_SAVE
+#define TARGET_OPTION_SAVE riscv_option_save
+
#undef TARGET_OPTION_RESTORE
#define TARGET_OPTION_RESTORE riscv_option_restore
+#undef TARGET_OPTION_PRINT
+#define TARGET_OPTION_PRINT riscv_option_print
+
#undef TARGET_OPTION_VALID_ATTRIBUTE_P
#define TARGET_OPTION_VALID_ATTRIBUTE_P riscv_option_valid_attribute_p
@@ -15650,9 +16370,15 @@ riscv_prefetch_offset_address_p (rtx x, machine_mode mode)
#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
#define TARGET_SCHED_MACRO_FUSION_PAIR_P riscv_macro_fusion_pair_p
+#undef TARGET_SCHED_INIT
+#define TARGET_SCHED_INIT riscv_sched_init
+
#undef TARGET_SCHED_VARIABLE_ISSUE
#define TARGET_SCHED_VARIABLE_ISSUE riscv_sched_variable_issue
+#undef TARGET_SCHED_REORDER
+#define TARGET_SCHED_REORDER riscv_sched_reorder
+
#undef TARGET_SCHED_ADJUST_COST
#define TARGET_SCHED_ADJUST_COST riscv_sched_adjust_cost
diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index 9146571..6a3e537 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -60,17 +60,18 @@ extern const char *riscv_arch_help (int argc, const char **argv);
{ "riscv_arch_help", riscv_arch_help },
/* Support for a compile-time default CPU, et cetera. The rules are:
- --with-arch is ignored if -march or -mcpu is specified.
+ --with-arch and --with-cpu are ignored if -march or -mcpu is specified.
--with-abi is ignored if -mabi is specified.
--with-tune is ignored if -mtune or -mcpu is specified.
--with-isa-spec is ignored if -misa-spec is specified.
--with-tls is ignored if -mtls-dialect is specified.
- But using default -march/-mtune value if -mcpu don't have valid option. */
+ Uses default values if -mcpu doesn't have a valid option. */
#define OPTION_DEFAULT_SPECS \
{"tune", "%{!mtune=*:" \
" %{!mcpu=*:-mtune=%(VALUE)}" \
" %{mcpu=*:-mtune=%:riscv_default_mtune(%* %(VALUE))}}" }, \
+ {"cpu", "%{!march=*:%{!mcpu=*:%:riscv_expand_arch_from_cpu(%(VALUE))}}" }, \
{"arch", "%{!march=*|march=unset:" \
" %{!mcpu=*:-march=%(VALUE)}" \
" %{mcpu=*:%:riscv_expand_arch_from_cpu(%* %(VALUE))}}" }, \
@@ -779,6 +780,17 @@ enum riscv_cc
{
RISCV_CC_BASE = 0, /* Base standard RISC-V ABI. */
RISCV_CC_V, /* For functions that pass or return values in V registers. */
+ /* For functions that pass or return values in V registers. */
+ RISCV_CC_VLS_V_32,
+ RISCV_CC_VLS_V_64,
+ RISCV_CC_VLS_V_128,
+ RISCV_CC_VLS_V_256,
+ RISCV_CC_VLS_V_512,
+ RISCV_CC_VLS_V_1024,
+ RISCV_CC_VLS_V_2048,
+ RISCV_CC_VLS_V_4096,
+ RISCV_CC_VLS_V_8192,
+ RISCV_CC_VLS_V_16384,
RISCV_CC_UNKNOWN
};
@@ -786,6 +798,8 @@ typedef struct {
/* The calling convention that current function used. */
enum riscv_cc variant_cc;
+ unsigned int abi_vlen;
+
/* Number of integer registers used so far, up to MAX_ARGS_IN_REGISTERS. */
unsigned int num_gprs;
@@ -809,7 +823,7 @@ extern enum riscv_cc get_riscv_cc (const rtx use);
#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
riscv_init_cumulative_args (&(CUM), (FNTYPE), (LIBNAME), (INDIRECT), \
- (N_NAMED_ARGS) != -1)
+ (N_NAMED_ARGS) != -1, /* check_only */false)
#define EPILOGUE_USES(REGNO) riscv_epilogue_uses (REGNO)
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 640ca5f..6f8cd26 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -273,6 +273,7 @@
V1SI,V2SI,V4SI,V8SI,V16SI,V32SI,V64SI,V128SI,V256SI,V512SI,V1024SI,
V1DI,V2DI,V4DI,V8DI,V16DI,V32DI,V64DI,V128DI,V256DI,V512DI,
V1HF,V2HF,V4HF,V8HF,V16HF,V32HF,V64HF,V128HF,V256HF,V512HF,V1024HF,V2048HF,
+ V1BF,V2BF,V4BF,V8BF,V16BF,V32BF,V64BF,V128BF,V256BF,V512BF,V1024BF,V2048BF,
V1SF,V2SF,V4SF,V8SF,V16SF,V32SF,V64SF,V128SF,V256SF,V512SF,V1024SF,
V1DF,V2DF,V4DF,V8DF,V16DF,V32DF,V64DF,V128DF,V256DF,V512DF,
V1BI,V2BI,V4BI,V8BI,V16BI,V32BI,V64BI,V128BI,V256BI,V512BI,V1024BI,V2048BI,V4096BI"
@@ -672,7 +673,8 @@
;; Microarchitectures we know how to tune for.
;; Keep this in sync with enum riscv_microarchitecture.
(define_attr "tune"
- "generic,sifive_7,sifive_p400,sifive_p600,xiangshan,generic_ooo,mips_p8700,tt_ascalon_d8"
+ "generic,sifive_7,sifive_p400,sifive_p600,xiangshan,generic_ooo,mips_p8700,
+ tt_ascalon_d8,andes_25_series,andes_23_series,andes_45_series,spacemit_x60"
(const (symbol_ref "((enum attr_tune) riscv_microarchitecture)")))
;; Describe a user's asm statement.
@@ -790,14 +792,8 @@
rtx t6 = gen_reg_rtx (DImode);
emit_insn (gen_addsi3_extended (t6, operands[1], operands[2]));
- if (GET_CODE (operands[1]) != CONST_INT)
- emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0));
- else
- t4 = operands[1];
- if (GET_CODE (operands[2]) != CONST_INT)
- emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0));
- else
- t5 = operands[2];
+ t4 = convert_modes (DImode, SImode, operands[1], false);
+ t5 = convert_modes (DImode, SImode, operands[2], false);
emit_insn (gen_adddi3 (t3, t4, t5));
rtx t7 = gen_lowpart (SImode, t6);
SUBREG_PROMOTED_VAR_P (t7) = 1;
@@ -834,10 +830,7 @@
rtx t3 = gen_reg_rtx (DImode);
rtx t4 = gen_reg_rtx (DImode);
- if (GET_CODE (operands[1]) != CONST_INT)
- emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0));
- else
- t3 = operands[1];
+ t3 = convert_modes (DImode, SImode, operands[1], 0);
emit_insn (gen_addsi3_extended (t4, operands[1], operands[2]));
rtx t5 = gen_lowpart (SImode, t4);
SUBREG_PROMOTED_VAR_P (t5) = 1;
@@ -981,14 +974,8 @@
rtx t6 = gen_reg_rtx (DImode);
emit_insn (gen_subsi3_extended (t6, operands[1], operands[2]));
- if (GET_CODE (operands[1]) != CONST_INT)
- emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0));
- else
- t4 = operands[1];
- if (GET_CODE (operands[2]) != CONST_INT)
- emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0));
- else
- t5 = operands[2];
+ t4 = convert_modes (DImode, SImode, operands[1], false);
+ t5 = convert_modes (DImode, SImode, operands[2], false);
emit_insn (gen_subdi3 (t3, t4, t5));
rtx t7 = gen_lowpart (SImode, t6);
SUBREG_PROMOTED_VAR_P (t7) = 1;
@@ -1028,10 +1015,7 @@
rtx t3 = gen_reg_rtx (DImode);
rtx t4 = gen_reg_rtx (DImode);
- if (GET_CODE (operands[1]) != CONST_INT)
- emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0));
- else
- t3 = operands[1];
+ t3 = convert_modes (DImode, SImode, operands[1], false);
emit_insn (gen_subsi3_extended (t4, operands[1], operands[2]));
rtx t5 = gen_lowpart (SImode, t4);
SUBREG_PROMOTED_VAR_P (t5) = 1;
@@ -1191,18 +1175,12 @@
rtx t5 = gen_reg_rtx (DImode);
rtx t6 = gen_reg_rtx (DImode);
- if (GET_CODE (operands[1]) != CONST_INT)
- emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0));
- else
- t4 = operands[1];
- if (GET_CODE (operands[2]) != CONST_INT)
- emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0));
- else
- t5 = operands[2];
+ t4 = convert_modes (DImode, SImode, operands[1], false);
+ t5 = convert_modes (DImode, SImode, operands[2], false);
emit_insn (gen_muldi3 (t3, t4, t5));
emit_move_insn (operands[0], gen_lowpart (SImode, t3));
- emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0));
+ t6 = convert_modes (DImode, SImode, operands[0], false);
riscv_expand_conditional_branch (operands[3], NE, t6, t3);
}
@@ -1238,14 +1216,8 @@
rtx t7 = gen_reg_rtx (DImode);
rtx t8 = gen_reg_rtx (DImode);
- if (GET_CODE (operands[1]) != CONST_INT)
- emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0));
- else
- t3 = operands[1];
- if (GET_CODE (operands[2]) != CONST_INT)
- emit_insn (gen_extend_insn (t4, operands[2], DImode, SImode, 0));
- else
- t4 = operands[2];
+ t3 = convert_modes (DImode, SImode, operands[1], false);
+ t4 = convert_modes (DImode, SImode, operands[2], false);
emit_insn (gen_ashldi3 (t5, t3, GEN_INT (32)));
emit_insn (gen_ashldi3 (t6, t4, GEN_INT (32)));
@@ -3752,6 +3724,57 @@
[(set_attr "type" "slt")
(set_attr "mode" "<X:MODE>")])
+;; We can sometimes do better for unsigned comparisons against
+;; values where there's a run of 1s in the LSBs.
+;;
+(define_split
+ [(set (match_operand:X 0 "register_operand")
+ (gtu:X (match_operand:X 1 "register_operand")
+ (match_operand 2 "const_int_operand")))
+ (clobber (match_operand:X 3 "register_operand"))]
+ "exact_log2 (INTVAL (operands[2]) + 1) >= 0"
+ [(set (match_dup 3) (lshiftrt:X (match_dup 1) (match_dup 2)))
+ (set (match_dup 0) (ne:X (match_dup 3) (const_int 0)))]
+{ operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2]) + 1)); })
+
+(define_split
+ [(set (match_operand:X 0 "register_operand")
+ (leu:X (match_operand:X 1 "register_operand")
+ (match_operand 2 "const_int_operand")))
+ (clobber (match_operand:X 3 "register_operand"))]
+ "exact_log2 (INTVAL (operands[2]) + 1) >= 0"
+ [(set (match_dup 3) (lshiftrt:X (match_dup 1) (match_dup 2)))
+ (set (match_dup 0) (eq:X (match_dup 3) (const_int 0)))]
+{ operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2]) + 1)); })
+
+;; Alternate forms that are ultimately just sltiu
+(define_insn ""
+ [(set (match_operand:X 0 "register_operand" "=r")
+ (eq:X (zero_extract:X (match_operand:X 1 "register_operand" "r")
+ (match_operand 2 "const_int_operand")
+ (match_operand 3 "const_int_operand"))
+ (const_int 0)))]
+ "(INTVAL (operands[3]) < 11
+ && INTVAL (operands[2]) + INTVAL (operands[3]) == BITS_PER_WORD)"
+{
+ operands[2] = GEN_INT (HOST_WIDE_INT_1U << INTVAL (operands[3]));
+ return "sltiu\t%0,%1,%2";
+}
+ [(set_attr "type" "slt")
+ (set_attr "mode" "<X:MODE>")])
+
+(define_insn ""
+ [(set (match_operand:X 0 "register_operand" "=r")
+ (eq:X (lshiftrt:X (match_operand:X 1 "register_operand" "r")
+ (match_operand 2 "const_int_operand"))
+ (const_int 0)))]
+ "INTVAL (operands[2]) < 11"
+{
+ operands[2] = GEN_INT (HOST_WIDE_INT_1U << INTVAL (operands[2]));
+ return "sltiu\t%0,%1,%2";
+}
+ [(set_attr "type" "slt")
+ (set_attr "mode" "<X:MODE>")])
;;
;; ....................
;;
@@ -4966,3 +4989,7 @@
(include "generic-vector-ooo.md")
(include "generic-ooo.md")
(include "tt-ascalon-d8.md")
+(include "andes-23-series.md")
+(include "andes-25-series.md")
+(include "andes-45-series.md")
+(include "spacemit-x60.md")
diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
index 6543fd1..452062c 100644
--- a/gcc/config/riscv/riscv.opt
+++ b/gcc/config/riscv/riscv.opt
@@ -341,6 +341,10 @@ Target Undocumented RejectNegative Joined Enum(vsetvl_strategy) Var(vsetvl_strat
Target Undocumented Uinteger Var(riscv_two_source_permutes) Init(0)
-param=riscv-two-source-permutes Enable permutes with two source vectors.
+mmax-vectorization
+Target Var(riscv_max_vectorization) Save
+Override the scalar cost model such that vectorization is always profitable.
+
Enum
Name(stringop_strategy) Type(enum stringop_strategy_enum)
Valid arguments to -mstringop-strategy=:
@@ -361,6 +365,18 @@ mstringop-strategy=
Target RejectNegative Joined Enum(stringop_strategy) Var(stringop_strategy) Init(STRATEGY_AUTO)
Specify stringop expansion strategy.
+-param=memcpy-size-threshold=
+Target Joined UInteger Var(riscv_memcpy_size_threshold) Init(-1) Param
+Constant memcpy size in bytes above which to start using libcalls over inlining.
+
+-param=memmove-size-threshold=
+Target Joined UInteger Var(riscv_memmove_size_threshold) Init(-1) Param
+Constant memmove size in bytes above which to start using libcalls over inlining.
+
+-param=memset-size-threshold=
+Target Joined UInteger Var(riscv_memset_size_threshold) Init(-1) Param
+Constant memset size in bytes above which to start using libcalls over inlining.
+
Enum
Name(rvv_vector_bits) Type(enum rvv_vector_bits_enum)
The possible RVV vector register lengths:
diff --git a/gcc/config/riscv/riscv.opt.urls b/gcc/config/riscv/riscv.opt.urls
index fe88ec8..bfb1a2d 100644
--- a/gcc/config/riscv/riscv.opt.urls
+++ b/gcc/config/riscv/riscv.opt.urls
@@ -96,6 +96,8 @@ UrlSuffix(gcc/RISC-V-Options.html#index-minline-strncmp)
minline-strlen
UrlSuffix(gcc/RISC-V-Options.html#index-minline-strlen)
+; skipping UrlSuffix for 'mmax-vectorization' due to finding no URLs
+
; skipping UrlSuffix for 'mtls-dialect=' due to finding no URLs
mfence-tso
diff --git a/gcc/config/riscv/spacemit-x60.md b/gcc/config/riscv/spacemit-x60.md
new file mode 100644
index 0000000..c991f89
--- /dev/null
+++ b/gcc/config/riscv/spacemit-x60.md
@@ -0,0 +1,190 @@
+;; spacemit_x60 DFA-based pipeline description for RISC-V targets.
+;; Copyright (C) 2011-2025 Free Software Foundation, Inc.
+;; Contributed by Andrew Waterman (andrew@sifive.com).
+;; Based on MIPS target for GNU compiler.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; ----------------------------------------------------
+;; Spacemit-x60 Units
+;; 2*alu + 2*lsu + 1*fpalu + 1*fdivsqrt + 1*vxu
+;;
+;; There's actually two VXU units and ops get split across them
+;; to give the illusion of a single wider unit with higher
+;; performance. There are a few ops that can only be fed into
+;; one of the two units. Probably best to initially model as
+;; a single unit
+;;
+;; The VXU is not currently modeled.
+;; Some ops like shadd.uw and add.uw, cpop take an extra cycle
+;; Given everything is in-order, anti-dependencies probably matter
+;; FP sign injection isn't handled correctly
+;; ----------------------------------------------------
+
+(define_automaton "spacemit_x60")
+(define_cpu_unit "spacemit_x60_alu0,spacemit_x60_alu1" "spacemit_x60")
+(define_cpu_unit "spacemit_x60_lsu0,spacemit_x60_lsu1" "spacemit_x60")
+;;(define_cpu_unit "spacemit_x60_vxu0" "spacemit_x60")
+(define_cpu_unit "spacemit_x60_fpalu" "spacemit_x60")
+(define_cpu_unit "spacemit_x60_fdivsqrt" "spacemit_x60")
+
+(define_reservation "spacemit_x60_lsu" "spacemit_x60_lsu0, spacemit_x60_lsu1")
+(define_reservation "spacemit_x60_alu" "spacemit_x60_alu0, spacemit_x60_alu1")
+
+;; ----------------------------------------------------
+;; Memory (load/store)
+;; ----------------------------------------------------
+
+(define_insn_reservation "spacemit_x60_load" 5
+ (and (eq_attr "tune" "spacemit_x60")
+ (eq_attr "type" "load,fpload,atomic"))
+ "spacemit_x60_lsu")
+
+(define_insn_reservation "spacemit_x60_store" 3
+ (and (eq_attr "tune" "spacemit_x60")
+ (eq_attr "type" "store,fpstore"))
+ "spacemit_x60_lsu")
+
+;; ----------------------------------------------------
+;; Int
+;; ----------------------------------------------------
+
+;; alu0 handles div/rem and jumps
+(define_insn_reservation "spacemit_x60_jump" 1
+ (and (eq_attr "tune" "spacemit_x60")
+ (eq_attr "type" "branch,jump,call,jalr,ret,trap,sfb_alu"))
+ "spacemit_x60_alu0")
+
+(define_insn_reservation "spacemit_x60_idivsi" 12
+ (and (eq_attr "tune" "spacemit_x60")
+ (and (eq_attr "type" "idiv")
+ (eq_attr "mode" "SI")))
+ "spacemit_x60_alu0*12")
+
+(define_insn_reservation "spacemit_x60_idivdi" 20
+ (and (eq_attr "tune" "spacemit_x60")
+ (and (eq_attr "type" "idiv")
+ (eq_attr "mode" "DI")))
+ "spacemit_x60_alu0*20")
+
+(define_insn_reservation "spacemit_x60_imulsi" 3
+ (and (eq_attr "tune" "spacemit_x60")
+ (and (eq_attr "type" "imul")
+ (eq_attr "mode" "SI")))
+ "spacemit_x60_alu")
+
+(define_insn_reservation "spacemit_x60_imuldi" 5
+ (and (eq_attr "tune" "spacemit_x60")
+ (and (eq_attr "type" "imul")
+ (eq_attr "mode" "DI")))
+ "spacemit_x60_alu")
+
+(define_insn_reservation "spacemit_x60_clmul" 5
+ (and (eq_attr "tune" "spacemit_x60")
+ (eq_attr "type" "clmul"))
+ "spacemit_x60_alu")
+
+(define_insn_reservation "spacemit_x60_mtc_mfc" 3
+ (and (eq_attr "tune" "spacemit_x60")
+ (eq_attr "type" "mtc,mfc"))
+ "spacemit_x60_alu")
+
+(define_insn_reservation "spacemit_x60_fcvt_i2f" 4
+ (and (eq_attr "tune" "spacemit_x60")
+ (eq_attr "type" "fcvt_i2f"))
+ "spacemit_x60_alu")
+
+(define_insn_reservation "spacemit_x60_fcvt_f2i" 6
+ (and (eq_attr "tune" "spacemit_x60")
+ (eq_attr "type" "fcvt_f2i"))
+ "spacemit_x60_alu")
+
+(define_insn_reservation "spacemit_x60_alu" 1
+ (and (eq_attr "tune" "spacemit_x60")
+ (eq_attr "type" "unknown,const,arith,shift,slt,multi,auipc,nop,logical,\
+ move,bitmanip,min,max,minu,maxu,clz,ctz,rotate,\
+ condmove,crypto,mvpair,zicond,cpop"))
+ "spacemit_x60_alu")
+
+(define_insn_reservation "spacemit_x60_alu2c" 2
+ (and (eq_attr "tune" "spacemit_x60")
+ (eq_attr "type" "cpop"))
+ "spacemit_x60_alu")
+
+;; ----------------------------------------------------
+;; Float
+;; ----------------------------------------------------
+
+(define_insn_reservation "spacemit_x60_fcvt" 4
+ (and (eq_attr "tune" "spacemit_x60")
+ (eq_attr "type" "fcvt,fmove"))
+ "spacemit_x60_fpalu")
+
+(define_insn_reservation "spacemit_x60_fcmp" 6
+ (and (eq_attr "tune" "spacemit_x60")
+ (eq_attr "type" "fcmp"))
+ "spacemit_x60_fpalu")
+
+(define_insn_reservation "spacemit_x60_fmul_half_single" 4
+ (and (eq_attr "tune" "spacemit_x60")
+ (and (eq_attr "type" "fadd,fmul")
+ (ior (eq_attr "mode" "HF")
+ (eq_attr "mode" "SF"))))
+ "spacemit_x60_fpalu")
+
+(define_insn_reservation "spacemit_x60_fmadd_half_single" 5
+ (and (eq_attr "tune" "spacemit_x60")
+ (and (eq_attr "type" "fmadd")
+ (ior (eq_attr "mode" "HF")
+ (eq_attr "mode" "SF"))))
+ "spacemit_x60_fpalu")
+
+(define_insn_reservation "spacemit_x60_fmul_double" 5
+ (and (eq_attr "tune" "spacemit_x60")
+ (and (eq_attr "type" "fadd,fmul")
+ (eq_attr "mode" "DF")))
+ "spacemit_x60_fpalu")
+
+(define_insn_reservation "spacemit_x60_fmadd_double" 5
+ (and (eq_attr "tune" "spacemit_x60")
+ (and (eq_attr "type" "fmadd")
+ (eq_attr "mode" "DF")))
+ "spacemit_x60_fpalu")
+
+(define_insn_reservation "spacemit_x60_fdiv_half" 12
+ (and (eq_attr "tune" "spacemit_x60")
+ (and (eq_attr "type" "fdiv,fsqrt")
+ (eq_attr "mode" "HF")))
+ "spacemit_x60_fdivsqrt*12")
+
+(define_insn_reservation "spacemit_x60_fdiv_single" 15
+ (and (eq_attr "tune" "spacemit_x60")
+ (and (eq_attr "type" "fdiv,fsqrt")
+ (eq_attr "mode" "SF")))
+ "spacemit_x60_fdivsqrt*15")
+
+(define_insn_reservation "spacemit_x60_fdiv_double" 22
+ (and (eq_attr "tune" "spacemit_x60")
+ (and (eq_attr "type" "fdiv,fsqrt")
+ (eq_attr "mode" "DF")))
+ "spacemit_x60_fdivsqrt*22")
+
+(define_insn_reservation "spacemi6_x60_dummy" 1
+ (and (eq_attr "tune" "spacemit_x60")
+ (eq_attr "type" "viminmax,vfmuladd,vfmovvf,vssegte,vlsegds,rdvlenb,vaesef,vfcmp,vmpop,vwsll,vsha2cl,vfwcvtbf16,vfncvtftoi,vgather,vsha2ch,vsts,vldm,vmsfs,vfmul,vcompress,vaesz,vssegtox,vstox,vclmulh,vghsh,vaalu,vslideup,vfalu,vaeskf1,vfcvtitof,vaesdm,vmffs,vandn,vstm,vgmul,vlds,viwmul,vfmerge,vlsegdff,vshift,vaesem,vaesdf,vste,ghost,viwred,vsalu,vfwredu,vmidx,sf_vfnrclip,vstux,vfslide1down,vfcvtftoi,vfncvtitof,vnshift,vsm3me,vired,vlde,vfwalu,sf_vc_se,vlsegdux,vicmp,vfncvtftof,vror,vfwmaccbf16,vfminmax,vldff,vstr,vsm3c,vfwcvtftoi,vbrev,vaeskf2,vidiv,vfwcvtftof,rdvl,vimul,vfsgnj,vimovvx,vsha2ms,vialu,vfredo,vctz,vlsegde,viwmuladd,vcpop,vsetvl,vldux,vfwmuladd,vector,wrvxrm,vsshift,vfredu,vimerge,vlsegdox,vfrecp,vnclip,vfclass,vbrev8,vslidedown,vldox,vmalu,vext,vimuladd,sf_vqmacc,vldr,vrol,vmov,vsmul,vclmul,vfmov,vislide1up,vssegtux,vclz,rdfrm,vfwcvtitof,vfncvtbf16,vfmovfv,vislide1down,vfwmul,vfsqrt,vrev8,vicalu,vimov,wrfrm,vfdiv,sf_vc,vsm4k,vmiota,vsm4r,viwalu,vsetvl_pre,vimovxv,vfwredo,vfslide1up,vssegts"))
+ "nothing")
+
diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md
index 37f15d8..01eab1a 100644
--- a/gcc/config/riscv/sync.md
+++ b/gcc/config/riscv/sync.md
@@ -603,8 +603,7 @@
{
/* We don't have SI mode compare on RV64, so we need to make sure expected
value is sign-extended. */
- rtx tmp0 = gen_reg_rtx (word_mode);
- emit_insn (gen_extend_insn (tmp0, operands[3], word_mode, <MODE>mode, 0));
+ rtx tmp0 = convert_modes (word_mode, <MODE>mode, operands[3], false);
operands[3] = gen_lowpart (<MODE>mode, tmp0);
}
@@ -702,17 +701,8 @@
operands[6],
operands[7]));
- rtx val = gen_reg_rtx (SImode);
- if (operands[1] != const0_rtx)
- emit_move_insn (val, gen_rtx_SIGN_EXTEND (SImode, operands[1]));
- else
- emit_move_insn (val, const0_rtx);
-
- rtx exp = gen_reg_rtx (SImode);
- if (operands[3] != const0_rtx)
- emit_move_insn (exp, gen_rtx_SIGN_EXTEND (SImode, operands[3]));
- else
- emit_move_insn (exp, const0_rtx);
+ rtx val = convert_modes (SImode, <SHORT:MODE>mode, operands[1], false);
+ rtx exp = convert_modes (SImode, <SHORT:MODE>mode, operands[3], false);
rtx compare = val;
if (exp != const0_rtx)
diff --git a/gcc/config/riscv/t-riscv b/gcc/config/riscv/t-riscv
index b53a2df..2761e5e 100644
--- a/gcc/config/riscv/t-riscv
+++ b/gcc/config/riscv/t-riscv
@@ -89,6 +89,12 @@ riscv-sr.o: $(srcdir)/config/riscv/riscv-sr.cc $(CONFIG_H) \
$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
$(srcdir)/config/riscv/riscv-sr.cc
+riscv-opt-popretz.o: $(srcdir)/config/riscv/riscv-opt-popretz.cc $(CONFIG_H) \
+ $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(TARGET_H) recog.h insn-opinit.h \
+ tree-pass.h emit-rtl.h insn-config.h
+ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+ $(srcdir)/config/riscv/riscv-opt-popretz.cc
+
riscv-c.o: $(srcdir)/config/riscv/riscv-c.cc $(CONFIG_H) $(SYSTEM_H) \
coretypes.h $(TM_H) $(TREE_H) output.h $(C_COMMON_H) $(TARGET_H)
$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
@@ -216,7 +222,8 @@ RISCV_EXT_DEFS = \
$(srcdir)/config/riscv/riscv-ext-thead.def \
$(srcdir)/config/riscv/riscv-ext-ventana.def \
$(srcdir)/config/riscv/riscv-ext-mips.def \
- $(srcdir)/config/riscv/riscv-ext-andes.def
+ $(srcdir)/config/riscv/riscv-ext-andes.def \
+ $(srcdir)/config/riscv/riscv-ext-spacemit.def
$(srcdir)/config/riscv/riscv-ext.opt: $(RISCV_EXT_DEFS)
diff --git a/gcc/config/riscv/thead.md b/gcc/config/riscv/thead.md
index 20e82e6..42171a5 100644
--- a/gcc/config/riscv/thead.md
+++ b/gcc/config/riscv/thead.md
@@ -34,7 +34,7 @@
(define_insn "*th_srri<mode>3"
[(set (match_operand:GPR 0 "register_operand" "=r")
(rotatert:GPR (match_operand:GPR 1 "register_operand" "r")
- (match_operand 2 "const_int_operand" "n")))]
+ (match_operand 2 "const_int_operand" "n")))]
"TARGET_XTHEADBB && (TARGET_64BIT || <MODE>mode == SImode)"
{
bool wform = TARGET_64BIT && (<MODE>mode == SImode);
@@ -45,6 +45,22 @@
[(set_attr "type" "bitmanip")
(set_attr "mode" "<GPR:MODE>")])
+;; Version with explicit sign extension to facilitate sign extension
+;; removal.
+(define_insn "*th_srrisi3_extended"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (sign_extend:DI
+ (rotatert:SI (match_operand:SI 1 "register_operand" "r")
+ (match_operand 2 "const_int_operand" "n"))))]
+ "TARGET_XTHEADBB && TARGET_64BIT"
+ {
+ operands[2] = GEN_INT (INTVAL (operands[2])
+ & (GET_MODE_BITSIZE (SImode) - 1));
+ return "th.srriw\t%0,%1,%2";
+ }
+ [(set_attr "type" "bitmanip")
+ (set_attr "mode" "SI")])
+
(define_insn "*th_ext<mode>4"
[(set (match_operand:GPR 0 "register_operand" "=r")
(sign_extract:GPR (match_operand:GPR 1 "register_operand" "r")
diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md
index 45af656..90865a3 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -173,6 +173,21 @@
(RVVMF4BF "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_64")
])
+(define_mode_iterator VLSF_ZVFBF16 [
+ (V1BF "riscv_vector::vls_mode_valid_p (V1BFmode) && TARGET_VECTOR_ELEN_BF_16")
+ (V2BF "riscv_vector::vls_mode_valid_p (V2BFmode) && TARGET_VECTOR_ELEN_BF_16")
+ (V4BF "riscv_vector::vls_mode_valid_p (V4BFmode) && TARGET_VECTOR_ELEN_BF_16")
+ (V8BF "riscv_vector::vls_mode_valid_p (V8BFmode) && TARGET_VECTOR_ELEN_BF_16")
+ (V16BF "riscv_vector::vls_mode_valid_p (V16BFmode) && TARGET_VECTOR_ELEN_BF_16")
+ (V32BF "riscv_vector::vls_mode_valid_p (V32BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 64")
+ (V64BF "riscv_vector::vls_mode_valid_p (V64BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 128")
+ (V128BF "riscv_vector::vls_mode_valid_p (V128BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 256")
+ (V256BF "riscv_vector::vls_mode_valid_p (V256BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 512")
+ (V512BF "riscv_vector::vls_mode_valid_p (V512BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 1024")
+ (V1024BF "riscv_vector::vls_mode_valid_p (V1024BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 2048")
+ (V2048BF "riscv_vector::vls_mode_valid_p (V2048BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 4096")
+])
+
(define_mode_iterator VF_ZVFHMIN [
(RVVM8HF "TARGET_VECTOR_ELEN_FP_16") (RVVM4HF "TARGET_VECTOR_ELEN_FP_16") (RVVM2HF "TARGET_VECTOR_ELEN_FP_16")
(RVVM1HF "TARGET_VECTOR_ELEN_FP_16") (RVVMF2HF "TARGET_VECTOR_ELEN_FP_16")
@@ -1646,6 +1661,18 @@
(V512HF "riscv_vector::vls_mode_valid_p (V512HFmode) && TARGET_ZVFH && TARGET_MIN_VLEN >= 1024")
(V1024HF "riscv_vector::vls_mode_valid_p (V1024HFmode) && TARGET_ZVFH && TARGET_MIN_VLEN >= 2048")
(V2048HF "riscv_vector::vls_mode_valid_p (V2048HFmode) && TARGET_ZVFH && TARGET_MIN_VLEN >= 4096")
+ (V1BF "riscv_vector::vls_mode_valid_p (V1BFmode) && TARGET_VECTOR_ELEN_BF_16")
+ (V2BF "riscv_vector::vls_mode_valid_p (V2BFmode) && TARGET_VECTOR_ELEN_BF_16")
+ (V4BF "riscv_vector::vls_mode_valid_p (V4BFmode) && TARGET_VECTOR_ELEN_BF_16")
+ (V8BF "riscv_vector::vls_mode_valid_p (V8BFmode) && TARGET_VECTOR_ELEN_BF_16")
+ (V16BF "riscv_vector::vls_mode_valid_p (V16BFmode) && TARGET_VECTOR_ELEN_BF_16")
+ (V32BF "riscv_vector::vls_mode_valid_p (V32BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 64")
+ (V64BF "riscv_vector::vls_mode_valid_p (V64BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 128")
+ (V128BF "riscv_vector::vls_mode_valid_p (V128BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 256")
+ (V256BF "riscv_vector::vls_mode_valid_p (V256BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 512")
+ (V512BF "riscv_vector::vls_mode_valid_p (V512BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 1024")
+ (V1024BF "riscv_vector::vls_mode_valid_p (V1024BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 2048")
+ (V2048BF "riscv_vector::vls_mode_valid_p (V2048BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 4096")
(V1SF "riscv_vector::vls_mode_valid_p (V1SFmode) && TARGET_VECTOR_ELEN_FP_32")
(V2SF "riscv_vector::vls_mode_valid_p (V2SFmode) && TARGET_VECTOR_ELEN_FP_32")
(V4SF "riscv_vector::vls_mode_valid_p (V4SFmode) && TARGET_VECTOR_ELEN_FP_32")
@@ -1671,23 +1698,39 @@
(define_mode_iterator VB_VLS [VB VLSB])
-(define_mode_iterator VLS [VLSI VLSF_ZVFHMIN])
+;; VLSI + VLSF but including half-float modes for Zvfhmin and Zvfbf16,
+;; to be used for loads/stores.
+(define_mode_iterator VLS [VLSI VLSF_ZVFHMIN VLSF_ZVFBF16])
+;; VLSI + VLSF but including half-float modes iff TARGET_ZVFH.
(define_mode_iterator VLS_ZVFH [VLSI VLSF])
-(define_mode_iterator V [VI VF_ZVFBF16 VF_ZVFHMIN])
+;; VI + VF but including half-float modes for Zvfhmin and Zvfbf16,
+;; to be used for loads/stores.
+(define_mode_iterator V [VI VF_ZVFHMIN VF_ZVFBF16])
+;; VI + VF but including half-float modes iff TARGET_ZVFH.
(define_mode_iterator V_ZVFH [VI VF])
+;; Used for permutes and loads/stores, i.e. operations that are int/float
+;; agnostic. Some loads/stores still only use V for now but we can change
+;; that.
(define_mode_iterator V_VLS [V VLS])
+;; Same as V_VLS but with ZVFH instead of ZVFHMIN. Currently only used for
+;; vec_extract and vec_set because those use v(f)mv. As those are just
+;; permutes we could pun with an integer type when the actual mode is
+;; not supported. (Just as we already do for broadcasting unsupported
+;; modes, see V_VLSF_FALLBACK).
(define_mode_iterator V_VLS_ZVFH [V_ZVFH VLS_ZVFH])
(define_mode_iterator V_VLSI [VI VLSI])
(define_mode_iterator V_VLSF [VF VLSF])
-(define_mode_iterator V_VLSF_ZVFHMIN [VF_ZVFBF16 VF_ZVFHMIN VLSF_ZVFHMIN])
+;; All modes that cannot be broadcast directly so we either use strided
+;; broadcast or gather broadcast.
+(define_mode_iterator V_VLSF_FALLBACK [VF_ZVFBF16 VLSF_ZVFBF16 VF_ZVFHMIN VLSF_ZVFHMIN])
(define_mode_iterator VT [V1T V2T V4T V8T V16T V32T])
@@ -1842,6 +1885,18 @@
(V512HF "V512HI")
(V1024HF "V1024HI")
(V2048HF "V2048HI")
+ (V1BF "V1HI")
+ (V2BF "V2HI")
+ (V4BF "V4HI")
+ (V8BF "V8HI")
+ (V16BF "V16HI")
+ (V32BF "V32HI")
+ (V64BF "V64HI")
+ (V128BF "V128HI")
+ (V256BF "V256HI")
+ (V512BF "V512HI")
+ (V1024BF "V1024HI")
+ (V2048BF "V2048HI")
(V1SF "V1SI")
(V2SF "V2SI")
(V4SF "V4SI")
@@ -2064,6 +2119,9 @@
(V1HF "V1BI") (V2HF "V2BI") (V4HF "V4BI") (V8HF "V8BI") (V16HF "V16BI")
(V32HF "V32BI") (V64HF "V64BI") (V128HF "V128BI") (V256HF "V256BI")
(V512HF "V512BI") (V1024HF "V1024BI") (V2048HF "V2048BI")
+ (V1BF "V1BI") (V2BF "V2BI") (V4BF "V4BI") (V8BF "V8BI") (V16BF "V16BI")
+ (V32BF "V32BI") (V64BF "V64BI") (V128BF "V128BI") (V256BF "V256BI")
+ (V512BF "V512BI") (V1024BF "V1024BI") (V2048BF "V2048BI")
(V1SF "V1BI") (V2SF "V2BI") (V4SF "V4BI") (V8SF "V8BI")
(V16SF "V16BI") (V32SF "V32BI") (V64SF "V64BI")
(V128SF "V128BI") (V256SF "V256BI") (V512SF "V512BI") (V1024SF "V1024BI")
@@ -2175,6 +2233,9 @@
(V1HF "v1bi") (V2HF "v2bi") (V4HF "v4bi") (V8HF "v8bi") (V16HF "v16bi")
(V32HF "v32bi") (V64HF "v64bi") (V128HF "v128bi") (V256HF "v256bi")
(V512HF "v512bi") (V1024HF "v1024bi") (V2048HF "v2048bi")
+ (V1BF "v1bi") (V2BF "v2bi") (V4BF "v4bi") (V8BF "v8bi") (V16BF "v16bi")
+ (V32BF "v32bi") (V64BF "v64bi") (V128BF "v128bi") (V256BF "v256bi")
+ (V512BF "v512bi") (V1024BF "v1024bi") (V2048BF "v2048bi")
(V1SF "v1bi") (V2SF "v2bi") (V4SF "v4bi") (V8SF "v8bi")
(V16SF "v16bi") (V32SF "v32bi") (V64SF "v64bi")
(V128SF "v128bi") (V256SF "v256bi") (V512SF "v512bi") (V1024SF "v1024bi")
@@ -2209,6 +2270,8 @@
(V1DI "DI") (V2DI "DI") (V4DI "DI") (V8DI "DI") (V16DI "DI") (V32DI "DI") (V64DI "DI") (V128DI "DI") (V256DI "DI") (V512DI "DI")
(V1HF "HF") (V2HF "HF") (V4HF "HF") (V8HF "HF") (V16HF "HF") (V32HF "HF") (V64HF "HF") (V128HF "HF") (V256HF "HF")
(V512HF "HF") (V1024HF "HF") (V2048HF "HF")
+ (V1BF "BF") (V2BF "BF") (V4BF "BF") (V8BF "BF") (V16BF "BF") (V32BF "BF") (V64BF "BF") (V128BF "BF") (V256BF "BF")
+ (V512BF "BF") (V1024BF "BF") (V2048BF "BF")
(V1SF "SF") (V2SF "SF") (V4SF "SF") (V8SF "SF") (V16SF "SF") (V32SF "SF") (V64SF "SF") (V128SF "SF") (V256SF "SF")
(V512SF "SF") (V1024SF "SF")
(V1DF "DF") (V2DF "DF") (V4DF "DF") (V8DF "DF") (V16DF "DF") (V32DF "DF") (V64DF "DF") (V128DF "DF") (V256DF "DF") (V512DF "DF")
@@ -2308,6 +2371,8 @@
(V1DI "di") (V2DI "di") (V4DI "di") (V8DI "di") (V16DI "di") (V32DI "di") (V64DI "di") (V128DI "di") (V256DI "di") (V512DI "di")
(V1HF "hf") (V2HF "hf") (V4HF "hf") (V8HF "hf") (V16HF "hf") (V32HF "hf") (V64HF "hf") (V128HF "hf") (V256HF "hf")
(V512HF "hf") (V1024HF "hf") (V2048HF "hf")
+ (V1BF "BF") (V2BF "BF") (V4BF "BF") (V8BF "BF") (V16BF "BF") (V32BF "BF") (V64BF "BF") (V128BF "BF") (V256BF "BF")
+ (V512BF "BF") (V1024BF "BF") (V2048BF "BF")
(V1SF "sf") (V2SF "sf") (V4SF "sf") (V8SF "sf") (V16SF "sf") (V32SF "sf") (V64SF "sf") (V128SF "sf") (V256SF "sf")
(V512SF "sf") (V1024SF "sf")
(V1DF "df") (V2DF "df") (V4DF "df") (V8DF "df") (V16DF "df") (V32DF "df") (V64DF "df") (V128DF "df") (V256DF "df") (V512DF "df")
@@ -2689,6 +2754,8 @@
(V1DI "64") (V2DI "64") (V4DI "64") (V8DI "64") (V16DI "64") (V32DI "64") (V64DI "64") (V128DI "64") (V256DI "64") (V512DI "64")
(V1HF "16") (V2HF "16") (V4HF "16") (V8HF "16") (V16HF "16") (V32HF "16") (V64HF "16") (V128HF "16") (V256HF "16")
(V512HF "16") (V1024HF "16") (V2048HF "16")
+ (V1BF "16") (V2BF "16") (V4BF "16") (V8BF "16") (V16BF "16") (V32BF "16") (V64BF "16") (V128BF "16") (V256BF "16")
+ (V512BF "16") (V1024BF "16") (V2048BF "16")
(V1SF "32") (V2SF "32") (V4SF "32") (V8SF "32") (V16SF "32") (V32SF "32") (V64SF "32") (V128SF "32") (V256SF "32")
(V512SF "32") (V1024SF "32")
(V1DF "64") (V2DF "64") (V4DF "64") (V8DF "64") (V16DF "64") (V32DF "64") (V64DF "64") (V128DF "64") (V256DF "64") (V512DF "64")
@@ -3702,6 +3769,18 @@
(V512HF "vector_eew16_stride_operand")
(V1024HF "vector_eew16_stride_operand")
(V2048HF "vector_eew16_stride_operand")
+ (V1BF "vector_eew16_stride_operand")
+ (V2BF "vector_eew16_stride_operand")
+ (V4BF "vector_eew16_stride_operand")
+ (V8BF "vector_eew16_stride_operand")
+ (V16BF "vector_eew16_stride_operand")
+ (V32BF "vector_eew16_stride_operand")
+ (V64BF "vector_eew16_stride_operand")
+ (V128BF "vector_eew16_stride_operand")
+ (V256BF "vector_eew16_stride_operand")
+ (V512BF "vector_eew16_stride_operand")
+ (V1024BF "vector_eew16_stride_operand")
+ (V2048BF "vector_eew16_stride_operand")
(V1SF "vector_eew32_stride_operand")
(V2SF "vector_eew32_stride_operand")
(V4SF "vector_eew32_stride_operand")
@@ -3816,6 +3895,18 @@
(V512HF "rJ,rJ,rJ,k02,k02,k02")
(V1024HF "rJ,rJ,rJ,k02,k02,k02")
(V2048HF "rJ,rJ,rJ,k02,k02,k02")
+ (V1BF "rJ,rJ,rJ,k02,k02,k02")
+ (V2BF "rJ,rJ,rJ,k02,k02,k02")
+ (V4BF "rJ,rJ,rJ,k02,k02,k02")
+ (V8BF "rJ,rJ,rJ,k02,k02,k02")
+ (V16BF "rJ,rJ,rJ,k02,k02,k02")
+ (V32BF "rJ,rJ,rJ,k02,k02,k02")
+ (V64BF "rJ,rJ,rJ,k02,k02,k02")
+ (V128BF "rJ,rJ,rJ,k02,k02,k02")
+ (V256BF "rJ,rJ,rJ,k02,k02,k02")
+ (V512BF "rJ,rJ,rJ,k02,k02,k02")
+ (V1024BF "rJ,rJ,rJ,k02,k02,k02")
+ (V2048BF "rJ,rJ,rJ,k02,k02,k02")
(V1SF "rJ,rJ,rJ,k04,k04,k04")
(V2SF "rJ,rJ,rJ,k04,k04,k04")
(V4SF "rJ,rJ,rJ,k04,k04,k04")
@@ -3930,6 +4021,18 @@
(V512HF "rJ,k02")
(V1024HF "rJ,k02")
(V2048HF "rJ,k02")
+ (V1BF "rJ,k02")
+ (V2BF "rJ,k02")
+ (V4BF "rJ,k02")
+ (V8BF "rJ,k02")
+ (V16BF "rJ,k02")
+ (V32BF "rJ,k02")
+ (V64BF "rJ,k02")
+ (V128BF "rJ,k02")
+ (V256BF "rJ,k02")
+ (V512BF "rJ,k02")
+ (V1024BF "rJ,k02")
+ (V2048BF "rJ,k02")
(V1SF "rJ,k04")
(V2SF "rJ,k04")
(V4SF "rJ,k04")
@@ -4409,6 +4512,11 @@
(V4HF "riscv_vector::vls_mode_valid_p (V4HFmode) && TARGET_VECTOR_ELEN_FP_16")
(V8HF "riscv_vector::vls_mode_valid_p (V8HFmode) && TARGET_VECTOR_ELEN_FP_16")
(V16HF "riscv_vector::vls_mode_valid_p (V16HFmode) && TARGET_VECTOR_ELEN_FP_16")
+ (V1BF "riscv_vector::vls_mode_valid_p (V1BFmode) && TARGET_VECTOR_ELEN_BF_16")
+ (V2BF "riscv_vector::vls_mode_valid_p (V2BFmode) && TARGET_VECTOR_ELEN_BF_16")
+ (V4BF "riscv_vector::vls_mode_valid_p (V4BFmode) && TARGET_VECTOR_ELEN_BF_16")
+ (V8BF "riscv_vector::vls_mode_valid_p (V8BFmode) && TARGET_VECTOR_ELEN_BF_16")
+ (V16BF "riscv_vector::vls_mode_valid_p (V16BFmode) && TARGET_VECTOR_ELEN_BF_16")
(V1SF "riscv_vector::vls_mode_valid_p (V1SFmode) && TARGET_VECTOR_ELEN_FP_32")
(V2SF "riscv_vector::vls_mode_valid_p (V2SFmode) && TARGET_VECTOR_ELEN_FP_32")
(V4SF "riscv_vector::vls_mode_valid_p (V4SFmode) && TARGET_VECTOR_ELEN_FP_32")
@@ -4461,6 +4569,13 @@
(V512HF "riscv_vector::vls_mode_valid_p (V512HFmode) && TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 1024")
(V1024HF "riscv_vector::vls_mode_valid_p (V1024HFmode) && TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 2048")
(V2048HF "riscv_vector::vls_mode_valid_p (V2048HFmode) && TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 4096")
+ (V32BF "riscv_vector::vls_mode_valid_p (V32BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 64")
+ (V64BF "riscv_vector::vls_mode_valid_p (V64BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 128")
+ (V128BF "riscv_vector::vls_mode_valid_p (V128BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 256")
+ (V256BF "riscv_vector::vls_mode_valid_p (V256BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 512")
+ (V512BF "riscv_vector::vls_mode_valid_p (V512BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 1024")
+ (V1024BF "riscv_vector::vls_mode_valid_p (V1024BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 2048")
+ (V2048BF "riscv_vector::vls_mode_valid_p (V2048BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 4096")
(V32SF "riscv_vector::vls_mode_valid_p (V32SFmode) && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128")
(V64SF "riscv_vector::vls_mode_valid_p (V64SFmode) && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 256")
(V128SF "riscv_vector::vls_mode_valid_p (V128SFmode) && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 512")
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 3cb87bf..ba4a43b 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -139,7 +139,8 @@
RVVM2x3HF,RVVM1x3HF,RVVMF2x3HF,RVVMF4x3HF,\
RVVM4x2HF,RVVM2x2HF,RVVM1x2HF,RVVMF2x2HF,RVVMF4x2HF,\
V1HI,V2HI,V4HI,V8HI,V16HI,V32HI,V64HI,V128HI,V256HI,V512HI,V1024HI,V2048HI,\
- V1HF,V2HF,V4HF,V8HF,V16HF,V32HF,V64HF,V128HF,V256HF,V512HF,V1024HF,V2048HF")
+ V1HF,V2HF,V4HF,V8HF,V16HF,V32HF,V64HF,V128HF,V256HF,V512HF,V1024HF,V2048HF,\
+ V1BF,V2BF,V4BF,V8BF,V16BF,V32BF,V64BF,V128BF,V256BF,V512BF,V1024BF,V2048BF")
(const_int 16)
(eq_attr "mode" "RVVM8SI,RVVM4SI,RVVM2SI,RVVM1SI,RVVMF2SI,\
RVVM8SF,RVVM4SF,RVVM2SF,RVVM1SF,RVVMF2SF,\
@@ -446,6 +447,18 @@
(eq_attr "mode" "V512HF") (symbol_ref "riscv_vector::get_vlmul(E_V512HFmode)")
(eq_attr "mode" "V1024HF") (symbol_ref "riscv_vector::get_vlmul(E_V1024HFmode)")
(eq_attr "mode" "V2048HF") (symbol_ref "riscv_vector::get_vlmul(E_V2048HFmode)")
+ (eq_attr "mode" "V1BF") (symbol_ref "riscv_vector::get_vlmul(E_V1BFmode)")
+ (eq_attr "mode" "V2BF") (symbol_ref "riscv_vector::get_vlmul(E_V2BFmode)")
+ (eq_attr "mode" "V4BF") (symbol_ref "riscv_vector::get_vlmul(E_V4BFmode)")
+ (eq_attr "mode" "V8BF") (symbol_ref "riscv_vector::get_vlmul(E_V8BFmode)")
+ (eq_attr "mode" "V16BF") (symbol_ref "riscv_vector::get_vlmul(E_V16BFmode)")
+ (eq_attr "mode" "V32BF") (symbol_ref "riscv_vector::get_vlmul(E_V32BFmode)")
+ (eq_attr "mode" "V64BF") (symbol_ref "riscv_vector::get_vlmul(E_V64BFmode)")
+ (eq_attr "mode" "V128BF") (symbol_ref "riscv_vector::get_vlmul(E_V128BFmode)")
+ (eq_attr "mode" "V256BF") (symbol_ref "riscv_vector::get_vlmul(E_V256BFmode)")
+ (eq_attr "mode" "V512BF") (symbol_ref "riscv_vector::get_vlmul(E_V512BFmode)")
+ (eq_attr "mode" "V1024BF") (symbol_ref "riscv_vector::get_vlmul(E_V1024BFmode)")
+ (eq_attr "mode" "V2048BF") (symbol_ref "riscv_vector::get_vlmul(E_V2048BFmode)")
(eq_attr "mode" "V1SF") (symbol_ref "riscv_vector::get_vlmul(E_V1SFmode)")
(eq_attr "mode" "V2SF") (symbol_ref "riscv_vector::get_vlmul(E_V2SFmode)")
(eq_attr "mode" "V4SF") (symbol_ref "riscv_vector::get_vlmul(E_V4SFmode)")
@@ -762,6 +775,18 @@
(eq_attr "mode" "V512HF") (symbol_ref "riscv_vector::get_ratio(E_V512HFmode)")
(eq_attr "mode" "V1024HF") (symbol_ref "riscv_vector::get_ratio(E_V1024HFmode)")
(eq_attr "mode" "V2048HF") (symbol_ref "riscv_vector::get_ratio(E_V2048HFmode)")
+ (eq_attr "mode" "V1BF") (symbol_ref "riscv_vector::get_ratio(E_V1BFmode)")
+ (eq_attr "mode" "V2BF") (symbol_ref "riscv_vector::get_ratio(E_V2BFmode)")
+ (eq_attr "mode" "V4BF") (symbol_ref "riscv_vector::get_ratio(E_V4BFmode)")
+ (eq_attr "mode" "V8BF") (symbol_ref "riscv_vector::get_ratio(E_V8BFmode)")
+ (eq_attr "mode" "V16BF") (symbol_ref "riscv_vector::get_ratio(E_V16BFmode)")
+ (eq_attr "mode" "V32BF") (symbol_ref "riscv_vector::get_ratio(E_V32BFmode)")
+ (eq_attr "mode" "V64BF") (symbol_ref "riscv_vector::get_ratio(E_V64BFmode)")
+ (eq_attr "mode" "V128BF") (symbol_ref "riscv_vector::get_ratio(E_V128BFmode)")
+ (eq_attr "mode" "V256BF") (symbol_ref "riscv_vector::get_ratio(E_V256BFmode)")
+ (eq_attr "mode" "V512BF") (symbol_ref "riscv_vector::get_ratio(E_V512BFmode)")
+ (eq_attr "mode" "V1024BF") (symbol_ref "riscv_vector::get_ratio(E_V1024BFmode)")
+ (eq_attr "mode" "V2048BF") (symbol_ref "riscv_vector::get_ratio(E_V2048BFmode)")
(eq_attr "mode" "V1SF") (symbol_ref "riscv_vector::get_ratio(E_V1SFmode)")
(eq_attr "mode" "V2SF") (symbol_ref "riscv_vector::get_ratio(E_V2SFmode)")
(eq_attr "mode" "V4SF") (symbol_ref "riscv_vector::get_ratio(E_V4SFmode)")
@@ -1437,6 +1462,8 @@
[(set_attr "type" "vlde,vste,vmov")
(set_attr "mode" "<MODE>")
(set (attr "merge_op_idx") (const_int INVALID_ATTRIBUTE))
+ (set (attr "has_vl_op") (const_string "false"))
+ (set (attr "has_vtype_op") (const_string "false"))
(set (attr "avl_type_idx") (const_int INVALID_ATTRIBUTE))
(set (attr "mode_idx") (const_int INVALID_ATTRIBUTE))]
)
@@ -2402,19 +2429,19 @@
(set_attr "mode" "<MODE>")])
(define_insn_and_split "*pred_strided_broadcast<mode>_zvfhmin"
- [(set (match_operand:V_VLSF_ZVFHMIN 0 "register_operand" "=vr, vr, vr, vr")
- (if_then_else:V_VLSF_ZVFHMIN
+ [(set (match_operand:V_VLSF_FALLBACK 0 "register_operand" "=vr, vr, vr, vr")
+ (if_then_else:V_VLSF_FALLBACK
(unspec:<VM>
- [(match_operand:<VM> 1 "strided_broadcast_mask_operand" " vm, vm, Wc1, Wc1")
- (match_operand 4 "vector_length_operand" "rvl, rvl, rvl, rvl")
- (match_operand 5 "const_int_operand" " i, i, i, i")
- (match_operand 6 "const_int_operand" " i, i, i, i")
- (match_operand 7 "const_int_operand" " i, i, i, i")
+ [(match_operand:<VM> 1 "strided_broadcast_mask_operand" " vm, vm, Wc1, Wc1")
+ (match_operand 4 "vector_length_operand" "rvl, rvl, rvl, rvl")
+ (match_operand 5 "const_int_operand" " i, i, i, i")
+ (match_operand 6 "const_int_operand" " i, i, i, i")
+ (match_operand 7 "const_int_operand" " i, i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
- (vec_duplicate:V_VLSF_ZVFHMIN
- (match_operand:<VEL> 3 "strided_broadcast_operand" " A, A, A, A"))
- (match_operand:V_VLSF_ZVFHMIN 2 "vector_merge_operand" " vu, 0, vu, 0")))]
+ (vec_duplicate:V_VLSF_FALLBACK
+ (match_operand:<VEL> 3 "strided_broadcast_operand" " A, A, A, A"))
+ (match_operand:V_VLSF_FALLBACK 2 "vector_merge_operand" " vu, 0, vu, 0")))]
"TARGET_VECTOR"
"@
vlse<sew>.v\t%0,%3,zero,%1.t
@@ -2422,7 +2449,8 @@
vlse<sew>.v\t%0,%3,zero
vlse<sew>.v\t%0,%3,zero"
"&& !strided_load_broadcast_p ()
- && <VEL>mode == HFmode
+ && (<VEL>mode == HFmode
+ || <VEL>mode == BFmode)
&& can_create_pseudo_p ()"
[(const_int 0)]
{
@@ -4171,6 +4199,7 @@
"TARGET_VECTOR"
"vw<plus_minus:insn><any_extend:u>.wx\t%0,%3,%z4%p1"
[(set_attr "type" "vi<widen_binop_insn_type>")
+ (set_attr "mode_idx" "3")
(set_attr "mode" "<V_DOUBLE_TRUNC>")])
(define_insn "@pred_single_widen_add<any_extend:su><mode>_extended_scalar"
@@ -4437,6 +4466,7 @@
"TARGET_VECTOR"
"v<insn>.vx\t%0,%3,%4%p1"
[(set_attr "type" "<int_binop_insn_type>")
+ (set_attr "mode_idx" "3")
(set_attr "mode" "<MODE>")])
(define_insn "@pred_<optab><mode>_scalar"
@@ -4458,6 +4488,7 @@
"TARGET_VECTOR"
"v<insn>.vx\t%0,%3,%4%p1"
[(set_attr "type" "<int_binop_insn_type>")
+ (set_attr "mode_idx" "3")
(set_attr "mode" "<MODE>")])
(define_expand "@pred_<optab><mode>_scalar"
@@ -4512,6 +4543,7 @@
"TARGET_VECTOR"
"v<insn>.vx\t%0,%3,%4%p1"
[(set_attr "type" "<int_binop_insn_type>")
+ (set_attr "mode_idx" "3")
(set_attr "mode" "<MODE>")])
(define_insn "*pred_<optab><mode>_extended_scalar"
@@ -4534,6 +4566,7 @@
"TARGET_VECTOR && !TARGET_64BIT"
"v<insn>.vx\t%0,%3,%4%p1"
[(set_attr "type" "<int_binop_insn_type>")
+ (set_attr "mode_idx" "3")
(set_attr "mode" "<MODE>")])
(define_expand "@pred_<optab><mode>_scalar"
@@ -4588,6 +4621,7 @@
"TARGET_VECTOR"
"v<insn>.vx\t%0,%3,%z4%p1"
[(set_attr "type" "<int_binop_insn_type>")
+ (set_attr "mode_idx" "3")
(set_attr "mode" "<MODE>")])
(define_insn "*pred_<optab><mode>_extended_scalar"
@@ -4610,6 +4644,7 @@
"TARGET_VECTOR && !TARGET_64BIT"
"v<insn>.vx\t%0,%3,%z4%p1"
[(set_attr "type" "<int_binop_insn_type>")
+ (set_attr "mode_idx" "3")
(set_attr "mode" "<MODE>")])
(define_insn "@pred_<sat_op><mode>"
@@ -4655,6 +4690,7 @@
"TARGET_VECTOR"
"v<sat_op>.vx\t%0,%3,%z4%p1"
[(set_attr "type" "<sat_insn_type>")
+ (set_attr "mode_idx" "3")
(set_attr "mode" "<MODE>")])
(define_insn "@pred_<sat_op><mode>_scalar"
@@ -8641,7 +8677,7 @@
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(unspec:VT
- [(match_operand 3 "pmode_reg_or_0_operand" " rJ, rJ, rJ")
+ [(match_operand:VT 3 "memory_operand" " m, m, m")
(mem:BLK (scratch))] UNSPEC_VLEFF)
(match_operand:VT 2 "vector_merge_operand" " 0, vu, vu")))
(set (reg:SI VL_REGNUM)
@@ -8656,7 +8692,7 @@
[(match_dup 3) (mem:BLK (scratch))] UNSPEC_VLEFF)
(match_dup 2))] UNSPEC_MODIFY_VL))]
"TARGET_VECTOR"
- "vlseg<nf>e<sew>ff.v\t%0,(%z3)%p1"
+ "vlseg<nf>e<sew>ff.v\t%0,%3%p1"
[(set_attr "type" "vlsegdff")
(set_attr "mode" "<MODE>")])
@@ -9042,6 +9078,56 @@
riscv_vector::prepare_ternary_operands (operands);
})
+(define_insn "*pred_widen_mul_plus_u_vx<mode>_undef"
+ [(set (match_operand:VWEXTI 0 "register_operand" "=&vr")
+ (if_then_else:VWEXTI
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1")
+ (match_operand 6 "vector_length_operand" " rvl")
+ (match_operand 7 "const_int_operand" " i")
+ (match_operand 8 "const_int_operand" " i")
+ (match_operand 9 "const_int_operand" " i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (plus:VWEXTI
+ (mult:VWEXTI
+ (zero_extend:VWEXTI
+ (vec_duplicate:<V_DOUBLE_TRUNC>
+ (match_operand:<VSUBEL> 3 "register_operand" " rJ")))
+ (zero_extend:VWEXTI
+ (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand" " vr")))
+ (match_operand:VWEXTI 5 "register_operand" " 0"))
+ (match_operand:VWEXTI 2 "vector_undef_operand")))]
+ "TARGET_VECTOR"
+ "vwmaccu.vx\t%0,%z3,%4%p1"
+ [(set_attr "type" "vimuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_expand "@pred_widen_mul_plus_u_vx<mode>"
+ [(set (match_operand:VWEXTI 0 "register_operand")
+ (if_then_else:VWEXTI
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand")
+ (match_operand 6 "vector_length_operand")
+ (match_operand 7 "const_int_operand")
+ (match_operand 8 "const_int_operand")
+ (match_operand 9 "const_int_operand")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (plus:VWEXTI
+ (mult:VWEXTI
+ (zero_extend:VWEXTI
+ (vec_duplicate:<V_DOUBLE_TRUNC>
+ (match_operand:<VSUBEL> 2 "register_operand")))
+ (zero_extend:VWEXTI
+ (match_operand:<V_DOUBLE_TRUNC> 3 "register_operand")))
+ (match_operand:VWEXTI 4 "register_operand"))
+ (match_operand:VWEXTI 5 "vector_merge_operand")))]
+ "TARGET_VECTOR"
+ {
+ riscv_vector::prepare_ternary_operands (operands);
+ })
+
(include "autovec.md")
(include "autovec-opt.md")
(include "sifive-vector.md")