diff options
-rw-r--r-- | gcc/ChangeLog | 15 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-cores.def | 5 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-cost-tables.h | 103 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-tune.md | 2 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.c | 83 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.md | 1 | ||||
-rw-r--r-- | gcc/config/aarch64/thunderx3t110.md | 686 | ||||
-rw-r--r-- | gcc/doc/invoke.texi | 2 |
8 files changed, 895 insertions, 2 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index bb017f2..7d8fb27 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,18 @@ +2020-04-27 Anton Youdkevitch <anton.youdkevitch@bell-sw.com> + + * config/aarch64/aarch64-cores.def (thunderx3t110): Add the chip name. + * config/aarch64/aarch64-tune.md: Regenerate. + * config/aarch64/aarch64.c (thunderx3t110_addrcost_table): Define. + (thunderx3t110_regmove_cost): Likewise. + (thunderx3t110_vector_cost): Likewise. + (thunderx3t110_prefetch_tune): Likewise. + (thunderx3t110_tunings): Likewise. + * gcc/config/aarch64/aarch64-cost-tables.h (thunderx3t110_extra_costs): + Define. + * config/aarch64/thunderx3t110.md: New file. + * config/aarch64/aarch64.md: Include thunderx3t110.md. + * doc/invoke.texi (AArch64 options): Add thunderx3t110. + 2020-04-28 Jakub Jelinek <jakub@redhat.com> PR target/94704 diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def index ea9b98b..31da488 100644 --- a/gcc/config/aarch64/aarch64-cores.def +++ b/gcc/config/aarch64/aarch64-cores.def @@ -122,6 +122,11 @@ AARCH64_CORE("octeontx2f95mm", octeontx2f95mm, cortexa57, 8_2A, AARCH64_FL_FOR_ /* HiSilicon ('H') cores. */ AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1) +/* ARMv8.3-A Architecture Processors. */ + +/* Marvell cores (TX3). */ +AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, 8_3A, AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML | AARCH64_FL_RCPC8_4, thunderx3t110, 0x43, 0x0b8, 0x0a) + /* ARMv8.4-A Architecture Processors. */ /* Qualcomm ('Q') cores. */ diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h index 65df55e..8a98bf4 100644 --- a/gcc/config/aarch64/aarch64-cost-tables.h +++ b/gcc/config/aarch64/aarch64-cost-tables.h @@ -334,6 +334,109 @@ const struct cpu_cost_table thunderx2t99_extra_costs = } }; +const struct cpu_cost_table thunderx3t110_extra_costs = +{ + /* ALU */ + { + 0, /* Arith. */ + 0, /* Logical. */ + 0, /* Shift. */ + 0, /* Shift_reg. */ + COSTS_N_INSNS (1), /* Arith_shift. */ + COSTS_N_INSNS (1), /* Arith_shift_reg. */ + COSTS_N_INSNS (1), /* Log_shift. */ + COSTS_N_INSNS (1), /* Log_shift_reg. */ + 0, /* Extend. */ + COSTS_N_INSNS (1), /* Extend_arith. */ + 0, /* Bfi. */ + 0, /* Bfx. */ + COSTS_N_INSNS (3), /* Clz. */ + 0, /* Rev. */ + 0, /* Non_exec. */ + true /* Non_exec_costs_exec. */ + }, + { + /* MULT SImode */ + { + COSTS_N_INSNS (4), /* Simple. */ + COSTS_N_INSNS (4), /* Flag_setting. */ + COSTS_N_INSNS (4), /* Extend. */ + COSTS_N_INSNS (5), /* Add. */ + COSTS_N_INSNS (5), /* Extend_add. */ + COSTS_N_INSNS (18) /* Idiv. */ + }, + /* MULT DImode */ + { + COSTS_N_INSNS (4), /* Simple. */ + 0, /* Flag_setting. */ + COSTS_N_INSNS (4), /* Extend. */ + COSTS_N_INSNS (5), /* Add. */ + COSTS_N_INSNS (5), /* Extend_add. */ + COSTS_N_INSNS (26) /* Idiv. */ + } + }, + /* LD/ST */ + { + COSTS_N_INSNS (4), /* Load. */ + COSTS_N_INSNS (4), /* Load_sign_extend. */ + COSTS_N_INSNS (5), /* Ldrd. */ + COSTS_N_INSNS (4), /* Ldm_1st. */ + 1, /* Ldm_regs_per_insn_1st. */ + 1, /* Ldm_regs_per_insn_subsequent. */ + COSTS_N_INSNS (4), /* Loadf. */ + COSTS_N_INSNS (4), /* Loadd. */ + COSTS_N_INSNS (4), /* Load_unaligned. */ + 0, /* Store. */ + 0, /* Strd. */ + 0, /* Stm_1st. */ + 1, /* Stm_regs_per_insn_1st. */ + 1, /* Stm_regs_per_insn_subsequent. */ + 0, /* Storef. */ + 0, /* Stored. */ + 0, /* Store_unaligned. */ + COSTS_N_INSNS (1), /* Loadv. */ + COSTS_N_INSNS (1) /* Storev. */ + }, + { + /* FP SFmode */ + { + COSTS_N_INSNS (4), /* Div. */ + COSTS_N_INSNS (1), /* Mult. */ + COSTS_N_INSNS (1), /* Mult_addsub. */ + COSTS_N_INSNS (1), /* Fma. */ + COSTS_N_INSNS (1), /* Addsub. */ + COSTS_N_INSNS (1), /* Fpconst. */ + COSTS_N_INSNS (1), /* Neg. */ + COSTS_N_INSNS (1), /* Compare. */ + COSTS_N_INSNS (2), /* Widen. */ + COSTS_N_INSNS (2), /* Narrow. */ + COSTS_N_INSNS (2), /* Toint. */ + COSTS_N_INSNS (2), /* Fromint. */ + COSTS_N_INSNS (2) /* Roundint. */ + }, + /* FP DFmode */ + { + COSTS_N_INSNS (6), /* Div. */ + COSTS_N_INSNS (1), /* Mult. */ + COSTS_N_INSNS (1), /* Mult_addsub. */ + COSTS_N_INSNS (1), /* Fma. */ + COSTS_N_INSNS (1), /* Addsub. */ + COSTS_N_INSNS (1), /* Fpconst. */ + COSTS_N_INSNS (1), /* Neg. */ + COSTS_N_INSNS (1), /* Compare. */ + COSTS_N_INSNS (2), /* Widen. */ + COSTS_N_INSNS (2), /* Narrow. */ + COSTS_N_INSNS (2), /* Toint. */ + COSTS_N_INSNS (2), /* Fromint. */ + COSTS_N_INSNS (2) /* Roundint. */ + } + }, + /* Vector */ + { + COSTS_N_INSNS (1) /* Alu. */ + } +}; + const struct cpu_cost_table tsv110_extra_costs = { /* ALU */ diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md index 3cc1c4d..c2dd6c1 100644 --- a/gcc/config/aarch64/aarch64-tune.md +++ b/gcc/config/aarch64/aarch64-tune.md @@ -1,5 +1,5 @@ ;; -*- buffer-read-only: t -*- ;; Generated automatically by gentune.sh from aarch64-cores.def (define_attr "tune" - "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa65,cortexa65ae,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,tsv110,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55" + "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa65,cortexa65ae,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,tsv110,thunderx3t110,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55" (const (symbol_ref "((enum attr_tune) aarch64_tune)"))) diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index a81b0b2..5316350 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -416,6 +416,22 @@ static const struct cpu_addrcost_table thunderx2t99_addrcost_table = 0, /* imm_offset */ }; +static const struct cpu_addrcost_table thunderx3t110_addrcost_table = +{ + { + 1, /* hi */ + 1, /* si */ + 1, /* di */ + 2, /* ti */ + }, + 0, /* pre_modify */ + 0, /* post_modify */ + 2, /* register_offset */ + 3, /* register_sextend */ + 3, /* register_zextend */ + 0, /* imm_offset */ +}; + static const struct cpu_addrcost_table tsv110_addrcost_table = { { @@ -524,6 +540,15 @@ static const struct cpu_regmove_cost thunderx2t99_regmove_cost = 4 /* FP2FP */ }; +static const struct cpu_regmove_cost thunderx3t110_regmove_cost = +{ + 1, /* GP2GP */ + /* Avoid the use of int<->fp moves for spilling. */ + 4, /* GP2FP */ + 5, /* FP2GP */ + 4 /* FP2FP */ +}; + static const struct cpu_regmove_cost tsv110_regmove_cost = { 1, /* GP2GP */ @@ -692,6 +717,26 @@ static const struct cpu_vector_cost thunderx2t99_vector_cost = 1 /* cond_not_taken_branch_cost */ }; +static const struct cpu_vector_cost thunderx3t110_vector_cost = +{ + 1, /* scalar_int_stmt_cost */ + 5, /* scalar_fp_stmt_cost */ + 4, /* scalar_load_cost */ + 1, /* scalar_store_cost */ + 5, /* vec_int_stmt_cost */ + 5, /* vec_fp_stmt_cost */ + 10, /* vec_permute_cost */ + 5, /* vec_to_scalar_cost */ + 5, /* scalar_to_vec_cost */ + 4, /* vec_align_load_cost */ + 4, /* vec_unalign_load_cost */ + 4, /* vec_unalign_store_cost */ + 4, /* vec_store_cost */ + 2, /* cond_taken_branch_cost */ + 1 /* cond_not_taken_branch_cost */ +}; + + /* Generic costs for branch instructions. */ static const struct cpu_branch_cost generic_branch_cost = { @@ -790,6 +835,17 @@ static const cpu_prefetch_tune thunderx2t99_prefetch_tune = -1 /* default_opt_level */ }; +static const cpu_prefetch_tune thunderx3t110_prefetch_tune = +{ + 8, /* num_slots */ + 32, /* l1_cache_size */ + 64, /* l1_cache_line_size */ + 256, /* l2_cache_size */ + true, /* prefetch_dynamic_strides */ + -1, /* minimum_stride */ + -1 /* default_opt_level */ +}; + static const cpu_prefetch_tune tsv110_prefetch_tune = { 0, /* num_slots */ @@ -1216,6 +1272,33 @@ static const struct tune_params thunderx2t99_tunings = &thunderx2t99_prefetch_tune }; +static const struct tune_params thunderx3t110_tunings = +{ + &thunderx3t110_extra_costs, + &thunderx3t110_addrcost_table, + &thunderx3t110_regmove_cost, + &thunderx3t110_vector_cost, + &generic_branch_cost, + &generic_approx_modes, + SVE_NOT_IMPLEMENTED, /* sve_width */ + 4, /* memmov_cost. */ + 6, /* issue_rate. */ + (AARCH64_FUSE_ALU_BRANCH | AARCH64_FUSE_AES_AESMC + | AARCH64_FUSE_ALU_CBZ), /* fusible_ops */ + "16", /* function_align. */ + "8", /* jump_align. */ + "16", /* loop_align. */ + 3, /* int_reassoc_width. */ + 2, /* fp_reassoc_width. */ + 2, /* vec_reassoc_width. */ + 2, /* min_div_recip_mul_sf. */ + 2, /* min_div_recip_mul_df. */ + 0, /* max_case_values. */ + tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ + (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */ + &thunderx3t110_prefetch_tune +}; + static const struct tune_params neoversen1_tunings = { &cortexa57_extra_costs, diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index c7c4d1d..8c8be3c 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -438,6 +438,7 @@ (include "../arm/xgene1.md") (include "thunderx2t99.md") (include "tsv110.md") +(include "thunderx3t110.md") ;; ------------------------------------------------------------------- ;; Jumps and other miscellaneous insns diff --git a/gcc/config/aarch64/thunderx3t110.md b/gcc/config/aarch64/thunderx3t110.md new file mode 100644 index 0000000..f8d6204 --- /dev/null +++ b/gcc/config/aarch64/thunderx3t110.md @@ -0,0 +1,686 @@ +;; Cavium ThunderX 3 CN11xx pipeline description +;; Copyright (C) 2020 Free Software Foundation, Inc. +;; +;; Contributed by Marvell + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_automaton "thunderx3t110, thunderx3t110_advsimd, thunderx3t110_ldst") +(define_automaton "thunderx3t110_mult") + +(define_cpu_unit "thunderx3t110_i0" "thunderx3t110") +(define_cpu_unit "thunderx3t110_i1" "thunderx3t110") +(define_cpu_unit "thunderx3t110_i2" "thunderx3t110") +(define_cpu_unit "thunderx3t110_i3" "thunderx3t110") + +(define_cpu_unit "thunderx3t110_ls0" "thunderx3t110_ldst") +(define_cpu_unit "thunderx3t110_ls1" "thunderx3t110_ldst") +(define_cpu_unit "thunderx3t110_sd" "thunderx3t110_ldst") + +; Pseudo-units for multiply pipeline. +; unchanged from TX2, occupies I1 for four (1 + 3 additional) slots + +(define_cpu_unit "thunderx3t110_i1m1" "thunderx3t110_mult") +(define_cpu_unit "thunderx3t110_i1m2" "thunderx3t110_mult") +(define_cpu_unit "thunderx3t110_i1m3" "thunderx3t110_mult") + +; Pseudo-units for load delay (assuming dcache hit). + +(define_cpu_unit "thunderx3t110_ls0d1" "thunderx3t110_ldst") +(define_cpu_unit "thunderx3t110_ls0d2" "thunderx3t110_ldst") +(define_cpu_unit "thunderx3t110_ls0d3" "thunderx3t110_ldst") + +(define_cpu_unit "thunderx3t110_ls1d1" "thunderx3t110_ldst") +(define_cpu_unit "thunderx3t110_ls1d2" "thunderx3t110_ldst") +(define_cpu_unit "thunderx3t110_ls1d3" "thunderx3t110_ldst") + +; Define FP units f0/f1/f2/f3. +(define_cpu_unit "thunderx3t110_f0" "thunderx3t110_advsimd") +(define_cpu_unit "thunderx3t110_f1" "thunderx3t110_advsimd") +(define_cpu_unit "thunderx3t110_f2" "thunderx3t110_advsimd") +(define_cpu_unit "thunderx3t110_f3" "thunderx3t110_advsimd") + +(define_reservation "thunderx3t110_i23" "thunderx3t110_i2|thunderx3t110_i3") +(define_reservation "thunderx3t110_i01" + "thunderx3t110_i0|thunderx3t110_i1") +(define_reservation "thunderx3t110_i012" + "thunderx3t110_i0|thunderx3t110_i1|thunderx3t110_i2") +(define_reservation "thunderx3t110_i0123" + "thunderx3t110_i0|thunderx3t110_i1|thunderx3t110_i2|thunderx3t110_i3") +(define_reservation "thunderx3t110_ls01" "thunderx3t110_ls0|thunderx3t110_ls1") +(define_reservation "thunderx3t110_f01" "thunderx3t110_f0|thunderx3t110_f1") +(define_reservation "thunderx3t110_f23" "thunderx3t110_f2|thunderx3t110_f3") +(define_reservation "thunderx3t110_f0123" + "thunderx3t110_f0|thunderx3t110_f1|thunderx3t110_f2|thunderx3t110_f3") + +; A load with delay in the ls0/ls1 pipes. +; this is always a delay of four +(define_reservation "thunderx3t110_l0delay" + "thunderx3t110_ls0,thunderx3t110_ls0d1,thunderx3t110_ls0d2,\ + thunderx3t110_ls0d3") +(define_reservation "thunderx3t110_l1delay" + "thunderx3t110_ls1,thunderx3t110_ls1d1,thunderx3t110_ls1d2,\ + thunderx3t110_ls1d3") +(define_reservation "thunderx3t110_l01delay" + "thunderx3t110_l0delay|thunderx3t110_l1delay") +;; Branch and call instructions. + +(define_insn_reservation "thunderx3t110_branch" 1 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "call,branch,trap")) + "thunderx3t110_i23") + +;; Misc instructions. + +; Speculation barrier +(define_insn_reservation "thunderx3t110_nothing" 0 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "block")) + "nothing") + +(define_insn_reservation "thunderx3t110_mrs" 0 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "mrs")) + "thunderx3t110_i2") + +(define_insn_reservation "thunderx3t110_multiple" 1 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "multiple")) + "thunderx3t110_i0+thunderx3t110_i1+thunderx3t110_i3+thunderx3t110_ls0+\ + thunderx3t110_ls1+thunderx3t110_sd+thunderx3t110_i1m1+thunderx3t110_i1m2+\ + thunderx3t110_i1m3+thunderx3t110_f0+thunderx3t110_f1") + +;; Integer arithmetic/logic instructions. + +; Plain register moves are handled by renaming, +; and don't create any uops. +(define_insn_reservation "thunderx3t110_regmove" 0 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "mov_reg")) + "nothing") + +(define_insn_reservation "thunderx3t110_alu_basic" 1 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "alu_imm,alu_sreg,alus_imm,alus_sreg,\ + adc_reg,adc_imm,adcs_reg,adcs_imm,\ + logic_reg,logic_imm,logics_reg,logics_imm,\ + csel,adr,mov_imm,shift_reg,shift_imm,bfm,\ + bfx,rbit,rev,extend,rotate_imm")) + "thunderx3t110_i0123") + +; distinguish between latency 1|2 and throughput 1/4|2/4? +; is it actually 1,1/2,{i0,i1} vs 2,1/4,{i0,i1,i2,i3} +(define_insn_reservation "thunderx3t110_alu_shift" 2 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "alu_shift_imm,alu_ext,\ + alus_shift_imm,alus_ext,\ + logic_shift_imm,logics_shift_imm")) + "thunderx3t110_i0123") + +(define_insn_reservation "thunderx3t110_alu_shift1" 1 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "alu_shift_imm,alu_ext,\ + alus_shift_imm,alus_ext,\ + logic_shift_imm,logics_shift_imm")) + "thunderx3t110_i01") + +; we are going for the the optimistic answer (13) +; for now, the worst case is 23 +(define_insn_reservation "thunderx3t110_div" 13 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "sdiv,udiv")) + "thunderx3t110_i1*3") + +(define_insn_reservation "thunderx3t110_madd" 5 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "mla,smlal,umlal")) + "thunderx3t110_i0123,thunderx3t110_i1m1,thunderx3t110_i1m2,thunderx3t110_i1m3,\ + thunderx3t110_i012") + +; NOTE: smull, umull are used for "high part" multiplies too. +; mul is alias for MADD +; it has to be distinguished between smulh, umulh (4,1) and +; other (5,1) but there is no such a type, so, we go for the +; conservative approach of (5,1) for now +; smulh, umulh only runs on I1 +(define_insn_reservation "thunderx3t110_mul" 5 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "mul,smull,umull")) + "thunderx3t110_i0123,thunderx3t110_i1m1,thunderx3t110_i1m2,thunderx3t110_i1m3") + +(define_insn_reservation "thunderx3t110_countbits" 3 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "clz")) + "thunderx3t110_i1") + +;; Integer loads and stores. + +; load_4 matches prefetch, a multitude of move/str/dup variants, +; sign extend +(define_insn_reservation "thunderx3t110_load_basic" 4 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "load_4")) + "thunderx3t110_ls01") + +; model use of I0/I1/I2 for index versions only, model 4|8 2nd on load +(define_insn_reservation "thunderx3t110_loadpair" 5 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "load_8,load_16")) + "thunderx3t110_i012,thunderx3t110_ls01") + +(define_insn_reservation "thunderx3t110_store_basic" 1 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "store_4")) + "thunderx3t110_ls01,thunderx3t110_sd") + +; model use of I0/I1/I2/I3 for index versions, model differing +; throughputs +(define_insn_reservation "thunderx3t110_storepair_basic" 1 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "store_8,store_16")) + "thunderx3t110_ls01,thunderx3t110_sd") + +;; FP data processing instructions. + +(define_insn_reservation "thunderx3t110_fp_simple" 5 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "ffariths,ffarithd,f_minmaxs,f_minmaxd")) + "thunderx3t110_f0123") + +; distinguish latency 3/4 throughput 1/2|1/4 +(define_insn_reservation "thunderx3t110_fp_addsub3" 3 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "fadds,faddd")) + "thunderx3t110_f23") +(define_insn_reservation "thunderx3t110_fp_addsub4" 4 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "fadds,faddd")) + "thunderx3t110_f0123") + +(define_insn_reservation "thunderx3t110_fp_cmp" 4 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "fcmps,fcmpd,fccmps,fccmpd")) + "thunderx3t110_f0123") + +; need to split out latency 23 throughput 23/4: F64 from +; latency 16 throughput 16/4: FDIV F32 +(define_insn_reservation "thunderx3t110_fp_divsqrt_s" 16 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "fdivs,fsqrts")) + "thunderx3t110_f0*3|thunderx3t110_f1*3|\ + thunderx3t110_f2*3|thunderx3t110_f3*3") + +(define_insn_reservation "thunderx3t110_fp_divsqrt_d" 23 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "fdivd,fsqrtd")) + "thunderx3t110_f0*5|thunderx3t110_f1*5|\ + thunderx3t110_f2*5|thunderx3t110_f3*5") + +(define_insn_reservation "thunderx3t110_fp_mul_mac" 5 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "fmuls,fmuld,fmacs,fmacd")) + "thunderx3t110_f01") + +(define_insn_reservation "thunderx3t110_frint" 5 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "f_rints,f_rintd")) + "thunderx3t110_f0123") + +; mimic latency 3|4 throughput 1/2|1/4 +(define_insn_reservation "thunderx3t110_fcsel3" 3 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "fcsel")) + "thunderx3t110_f23") + +(define_insn_reservation "thunderx3t110_fcsel4" 4 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "fcsel")) + "thunderx3t110_f0123") + +;; FP miscellaneous instructions. + +(define_insn_reservation "thunderx3t110_fp_cvt" 5 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "f_cvtf2i,f_cvt,f_cvti2f")) + "thunderx3t110_f0123") + +; even though f_mrc has to belong to fp_mov_to_gen +; we retain this for the sake of legacy as codegen +; doesn't use it anyway +(define_insn_reservation "thunderx3t110_fp_mov3" 3 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "fconsts,fconstd,fmov,f_mrc")) + "thunderx3t110_f23") + +(define_insn_reservation "thunderx3t110_fp_mov" 4 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "fconsts,fconstd,fmov,f_mrc")) + "thunderx3t110_f0123") + +(define_insn_reservation "thunderx3t110_fp_mov_to_gen" 4 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "f_mcr")) + "thunderx3t110_f0123") + +;; FP loads and stores. +; model use of I0/I1/I2 for post/pre index modes + +(define_insn_reservation "thunderx3t110_fp_load_basic" 4 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "f_loads,f_loadd")) + "thunderx3t110_ls01") + +; model throughput 1 +(define_insn_reservation "thunderx3t110_fp_store_basic" 1 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "f_stores,f_stored")) + "thunderx3t110_ls01,thunderx3t110_sd") + +;; ASIMD integer instructions. + +(define_insn_reservation "thunderx3t110_asimd_int" 5 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "neon_abd,neon_abd_q,\ + neon_arith_acc,neon_arith_acc_q,\ + neon_abs,neon_abs_q,\ + neon_add,neon_add_q,\ + neon_sub,neon_sub_q,\ + neon_neg,neon_neg_q,\ + neon_add_long,neon_add_widen,\ + neon_add_halve,neon_add_halve_q,\ + neon_sub_long,neon_sub_widen,\ + neon_sub_halve,neon_sub_halve_q,\ + neon_add_halve_narrow_q,neon_sub_halve_narrow_q,\ + neon_qabs,neon_qabs_q,\ + neon_qadd,neon_qadd_q,\ + neon_qneg,neon_qneg_q,\ + neon_qsub,neon_qsub_q,\ + neon_minmax,neon_minmax_q,\ + neon_reduc_minmax,neon_reduc_minmax_q,\ + neon_mul_b,neon_mul_h,neon_mul_s,\ + neon_mul_b_q,neon_mul_h_q,neon_mul_s_q,\ + neon_sat_mul_b,neon_sat_mul_h,neon_sat_mul_s,\ + neon_sat_mul_b_q,neon_sat_mul_h_q,neon_sat_mul_s_q,\ + neon_mla_b,neon_mla_h,neon_mla_s,\ + neon_mla_b_q,neon_mla_h_q,neon_mla_s_q,\ + neon_mul_b_long,neon_mul_h_long,\ + neon_mul_s_long,neon_mul_d_long,\ + neon_sat_mul_b_long,neon_sat_mul_h_long,\ + neon_sat_mul_s_long,\ + neon_mla_b_long,neon_mla_h_long,neon_mla_s_long,\ + neon_sat_mla_b_long,neon_sat_mla_h_long,\ + neon_sat_mla_s_long,\ + neon_shift_acc,neon_shift_acc_q,\ + neon_shift_imm,neon_shift_imm_q,\ + neon_shift_reg,neon_shift_reg_q,\ + neon_shift_imm_long,neon_shift_imm_narrow_q,\ + neon_sat_shift_imm,neon_sat_shift_imm_q,\ + neon_sat_shift_reg,neon_sat_shift_reg_q,\ + neon_sat_shift_imm_narrow_q")) + "thunderx3t110_f0123") + +; neon_reduc_add is used for both addp and [su]adalp +(define_insn_reservation "thunderx3t110_asimd_reduc_add" 5 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "neon_reduc_add,neon_reduc_add_q")) + "thunderx3t110_f01") + +(define_insn_reservation "thunderx3t110_asimd_cmp" 5 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "neon_compare,neon_compare_q,neon_compare_zero,\ + neon_tst,neon_tst_q")) + "thunderx3t110_f0123") + +; neon_logic used in ldr, str, mov, umov, fmov, mov; orn; bic; and, +; simd mov immediate; orr, simd mov immediate; eor; not (mvn) +; latency 4 throughput 1/2 LS0/LS1: ldr +; latency 1 throughput 1 LS0/LS1,SDI,I0/I1/I2: str +; latency 3|4 throughput 1/2|1/4 F2/F3 F0/F1/F2/F3: fmov immed, orn, +; bic, and, orr, eor, not (mvn) +; latency 4 throughput 1/4 F0/F1/F2/F3: fmov register, fmov gen to vec +; latency 5 throughput 1/4 F0/F1/F2/F3: fmov vec to gen, umov, fmov +(define_insn_reservation "thunderx3t110_asimd_logic4" 4 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "neon_logic,neon_logic_q")) + "thunderx3t110_f23") + +(define_insn_reservation "thunderx3t110_asimd_logic5" 5 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "neon_logic,neon_logic_q")) + "thunderx3t110_f0123") + +;; ASIMD floating-point instructions. + +; Distinguish between latency 5 throughput 1/4: fabs, fmax, fmin, fneg +; latency 4 throughput 1/4: fcmp +(define_insn_reservation "thunderx3t110_asimd_fp_simple" 5 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "neon_fp_abs_s,neon_fp_abs_d,\ + neon_fp_abs_s_q,neon_fp_abs_d_q,\ + neon_fp_compare_s,neon_fp_compare_d,\ + neon_fp_compare_s_q,neon_fp_compare_d_q,\ + neon_fp_minmax_s,neon_fp_minmax_d,\ + neon_fp_minmax_s_q,neon_fp_minmax_d_q,\ + neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_d,\ + neon_fp_reduc_minmax_s_q,neon_fp_reduc_minmax_d_q,\ + neon_fp_neg_s,neon_fp_neg_d,\ + neon_fp_neg_s_q,neon_fp_neg_d_q")) + "thunderx3t110_f0123") + +; distinguish between latency 3 throughput 1/2, +; latency 4 throughput 1/4 +; neon_fp_reduc_add_<stype><q> is used for both faddp and +; vector reduction add. On TX3, faddp is 3|4 1/2|1/4 and reduction is 5 1/4 +(define_insn_reservation "thunderx3t110_asimd_fp_arith3" 3 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "neon_fp_abd_s,neon_fp_abd_d,\ + neon_fp_abd_s_q,neon_fp_abd_d_q,\ + neon_fp_addsub_s,neon_fp_addsub_d,\ + neon_fp_addsub_s_q,neon_fp_addsub_d_q,\ + neon_fp_reduc_add_s,neon_fp_reduc_add_d,\ + neon_fp_reduc_add_s_q,neon_fp_reduc_add_d_q")) + "thunderx3t110_f23") + +(define_insn_reservation "thunderx3t110_asimd_fp_arith4" 4 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "neon_fp_abd_s,neon_fp_abd_d,\ + neon_fp_abd_s_q,neon_fp_abd_d_q,\ + neon_fp_addsub_s,neon_fp_addsub_d,\ + neon_fp_addsub_s_q,neon_fp_addsub_d_q,\ + neon_fp_reduc_add_s,neon_fp_reduc_add_d,\ + neon_fp_reduc_add_s_q,neon_fp_reduc_add_d_q")) + "thunderx3t110_f0123") + +(define_insn_reservation "thunderx3t110_asimd_fp_arith5" 5 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "neon_fp_mul_s,neon_fp_mul_d,\ + neon_fp_mul_s_q,neon_fp_mul_d_q,\ + neon_fp_mul_s_scalar_q,neon_fp_mul_d_scalar_q,\ + neon_fp_mla_s,neon_fp_mla_d,\ + neon_fp_mla_s_q,neon_fp_mla_d_q")) + "thunderx3t110_f0123") + +; neon_fp_cvt_widen_s,neon_fp_cvt_narrow_d_q: fcvtl,fctvl2,fcvtn,fcvtn2 +; neon_fp_to_int_s,neon_fp_to_int_d: fcvt{<frint_suffix><su>,z<su>} +; where frint_suffix: zpmixan, su: su (plus other sign/unsign/extract... +; neon_fp_to_int_s_q,neon_fp_to_int_d_q: fcvtz<su> other +; The int_to_fp* is complicated +; neon_int_to_fp_s,neon_int_to_fp_d: <su_optab>cvtf +; neon_int_to_fp_s_q,neon_int_to_fp_d_q +; Round matches single define_insn, frint<frint_suffix> +; neon_fp_round_s,neon_fp_round_d,neon_fp_round_s_q, +; neon_fp_round_d_q: frint<frint_suffix> +; FCVT*,VCVTAU,[SU]CVTF: latency 5 throughput 1/4 +; FRINT*: latency 5 throughput 1/4 +(define_insn_reservation "thunderx3t110_asimd_fp_conv" 5 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "neon_fp_cvt_widen_s,neon_fp_cvt_narrow_d_q,\ + neon_fp_to_int_s,neon_fp_to_int_d,\ + neon_fp_to_int_s_q,neon_fp_to_int_d_q,\ + neon_int_to_fp_s,neon_int_to_fp_d,\ + neon_int_to_fp_s_q,neon_int_to_fp_d_q,\ + neon_fp_round_s,neon_fp_round_d,\ + neon_fp_round_s_q,neon_fp_round_d_q")) + "thunderx3t110_f0123") + +; model that pipeline is occupied the whole time D/F32, Q/F32: 16/4 +; Q/F64: 23/4 +(define_insn_reservation "thunderx3t110_asimd_fp_div_s" 16 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "neon_fp_div_s,neon_fp_div_s_q")) + "thunderx3t110_f0123") + +(define_insn_reservation "thunderx3t110_asimd_fp_div_d" 23 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "neon_fp_div_d,neon_fp_div_d_q")) + "thunderx3t110_f0123") + +;; ASIMD miscellaneous instructions. + +; divided out: +; rbit,bsl,bsl_q,cls,cls_q,cnt,cnt_q,move,move_q: 3|4 1/2 | 1/4 +; from_gp,from_gp_q : 4 | 1/4 +; dup,dup_q,ext,ext_q,ins,ins_q,all recpe forms, rev,rev_q: 5 1/4 +; permute,permute_q needs to depend on aarch64_expand_vec_perm_const does +; on TX3 +(define_insn_reservation "thunderx3t110_asimd_misc3" 3 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "neon_rbit,\ + neon_bsl,neon_bsl_q,\ + neon_cls,neon_cls_q,\ + neon_cnt,neon_cnt_q,\ + neon_move,neon_move_q")) + "thunderx3t110_f23") + +(define_insn_reservation "thunderx3t110_asimd_misc4" 4 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "neon_rbit,\ + neon_bsl,neon_bsl_q,\ + neon_cls,neon_cls_q,\ + neon_cnt,neon_cnt_q,\ + neon_from_gp,neon_from_gp_q,\ + neon_move,neon_move_q")) + "thunderx3t110_f0123") + +(define_insn_reservation "thunderx3t110_asimd_misc" 5 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" " + neon_dup,neon_dup_q,\ + neon_ext,neon_ext_q,\ + neon_ins,neon_ins_q,\ + neon_move,neon_move_q,\ + neon_fp_recpe_s,neon_fp_recpe_d,\ + neon_fp_recpe_s_q,neon_fp_recpe_d_q,\ + neon_fp_recpx_s,neon_fp_recpx_d,\ + neon_fp_recpx_s_q,neon_fp_recpx_d_q,\ + neon_rev,neon_rev_q,\ + neon_permute,neon_permute_q")) + "thunderx3t110_f0123") + +(define_insn_reservation "thunderx3t110_asimd_recip_step" 5 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "neon_fp_recps_s,neon_fp_recps_s_q,\ + neon_fp_recps_d,neon_fp_recps_d_q,\ + neon_fp_sqrt_s,neon_fp_sqrt_s_q,\ + neon_fp_sqrt_d,neon_fp_sqrt_d_q,\ + neon_fp_rsqrte_s, neon_fp_rsqrte_s_q,\ + neon_fp_rsqrte_d, neon_fp_rsqrte_d_q,\ + neon_fp_rsqrts_s, neon_fp_rsqrts_s_q,\ + neon_fp_rsqrts_d, neon_fp_rsqrts_d_q")) + "thunderx3t110_f0123") + +(define_insn_reservation "thunderx3t110_asimd_lut1" 5 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "neon_tbl1,neon_tbl1_q")) + "thunderx3t110_f0123") + +(define_insn_reservation "thunderx3t110_asimd_lut2" 10 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "neon_tbl2,neon_tbl2_q")) + "thunderx3t110_f0123") + +(define_insn_reservation "thunderx3t110_asimd_lut3" 15 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "neon_tbl3,neon_tbl3_q")) + "thunderx3t110_f0123") + +(define_insn_reservation "thunderx3t110_asimd_lut4" 20 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "neon_tbl4,neon_tbl4_q")) + "thunderx3t110_f0123") + +(define_insn_reservation "thunderx3t110_asimd_elt_to_gr" 5 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "neon_to_gp,neon_to_gp_q")) + "thunderx3t110_f0123") + +;; ASIMD load instructions. + +; NOTE: These reservations attempt to model latency and throughput +; correctly, but the cycle timing of unit allocation is not +; necessarily accurate (because insns are split into uops, and those +; may be issued out-of-order). + +; the LDP/LDNP imm-offset S/D/Q suppplies the first arg with latency 4 +; and the 2nd at 5 (Q form) or 8 (S/D form). Can this be modeled? These +;forms, as documented, do not use the I0/I1/I2 units (no I3), but the +; other LDP ones do. +(define_insn_reservation "thunderx3t110_asimd_load1_ldp" 5 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "neon_ldp,neon_ldp_q")) + "thunderx3t110_i012,thunderx3t110_ls01") + +; Need to distinguish latency 6 throughput 2: 4 reg D/Q +; latency 5 throughput 3/2: 3 reg D/Q +; latency 4 throughput 1: 2 reg D/Q +; latency 4 throughput 1/2: 1 reg D/Q +(define_insn_reservation "thunderx3t110_asimd_load1" 4 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "neon_load1_1reg,neon_load1_1reg_q,\ + neon_load1_2reg,neon_load1_2reg_q,\ + neon_load1_3reg,neon_load1_3reg_q,\ + neon_load1_4reg,neon_load1_4reg_q")) + "thunderx3t110_ls01") + +(define_insn_reservation "thunderx3t110_asimd_load1_onelane" 5 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "neon_load1_one_lane,neon_load1_one_lane_q")) + "thunderx3t110_l01delay,thunderx3t110_f0123") + +(define_insn_reservation "thunderx3t110_asimd_load1_all" 5 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "neon_load1_all_lanes,neon_load1_all_lanes_q")) + "thunderx3t110_l01delay,thunderx3t110_f0123") + +(define_insn_reservation "thunderx3t110_asimd_load2" 5 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "neon_load2_2reg,neon_load2_2reg_q,\ + neon_load2_one_lane,neon_load2_one_lane_q,\ + neon_load2_all_lanes,neon_load2_all_lanes_q")) + "thunderx3t110_l01delay,thunderx3t110_f0123") + +(define_insn_reservation "thunderx3t110_asimd_load3" 7 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q,\ + neon_load3_one_lane,neon_load3_one_lane_q,\ + neon_load3_all_lanes,neon_load3_all_lanes_q")) + "thunderx3t110_l01delay,thunderx3t110_f0123") + +(define_insn_reservation "thunderx3t110_asimd_load4" 8 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "neon_load4_4reg,neon_load4_4reg_q,\ + neon_load4_one_lane,neon_load4_one_lane_q,\ + neon_load4_all_lanes,neon_load4_all_lanes_q")) + "thunderx3t110_l01delay,thunderx3t110_f0123") + +;; ASIMD store instructions. + +; Same note applies as for ASIMD load instructions. + +; Vector Store pair Need to distinguish: +; 5 throughput: imm-offset S/D; imm-postindex S/D; imm-preindex S/D +; 2 throughput: imm-offset Q; imm-postindex Q; imm-preindex Q +; all index modes use I0/I1/I2 +(define_insn_reservation "thunderx3t110_asimd_store_stp" 1 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "neon_stp,neon_stp_q")) + "thunderx3t110_ls01,thunderx3t110_sd") + +; There are multiple forms of ST1 +; The following two groups, as documented, do not use the FP pipelines. +; multiple, 1 reg, D-form ST1 +; tx2_ltp: x 1/2 LS0/LS1 +; tx3_ltp: x 1/2 LS0/LS1 +; multiple, 1 reg, Q-form ST1 +; tx2_ltp: x 1/2 LS0/LS1 +; tx3_ltp: x 1/2 LS0/LS1 +; +; one lane, B/H/S ST1 +; tx2_ltp: x 1/2 LS0/LS1,F0/F1 +; tx3_ltp: x 1/2 LS0/LS1,F0/F1/F2/F3 +; one lane, D ST1 +; tx2_ltp: x 1/2 LS0/LS1,F0/F1 +; tx3_ltp: x 1/2 LS0/LS1,F0/F1/F2/F3 +;; Model for st1 insn needs refinement for different register forms +; multiple, 2 reg, D-form ST1 x 1 LS0/LS1 +; multiple, 2 reg, Q-form ST1 x 1 LS0/LS1 +; multiple, 3 reg, D-form ST1 x 3/2 LS0/LS1 +; multiple, 3 reg, Q-form ST1 x 3/2 LS0/LS1 +; multiple,4 reg, D-form ST1 x 2 LS0/LS1 +; multiple,4 reg, Q-form ST1 x 2 LS0/LS1 +(define_insn_reservation "thunderx3t110_asimd_store1" 1 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "neon_store1_1reg,neon_store1_1reg_q,\ + neon_store1_2reg,neon_store1_2reg_q,\ + neon_store1_3reg,neon_store1_4reg")) + "thunderx3t110_ls01") + +(define_insn_reservation "thunderx3t110_asimd_store1_onelane" 1 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "neon_store1_one_lane,neon_store1_one_lane_q")) + "thunderx3t110_ls01,thunderx3t110_f0123") + +; distinguish between throughput 1: D/Q-form B/H/S, Q-form D and +; throughput 1/2: one lane B/H/S/D +(define_insn_reservation "thunderx3t110_asimd_store2" 1 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "neon_store2_2reg,neon_store2_2reg_q,\ + neon_store2_one_lane,neon_store2_one_lane_q")) + "thunderx3t110_ls01,thunderx3t110_f0123") + +; distinguish between throughput 3: D/Q-form B/H/S, Q-form D and +; throughput 1: one lane B/H/S/D +(define_insn_reservation "thunderx3t110_asimd_store3" 1 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "neon_store3_3reg,neon_store3_3reg_q,\ + neon_store3_one_lane,neon_store3_one_lane_q")) + "thunderx3t110_ls01,thunderx3t110_f0123") + +; distinguish between throughput 4: D/Q-form B/H/S, Q-form D and +; throughput 1: one lane B/H/S/D? (not in doc) +(define_insn_reservation "thunderx3t110_asimd_store4" 1 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "neon_store4_4reg,neon_store4_4reg_q,\ + neon_store4_one_lane,neon_store4_one_lane_q")) + "thunderx3t110_ls01,thunderx3t110_f0123") + +;; Crypto extensions. + +(define_insn_reservation "thunderx3t110_aes" 4 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "crypto_aese,crypto_aesmc")) + "thunderx3t110_f0123") + +(define_insn_reservation "thunderx3t110_sha" 5 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor,crypto_sha1_slow,\ + crypto_sha256_fast,crypto_sha256_slow")) + "thunderx3t110_f0123") + +;; CRC extension. + +(define_insn_reservation "thunderx3t110_crc" 3 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "crc")) + "thunderx3t110_i1") + +;; PMULL extension. + +(define_insn_reservation "thunderx3t110_pmull" 5 + (and (eq_attr "tune" "thunderx3t110") + (eq_attr "type" "crypto_pmull")) + "thunderx3t110_f0123") diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index a37a2ee..a101928 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -16950,7 +16950,7 @@ performance of the code. Permissible values for this option are: @samp{octeontx2f95mm} @samp{thunderx}, @samp{thunderxt88}, @samp{thunderxt88p1}, @samp{thunderxt81}, @samp{tsv110}, -@samp{thunderxt83}, @samp{thunderx2t99}, +@samp{thunderxt83}, @samp{thunderx2t99}, @samp{thunderx3t110}, @samp{cortex-a57.cortex-a53}, @samp{cortex-a72.cortex-a53}, @samp{cortex-a73.cortex-a35}, @samp{cortex-a73.cortex-a53}, @samp{cortex-a75.cortex-a55}, @samp{cortex-a76.cortex-a55} |