aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/rs6000/power9.md
diff options
context:
space:
mode:
authorPat Haugen <pthaugen@us.ibm.com>2016-06-28 13:33:03 +0000
committerPat Haugen <pthaugen@gcc.gnu.org>2016-06-28 13:33:03 +0000
commiteda328bf1dd994826252fa0435d2e58103c2c2d8 (patch)
treef5a8164759ac645efca00b2601b1cdcbc37f566f /gcc/config/rs6000/power9.md
parent7d4cdbd485607cc1d575a42d815a5dec7708aded (diff)
downloadgcc-eda328bf1dd994826252fa0435d2e58103c2c2d8.zip
gcc-eda328bf1dd994826252fa0435d2e58103c2c2d8.tar.gz
gcc-eda328bf1dd994826252fa0435d2e58103c2c2d8.tar.bz2
rs6000.md ('type' attribute): Add htmsimple/dfp types.
* config/rs6000/rs6000.md ('type' attribute): Add htmsimple/dfp types. ('size' attribute): Add '128'. Include power9.md. (*mov<mode>_hardfloat32, *mov<mode>_hardfloat64, *movdi_internal32, *movdi_internal64, *movdf_update1): Set size attribute to '64'. (add<mode>3, sub<mode>3, mul<mode>3, div<mode>3, sqrt<mode>2, copysign<mode>3, neg<mode>2_hw, abs<mode>2_hw, *nabs<mode>2_hw, *fma<mode>4_hw, *fms<mode>4_hw, *nfma<mode>4_hw, *nfms<mode>4_hw, extend<SFDF:mode><IEEE128:mode>2_hw, trunc<mode>df2_hw, *xscvqp<su>wz_<mode>, *xscvqp<su>dz_<mode>, *xscv<su>dqp_<mode>, *trunc<mode>df2_odd): Set size attribute to '128'. (*cmp<mode>_hw): Change type to veccmp and set size attribute to '128'. * config/rs6000/power6.md (power6-fp): Include dfp type. * config/rs6000/power7.md (power7-fp): Likewise. * config/rs6000/power8.md (power8-fp): Likewise. * config/rs6000/power9.md: New file. * config/rs6000/t-rs6000 (MD_INCLUDES): Add power9.md. * config/rs6000/htm.md (*tabort, *tabort<wd>c, *tabort<wd>ci, *trechkpt, *treclaim, *tsr, *ttest): Change type attribute to htmsimple. * config/rs6000/dfp.md (extendsddd2, truncddsd2, extendddtd2, trunctddd2, adddd3, addtd3, subdd3, subtd3, muldd3, multd3, divdd3, divtd3, *cmpdd_internal1, *cmptd_internal1, floatdidd2, floatditd2, ftruncdd2, fixdddi2, ftrunctd2, fixtddi2, dfp_ddedpd_<mode>, dfp_denbcd_<mode>, dfp_dxex_<mode>, dfp_diex_<mode>, dfp_dscli_<mode>, dfp_dscri_<mode>): Change type attribute to dfp. * config/rs6000/crypto.md (crypto_vshasigma<CR_char>): Change type attribute to vecsimple. * config/rs6000/rs6000.c (power9_cost): Update costs, cache size and prefetch streams. (rs6000_option_override_internal): Remove temporary code setting tuning to power8. Don't set rs6000_sched_groups for power9. (last_scheduled_insn): Change to rtx_insn *. (divide_cnt, vec_load_pendulum): New variables. (rs6000_adjust_cost): Add Power9 to test for store->load separation. (rs6000_issue_rate): Set issue rate for Power9. (is_power9_pairable_vec_type): New. (power9_sched_reorder2): New. (rs6000_sched_reorder2): Call new function for Power9 specific reordering. (insn_must_be_first_in_group): Remove Power9. (insn_must_be_last_in_group): Likewise. (force_new_group): Likewise. (rs6000_sched_init): Fix initialization of last_scheduled_insn. Initialize divide_cnt/vec_load_pendulum. (_rs6000_sched_context, rs6000_init_sched_context, rs6000_set_sched_context): Handle context save/restore of new variables. From-SVN: r237820
Diffstat (limited to 'gcc/config/rs6000/power9.md')
-rw-r--r--gcc/config/rs6000/power9.md477
1 files changed, 477 insertions, 0 deletions
diff --git a/gcc/config/rs6000/power9.md b/gcc/config/rs6000/power9.md
new file mode 100644
index 0000000..015b5ba
--- /dev/null
+++ b/gcc/config/rs6000/power9.md
@@ -0,0 +1,477 @@
+;; Scheduling description for IBM POWER9 processor.
+;; Copyright (C) 2016 Free Software Foundation, Inc.
+;;
+;; Contributed by Pat Haugen (pthaugen@us.ibm.com).
+
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "power9dsp,power9lsu,power9vsu,power9misc")
+
+(define_cpu_unit "lsu0_power9,lsu1_power9,lsu2_power9,lsu3_power9" "power9lsu")
+(define_cpu_unit "vsu0_power9,vsu1_power9,vsu2_power9,vsu3_power9" "power9vsu")
+; Two vector permute units, part of vsu
+(define_cpu_unit "prm0_power9,prm1_power9" "power9vsu")
+; Two fixed point divide units, not pipelined
+(define_cpu_unit "fx_div0_power9,fx_div1_power9" "power9misc")
+(define_cpu_unit "bru_power9,cryptu_power9,dfu_power9" "power9misc")
+
+(define_cpu_unit "x0_power9,x1_power9,xa0_power9,xa1_power9,
+ x2_power9,x3_power9,xb0_power9,xb1_power9,
+ br0_power9,br1_power9" "power9dsp")
+
+
+; Dispatch port reservations
+;
+; Power9 can dispatch a maximum of 6 iops per cycle with the following
+; general restrictions (other restrictions also apply):
+; 1) At most 2 iops per execution slice
+; 2) At most 2 iops to the branch unit
+; Note that insn position in a dispatch group of 6 insns does not infer which
+; execution slice the insn is routed to. The units are used to infer the
+; conflicts that exist (i.e. an 'even' requirement will preclude dispatch
+; with 2 insns with 'superslice' requirement).
+
+; The xa0/xa1 units really represent the 3rd dispatch port for a superslice but
+; are listed as separate units to allow those insns that preclude its use to
+; still be scheduled two to a superslice while reserving the 3rd slot. The
+; same applies for xb0/xb1.
+(define_reservation "DU_xa_power9" "xa0_power9+xa1_power9")
+(define_reservation "DU_xb_power9" "xb0_power9+xb1_power9")
+
+; Any execution slice dispatch
+(define_reservation "DU_any_power9"
+ "x0_power9|x1_power9|DU_xa_power9|x2_power9|x3_power9|
+ DU_xb_power9")
+
+; Even slice, actually takes even/odd slots
+(define_reservation "DU_even_power9" "x0_power9+x1_power9|x2_power9+x3_power9")
+
+; Slice plus 3rd slot
+(define_reservation "DU_slice_3_power9"
+ "x0_power9+xa0_power9|x1_power9+xa1_power9|
+ x2_power9+xb0_power9|x3_power9+xb1_power9")
+
+; Superslice
+(define_reservation "DU_super_power9"
+ "x0_power9+x1_power9|x2_power9+x3_power9")
+
+; 2-way cracked
+(define_reservation "DU_C2_power9" "x0_power9+x1_power9|
+ x1_power9+DU_xa_power9|
+ x1_power9+x2_power9|
+ DU_xa_power9+x2_power9|
+ x2_power9+x3_power9|
+ x3_power9+DU_xb_power9")
+
+; 2-way cracked plus 3rd slot
+(define_reservation "DU_C2_3_power9" "x0_power9+x1_power9+xa0_power9|
+ x1_power9+x2_power9+xa0_power9|
+ x1_power9+x2_power9+xb0_power9|
+ x2_power9+x3_power9+xb0_power9")
+
+; 3-way cracked (consumes whole decode/dispatch cycle)
+(define_reservation "DU_C3_power9"
+ "x0_power9+x1_power9+xa0_power9+xa1_power9+x2_power9+
+ x3_power9+xb0_power9+xb1_power9+br0_power9+br1_power9")
+
+; Branch ports
+(define_reservation "DU_branch_power9" "br0_power9|br1_power9")
+
+
+; Execution unit reservations
+(define_reservation "LSU_power9"
+ "lsu0_power9|lsu1_power9|lsu2_power9|lsu3_power9")
+
+(define_reservation "LSU_pair_power9"
+ "lsu0_power9+lsu1_power9|lsu1_power9+lsu2_power9|
+ lsu2_power9+lsu3_power9|lsu3_power9+lsu0_power9")
+
+(define_reservation "VSU_power9"
+ "vsu0_power9|vsu1_power9|vsu2_power9|vsu3_power9")
+
+(define_reservation "VSU_super_power9"
+ "vsu0_power9+vsu1_power9|vsu2_power9+vsu3_power9")
+
+(define_reservation "VSU_PRM_power9" "prm0_power9|prm1_power9")
+
+
+; LS Unit
+(define_insn_reservation "power9-load" 4
+ (and (eq_attr "type" "load")
+ (eq_attr "sign_extend" "no")
+ (eq_attr "update" "no")
+ (eq_attr "cpu" "power9"))
+ "DU_any_power9,LSU_power9")
+
+(define_insn_reservation "power9-load-update" 4
+ (and (eq_attr "type" "load")
+ (eq_attr "sign_extend" "no")
+ (eq_attr "update" "yes")
+ (eq_attr "cpu" "power9"))
+ "DU_C2_power9,LSU_power9+VSU_power9")
+
+(define_insn_reservation "power9-load-ext" 6
+ (and (eq_attr "type" "load")
+ (eq_attr "sign_extend" "yes")
+ (eq_attr "update" "no")
+ (eq_attr "cpu" "power9"))
+ "DU_C2_power9,LSU_power9")
+
+(define_insn_reservation "power9-load-ext-update" 6
+ (and (eq_attr "type" "load")
+ (eq_attr "sign_extend" "yes")
+ (eq_attr "update" "yes")
+ (eq_attr "cpu" "power9"))
+ "DU_C3_power9,LSU_power9+VSU_power9")
+
+(define_insn_reservation "power9-fpload-double" 4
+ (and (eq_attr "type" "fpload")
+ (eq_attr "update" "no")
+ (eq_attr "size" "64")
+ (eq_attr "cpu" "power9"))
+ "DU_slice_3_power9,LSU_power9")
+
+(define_insn_reservation "power9-fpload-update-double" 4
+ (and (eq_attr "type" "fpload")
+ (eq_attr "update" "yes")
+ (eq_attr "size" "64")
+ (eq_attr "cpu" "power9"))
+ "DU_C2_3_power9,LSU_power9+VSU_power9")
+
+; SFmode loads are cracked and have additional 2 cycles over DFmode
+(define_insn_reservation "power9-fpload-single" 6
+ (and (eq_attr "type" "fpload")
+ (eq_attr "update" "no")
+ (eq_attr "size" "32")
+ (eq_attr "cpu" "power9"))
+ "DU_C2_3_power9,LSU_power9")
+
+(define_insn_reservation "power9-fpload-update-single" 6
+ (and (eq_attr "type" "fpload")
+ (eq_attr "update" "yes")
+ (eq_attr "size" "32")
+ (eq_attr "cpu" "power9"))
+ "DU_C3_power9,LSU_power9+VSU_power9")
+
+(define_insn_reservation "power9-vecload" 5
+ (and (eq_attr "type" "vecload")
+ (eq_attr "cpu" "power9"))
+ "DU_any_power9,LSU_pair_power9")
+
+; Store data can issue 2 cycles after AGEN issue, 3 cycles for vector store
+(define_insn_reservation "power9-store" 0
+ (and (eq_attr "type" "store")
+ (eq_attr "update" "no")
+ (eq_attr "indexed" "no")
+ (eq_attr "cpu" "power9"))
+ "DU_slice_3_power9,LSU_power9")
+
+(define_insn_reservation "power9-store-indexed" 0
+ (and (eq_attr "type" "store")
+ (eq_attr "update" "no")
+ (eq_attr "indexed" "yes")
+ (eq_attr "cpu" "power9"))
+ "DU_slice_3_power9,LSU_power9")
+
+; Update forms have 2 cycle latency for updated addr reg
+(define_insn_reservation "power9-store-update" 2
+ (and (eq_attr "type" "store")
+ (eq_attr "update" "yes")
+ (eq_attr "indexed" "no")
+ (eq_attr "cpu" "power9"))
+ "DU_C2_3_power9,LSU_power9+VSU_power9")
+
+; Update forms have 2 cycle latency for updated addr reg
+(define_insn_reservation "power9-store-update-indexed" 2
+ (and (eq_attr "type" "store")
+ (eq_attr "update" "yes")
+ (eq_attr "indexed" "yes")
+ (eq_attr "cpu" "power9"))
+ "DU_C2_3_power9,LSU_power9+VSU_power9")
+
+(define_insn_reservation "power9-fpstore" 0
+ (and (eq_attr "type" "fpstore")
+ (eq_attr "update" "no")
+ (eq_attr "cpu" "power9"))
+ "DU_slice_3_power9,LSU_power9")
+
+; Update forms have 2 cycle latency for updated addr reg
+(define_insn_reservation "power9-fpstore-update" 2
+ (and (eq_attr "type" "fpstore")
+ (eq_attr "update" "yes")
+ (eq_attr "cpu" "power9"))
+ "DU_C2_3_power9,LSU_power9+VSU_power9")
+
+(define_insn_reservation "power9-vecstore" 0
+ (and (eq_attr "type" "vecstore")
+ (eq_attr "cpu" "power9"))
+ "DU_super_power9,LSU_pair_power9")
+
+(define_insn_reservation "power9-larx" 4
+ (and (eq_attr "type" "load_l")
+ (eq_attr "cpu" "power9"))
+ "DU_any_power9,LSU_power9")
+
+(define_insn_reservation "power9-stcx" 2
+ (and (eq_attr "type" "store_c")
+ (eq_attr "cpu" "power9"))
+ "DU_C2_3_power9,LSU_power9+VSU_power9")
+
+(define_insn_reservation "power9-sync" 4
+ (and (eq_attr "type" "sync,isync")
+ (eq_attr "cpu" "power9"))
+ "DU_any_power9,LSU_power9")
+
+
+; VSU Execution Unit
+
+; Fixed point ops
+
+; Most ALU insns are simple 2 cycle, including record form
+(define_insn_reservation "power9-alu" 2
+ (and (ior (eq_attr "type" "add,cmp,exts,integer,logical,isel")
+ (and (eq_attr "type" "insert,shift")
+ (eq_attr "dot" "no")))
+ (eq_attr "cpu" "power9"))
+ "DU_any_power9,VSU_power9")
+
+; Record form rotate/shift are cracked
+(define_insn_reservation "power9-cracked-alu" 2
+ (and (eq_attr "type" "insert,shift")
+ (eq_attr "dot" "yes")
+ (eq_attr "cpu" "power9"))
+ "DU_C2_power9,VSU_power9")
+; 4 cycle CR latency
+(define_bypass 4 "power9-cracked-alu"
+ "power9-crlogical,power9-mfcr,power9-mfcrf,power9-branch")
+
+(define_insn_reservation "power9-alu2" 3
+ (and (eq_attr "type" "cntlz,popcnt,trap")
+ (eq_attr "cpu" "power9"))
+ "DU_any_power9,VSU_power9")
+
+; Treat 'two' and 'three' types as 2 or 3 way cracked
+(define_insn_reservation "power9-two" 4
+ (and (eq_attr "type" "two")
+ (eq_attr "cpu" "power9"))
+ "DU_C2_power9,VSU_power9")
+
+(define_insn_reservation "power9-three" 6
+ (and (eq_attr "type" "three")
+ (eq_attr "cpu" "power9"))
+ "DU_C3_power9,VSU_power9")
+
+(define_insn_reservation "power9-mul" 4
+ (and (eq_attr "type" "mul")
+ (eq_attr "dot" "no")
+ (eq_attr "cpu" "power9"))
+ "DU_any_power9,VSU_power9")
+
+(define_insn_reservation "power9-mul-compare" 4
+ (and (eq_attr "type" "mul")
+ (eq_attr "dot" "yes")
+ (eq_attr "cpu" "power9"))
+ "DU_C2_power9,VSU_power9")
+; 6 cycle CR latency
+(define_bypass 6 "power9-mul-compare"
+ "power9-crlogical,power9-mfcr,power9-mfcrf,power9-branch")
+
+; Fixed point divides reserve the divide units for a minimum of 8 cycles
+(define_insn_reservation "power9-idiv" 16
+ (and (eq_attr "type" "div")
+ (eq_attr "size" "32")
+ (eq_attr "cpu" "power9"))
+ "DU_even_power9,fx_div0_power9*8|fx_div1_power9*8")
+
+(define_insn_reservation "power9-ldiv" 24
+ (and (eq_attr "type" "div")
+ (eq_attr "size" "64")
+ (eq_attr "cpu" "power9"))
+ "DU_even_power9,fx_div0_power9*8|fx_div1_power9*8")
+
+(define_insn_reservation "power9-crlogical" 2
+ (and (eq_attr "type" "cr_logical,delayed_cr")
+ (eq_attr "cpu" "power9"))
+ "DU_any_power9,VSU_power9")
+
+(define_insn_reservation "power9-mfcrf" 2
+ (and (eq_attr "type" "mfcrf")
+ (eq_attr "cpu" "power9"))
+ "DU_any_power9,VSU_power9")
+
+(define_insn_reservation "power9-mfcr" 6
+ (and (eq_attr "type" "mfcr")
+ (eq_attr "cpu" "power9"))
+ "DU_C3_power9,VSU_power9")
+
+; Should differentiate between 1 cr field and > 1 since target of > 1 cr
+; is cracked
+(define_insn_reservation "power9-mtcr" 2
+ (and (eq_attr "type" "mtcr")
+ (eq_attr "cpu" "power9"))
+ "DU_any_power9,VSU_power9")
+
+; Move to LR/CTR are executed in VSU
+(define_insn_reservation "power9-mtjmpr" 5
+ (and (eq_attr "type" "mtjmpr")
+ (eq_attr "cpu" "power9"))
+ "DU_any_power9,VSU_power9")
+
+; Floating point/Vector ops
+(define_insn_reservation "power9-fpsimple" 2
+ (and (eq_attr "type" "fpsimple")
+ (eq_attr "cpu" "power9"))
+ "DU_slice_3_power9,VSU_power9")
+
+(define_insn_reservation "power9-fp" 7
+ (and (eq_attr "type" "fp,dmul")
+ (eq_attr "cpu" "power9"))
+ "DU_slice_3_power9,VSU_power9")
+
+(define_insn_reservation "power9-fpcompare" 3
+ (and (eq_attr "type" "fpcompare")
+ (eq_attr "cpu" "power9"))
+ "DU_slice_3_power9,VSU_power9")
+
+; FP div/sqrt are executed in the VSU slices. They are not pipelined wrt other
+; divide insns, but for the most part do not block pipelined ops.
+(define_insn_reservation "power9-sdiv" 22
+ (and (eq_attr "type" "sdiv")
+ (eq_attr "cpu" "power9"))
+ "DU_slice_3_power9,VSU_power9")
+
+(define_insn_reservation "power9-ddiv" 33
+ (and (eq_attr "type" "ddiv")
+ (eq_attr "cpu" "power9"))
+ "DU_slice_3_power9,VSU_power9")
+
+(define_insn_reservation "power9-sqrt" 26
+ (and (eq_attr "type" "ssqrt")
+ (eq_attr "cpu" "power9"))
+ "DU_slice_3_power9,VSU_power9")
+
+(define_insn_reservation "power9-dsqrt" 36
+ (and (eq_attr "type" "dsqrt")
+ (eq_attr "cpu" "power9"))
+ "DU_slice_3_power9,VSU_power9")
+
+(define_insn_reservation "power9-vec-2cyc" 2
+ (and (eq_attr "type" "vecmove,veclogical,vecexts,veccmpfx")
+ (eq_attr "cpu" "power9"))
+ "DU_super_power9,VSU_super_power9")
+
+(define_insn_reservation "power9-veccmp" 3
+ (and (eq_attr "type" "veccmp")
+ (eq_attr "cpu" "power9"))
+ "DU_super_power9,VSU_super_power9")
+
+(define_insn_reservation "power9-vecsimple" 3
+ (and (eq_attr "type" "vecsimple")
+ (eq_attr "cpu" "power9"))
+ "DU_super_power9,VSU_super_power9")
+
+(define_insn_reservation "power9-vecnormal" 7
+ (and (eq_attr "type" "vecfloat,vecdouble")
+ (eq_attr "size" "!128")
+ (eq_attr "cpu" "power9"))
+ "DU_super_power9,VSU_super_power9")
+
+; Quad-precision FP ops, execute in DFU
+(define_insn_reservation "power9-qp" 12
+ (and (eq_attr "type" "vecfloat,vecdouble")
+ (eq_attr "size" "128")
+ (eq_attr "cpu" "power9"))
+ "DU_super_power9,dfu_power9")
+
+(define_insn_reservation "power9-vecperm" 3
+ (and (eq_attr "type" "vecperm")
+ (eq_attr "cpu" "power9"))
+ "DU_super_power9,VSU_PRM_power9")
+
+(define_insn_reservation "power9-veccomplex" 7
+ (and (eq_attr "type" "veccomplex")
+ (eq_attr "cpu" "power9"))
+ "DU_super_power9,VSU_super_power9")
+
+(define_insn_reservation "power9-vecfdiv" 28
+ (and (eq_attr "type" "vecfdiv")
+ (eq_attr "cpu" "power9"))
+ "DU_super_power9,VSU_super_power9")
+
+(define_insn_reservation "power9-vecdiv" 32
+ (and (eq_attr "type" "vecdiv")
+ (eq_attr "size" "!128")
+ (eq_attr "cpu" "power9"))
+ "DU_super_power9,VSU_super_power9")
+
+(define_insn_reservation "power9-qpdiv" 56
+ (and (eq_attr "type" "vecdiv")
+ (eq_attr "size" "128")
+ (eq_attr "cpu" "power9"))
+ "DU_super_power9,dfu_power9")
+
+(define_insn_reservation "power9-mffgpr" 2
+ (and (eq_attr "type" "mffgpr")
+ (eq_attr "cpu" "power9"))
+ "DU_slice_3_power9,VSU_power9")
+
+(define_insn_reservation "power9-mftgpr" 2
+ (and (eq_attr "type" "mftgpr")
+ (eq_attr "cpu" "power9"))
+ "DU_slice_3_power9,VSU_power9")
+
+
+; Branch Unit
+; Move from LR/CTR are executed in BRU but consume a writeback port from an
+; execution slice.
+(define_insn_reservation "power9-mfjmpr" 6
+ (and (eq_attr "type" "mfjmpr")
+ (eq_attr "cpu" "power9"))
+ "DU_branch_power9,bru_power9+VSU_power9")
+
+; Branch is 2 cycles
+(define_insn_reservation "power9-branch" 2
+ (and (eq_attr "type" "jmpreg,branch")
+ (eq_attr "cpu" "power9"))
+ "DU_branch_power9,bru_power9")
+
+
+; Crypto Unit
+(define_insn_reservation "power9-crypto" 6
+ (and (eq_attr "type" "crypto")
+ (eq_attr "cpu" "power9"))
+ "DU_super_power9,cryptu_power9")
+
+
+; HTM Unit
+(define_insn_reservation "power9-htm" 4
+ (and (eq_attr "type" "htm")
+ (eq_attr "cpu" "power9"))
+ "DU_C2_power9,LSU_power9")
+
+(define_insn_reservation "power9-htm-simple" 2
+ (and (eq_attr "type" "htmsimple")
+ (eq_attr "cpu" "power9"))
+ "DU_any_power9,VSU_power9")
+
+
+; DFP Unit
+(define_insn_reservation "power9-dfp" 12
+ (and (eq_attr "type" "dfp")
+ (eq_attr "cpu" "power9"))
+ "DU_even_power9,dfu_power9")
+