aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorAlexander Monakov <amonakov@ispras.ru>2022-10-31 17:35:57 +0300
committerAlexander Monakov <amonakov@ispras.ru>2022-11-01 15:20:57 +0300
commit5cee5f94000ee5eabce9b223c44c7923c1c69f61 (patch)
tree628e3f4e76dc49e92652b88160826ebdc6e01bef /gcc
parent0122faae30fe1ad1dfa8c69f3d3f0428b996b600 (diff)
downloadgcc-5cee5f94000ee5eabce9b223c44c7923c1c69f61.zip
gcc-5cee5f94000ee5eabce9b223c44c7923c1c69f61.tar.gz
gcc-5cee5f94000ee5eabce9b223c44c7923c1c69f61.tar.bz2
i386: correct integer division modeling in znver.md
In znver.md, division instructions have descriptions like (define_insn_reservation "znver1_idiv_DI" 41 (and (eq_attr "cpu" "znver1,znver2") (and (eq_attr "type" "idiv") (and (eq_attr "mode" "DI") (eq_attr "memory" "none")))) "znver1-double,znver1-ieu2*41") which says that DImode idiv has latency 41 (which is correct) and that it occupies 2nd integer execution unit for 41 consecutive cycles, but that is not correct: 1) the division instruction is partially pipelined, and has throughput 1/14, not 1/41; 2) for the most part it occupies a separate division unit, not the general arithmetic unit. Evidently, interaction of such 41-cycle paths with the rest of reservations causes a combinatorial explosion in the automaton. Fix this by modeling the integer division unit properly, and correcting reservations to use the measured reciprocal throughput of those instructions (available from uops.info). A similar correction for floating-point divisions is left for a followup patch. Top 5 znver table sizes, before: 68692 r znver1_ieu_check 68692 r znver1_ieu_transitions 99792 r znver1_ieu_min_issue_delay 428108 r znver1_fp_min_issue_delay 856216 r znver1_fp_transitions After: 1454 r znver1_ieu_translate 1454 r znver1_translate 2304 r znver1_ieu_transitions 428108 r znver1_fp_min_issue_delay 856216 r znver1_fp_transitions gcc/ChangeLog: PR target/87832 * config/i386/znver.md (znver1_idiv): New automaton. (znver1-idiv): New unit. (znver1_idiv_DI): Correct unit and cycles in the reservation. (znver1_idiv_SI): Ditto. (znver1_idiv_HI): Ditto. (znver1_idiv_QI): Ditto. (znver1_idiv_mem_DI): Ditto. (znver1_idiv_mem_SI): Ditto. (znver1_idiv_mem_HI): Ditto. (znver1_idiv_mem_QI): Ditto. (znver3_idiv_DI): Ditto. (znver3_idiv_SI): Ditto. (znver3_idiv_HI): Ditto. (znver3_idiv_QI): Ditto. (znver3_idiv_mem_DI): Ditto. (znver3_idiv_mem_SI): Ditto. (znver3_idiv_mem_HI): Ditto. (znver3_idiv_mem_QI): Ditto.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/i386/znver.md39
1 files changed, 21 insertions, 18 deletions
diff --git a/gcc/config/i386/znver.md b/gcc/config/i386/znver.md
index 9c25b4e..4aa098f 100644
--- a/gcc/config/i386/znver.md
+++ b/gcc/config/i386/znver.md
@@ -23,8 +23,8 @@
;; AMD znver1, znver2 and znver3 Scheduling
;; Modeling automatons for zen decoders, integer execution pipes,
-;; AGU pipes and floating point execution units.
-(define_automaton "znver1, znver1_ieu, znver1_fp, znver1_agu")
+;; SIMD/FP domain, AGU pipes, and dividers.
+(define_automaton "znver1, znver1_ieu, znver1_fp, znver1_agu, znver1_idiv")
;; Decoders unit has 4 decoders and all of them can decode fast path
;; and vector type instructions.
@@ -93,6 +93,9 @@
+znver1-fp2+znver1-fp3
+znver1-agu0+znver1-agu1+znver2-agu2")
+;; Dividers
+(define_cpu_unit "znver1-idiv" "znver1_idiv")
+
;; Call instruction
(define_insn_reservation "znver1_call" 1
(and (eq_attr "cpu" "znver1")
@@ -176,28 +179,28 @@
(and (eq_attr "type" "idiv")
(and (eq_attr "mode" "DI")
(eq_attr "memory" "none"))))
- "znver1-double,znver1-ieu2*41")
+ "znver1-double,znver1-idiv*14")
(define_insn_reservation "znver1_idiv_SI" 25
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "type" "idiv")
(and (eq_attr "mode" "SI")
(eq_attr "memory" "none"))))
- "znver1-double,znver1-ieu2*25")
+ "znver1-double,znver1-idiv*14")
(define_insn_reservation "znver1_idiv_HI" 17
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "type" "idiv")
(and (eq_attr "mode" "HI")
(eq_attr "memory" "none"))))
- "znver1-double,znver1-ieu2*17")
+ "znver1-double,znver1-idiv*14")
(define_insn_reservation "znver1_idiv_QI" 12
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "type" "idiv")
(and (eq_attr "mode" "QI")
(eq_attr "memory" "none"))))
- "znver1-direct,znver1-ieu2*12")
+ "znver1-direct,znver1-idiv*13")
;; Mem operands
(define_insn_reservation "znver1_idiv_mem_DI" 45
@@ -205,84 +208,84 @@
(and (eq_attr "type" "idiv")
(and (eq_attr "mode" "DI")
(eq_attr "memory" "none"))))
- "znver1-double,znver1-load,znver1-ieu2*41")
+ "znver1-double,znver1-load,znver1-idiv*14")
(define_insn_reservation "znver1_idiv_mem_SI" 29
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "type" "idiv")
(and (eq_attr "mode" "SI")
(eq_attr "memory" "none"))))
- "znver1-double,znver1-load,znver1-ieu2*25")
+ "znver1-double,znver1-load,znver1-idiv*14")
(define_insn_reservation "znver1_idiv_mem_HI" 21
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "type" "idiv")
(and (eq_attr "mode" "HI")
(eq_attr "memory" "none"))))
- "znver1-double,znver1-load,znver1-ieu2*17")
+ "znver1-double,znver1-load,znver1-idiv*14")
(define_insn_reservation "znver1_idiv_mem_QI" 16
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "type" "idiv")
(and (eq_attr "mode" "QI")
(eq_attr "memory" "none"))))
- "znver1-direct,znver1-load,znver1-ieu2*12")
+ "znver1-direct,znver1-load,znver1-idiv*13")
(define_insn_reservation "znver3_idiv_DI" 18
(and (eq_attr "cpu" "znver3")
(and (eq_attr "type" "idiv")
(and (eq_attr "mode" "DI")
(eq_attr "memory" "none"))))
- "znver1-double,znver1-ieu2*18")
+ "znver1-double,znver1-idiv*7")
(define_insn_reservation "znver3_idiv_SI" 12
(and (eq_attr "cpu" "znver3")
(and (eq_attr "type" "idiv")
(and (eq_attr "mode" "SI")
(eq_attr "memory" "none"))))
- "znver1-double,znver1-ieu2*12")
+ "znver1-double,znver1-idiv*6")
(define_insn_reservation "znver3_idiv_HI" 10
(and (eq_attr "cpu" "znver3")
(and (eq_attr "type" "idiv")
(and (eq_attr "mode" "HI")
(eq_attr "memory" "none"))))
- "znver1-double,znver1-ieu2*10")
+ "znver1-double,znver1-idiv*4")
(define_insn_reservation "znver3_idiv_QI" 9
(and (eq_attr "cpu" "znver3")
(and (eq_attr "type" "idiv")
(and (eq_attr "mode" "QI")
(eq_attr "memory" "none"))))
- "znver1-direct,znver1-ieu2*9")
+ "znver1-direct,znver1-idiv*4")
(define_insn_reservation "znver3_idiv_mem_DI" 22
(and (eq_attr "cpu" "znver3")
(and (eq_attr "type" "idiv")
(and (eq_attr "mode" "DI")
(eq_attr "memory" "load"))))
- "znver1-double,znver1-load,znver1-ieu2*22")
+ "znver1-double,znver1-load,znver1-idiv*7")
(define_insn_reservation "znver3_idiv_mem_SI" 16
(and (eq_attr "cpu" "znver3")
(and (eq_attr "type" "idiv")
(and (eq_attr "mode" "SI")
(eq_attr "memory" "load"))))
- "znver1-double,znver1-load,znver1-ieu2*16")
+ "znver1-double,znver1-load,znver1-idiv*6")
(define_insn_reservation "znver3_idiv_mem_HI" 14
(and (eq_attr "cpu" "znver3")
(and (eq_attr "type" "idiv")
(and (eq_attr "mode" "HI")
(eq_attr "memory" "load"))))
- "znver1-double,znver1-load,znver1-ieu2*10")
+ "znver1-double,znver1-load,znver1-idiv*4")
(define_insn_reservation "znver3_idiv_mem_QI" 13
(and (eq_attr "cpu" "znver3")
(and (eq_attr "type" "idiv")
(and (eq_attr "mode" "QI")
(eq_attr "memory" "load"))))
- "znver1-direct,znver1-load,znver1-ieu2*9")
+ "znver1-direct,znver1-load,znver1-idiv*4")
;; STR ISHIFT which are micro coded.
;; Fix me: Latency need to be rechecked.