aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorAlexander Monakov <amonakov@ispras.ru>2022-11-01 17:04:25 +0300
committerAlexander Monakov <amonakov@ispras.ru>2022-11-16 16:41:39 +0300
commitdd744f06c9952f92738b0860630085f0f0b99574 (patch)
tree3e4e2f196315dbaf04732f001ce101d9f64c1203 /gcc
parent3c54805d03ac1bcc3d8547ffb5e6c4e1f301a7a2 (diff)
downloadgcc-dd744f06c9952f92738b0860630085f0f0b99574.zip
gcc-dd744f06c9952f92738b0860630085f0f0b99574.tar.gz
gcc-dd744f06c9952f92738b0860630085f0f0b99574.tar.bz2
i386: correct x87&SSE division modeling in znver.md
Correct modeling of division instructions in the SIMD/FP domain for AMD Zen architectures and avoid combinatorial explosion of automaton tables by modeling the separate floating-point division unit and correcting reservations to reflect reciprocal throughput of the corresponding instructions, similar to earlier commit 5cee5f94000 ("i386: correct integer division modeling in znver.md"). Division is partially pipelined and some instructions have fractional throughput (e.g. Zen 3 can issue divss and divsd each 3.5 and 4.5 cycles on average, respectively). Considering these CPUs implement out-of-order execution, the model doesn't need to be exact to the last cycle, so simplify it by using 4/5 cycles for SF/DF modes, and not modeling the fact that FP3 pipe is occupied for one cycle. Top znver table sizes in insn-automata.o: Before: 428108 r znver1_fp_min_issue_delay 856216 r znver1_fp_transitions After: 30056 r znver1_fp_min_issue_delay 120224 r znver1_fp_transitions gcc/ChangeLog: PR target/87832 * config/i386/znver.md (znver1_fdiv): New automaton. (znver1-fdiv): New unit. (znver1_fp_op_div): Correct unit and cycles in the reservation. (znver1_fp_op_div_load): Ditto. (znver1_fp_op_idiv_load): Ditto. (znver2_fp_op_idiv_load): Ditto. (znver1_ssediv_ss_ps): Ditto. (znver1_ssediv_ss_ps_load): Ditto. (znver1_ssediv_sd_pd): Ditto. (znver1_ssediv_sd_pd_load): Ditto. (znver1_ssediv_avx256_ps): Ditto. (znver1_ssediv_avx256_ps_load): Ditto. (znver1_ssediv_avx256_pd): Ditto. (znver1_ssediv_avx256_pd_load): Ditto.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/i386/znver.md27
1 files changed, 14 insertions, 13 deletions
diff --git a/gcc/config/i386/znver.md b/gcc/config/i386/znver.md
index 4aa098f..c52f8b5 100644
--- a/gcc/config/i386/znver.md
+++ b/gcc/config/i386/znver.md
@@ -24,7 +24,7 @@
;; AMD znver1, znver2 and znver3 Scheduling
;; Modeling automatons for zen decoders, integer execution pipes,
;; SIMD/FP domain, AGU pipes, and dividers.
-(define_automaton "znver1, znver1_ieu, znver1_fp, znver1_agu, znver1_idiv")
+(define_automaton "znver1, znver1_ieu, znver1_fp, znver1_agu, znver1_idiv, znver1_fdiv")
;; Decoders unit has 4 decoders and all of them can decode fast path
;; and vector type instructions.
@@ -95,6 +95,7 @@
;; Dividers
(define_cpu_unit "znver1-idiv" "znver1_idiv")
+(define_cpu_unit "znver1-fdiv" "znver1_fdiv")
;; Call instruction
(define_insn_reservation "znver1_call" 1
@@ -591,27 +592,27 @@
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "fdiv")
(eq_attr "memory" "none")))
- "znver1-direct,znver1-fp3*15")
+ "znver1-direct,znver1-fdiv*6")
(define_insn_reservation "znver1_fp_op_div_load" 22
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "fdiv")
(eq_attr "memory" "load")))
- "znver1-direct,znver1-load,znver1-fp3*15")
+ "znver1-direct,znver1-load,znver1-fdiv*6")
(define_insn_reservation "znver1_fp_op_idiv_load" 27
(and (eq_attr "cpu" "znver1")
(and (eq_attr "type" "fdiv")
(and (eq_attr "fp_int_src" "true")
(eq_attr "memory" "load"))))
- "znver1-double,znver1-load,znver1-fp3*19")
+ "znver1-double,znver1-load,znver1-fdiv*6")
(define_insn_reservation "znver2_fp_op_idiv_load" 26
(and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "type" "fdiv")
(and (eq_attr "fp_int_src" "true")
(eq_attr "memory" "load"))))
- "znver1-double,znver1-load,znver1-fp3*19")
+ "znver1-double,znver1-load,znver1-fdiv*6")
;; MMX, SSE, SSEn.n, AVX, AVX2 instructions
@@ -1088,7 +1089,7 @@
(eq_attr "mode" "V8SF,V4SF,SF")))
(and (eq_attr "type" "ssediv")
(eq_attr "memory" "none")))
- "znver1-direct,znver1-fp3*10")
+ "znver1-direct,znver1-fdiv*4")
(define_insn_reservation "znver1_ssediv_ss_ps_load" 17
(and (ior (and (eq_attr "cpu" "znver1")
@@ -1099,7 +1100,7 @@
(eq_attr "mode" "V8SF,V4SF,SF")))
(and (eq_attr "type" "ssediv")
(eq_attr "memory" "load")))
- "znver1-direct,znver1-load,znver1-fp3*10")
+ "znver1-direct,znver1-load,znver1-fdiv*4")
(define_insn_reservation "znver1_ssediv_sd_pd" 13
(and (ior (and (eq_attr "cpu" "znver1")
@@ -1110,7 +1111,7 @@
(eq_attr "mode" "V4DF,V2DF,DF")))
(and (eq_attr "type" "ssediv")
(eq_attr "memory" "none")))
- "znver1-direct,znver1-fp3*13")
+ "znver1-direct,znver1-fdiv*5")
(define_insn_reservation "znver1_ssediv_sd_pd_load" 20
(and (ior (and (eq_attr "cpu" "znver1")
@@ -1121,35 +1122,35 @@
(eq_attr "mode" "V4DF,V2DF,DF")))
(and (eq_attr "type" "ssediv")
(eq_attr "memory" "load")))
- "znver1-direct,znver1-load,znver1-fp3*13")
+ "znver1-direct,znver1-load,znver1-fdiv*5")
(define_insn_reservation "znver1_ssediv_avx256_ps" 12
(and (eq_attr "cpu" "znver1")
(and (eq_attr "mode" "V8SF")
(and (eq_attr "memory" "none")
(eq_attr "type" "ssediv"))))
- "znver1-double,znver1-fp3*12")
+ "znver1-double,znver1-fdiv*8")
(define_insn_reservation "znver1_ssediv_avx256_ps_load" 19
(and (eq_attr "cpu" "znver1")
(and (eq_attr "mode" "V8SF")
(and (eq_attr "type" "ssediv")
(eq_attr "memory" "load"))))
- "znver1-double,znver1-load,znver1-fp3*12")
+ "znver1-double,znver1-load,znver1-fdiv*8")
(define_insn_reservation "znver1_ssediv_avx256_pd" 15
(and (eq_attr "cpu" "znver1")
(and (eq_attr "mode" "V4DF")
(and (eq_attr "type" "ssediv")
(eq_attr "memory" "none"))))
- "znver1-double,znver1-fp3*15")
+ "znver1-double,znver1-fdiv*10")
(define_insn_reservation "znver1_ssediv_avx256_pd_load" 22
(and (eq_attr "cpu" "znver1")
(and (eq_attr "mode" "V4DF")
(and (eq_attr "type" "ssediv")
(eq_attr "memory" "load"))))
- "znver1-double,znver1-load,znver1-fp3*15")
+ "znver1-double,znver1-load,znver1-fdiv*10")
;; SSE MUL
(define_insn_reservation "znver1_ssemul_ss_ps" 3
(and (ior (and (eq_attr "cpu" "znver1")