aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander Monakov <amonakov@ispras.ru>2022-11-01 17:53:13 +0300
committerAlexander Monakov <amonakov@ispras.ru>2022-11-16 16:41:39 +0300
commitd4cc7a8c4a623b62dd0d486d7780d91b58eb6f1f (patch)
tree98e6fc048db7a67a48e9ebcebb01a4abf2cb5169
parentdd744f06c9952f92738b0860630085f0f0b99574 (diff)
downloadgcc-d4cc7a8c4a623b62dd0d486d7780d91b58eb6f1f.zip
gcc-d4cc7a8c4a623b62dd0d486d7780d91b58eb6f1f.tar.gz
gcc-d4cc7a8c4a623b62dd0d486d7780d91b58eb6f1f.tar.bz2
i386: correct x87&SSE multiplication modeling in znver.md
All multiplication instructions are fully pipelined, except AVX256 instructions on Zen 1, which issue over two cycles on a 128-bit unit. Correct the model accordingly to reduce combinatorial explosion in automaton tables. Top znver table sizes in insn-automata.o: Before: 30056 r znver1_fp_min_issue_delay 120224 r znver1_fp_transitions After: 6720 r znver1_fp_min_issue_delay 53760 r znver1_fp_transitions gcc/ChangeLog: PR target/87832 * config/i386/znver.md: (znver1_fp_op_mul): Correct cycles in the reservation. (znver1_fp_op_mul_load): Ditto. (znver1_mmx_mul): Ditto. (znver1_mmx_load): Ditto. (znver1_ssemul_ss_ps): Ditto. (znver1_ssemul_ss_ps_load): Ditto. (znver1_ssemul_avx256_ps): Ditto. (znver1_ssemul_avx256_ps_load): Ditto. (znver1_ssemul_sd_pd): Ditto. (znver1_ssemul_sd_pd_load): Ditto. (znver2_ssemul_sd_pd): Ditto. (znver2_ssemul_sd_pd_load): Ditto. (znver1_ssemul_avx256_pd): Ditto. (znver1_ssemul_avx256_pd_load): Ditto. (znver1_sseimul): Ditto. (znver1_sseimul_avx256): Ditto. (znver1_sseimul_load): Ditto. (znver1_sseimul_avx256_load): Ditto. (znver1_sseimul_di): Ditto. (znver1_sseimul_load_di): Ditto.
-rw-r--r--gcc/config/i386/znver.md40
1 files changed, 20 insertions, 20 deletions
diff --git a/gcc/config/i386/znver.md b/gcc/config/i386/znver.md
index c52f8b5..882f250 100644
--- a/gcc/config/i386/znver.md
+++ b/gcc/config/i386/znver.md
@@ -573,13 +573,13 @@
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "fop,fmul")
(eq_attr "memory" "none")))
- "znver1-direct,znver1-fp0*5")
+ "znver1-direct,znver1-fp0")
(define_insn_reservation "znver1_fp_op_mul_load" 12
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "fop,fmul")
(eq_attr "memory" "load")))
- "znver1-direct,znver1-load,znver1-fp0*5")
+ "znver1-direct,znver1-load,znver1-fp0")
(define_insn_reservation "znver1_fp_op_imul_load" 16
(and (eq_attr "cpu" "znver1,znver2,znver3")
@@ -684,13 +684,13 @@
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "mmxmul")
(eq_attr "memory" "none")))
- "znver1-direct,znver1-fp0*3")
+ "znver1-direct,znver1-fp0")
(define_insn_reservation "znver1_mmx_load" 10
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "mmxmul")
(eq_attr "memory" "load")))
- "znver1-direct,znver1-load,znver1-fp0*3")
+ "znver1-direct,znver1-load,znver1-fp0")
;; TODO
(define_insn_reservation "znver1_avx256_log" 1
@@ -1161,7 +1161,7 @@
(eq_attr "mode" "V8SF,V4SF,SF,V4DF,V2DF,DF")))
(and (eq_attr "type" "ssemul")
(eq_attr "memory" "none")))
- "znver1-direct,(znver1-fp0|znver1-fp1)*3")
+ "znver1-direct,znver1-fp0|znver1-fp1")
(define_insn_reservation "znver1_ssemul_ss_ps_load" 10
(and (ior (and (eq_attr "cpu" "znver1")
@@ -1172,47 +1172,47 @@
(eq_attr "mode" "V8SF,V4SF,SF")))
(and (eq_attr "type" "ssemul")
(eq_attr "memory" "load")))
- "znver1-direct,znver1-load,(znver1-fp0|znver1-fp1)*3")
+ "znver1-direct,znver1-load,znver1-fp0|znver1-fp1")
(define_insn_reservation "znver1_ssemul_avx256_ps" 3
(and (eq_attr "cpu" "znver1")
(and (eq_attr "mode" "V8SF")
(and (eq_attr "type" "ssemul")
(eq_attr "memory" "none"))))
- "znver1-double,(znver1-fp0|znver1-fp1)*3")
+ "znver1-double,znver1-fp0*2|znver1-fp1*2")
(define_insn_reservation "znver1_ssemul_avx256_ps_load" 10
(and (eq_attr "cpu" "znver1")
(and (eq_attr "mode" "V8SF")
(and (eq_attr "type" "ssemul")
(eq_attr "memory" "load"))))
- "znver1-double,znver1-load,(znver1-fp0|znver1-fp1)*3")
+ "znver1-double,znver1-load,znver1-fp0*2|znver1-fp1*2")
(define_insn_reservation "znver1_ssemul_sd_pd" 4
(and (eq_attr "cpu" "znver1")
(and (eq_attr "mode" "V2DF,DF")
(and (eq_attr "type" "ssemul")
(eq_attr "memory" "none"))))
- "znver1-direct,(znver1-fp0|znver1-fp1)*4")
+ "znver1-direct,znver1-fp0|znver1-fp1")
(define_insn_reservation "znver1_ssemul_sd_pd_load" 11
(and (eq_attr "cpu" "znver1")
(and (eq_attr "mode" "V2DF,DF")
(and (eq_attr "type" "ssemul")
(eq_attr "memory" "load"))))
- "znver1-direct,znver1-load,(znver1-fp0|znver1-fp1)*4")
+ "znver1-direct,znver1-load,znver1-fp0|znver1-fp1")
(define_insn_reservation "znver2_ssemul_sd_pd" 3
(and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "type" "ssemul")
(eq_attr "memory" "none")))
- "znver1-direct,(znver1-fp0|znver1-fp1)*3")
+ "znver1-direct,znver1-fp0|znver1-fp1")
(define_insn_reservation "znver2_ssemul_sd_pd_load" 10
(and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "type" "ssemul")
(eq_attr "memory" "load")))
- "znver1-direct,znver1-load,(znver1-fp0|znver1-fp1)*3")
+ "znver1-direct,znver1-load,znver1-fp0|znver1-fp1")
(define_insn_reservation "znver1_ssemul_avx256_pd" 5
@@ -1220,14 +1220,14 @@
(and (eq_attr "mode" "V4DF")
(and (eq_attr "type" "ssemul")
(eq_attr "memory" "none"))))
- "znver1-double,(znver1-fp0|znver1-fp1)*4")
+ "znver1-double,znver1-fp0*2|znver1-fp1*2")
(define_insn_reservation "znver1_ssemul_avx256_pd_load" 12
(and (eq_attr "cpu" "znver1")
(and (eq_attr "mode" "V4DF")
(and (eq_attr "type" "ssemul")
(eq_attr "memory" "load"))))
- "znver1-double,znver1-load,(znver1-fp0|znver1-fp1)*4")
+ "znver1-double,znver1-load,znver1-fp0*2|znver1-fp1*2")
;;SSE imul
(define_insn_reservation "znver1_sseimul" 3
@@ -1239,14 +1239,14 @@
(eq_attr "mode" "TI,OI")))
(and (eq_attr "type" "sseimul")
(eq_attr "memory" "none")))
- "znver1-direct,znver1-fp0*3")
+ "znver1-direct,znver1-fp0")
(define_insn_reservation "znver1_sseimul_avx256" 4
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "mode" "OI")
(and (eq_attr "type" "sseimul")
(eq_attr "memory" "none"))))
- "znver1-double,znver1-fp0*4")
+ "znver1-double,znver1-fp0*2")
(define_insn_reservation "znver1_sseimul_load" 10
(and (ior (and (eq_attr "cpu" "znver1")
@@ -1257,28 +1257,28 @@
(eq_attr "mode" "TI,OI")))
(and (eq_attr "type" "sseimul")
(eq_attr "memory" "load")))
- "znver1-direct,znver1-load,znver1-fp0*3")
+ "znver1-direct,znver1-load,znver1-fp0")
(define_insn_reservation "znver1_sseimul_avx256_load" 11
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "mode" "OI")
(and (eq_attr "type" "sseimul")
(eq_attr "memory" "load"))))
- "znver1-double,znver1-load,znver1-fp0*4")
+ "znver1-double,znver1-load,znver1-fp0*2")
(define_insn_reservation "znver1_sseimul_di" 3
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "mode" "DI")
(and (eq_attr "memory" "none")
(eq_attr "type" "sseimul"))))
- "znver1-direct,znver1-fp0*3")
+ "znver1-direct,znver1-fp0")
(define_insn_reservation "znver1_sseimul_load_di" 10
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "mode" "DI")
(and (eq_attr "type" "sseimul")
(eq_attr "memory" "load"))))
- "znver1-direct,znver1-load,znver1-fp0*3")
+ "znver1-direct,znver1-load,znver1-fp0")
;; SSE compares
(define_insn_reservation "znver1_sse_cmp" 1