aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/i386
diff options
context:
space:
mode:
authorTejas Joshi <TejasSanjay.Joshi@amd.com>2022-06-28 16:33:53 +0530
committerVenkataramanan Kumar <Venkataramanan.Kumar@amd.com>2022-10-21 15:25:57 +0530
commitbf3b532b524ecacb3202ab2c8af419ffaaab7cff (patch)
tree0fbde96ab28f9269588ac179f880ca78e95ef4fb /gcc/config/i386
parent88b34661f73e9da8032d20a37299223b39db4ef9 (diff)
downloadgcc-bf3b532b524ecacb3202ab2c8af419ffaaab7cff.zip
gcc-bf3b532b524ecacb3202ab2c8af419ffaaab7cff.tar.gz
gcc-bf3b532b524ecacb3202ab2c8af419ffaaab7cff.tar.bz2
Enable AMD znver4 support and add instruction reservations
2022-09-28 Tejas Joshi <TejasSanjay.Joshi@amd.com> gcc/ChangeLog: * common/config/i386/cpuinfo.h (get_amd_cpu): Recognize znver4. * common/config/i386/i386-common.cc (processor_names): Add znver4. (processor_alias_table): Add znver4 and modularize old znvers. * common/config/i386/i386-cpuinfo.h (processor_subtypes): AMDFAM19H_ZNVER4. * config.gcc (x86_64-*-* |...): Likewise. * config/i386/driver-i386.cc (host_detect_local_cpu): Let -march=native recognize znver4 cpus. * config/i386/i386-c.cc (ix86_target_macros_internal): Add znver4. * config/i386/i386-options.cc (m_ZNVER4): New definition. (m_ZNVER): Include m_ZNVER4. (processor_cost_table): Add znver4. * config/i386/i386.cc (ix86_reassociation_width): Likewise. * config/i386/i386.h (processor_type): Add PROCESSOR_ZNVER4. (PTA_ZNVER1): New definition. (PTA_ZNVER2): Likewise. (PTA_ZNVER3): Likewise. (PTA_ZNVER4): Likewise. * config/i386/i386.md (define_attr "cpu"): Add znver4 and rename md file. * config/i386/x86-tune-costs.h (znver4_cost): New definition. * config/i386/x86-tune-sched.cc (ix86_issue_rate): Add znver4. (ix86_adjust_cost): Likewise. * config/i386/znver1.md: Rename to znver.md. * config/i386/znver.md: Add new reservations for znver4. * doc/extend.texi: Add details about znver4. * doc/invoke.texi: Likewise. gcc/testsuite/ChangeLog: * gcc.target/i386/funcspec-56.inc: Handle new march. * g++.target/i386/mv29.C: Likewise.
Diffstat (limited to 'gcc/config/i386')
-rw-r--r--gcc/config/i386/driver-i386.cc5
-rw-r--r--gcc/config/i386/i386-c.cc7
-rw-r--r--gcc/config/i386/i386-options.cc6
-rw-r--r--gcc/config/i386/i386.cc2
-rw-r--r--gcc/config/i386/i386.h16
-rw-r--r--gcc/config/i386/i386.md4
-rw-r--r--gcc/config/i386/x86-tune-costs.h133
-rw-r--r--gcc/config/i386/x86-tune-sched.cc2
-rw-r--r--gcc/config/i386/znver.md (renamed from gcc/config/i386/znver1.md)849
9 files changed, 983 insertions, 41 deletions
diff --git a/gcc/config/i386/driver-i386.cc b/gcc/config/i386/driver-i386.cc
index ef56704..aa16895 100644
--- a/gcc/config/i386/driver-i386.cc
+++ b/gcc/config/i386/driver-i386.cc
@@ -465,6 +465,8 @@ const char *host_detect_local_cpu (int argc, const char **argv)
processor = PROCESSOR_GEODE;
else if (has_feature (FEATURE_MOVBE) && family == 22)
processor = PROCESSOR_BTVER2;
+ else if (has_feature (FEATURE_AVX512F))
+ processor = PROCESSOR_ZNVER4;
else if (has_feature (FEATURE_VAES))
processor = PROCESSOR_ZNVER3;
else if (has_feature (FEATURE_CLWB))
@@ -779,6 +781,9 @@ const char *host_detect_local_cpu (int argc, const char **argv)
case PROCESSOR_ZNVER3:
cpu = "znver3";
break;
+ case PROCESSOR_ZNVER4:
+ cpu = "znver4";
+ break;
case PROCESSOR_BTVER1:
cpu = "btver1";
break;
diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc
index a9a35c0..f70f891 100644
--- a/gcc/config/i386/i386-c.cc
+++ b/gcc/config/i386/i386-c.cc
@@ -132,6 +132,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
def_or_undef (parse_in, "__znver3");
def_or_undef (parse_in, "__znver3__");
break;
+ case PROCESSOR_ZNVER4:
+ def_or_undef (parse_in, "__znver4");
+ def_or_undef (parse_in, "__znver4__");
+ break;
case PROCESSOR_BTVER1:
def_or_undef (parse_in, "__btver1");
def_or_undef (parse_in, "__btver1__");
@@ -330,6 +334,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
case PROCESSOR_ZNVER3:
def_or_undef (parse_in, "__tune_znver3__");
break;
+ case PROCESSOR_ZNVER4:
+ def_or_undef (parse_in, "__tune_znver4__");
+ break;
case PROCESSOR_BTVER1:
def_or_undef (parse_in, "__tune_btver1__");
break;
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
index 3e6d044..1f14d69 100644
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -154,11 +154,12 @@ along with GCC; see the file COPYING3. If not see
#define m_ZNVER1 (HOST_WIDE_INT_1U<<PROCESSOR_ZNVER1)
#define m_ZNVER2 (HOST_WIDE_INT_1U<<PROCESSOR_ZNVER2)
#define m_ZNVER3 (HOST_WIDE_INT_1U<<PROCESSOR_ZNVER3)
+#define m_ZNVER4 (HOST_WIDE_INT_1U<<PROCESSOR_ZNVER4)
#define m_BTVER1 (HOST_WIDE_INT_1U<<PROCESSOR_BTVER1)
#define m_BTVER2 (HOST_WIDE_INT_1U<<PROCESSOR_BTVER2)
#define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
#define m_BTVER (m_BTVER1 | m_BTVER2)
-#define m_ZNVER (m_ZNVER1 | m_ZNVER2 | m_ZNVER3)
+#define m_ZNVER (m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4)
#define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER \
| m_ZNVER)
@@ -773,7 +774,8 @@ static const struct processor_costs *processor_cost_table[] =
&btver2_cost,
&znver1_cost,
&znver2_cost,
- &znver3_cost
+ &znver3_cost,
+ &znver4_cost
};
/* Guarantee that the array is aligned with enum processor_type. */
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 480db35..aeea26e 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -23079,7 +23079,7 @@ ix86_reassociation_width (unsigned int op, machine_mode mode)
/* Integer vector instructions execute in FP unit
and can execute 3 additions and one multiplication per cycle. */
if ((ix86_tune == PROCESSOR_ZNVER1 || ix86_tune == PROCESSOR_ZNVER2
- || ix86_tune == PROCESSOR_ZNVER3)
+ || ix86_tune == PROCESSOR_ZNVER3 || ix86_tune == PROCESSOR_ZNVER4)
&& INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS)
return 1;
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 372a2cf..fd7c9df 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2255,6 +2255,7 @@ enum processor_type
PROCESSOR_ZNVER1,
PROCESSOR_ZNVER2,
PROCESSOR_ZNVER3,
+ PROCESSOR_ZNVER4,
PROCESSOR_max
};
@@ -2347,6 +2348,21 @@ constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_TREMONT | PTA_ADX | PTA_AVX
| PTA_HRESET | PTA_KL | PTA_WIDEKL | PTA_AVXVNNI;
constexpr wide_int_bitmask PTA_KNM = PTA_KNL | PTA_AVX5124VNNIW
| PTA_AVX5124FMAPS | PTA_AVX512VPOPCNTDQ;
+constexpr wide_int_bitmask PTA_ZNVER1 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2
+ | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
+ | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2 | PTA_BMI | PTA_BMI2
+ | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT
+ | PTA_FSGSBASE | PTA_RDRND | PTA_MOVBE | PTA_MWAITX | PTA_ADX | PTA_RDSEED
+ | PTA_CLZERO | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES | PTA_SHA | PTA_LZCNT
+ | PTA_POPCNT;
+constexpr wide_int_bitmask PTA_ZNVER2 = PTA_ZNVER1 | PTA_CLWB | PTA_RDPID
+ | PTA_WBNOINVD;
+constexpr wide_int_bitmask PTA_ZNVER3 = PTA_ZNVER2 | PTA_VAES | PTA_VPCLMULQDQ
+ | PTA_PKU;
+constexpr wide_int_bitmask PTA_ZNVER4 = PTA_ZNVER3 | PTA_AVX512F | PTA_AVX512DQ
+ | PTA_AVX512IFMA | PTA_AVX512CD | PTA_AVX512BW | PTA_AVX512VL
+ | PTA_AVX512BF16 | PTA_AVX512VBMI | PTA_AVX512VBMI2 | PTA_GFNI
+ | PTA_AVX512VNNI | PTA_AVX512BITALG | PTA_AVX512VPOPCNTDQ;
#ifndef GENERATOR_FILE
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 93538c5..baf1f1f 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -474,7 +474,7 @@
;; Processor type.
(define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,nehalem,
atom,slm,glm,haswell,generic,lujiazui,amdfam10,bdver1,
- bdver2,bdver3,bdver4,btver2,znver1,znver2,znver3"
+ bdver2,bdver3,bdver4,btver2,znver1,znver2,znver3,znver4"
(const (symbol_ref "ix86_schedule")))
;; A basic instruction type. Refinements due to arguments to be
@@ -1309,7 +1309,7 @@
(include "bdver1.md")
(include "bdver3.md")
(include "btver2.md")
-(include "znver1.md")
+(include "znver.md")
(include "geode.md")
(include "atom.md")
(include "slm.md")
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index 6c9066c..aeaa7eb0 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -1820,6 +1820,139 @@ struct processor_costs znver3_cost = {
"16", /* Func alignment. */
};
+/* This table currently replicates znver3_cost table. */
+struct processor_costs znver4_cost = {
+ {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
+
+ /* reg-reg moves are done by renaming and thus they are even cheaper than
+ 1 cycle. Because reg-reg move cost is 2 and following tables correspond
+ to doubles of latencies, we do not model this correctly. It does not
+ seem to make practical difference to bump prices up even more. */
+ 6, /* cost for loading QImode using
+ movzbl. */
+ {6, 6, 6}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {8, 8, 8}, /* cost of storing integer
+ registers. */
+ 2, /* cost of reg,reg fld/fst. */
+ {6, 6, 16}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode. */
+ {8, 8, 16}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode. */
+ 2, /* cost of moving MMX register. */
+ {6, 6}, /* cost of loading MMX registers
+ in SImode and DImode. */
+ {8, 8}, /* cost of storing MMX registers
+ in SImode and DImode. */
+ 2, 2, 3, /* cost of moving XMM,YMM,ZMM
+ register. */
+ {6, 6, 6, 6, 12}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit. */
+ {8, 8, 8, 8, 16}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit. */
+ 6, 6, /* SSE->integer and integer->SSE
+ moves. */
+ 8, 8, /* mask->integer and integer->mask moves */
+ {6, 6, 6}, /* cost of loading mask register
+ in QImode, HImode, SImode. */
+ {8, 8, 8}, /* cost if storing mask register
+ in QImode, HImode, SImode. */
+ 2, /* cost of moving mask register. */
+ /* End of register allocator costs. */
+ },
+
+ COSTS_N_INSNS (1), /* cost of an add instruction. */
+ COSTS_N_INSNS (1), /* cost of a lea instruction. */
+ COSTS_N_INSNS (1), /* variable shift costs. */
+ COSTS_N_INSNS (1), /* constant shift costs. */
+ {COSTS_N_INSNS (3), /* cost of starting multiply for QI. */
+ COSTS_N_INSNS (3), /* HI. */
+ COSTS_N_INSNS (3), /* SI. */
+ COSTS_N_INSNS (3), /* DI. */
+ COSTS_N_INSNS (3)}, /* other. */
+ 0, /* cost of multiply per each bit
+ set. */
+ {COSTS_N_INSNS (9), /* cost of a divide/mod for QI. */
+ COSTS_N_INSNS (10), /* HI. */
+ COSTS_N_INSNS (12), /* SI. */
+ COSTS_N_INSNS (17), /* DI. */
+ COSTS_N_INSNS (17)}, /* other. */
+ COSTS_N_INSNS (1), /* cost of movsx. */
+ COSTS_N_INSNS (1), /* cost of movzx. */
+ 8, /* "large" insn. */
+ 9, /* MOVE_RATIO. */
+ 6, /* CLEAR_RATIO */
+ {6, 6, 6}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {8, 8, 8}, /* cost of storing integer
+ registers. */
+ {6, 6, 6, 6, 12}, /* cost of loading SSE registers
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {8, 8, 8, 8, 16}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {6, 6, 6, 6, 12}, /* cost of unaligned loads. */
+ {8, 8, 8, 8, 16}, /* cost of unaligned stores. */
+ 2, 2, 3, /* cost of moving XMM,YMM,ZMM
+ register. */
+ 6, /* cost of moving SSE register to integer. */
+ /* VGATHERDPD is 15 uops and throughput is 4, VGATHERDPS is 23 uops,
+ throughput 9. Approx 7 uops do not depend on vector size and every load
+ is 4 uops. */
+ 14, 8, /* Gather load static, per_elt. */
+ 14, 10, /* Gather store static, per_elt. */
+ 32, /* size of l1 cache. */
+ 512, /* size of l2 cache. */
+ 64, /* size of prefetch block. */
+ /* New AMD processors never drop prefetches; if they cannot be performed
+ immediately, they are queued. We set number of simultaneous prefetches
+ to a large constant to reflect this (it probably is not a good idea not
+ to limit number of prefetches at all, as their execution also takes some
+ time). */
+ 100, /* number of parallel prefetches. */
+ 3, /* Branch cost. */
+ COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (5), /* cost of FMUL instruction. */
+ /* Latency of fdiv is 8-15. */
+ COSTS_N_INSNS (15), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (1), /* cost of FABS instruction. */
+ COSTS_N_INSNS (1), /* cost of FCHS instruction. */
+ /* Latency of fsqrt is 4-10. */
+ COSTS_N_INSNS (10), /* cost of FSQRT instruction. */
+
+ COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
+ COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
+ COSTS_N_INSNS (3), /* cost of MULSS instruction. */
+ COSTS_N_INSNS (3), /* cost of MULSD instruction. */
+ COSTS_N_INSNS (5), /* cost of FMA SS instruction. */
+ COSTS_N_INSNS (5), /* cost of FMA SD instruction. */
+ COSTS_N_INSNS (10), /* cost of DIVSS instruction. */
+ /* 9-13. */
+ COSTS_N_INSNS (13), /* cost of DIVSD instruction. */
+ COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */
+ COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */
+ /* Zen can execute 4 integer operations per cycle. FP operations
+ take 3 cycles and it can execute 2 integer additions and 2
+ multiplications thus reassociation may make sense up to with of 6.
+ SPEC2k6 bencharks suggests
+ that 4 works better than 6 probably due to register pressure.
+
+ Integer vector operations are taken by FP unit and execute 3 vector
+ plus/minus operations per cycle but only one multiply. This is adjusted
+ in ix86_reassociation_width. */
+ 4, 4, 3, 6, /* reassoc int, fp, vec_int, vec_fp. */
+ znver2_memcpy,
+ znver2_memset,
+ COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
+ COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */
+ "16", /* Loop alignment. */
+ "16", /* Jump alignment. */
+ "0:0:8", /* Label alignment. */
+ "16", /* Func alignment. */
+};
+
/* skylake_cost should produce code tuned for Skylake familly of CPUs. */
static stringop_algs skylake_memcpy[2] = {
{libcall,
diff --git a/gcc/config/i386/x86-tune-sched.cc b/gcc/config/i386/x86-tune-sched.cc
index e2765f8..96eb06a 100644
--- a/gcc/config/i386/x86-tune-sched.cc
+++ b/gcc/config/i386/x86-tune-sched.cc
@@ -68,6 +68,7 @@ ix86_issue_rate (void)
case PROCESSOR_ZNVER1:
case PROCESSOR_ZNVER2:
case PROCESSOR_ZNVER3:
+ case PROCESSOR_ZNVER4:
case PROCESSOR_CORE2:
case PROCESSOR_NEHALEM:
case PROCESSOR_SANDYBRIDGE:
@@ -415,6 +416,7 @@ ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
case PROCESSOR_ZNVER1:
case PROCESSOR_ZNVER2:
case PROCESSOR_ZNVER3:
+ case PROCESSOR_ZNVER4:
/* Stack engine allows to execute push&pop instructions in parall. */
if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
&& (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
diff --git a/gcc/config/i386/znver1.md b/gcc/config/i386/znver.md
index 9c25b4e..376a145 100644
--- a/gcc/config/i386/znver1.md
+++ b/gcc/config/i386/znver.md
@@ -23,8 +23,8 @@
;; AMD znver1, znver2 and znver3 Scheduling
;; Modeling automatons for zen decoders, integer execution pipes,
-;; AGU pipes and floating point execution units.
-(define_automaton "znver1, znver1_ieu, znver1_fp, znver1_agu")
+;; AGU pipes, floating point execution, branch and store units.
+(define_automaton "znver1, znver1_ieu, znver1_fp, znver1_agu, znver4_bru, znver4_fp_store")
;; Decoders unit has 4 decoders and all of them can decode fast path
;; and vector type instructions.
@@ -63,6 +63,8 @@
;; Load is 4 cycles. We do not model reservation of load unit.
;;(define_reservation "znver1-load" "znver1-agu-reserve, nothing, nothing, nothing")
(define_reservation "znver1-load" "znver1-agu-reserve")
+;; According to Manual, all AGU are used for loads and stores in znver4.
+(define_reservation "znver4-load" "znver2-store-agu-reserve")
;; Store operations differs between znver1, znver2 and znver3 because extra AGU
;; was added.
(define_reservation "znver1-store" "znver1-agu-reserve")
@@ -93,6 +95,11 @@
+znver1-fp2+znver1-fp3
+znver1-agu0+znver1-agu1+znver2-agu2")
+;; znver4 has one branch unit in znver1-ieu0 and a separate branch unit.
+(define_cpu_unit "znver4-bru0" "znver4_bru")
+;; znver4 also has dedicated fp-store unit.
+(define_cpu_unit "znver4-fp-store0" "znver4_fp_store")
+
;; Call instruction
(define_insn_reservation "znver1_call" 1
(and (eq_attr "cpu" "znver1")
@@ -104,6 +111,11 @@
(eq_attr "type" "call,callv"))
"znver1-double,znver2-store,znver1-ieu0|znver1-ieu3")
+(define_insn_reservation "znver4_call" 1
+ (and (eq_attr "cpu" "znver4")
+ (eq_attr "type" "call,callv"))
+ "znver1-double,znver1-ieu0|znver4-bru0,znver2-store")
+
;; General instructions
(define_insn_reservation "znver1_push" 1
(and (eq_attr "cpu" "znver1")
@@ -111,7 +123,7 @@
(eq_attr "memory" "store")))
"znver1-direct,znver1-store")
(define_insn_reservation "znver2_push" 1
- (and (eq_attr "cpu" "znver2,znver3")
+ (and (eq_attr "cpu" "znver2,znver3,znver4")
(and (eq_attr "type" "push")
(eq_attr "memory" "store")))
"znver1-direct,znver2-store")
@@ -126,12 +138,22 @@
(and (eq_attr "type" "push")
(eq_attr "memory" "both")))
"znver1-direct,znver1-load,znver2-store")
+(define_insn_reservation "znver4_push_load" 4
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "push")
+ (eq_attr "memory" "both")))
+ "znver1-direct,znver4-load,znver2-store")
(define_insn_reservation "znver1_pop" 4
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "pop")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load")
+(define_insn_reservation "znver4_pop" 4
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "pop")
+ (eq_attr "memory" "load")))
+ "znver1-direct,znver4-load")
(define_insn_reservation "znver1_pop_mem" 4
(and (eq_attr "cpu" "znver1")
@@ -143,6 +165,11 @@
(and (eq_attr "type" "pop")
(eq_attr "memory" "both")))
"znver1-direct,znver1-load,znver2-store")
+(define_insn_reservation "znver4_pop_mem" 4
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "pop")
+ (eq_attr "memory" "both")))
+ "znver1-direct,znver4-load,znver2-store")
;; Leave
(define_insn_reservation "znver1_leave" 1
@@ -150,7 +177,7 @@
(eq_attr "type" "leave"))
"znver1-double,znver1-ieu, znver1-store")
(define_insn_reservation "znver2_leave" 1
- (and (eq_attr "cpu" "znver2,znver3")
+ (and (eq_attr "cpu" "znver2,znver3,znver4")
(eq_attr "type" "leave"))
"znver1-double,znver1-ieu, znver2-store")
@@ -162,12 +189,29 @@
(and (eq_attr "type" "imul")
(eq_attr "memory" "none")))
"znver1-direct,znver1-ieu1")
+(define_insn_reservation "znver4_imul" 3
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "imul")
+ (and (eq_attr "mode" "SI,HI,QI")
+ (eq_attr "memory" "none"))))
+ "znver1-direct,znver1-ieu1")
+(define_insn_reservation "znver4_imul_DI" 4
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "imul")
+ (and (eq_attr "mode" "DI")
+ (eq_attr "memory" "none"))))
+ "znver1-direct,znver1-ieu1")
(define_insn_reservation "znver1_imul_mem" 7
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "imul")
(eq_attr "memory" "!none")))
"znver1-direct,znver1-load, znver1-ieu1")
+(define_insn_reservation "znver4_imul_mem" 7
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "imul")
+ (eq_attr "memory" "!none")))
+ "znver1-direct,znver4-load, znver1-ieu1")
;; Divisions
;; Reg operands
@@ -261,14 +305,14 @@
(and (eq_attr "type" "idiv")
(and (eq_attr "mode" "DI")
(eq_attr "memory" "load"))))
- "znver1-double,znver1-load,znver1-ieu2*22")
+ "znver1-double,znver1-load,znver1-ieu2*18")
(define_insn_reservation "znver3_idiv_mem_SI" 16
(and (eq_attr "cpu" "znver3")
(and (eq_attr "type" "idiv")
(and (eq_attr "mode" "SI")
(eq_attr "memory" "load"))))
- "znver1-double,znver1-load,znver1-ieu2*16")
+ "znver1-double,znver1-load,znver1-ieu2*12")
(define_insn_reservation "znver3_idiv_mem_HI" 14
(and (eq_attr "cpu" "znver3")
@@ -284,6 +328,62 @@
(eq_attr "memory" "load"))))
"znver1-direct,znver1-load,znver1-ieu2*9")
+(define_insn_reservation "znver4_idiv_DI" 18
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "idiv")
+ (and (eq_attr "mode" "DI")
+ (eq_attr "memory" "none"))))
+ "znver1-double,znver1-ieu0*18")
+
+(define_insn_reservation "znver4_idiv_SI" 12
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "idiv")
+ (and (eq_attr "mode" "SI")
+ (eq_attr "memory" "none"))))
+ "znver1-double,znver1-ieu0*12")
+
+(define_insn_reservation "znver4_idiv_HI" 10
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "idiv")
+ (and (eq_attr "mode" "HI")
+ (eq_attr "memory" "none"))))
+ "znver1-double,znver1-ieu0*10")
+
+(define_insn_reservation "znver4_idiv_QI" 9
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "idiv")
+ (and (eq_attr "mode" "QI")
+ (eq_attr "memory" "none"))))
+ "znver1-direct,znver1-ieu0*9")
+
+(define_insn_reservation "znver4_idiv_mem_DI" 22
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "idiv")
+ (and (eq_attr "mode" "DI")
+ (eq_attr "memory" "load"))))
+ "znver1-double,znver4-load,znver1-ieu0*18")
+
+(define_insn_reservation "znver4_idiv_mem_SI" 16
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "idiv")
+ (and (eq_attr "mode" "SI")
+ (eq_attr "memory" "load"))))
+ "znver1-double,znver4-load,znver1-ieu0*12")
+
+(define_insn_reservation "znver4_idiv_mem_HI" 14
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "idiv")
+ (and (eq_attr "mode" "HI")
+ (eq_attr "memory" "load"))))
+ "znver1-double,znver4-load,znver1-ieu0*10")
+
+(define_insn_reservation "znver4_idiv_mem_QI" 13
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "idiv")
+ (and (eq_attr "mode" "QI")
+ (eq_attr "memory" "load"))))
+ "znver1-direct,znver4-load,znver1-ieu0*9")
+
;; STR ISHIFT which are micro coded.
;; Fix me: Latency need to be rechecked.
(define_insn_reservation "znver1_str_ishift" 6
@@ -293,15 +393,15 @@
"znver1-vector,znver1-ivector")
(define_insn_reservation "znver2_str_ishift" 3
- (and (eq_attr "cpu" "znver2,znver3")
+ (and (eq_attr "cpu" "znver2,znver3,znver4")
(and (eq_attr "type" "ishift")
(eq_attr "memory" "both,store")))
- "znver1-vector,znver1-ivector")
+ "znver1-vector,znver2-ivector")
(define_insn_reservation "znver2_str_istr" 19
- (and (eq_attr "cpu" "znver2,znver3")
+ (and (eq_attr "cpu" "znver2,znver3,znver4")
(and (eq_attr "type" "str")
(eq_attr "memory" "both,store")))
- "znver1-vector,znver1-ivector")
+ "znver1-vector,znver2-ivector")
;; MOV - integer moves
(define_insn_reservation "znver1_load_imov_double" 2
@@ -318,8 +418,15 @@
(eq_attr "memory" "none"))))
"znver1-double,znver1-ieu|znver1-ieu")
+(define_insn_reservation "znver4_load_imov_double" 1
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "znver1_decode" "double")
+ (and (eq_attr "type" "imovx")
+ (eq_attr "memory" "none"))))
+ "znver1-double,znver1-ieu0|znver1-ieu3")
+
(define_insn_reservation "znver1_load_imov_direct" 1
- (and (eq_attr "cpu" "znver1,znver2,znver3")
+ (and (eq_attr "cpu" "znver1,znver2,znver3,znver4")
(and (eq_attr "type" "imov,imovx")
(eq_attr "memory" "none")))
"znver1-direct,znver1-ieu")
@@ -332,7 +439,7 @@
"znver1-double,znver1-ieu|znver1-ieu,znver1-store")
(define_insn_reservation "znver2_load_imov_double_store" 1
- (and (eq_attr "cpu" "znver2,znver3")
+ (and (eq_attr "cpu" "znver2,znver3,znver4")
(and (eq_attr "znver1_decode" "double")
(and (eq_attr "type" "imovx")
(eq_attr "memory" "store"))))
@@ -345,7 +452,7 @@
"znver1-direct,znver1-ieu,znver1-store")
(define_insn_reservation "znver2_load_imov_direct_store" 1
- (and (eq_attr "cpu" "znver2,znver3")
+ (and (eq_attr "cpu" "znver2,znver3,znver4")
(and (eq_attr "type" "imov,imovx")
(eq_attr "memory" "store")))
"znver1-direct,znver1-ieu,znver2-store")
@@ -364,6 +471,13 @@
(eq_attr "memory" "load"))))
"znver1-double,znver1-load,znver1-ieu|znver1-ieu")
+(define_insn_reservation "znver4_load_imov_double_load" 4
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "znver1_decode" "double")
+ (and (eq_attr "type" "imovx")
+ (eq_attr "memory" "load"))))
+ "znver1-double,znver4-load,znver1-ieu")
+
(define_insn_reservation "znver1_load_imov_direct_load" 4
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "imov,imovx")
@@ -378,12 +492,48 @@
(eq_attr "memory" "none,unknown")))
"znver1-direct,znver1-ieu")
+(define_insn_reservation "znver4_insn_1" 1
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "alu,icmp,negnot,test,incdec")
+ (eq_attr "memory" "none,unknown")))
+ "znver1-direct,znver1-ieu")
+
+(define_insn_reservation "znver4_insn_2" 1
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "rotate,rotate1,ishift,ishift1")
+ (eq_attr "memory" "none,unknown")))
+ "znver1-direct,znver1-ieu1|znver1-ieu2")
+
+(define_insn_reservation "znver4_insn_3" 1
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "setcc,icmov")
+ (eq_attr "memory" "none")))
+ "znver1-direct,znver1-ieu0|znver1-ieu3")
+
(define_insn_reservation "znver1_insn_load" 5
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "alu,icmp,negnot,rotate,rotate1,ishift,ishift1,test,setcc,incdec,icmov")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-ieu")
+(define_insn_reservation "znver4_insn_1_load" 5
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "alu,icmp,negnot,test,incdec")
+ (eq_attr "memory" "load")))
+ "znver1-direct,znver4-load,znver1-ieu")
+
+(define_insn_reservation "znver4_insn_2_load" 5
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "rotate,rotate1,ishift,ishift1")
+ (eq_attr "memory" "load")))
+ "znver1-direct,znver4-load,znver1-ieu1|znver1-ieu2")
+
+(define_insn_reservation "znver4_insn_3_load" 5
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "setcc,icmov")
+ (eq_attr "memory" "load")))
+ "znver1-double,znver4-load,znver1-ieu0|znver1-ieu3")
+
(define_insn_reservation "znver1_insn_store" 1
(and (eq_attr "cpu" "znver1")
(and (eq_attr "type" "alu,icmp,negnot,rotate,rotate1,ishift1,test,setcc,incdec")
@@ -396,6 +546,24 @@
(eq_attr "memory" "store")))
"znver1-direct,znver1-ieu,znver2-store")
+(define_insn_reservation "znver4_insn_1_store" 1
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "alu,icmp,negnot,test,incdec")
+ (eq_attr "memory" "store")))
+ "znver1-direct,znver1-ieu,znver2-store")
+
+(define_insn_reservation "znver4_insn_2_store" 1
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "rotate,rotate1,ishift,ishift1")
+ (eq_attr "memory" "store")))
+ "znver1-direct,znver1-ieu1|znver1-ieu2,znver2-store")
+
+(define_insn_reservation "znver4_insn_3_store" 1
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "setcc,icmov")
+ (eq_attr "memory" "store")))
+ "znver1-double,znver1-ieu0|znver1-ieu3,znver2-store")
+
(define_insn_reservation "znver1_insn_both" 5
(and (eq_attr "cpu" "znver1")
(and (eq_attr "type" "alu,icmp,negnot,rotate,rotate1,ishift1,test,setcc,incdec")
@@ -408,6 +576,24 @@
(eq_attr "memory" "both")))
"znver1-direct,znver1-load,znver1-ieu,znver2-store")
+(define_insn_reservation "znver4_insn_1_both" 5
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "alu,icmp,negnot,test,incdec")
+ (eq_attr "memory" "both")))
+ "znver1-direct,znver4-load,znver1-ieu,znver2-store")
+
+(define_insn_reservation "znver4_insn_2_both" 5
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "rotate,rotate1,ishift,ishift1")
+ (eq_attr "memory" "both")))
+ "znver1-direct,znver4-load,znver1-ieu1|znver1-ieu2,znver2-store")
+
+(define_insn_reservation "znver4_insn_3_both" 5
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "setcc,icmov")
+ (eq_attr "memory" "both")))
+ "znver1-double,znver4-load,znver1-ieu0|znver1-ieu3,znver2-store")
+
;; Fix me: Other vector type insns keeping latency 6 as of now.
(define_insn_reservation "znver1_ieu_vector" 6
(and (eq_attr "cpu" "znver1")
@@ -415,7 +601,7 @@
"znver1-vector,znver1-ivector")
(define_insn_reservation "znver2_ieu_vector" 5
- (and (eq_attr "cpu" "znver2,znver3")
+ (and (eq_attr "cpu" "znver2,znver3,znver4")
(eq_attr "type" "other,str,multi"))
"znver1-vector,znver2-ivector")
@@ -428,21 +614,21 @@
"znver1-vector,znver1-ivector")
(define_insn_reservation "znver2_alu1_vector" 3
- (and (eq_attr "cpu" "znver2,znver3")
+ (and (eq_attr "cpu" "znver2,znver3,znver4")
(and (eq_attr "znver1_decode" "vector")
(and (eq_attr "type" "alu1")
(eq_attr "memory" "none,unknown"))))
"znver1-vector,znver2-ivector")
(define_insn_reservation "znver1_alu1_double" 2
- (and (eq_attr "cpu" "znver1,znver2,znver3")
+ (and (eq_attr "cpu" "znver1,znver2,znver3,znver4")
(and (eq_attr "znver1_decode" "double")
(and (eq_attr "type" "alu1")
(eq_attr "memory" "none,unknown"))))
"znver1-double,znver1-ieu")
(define_insn_reservation "znver1_alu1_direct" 1
- (and (eq_attr "cpu" "znver1,znver2,znver3")
+ (and (eq_attr "cpu" "znver1,znver2,znver3,znver4")
(and (eq_attr "znver1_decode" "direct")
(and (eq_attr "type" "alu1")
(eq_attr "memory" "none,unknown"))))
@@ -454,6 +640,11 @@
(and (eq_attr "type" "ibr")
(eq_attr "memory" "none")))
"znver1-direct")
+(define_insn_reservation "znver4_branch" 1
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "ibr")
+ (eq_attr "memory" "none")))
+ "znver1-direct,znver1-ieu0|znver4-bru0")
;; Indirect branches check latencies.
(define_insn_reservation "znver1_indirect_branch_mem" 6
@@ -468,25 +659,36 @@
(eq_attr "memory" "load")))
"znver1-vector,znver2-ivector")
+(define_insn_reservation "znver4_indirect_branch_mem" 6
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "ibr")
+ (eq_attr "memory" "load")))
+ "znver1-vector,znver2-ivector+znver4-bru0")
+
;; LEA executes in ALU units with 1 cycle latency.
(define_insn_reservation "znver1_lea" 1
- (and (eq_attr "cpu" "znver1,znver2,znver3")
+ (and (eq_attr "cpu" "znver1,znver2,znver3,znver4")
(eq_attr "type" "lea"))
"znver1-direct,znver1-ieu")
-;; Other integer instrucions
+;; Other integer instructions
(define_insn_reservation "znver1_idirect" 1
- (and (eq_attr "cpu" "znver1,znver2,znver3")
+ (and (eq_attr "cpu" "znver1,znver2,znver3,znver4")
(and (eq_attr "unit" "integer,unknown")
(eq_attr "memory" "none,unknown")))
"znver1-direct,znver1-ieu")
;; Floating point
(define_insn_reservation "znver1_fp_cmov" 6
- (and (eq_attr "cpu" "znver1,znver2,znver3")
+ (and (eq_attr "cpu" "znver1")
(eq_attr "type" "fcmov"))
"znver1-vector,znver1-fvector")
+(define_insn_reservation "znver2_fp_cmov" 6
+ (and (eq_attr "cpu" "znver2,znver3,znver4")
+ (eq_attr "type" "fcmov"))
+ "znver1-vector,znver2-fvector")
+
(define_insn_reservation "znver1_fp_mov_direct_load" 8
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "znver1_decode" "direct")
@@ -494,6 +696,13 @@
(eq_attr "memory" "load"))))
"znver1-direct,znver1-load,znver1-fp3|znver1-fp1")
+(define_insn_reservation "znver4_fp_mov_direct_load" 8
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "znver1_decode" "direct")
+ (and (eq_attr "type" "fmov")
+ (eq_attr "memory" "load"))))
+ "znver1-direct,znver4-load,znver1-fp2|znver1-fp3")
+
(define_insn_reservation "znver1_fp_mov_direct_store" 5
(and (eq_attr "cpu" "znver1")
(and (eq_attr "znver1_decode" "direct")
@@ -501,7 +710,7 @@
(eq_attr "memory" "store"))))
"znver1-direct,znver1-fp2|znver1-fp3,znver1-store")
(define_insn_reservation "znver2_fp_mov_direct_store" 5
- (and (eq_attr "cpu" "znver2,znver3")
+ (and (eq_attr "cpu" "znver2,znver3,znver4")
(and (eq_attr "znver1_decode" "direct")
(and (eq_attr "type" "fmov")
(eq_attr "memory" "store"))))
@@ -514,6 +723,13 @@
(eq_attr "memory" "none"))))
"znver1-double,znver1-fp3")
+(define_insn_reservation "znver4_fp_mov_double" 4
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "znver1_decode" "double")
+ (and (eq_attr "type" "fmov")
+ (eq_attr "memory" "none"))))
+ "znver1-double,znver1-fp1")
+
(define_insn_reservation "znver1_fp_mov_double_load" 12
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "znver1_decode" "double")
@@ -521,11 +737,23 @@
(eq_attr "memory" "load"))))
"znver1-double,znver1-load,znver1-fp3")
+(define_insn_reservation "znver4_fp_mov_double_load" 11
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "znver1_decode" "double")
+ (and (eq_attr "type" "fmov")
+ (eq_attr "memory" "load"))))
+ "znver1-double,znver4-load,znver1-fp1")
+
(define_insn_reservation "znver1_fp_mov_direct" 1
(and (eq_attr "cpu" "znver1,znver2,znver3")
(eq_attr "type" "fmov"))
"znver1-direct,znver1-fp3")
+(define_insn_reservation "znver4_fp_mov_direct" 1
+ (and (eq_attr "cpu" "znver4")
+ (eq_attr "type" "fmov"))
+ "znver1-direct,znver1-fp1")
+
;; TODO: AGU?
(define_insn_reservation "znver1_fp_spc_direct" 5
(and (eq_attr "cpu" "znver1,znver2,znver3")
@@ -533,13 +761,25 @@
(eq_attr "memory" "store")))
"znver1-direct,znver1-fp3,znver1-fp2")
+(define_insn_reservation "znver4_fp_spc_direct" 5
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "fpspc")
+ (eq_attr "memory" "store")))
+ "znver1-direct,znver1-fp1,znver4-fp-store0")
+
+(define_insn_reservation "znver4_fp_sqrt_direct" 22
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "fpspc")
+ (eq_attr "memory" "none")))
+ "znver1-direct,znver1-fp1")
+
(define_insn_reservation "znver1_fp_insn_vector" 6
(and (eq_attr "cpu" "znver1")
(and (eq_attr "znver1_decode" "vector")
(eq_attr "type" "fpspc,mmxcvt,sselog1,ssemul,ssemov")))
"znver1-vector,znver1-fvector")
(define_insn_reservation "znver2_fp_insn_vector" 6
- (and (eq_attr "cpu" "znver2,znver3")
+ (and (eq_attr "cpu" "znver2,znver3,znver4")
(and (eq_attr "znver1_decode" "vector")
(eq_attr "type" "fpspc,mmxcvt,sselog1,ssemul,ssemov")))
"znver1-vector,znver2-fvector")
@@ -550,6 +790,11 @@
(eq_attr "type" "fsgn"))
"znver1-direct,znver1-fp3")
+(define_insn_reservation "znver4_fp_fsgn" 1
+ (and (eq_attr "cpu" "znver4")
+ (eq_attr "type" "fsgn"))
+ "znver1-direct,znver1-fp0|znver1-fp1")
+
(define_insn_reservation "znver1_fp_fcmp" 2
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "memory" "none")
@@ -557,13 +802,39 @@
(eq_attr "type" "fcmp"))))
"znver1-double,znver1-fp0,znver1-fp2")
+(define_insn_reservation "znver4_fp_fcmp_double" 4
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "znver1_decode" "double")
+ (eq_attr "type" "fcmp"))))
+ "znver1-double,znver1-fp0,znver4-fp-store0")
+
+(define_insn_reservation "znver4_fp_fcmp" 3
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "fcmp")
+ (eq_attr "memory" "none")))
+ "znver1-direct,znver1-fp0")
+
(define_insn_reservation "znver1_fp_fcmp_load" 9
(and (eq_attr "cpu" "znver1,znver2,znver3")
- (and (eq_attr "memory" "none")
+ (and (eq_attr "memory" "load")
(and (eq_attr "znver1_decode" "double")
(eq_attr "type" "fcmp"))))
"znver1-double,znver1-load, znver1-fp0,znver1-fp2")
+(define_insn_reservation "znver4_fp_fcmp_double_load" 11
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "memory" "load")
+ (and (eq_attr "znver1_decode" "double")
+ (eq_attr "type" "fcmp"))))
+ "znver1-double,znver4-load,znver1-fp0,znver4-fp-store0")
+
+(define_insn_reservation "znver4_fp_fcmp_load" 10
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "fcmp")
+ (eq_attr "memory" "load")))
+ "znver1-direct,znver4-load,znver1-fp0")
+
;;FADD FSUB FMUL
(define_insn_reservation "znver1_fp_op_mul" 5
(and (eq_attr "cpu" "znver1,znver2,znver3")
@@ -571,12 +842,31 @@
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp0*5")
+(define_insn_reservation "znver4_fp_op_mul" 6
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "fop,fmul")
+ (eq_attr "memory" "none")))
+ "znver1-direct,znver1-fp0*6")
+
(define_insn_reservation "znver1_fp_op_mul_load" 12
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "fop,fmul")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fp0*5")
+(define_insn_reservation "znver4_fp_op_mul_load" 13
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "fop,fmul")
+ (eq_attr "memory" "load")))
+ "znver1-direct,znver4-load,znver1-fp0*6")
+
+(define_insn_reservation "znver4_fp_op_imul" 10
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "fop,fmul")
+ (and (eq_attr "fp_int_src" "true")
+ (eq_attr "memory" "none"))))
+ "znver1-double,znver1-fp1,znver1-fp0")
+
(define_insn_reservation "znver1_fp_op_imul_load" 16
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "fop,fmul")
@@ -584,8 +874,15 @@
(eq_attr "memory" "load"))))
"znver1-double,znver1-load,znver1-fp3,znver1-fp0")
+(define_insn_reservation "znver4_fp_op_imul_load" 17
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "fop,fmul")
+ (and (eq_attr "fp_int_src" "true")
+ (eq_attr "memory" "load"))))
+ "znver1-double,znver4-load,znver1-fp1,znver1-fp0")
+
(define_insn_reservation "znver1_fp_op_div" 15
- (and (eq_attr "cpu" "znver1,znver2,znver3")
+ (and (eq_attr "cpu" "znver1,znver2,znver3,znver4")
(and (eq_attr "type" "fdiv")
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp3*15")
@@ -596,6 +893,12 @@
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fp3*15")
+(define_insn_reservation "znver4_fp_op_div_load" 22
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "fdiv")
+ (eq_attr "memory" "load")))
+ "znver1-direct,znver4-load,znver1-fp3*15")
+
(define_insn_reservation "znver1_fp_op_idiv_load" 27
(and (eq_attr "cpu" "znver1")
(and (eq_attr "type" "fdiv")
@@ -610,6 +913,19 @@
(eq_attr "memory" "load"))))
"znver1-double,znver1-load,znver1-fp3*19")
+(define_insn_reservation "znver4_fp_op_idiv" 19
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "fdiv")
+ (and (eq_attr "fp_int_src" "true")
+ (eq_attr "memory" "none"))))
+ "znver1-double,znver1-fp1,znver1-fp1")
+
+(define_insn_reservation "znver4_fp_op_idiv_load" 26
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "fdiv")
+ (and (eq_attr "fp_int_src" "true")
+ (eq_attr "memory" "none"))))
+ "znver1-double,znver4-load,znver1-fp1,znver1-fp1")
;; MMX, SSE, SSEn.n, AVX, AVX2 instructions
(define_insn_reservation "znver1_fp_insn" 1
@@ -623,26 +939,49 @@
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp0|znver1-fp1|znver1-fp3")
+(define_insn_reservation "znver4_fp_insn" 1
+ (and (eq_attr "cpu" "znver4")
+ (eq_attr "type" "mmx,mmxadd"))
+ "znver1-direct,znver1-fpu")
+
(define_insn_reservation "znver1_mmx_add_load" 8
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "mmxadd")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fp0|znver1-fp1|znver1-fp3")
+(define_insn_reservation "znver4_mmx_add_load" 8
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
+ (and (eq_attr "type" "mmxadd")
+ (eq_attr "memory" "load")))
+ "znver1-direct,znver4-load,znver1-fpu")
+
(define_insn_reservation "znver1_mmx_cmp" 1
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "mmxcmp")
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp0|znver1-fp3")
+(define_insn_reservation "znver4_mmx_cmp" 1
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "mmxcmp")
+ (eq_attr "memory" "none")))
+ "znver1-direct,znver1-fpu")
+
(define_insn_reservation "znver1_mmx_cmp_load" 8
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "mmxcmp")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fp0|znver1-fp3")
+(define_insn_reservation "znver4_mmx_cmp_load" 8
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "mmxcmp")
+ (eq_attr "memory" "load")))
+ "znver1-direct,znver4-load,znver1-fpu")
+
(define_insn_reservation "znver1_mmx_cvt_pck_shuf" 1
- (and (eq_attr "cpu" "znver1,znver2,znver3")
+ (and (eq_attr "cpu" "znver1,znver2,znver3,znver4")
(and (eq_attr "type" "mmxcvt,sseshuf,sseshuf1")
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp1|znver1-fp2")
@@ -653,18 +992,48 @@
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fp1|znver1-fp2")
+(define_insn_reservation "znver4_mmx_cvt_pck_shuf_load" 8
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "mmxcvt,sseshuf,sseshuf1")
+ (eq_attr "memory" "load")))
+ "znver1-direct,znver4-load,znver1-fp1|znver1-fp2")
+
(define_insn_reservation "znver1_mmx_shift_move" 1
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "mmxshft,mmxmov")
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp2")
+(define_insn_reservation "znver4_mmx_shift" 1
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "mmxshft")
+ (eq_attr "memory" "none")))
+ "znver1-direct,znver1-fp1|znver1-fp2")
+
+(define_insn_reservation "znver4_mmx_move" 1
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "mmxmov")
+ (eq_attr "memory" "none")))
+ "znver1-direct,znver4-fp-store0")
+
(define_insn_reservation "znver1_mmx_shift_move_load" 8
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "mmxshft,mmxmov")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fp2")
+(define_insn_reservation "znver4_mmx_shift_load" 8
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "mmxshft")
+ (eq_attr "memory" "load")))
+ "znver1-direct,znver4-load,znver1-fp1|znver1-fp2")
+
+(define_insn_reservation "znver4_mmx_move_load" 8
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "mmxmov")
+ (eq_attr "memory" "load")))
+ "znver1-direct,znver4-load,znver4-fp-store0")
+
(define_insn_reservation "znver1_mmx_move_store" 1
(and (eq_attr "cpu" "znver1")
(and (eq_attr "type" "mmxshft,mmxmov")
@@ -676,18 +1045,42 @@
(eq_attr "memory" "store,both")))
"znver1-direct,znver1-fp2,znver2-store")
+(define_insn_reservation "znver4_mmx_shift_store" 1
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "mmxshft")
+ (eq_attr "memory" "store,both")))
+ "znver1-direct,znver1-fp1|znver1-fp2,znver2-store")
+
+(define_insn_reservation "znver4_mmx_move_store" 1
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "mmxmov")
+ (eq_attr "memory" "store,both")))
+ "znver1-direct,znver4-fp-store0")
+
(define_insn_reservation "znver1_mmx_mul" 3
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "mmxmul")
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp0*3")
+(define_insn_reservation "znver4_mmx_mul" 3
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "mmxmul")
+ (eq_attr "memory" "none")))
+ "znver1-direct,(znver1-fp0|znver1-fp3)*3")
+
(define_insn_reservation "znver1_mmx_load" 10
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "mmxmul")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fp0*3")
+(define_insn_reservation "znver4_mmx_mul_load" 10
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "mmxmul")
+ (eq_attr "memory" "load")))
+ "znver1-direct,znver4-load,(znver1-fp0|znver1-fp3)*3")
+
;; TODO
(define_insn_reservation "znver1_avx256_log" 1
(and (eq_attr "cpu" "znver1")
@@ -709,6 +1102,62 @@
(eq_attr "memory" "none")))
"znver1-direct,znver1-fpu")
+(define_insn_reservation "znver4_sse_log" 1
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "sselog,sselog1")
+ (and (eq_attr "mode" "V4SF,V8SF,V2DF,V4DF")
+ (eq_attr "memory" "none"))))
+ "znver1-direct,znver1-fpu")
+
+(define_insn_reservation "znver4_sse_log_evex" 1
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "sselog,sselog1")
+ (and (eq_attr "mode" "V16SF,V8DF")
+ (eq_attr "memory" "none"))))
+ "znver1-direct,znver1-fp0+znver1-fp1|znver1-fp2+znver1-fp3")
+
+(define_insn_reservation "znver4_sse_log_load" 8
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "sselog,sselog1")
+ (and (eq_attr "mode" "V4SF,V8SF,V2DF,V4DF")
+ (eq_attr "memory" "load"))))
+ "znver1-direct,znver4-load,znver1-fpu")
+
+(define_insn_reservation "znver4_sse_log_evex_load" 8
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "sselog,sselog1")
+ (and (eq_attr "mode" "V16SF,V8DF")
+ (eq_attr "memory" "load"))))
+ "znver1-direct,znver4-load,znver1-fp0+znver1-fp1|znver1-fp2+znver1-fp3")
+
+(define_insn_reservation "znver4_sse_ilog" 1
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "sselog,sselog1")
+ (and (eq_attr "mode" "OI")
+ (eq_attr "memory" "none"))))
+ "znver1-direct,znver1-fp0+znver1-fp1|znver1-fp2+znver1-fp3")
+
+(define_insn_reservation "znver4_sse_ilog_evex" 1
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "sselog,sselog1")
+ (and (eq_attr "mode" "TI")
+ (eq_attr "memory" "none"))))
+ "znver1-direct,znver1-fp0+znver1-fp1+znver1-fp2+znver1-fp3")
+
+(define_insn_reservation "znver4_sse_ilog_load" 8
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "sselog,sselog1")
+ (and (eq_attr "mode" "OI")
+ (eq_attr "memory" "load"))))
+ "znver1-direct,znver4-load,znver1-fp0+znver1-fp1|znver1-fp2+znver1-fp3")
+
+(define_insn_reservation "znver4_sse_ilog_evex_load" 8
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "sselog,sselog1")
+ (and (eq_attr "mode" "TI")
+ (eq_attr "memory" "load"))))
+ "znver1-direct,znver4-load,znver1-fp0+znver1-fp1+znver1-fp2+znver1-fp3")
+
(define_insn_reservation "znver1_sse_log_load" 8
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "sselog")
@@ -771,6 +1220,18 @@
(eq_attr "memory" "none")))))
"znver1-double,znver1-fp0|znver1-fp1")
+(define_insn_reservation "znver4_sse_comi" 1
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "ssecomi")
+ (eq_attr "memory" "none")))
+ "znver1-double,znver1-fp2|znver1-fp3,znver4-fp-store0")
+
+(define_insn_reservation "znver4_sse_comi_load" 8
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "ssecomi")
+ (eq_attr "memory" "load")))
+ "znver1-double,znver4-load,znver1-fp2|znver1-fp3,znver4-fp-store0")
+
(define_insn_reservation "znver1_sse_comi_double_load" 10
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "V4SF,V2DF,TI"))
@@ -786,7 +1247,7 @@
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "SF,DF,V4SF,V2DF,TI"))
(ior (eq_attr "cpu" "znver2")
- (eq_attr "cpu" "znver3")))
+ (eq_attr "cpu" "znver3,znver4")))
(and (eq_attr "prefix_extra" "1")
(and (eq_attr "type" "ssecomi")
(eq_attr "memory" "none"))))
@@ -802,6 +1263,13 @@
(eq_attr "memory" "load"))))
"znver1-direct,znver1-load,znver1-fp1|znver1-fp2")
+(define_insn_reservation "znver4_sse_test_load" 8
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "prefix_extra" "1")
+ (and (eq_attr "type" "ssecomi")
+ (eq_attr "memory" "load"))))
+ "znver1-direct,znver4-load,znver1-fp1|znver1-fp2")
+
;; SSE moves
;; Fix me: Need to revist this again some of the moves may be restricted
;; to some fpu pipes.
@@ -814,7 +1282,7 @@
"znver1-direct,znver1-ieu0")
(define_insn_reservation "znver2_sse_mov" 1
- (and (eq_attr "cpu" "znver2,znver3")
+ (and (eq_attr "cpu" "znver2,znver3,znver4")
(and (eq_attr "mode" "SI")
(and (eq_attr "isa" "avx")
(and (eq_attr "type" "ssemov")
@@ -831,7 +1299,7 @@
"znver1-direct,znver1-ieu2")
(define_insn_reservation "znver2_avx_mov" 1
- (and (eq_attr "cpu" "znver2,znver3")
+ (and (eq_attr "cpu" "znver2,znver3,znver4")
(and (eq_attr "mode" "TI")
(and (eq_attr "isa" "avx")
(and (eq_attr "type" "ssemov")
@@ -843,7 +1311,8 @@
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "SF,DF,V4SF,V2DF,TI"))
(ior (eq_attr "cpu" "znver2")
- (eq_attr "cpu" "znver3")))
+ (ior (eq_attr "cpu" "znver3")
+ (eq_attr "cpu" "znver4"))))
(and (eq_attr "type" "ssemov")
(eq_attr "memory" "none")))
"znver1-direct,znver1-fpu")
@@ -855,7 +1324,7 @@
(eq_attr "memory" "store"))))
"znver1-direct,znver1-fpu,znver1-store")
(define_insn_reservation "znver2_sseavx_mov_store" 1
- (and (eq_attr "cpu" "znver2,znver3")
+ (and (eq_attr "cpu" "znver2,znver3,znver4")
(and (eq_attr "type" "ssemov")
(eq_attr "memory" "store")))
"znver1-direct,znver1-fpu,znver2-store")
@@ -869,6 +1338,12 @@
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fpu")
+(define_insn_reservation "znver4_sseavx_mov_load" 8
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "ssemov")
+ (eq_attr "memory" "load")))
+ "znver1-double,znver4-load,znver1-fpu")
+
(define_insn_reservation "znver1_avx256_mov" 1
(and (eq_attr "cpu" "znver1")
(and (eq_attr "mode" "V8SF,V4DF,OI")
@@ -895,7 +1370,8 @@
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "SF,DF,V4SF,V2DF,TI"))
(ior (eq_attr "cpu" "znver2")
- (eq_attr "cpu" "znver3")))
+ (ior (eq_attr "cpu" "znver3")
+ (eq_attr "cpu" "znver4"))))
(and (eq_attr "type" "sseadd")
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp2|znver1-fp3")
@@ -909,6 +1385,12 @@
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fp2|znver1-fp3")
+(define_insn_reservation "znver4_sseavx_add_load" 10
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "sseadd")
+ (eq_attr "memory" "load")))
+ "znver1-double,znver4-load,znver1-fp2|znver1-fp3")
+
(define_insn_reservation "znver1_avx256_add" 3
(and (eq_attr "cpu" "znver1")
(and (eq_attr "mode" "V8SF,V4DF,OI")
@@ -960,6 +1442,20 @@
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp0|znver1-fp1")
+(define_insn_reservation "znver4_sseavx_fma" 4
+ (and (and (eq_attr "cpu" "znver4")
+ (eq_attr "mode" "SF,DF,V4SF,V2DF,V8SF,V4DF"))
+ (and (eq_attr "type" "ssemuladd")
+ (eq_attr "memory" "none")))
+ "znver1-direct,znver1-fp0|znver1-fp1")
+
+(define_insn_reservation "znver4_sseavx_fma_evex" 4
+ (and (and (eq_attr "cpu" "znver4")
+ (eq_attr "mode" "V16SF,V8DF"))
+ (and (eq_attr "type" "ssemuladd")
+ (eq_attr "memory" "none")))
+ "znver1-direct,znver1-fp0+znver1-fp1")
+
(define_insn_reservation "znver3_sseavx_fma_load" 11
(and (and (eq_attr "cpu" "znver3")
(eq_attr "mode" "SF,DF,V4SF,V2DF"))
@@ -967,6 +1463,20 @@
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fp0|znver1-fp1")
+(define_insn_reservation "znver4_sseavx_fma_load" 11
+ (and (and (eq_attr "cpu" "znver4")
+ (eq_attr "mode" "SF,DF,V4SF,V2DF,V8SF,V4DF"))
+ (and (eq_attr "type" "ssemuladd")
+ (eq_attr "memory" "load")))
+ "znver1-direct,znver4-load,znver1-fp0|znver1-fp1")
+
+(define_insn_reservation "znver4_sseavx_fma_evex_load" 11
+ (and (and (eq_attr "cpu" "znver4")
+ (eq_attr "mode" "V16SF,V8DF"))
+ (and (eq_attr "type" "ssemuladd")
+ (eq_attr "memory" "load")))
+ "znver1-direct,znver4-load,znver1-fp0+znver1-fp1")
+
(define_insn_reservation "znver3_avx256_fma" 4
(and (eq_attr "cpu" "znver3")
(and (eq_attr "mode" "V8SF,V4DF")
@@ -990,6 +1500,20 @@
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp0|znver1-fp1|znver1-fp3")
+(define_insn_reservation "znver4_sseavx_iadd" 1
+ (and (and (eq_attr "cpu" "znver4")
+ (eq_attr "mode" "QI,HI,SI,DI,TI,OI,XI"))
+ (and (eq_attr "type" "sseiadd")
+ (eq_attr "memory" "none")))
+ "znver1-direct,znver1-fpu")
+
+(define_insn_reservation "znver4_sseavx_iadd_load" 8
+ (and (and (eq_attr "cpu" "znver4")
+ (eq_attr "mode" "QI,HI,SI,DI,TI,OI,XI"))
+ (and (eq_attr "type" "sseiadd")
+ (eq_attr "memory" "load")))
+ "znver1-direct,znver4-load,znver1-fpu")
+
(define_insn_reservation "znver1_sseavx_iadd_load" 8
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "DI,TI"))
@@ -1053,6 +1577,33 @@
(eq_attr "memory" "load")))))
"znver1-double,znver1-load,znver1-fp3,znver1-ieu0")
+(define_insn_reservation "znver4_ssecvtsfdf_si" 4
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "mode" "SI")
+ (and (eq_attr "type" "sseicvt")
+ (eq_attr "memory" "none"))))
+ "znver1-double,znver1-fp2|znver1-fp3,znver4-fp-store0")
+
+(define_insn_reservation "znver4_ssecvtsfdf_si_load" 11
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "mode" "SI")
+ (and (eq_attr "type" "sseicvt")
+ (eq_attr "memory" "load"))))
+ "znver1-double,znver4-load,znver1-fp2|znver1-fp3,znver4-fp-store0")
+
+(define_insn_reservation "znver4_ssecvtsfdf_di" 3
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "mode" "DI")
+ (and (eq_attr "type" "sseicvt")
+ (eq_attr "memory" "none"))))
+ "znver1-direct,znver1-fp2|znver1-fp3")
+
+(define_insn_reservation "znver4_ssecvtsfdf_di_load" 10
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "mode" "DI")
+ (and (eq_attr "type" "sseicvt")
+ (eq_attr "memory" "load"))))
+ "znver1-direct,znver4-load,znver1-fp2|znver1-fp3")
;; All other used ssecvt fp3 pipes
;; Check: Need to revisit this again.
@@ -1069,12 +1620,24 @@
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp3")
+(define_insn_reservation "znver4_ssecvt" 3
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "ssecvt")
+ (eq_attr "memory" "none")))
+ "znver1-direct,znver1-fp2|znver1-fp3")
+
(define_insn_reservation "znver1_ssecvt_load" 11
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "ssecvt")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fp3")
+(define_insn_reservation "znver4_ssecvt_load" 10
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "ssecvt")
+ (eq_attr "memory" "load")))
+ "znver1-direct,znver4-load,znver1-fp2|znver1-fp3")
+
;; SSE div
(define_insn_reservation "znver1_ssediv_ss_ps" 10
(and (ior (and (eq_attr "cpu" "znver1")
@@ -1087,6 +1650,21 @@
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp3*10")
+(define_insn_reservation "znver4_ssediv_ss_ps" 10
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "ssecvt")
+ (and (eq_attr "mode" "V16SF,V8SF,V4SF,SF")
+ (eq_attr "memory" "none"))))
+ "znver1-direct,znver1-fp3*10")
+
+(define_insn_reservation "znver4_ssediv_ss_ps_evex" 10
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "ssecvt")
+ (and (eq_attr "mode" "V16SF,V8SF,V4SF,SF")
+ (and (eq_attr "prefix" "evex")
+ (eq_attr "memory" "none")))))
+ "znver1-direct,znver1-fp1*10")
+
(define_insn_reservation "znver1_ssediv_ss_ps_load" 17
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "V4SF,SF"))
@@ -1098,6 +1676,21 @@
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fp3*10")
+(define_insn_reservation "znver4_ssediv_ss_ps_load" 17
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "ssecvt")
+ (and (eq_attr "mode" "V16SF,V8SF,V4SF,SF")
+ (eq_attr "memory" "load"))))
+ "znver1-direct,znver4-load,znver1-fp3*10")
+
+(define_insn_reservation "znver4_ssediv_ss_ps_evex_load" 17
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "ssecvt")
+ (and (eq_attr "mode" "V16SF,V8SF,V4SF,SF")
+ (and (eq_attr "prefix" "evex")
+ (eq_attr "memory" "load")))))
+ "znver1-direct,znver4-load,znver1-fp1*10")
+
(define_insn_reservation "znver1_ssediv_sd_pd" 13
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "V2DF,DF"))
@@ -1109,6 +1702,21 @@
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp3*13")
+(define_insn_reservation "znver4_ssediv_sd_pd" 13
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "ssecvt")
+ (and (eq_attr "mode" "V8DF,V4DF,V2DF,DF")
+ (eq_attr "memory" "none"))))
+ "znver1-direct,znver1-fp3*13")
+
+(define_insn_reservation "znver4_ssediv_sd_pd_evex" 13
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "ssecvt")
+ (and (eq_attr "mode" "V8DF,V4DF,V2DF,DF")
+ (and (eq_attr "prefix" "evex")
+ (eq_attr "memory" "none")))))
+ "znver1-direct,znver1-fp1*13")
+
(define_insn_reservation "znver1_ssediv_sd_pd_load" 20
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "V2DF,DF"))
@@ -1120,6 +1728,21 @@
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fp3*13")
+(define_insn_reservation "znver4_ssediv_sd_pd_load" 20
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "ssecvt")
+ (and (eq_attr "mode" "V8DF,V4DF,V2DF,DF")
+ (eq_attr "memory" "load"))))
+ "znver1-direct,znver4-load,znver1-fp3*13")
+
+(define_insn_reservation "znver4_ssediv_sd_pd_evex_load" 20
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "ssecvt")
+ (and (eq_attr "mode" "V8DF,V4DF,V2DF,DF")
+ (and (eq_attr "prefix" "evex")
+ (eq_attr "memory" "load")))))
+ "znver1-direct,znver4-load,znver1-fp1*13")
+
(define_insn_reservation "znver1_ssediv_avx256_ps" 12
(and (eq_attr "cpu" "znver1")
(and (eq_attr "mode" "V8SF")
@@ -1153,12 +1776,19 @@
(eq_attr "mode" "V4SF,SF"))
(and (eq_attr "cpu" "znver2")
(eq_attr "mode" "V8SF,V4SF,SF,V4DF,V2DF,DF"))
- (and (eq_attr "cpu" "znver3")
+ (and (eq_attr "cpu" "znver3,znver4")
(eq_attr "mode" "V8SF,V4SF,SF,V4DF,V2DF,DF")))
(and (eq_attr "type" "ssemul")
(eq_attr "memory" "none")))
"znver1-direct,(znver1-fp0|znver1-fp1)*3")
+(define_insn_reservation "znver4_ssemul_ss_ps_evex" 3
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "ssemul")
+ (and (eq_attr "mode" "V8DF,V16SF")
+ (eq_attr "memory" "none"))))
+ "znver1-direct,(znver1-fp0+znver1-fp1)*3")
+
(define_insn_reservation "znver1_ssemul_ss_ps_load" 10
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "V4SF,SF"))
@@ -1170,6 +1800,13 @@
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,(znver1-fp0|znver1-fp1)*3")
+(define_insn_reservation "znver4_ssemul_ss_ps_evex_load" 10
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "type" "ssemul")
+ (and (eq_attr "mode" "V8DF,V16SF")
+ (eq_attr "memory" "none"))))
+ "znver1-direct,znver4-load,(znver1-fp0+znver1-fp1)*3")
+
(define_insn_reservation "znver1_ssemul_avx256_ps" 3
(and (eq_attr "cpu" "znver1")
(and (eq_attr "mode" "V8SF")
@@ -1231,12 +1868,44 @@
(eq_attr "mode" "TI"))
(and (eq_attr "cpu" "znver2")
(eq_attr "mode" "TI,OI"))
- (and (eq_attr "cpu" "znver3")
+ (and (eq_attr "cpu" "znver3,znver4")
(eq_attr "mode" "TI,OI")))
(and (eq_attr "type" "sseimul")
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp0*3")
+(define_insn_reservation "znver4_sseimul" 3
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "mode" "TI,OI")
+ (and (eq_attr "type" "sseimul")
+ (and (eq_attr "prefix" "evex")
+ (eq_attr "memory" "none")))))
+ "znver1-direct,znver1-fp0|znver1-fp1")
+
+(define_insn_reservation "znver4_sseimul_evex" 3
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "mode" "XI")
+ (and (eq_attr "type" "sseimul")
+ (and (eq_attr "prefix" "evex")
+ (eq_attr "memory" "none")))))
+ "znver1-direct,znver1-fp0+znver1-fp1")
+
+(define_insn_reservation "znver4_sseimul_load" 10
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "mode" "TI,OI")
+ (and (eq_attr "type" "sseimul")
+ (and (eq_attr "prefix" "evex")
+ (eq_attr "memory" "load")))))
+ "znver1-direct,znver4-load,znver1-fp0|znver1-fp1")
+
+(define_insn_reservation "znver4_sseimul_evex_load" 10
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "mode" "XI")
+ (and (eq_attr "type" "sseimul")
+ (and (eq_attr "prefix" "evex")
+ (eq_attr "memory" "load")))))
+ "znver1-direct,znver4-load,znver1-fp0+znver1-fp1")
+
(define_insn_reservation "znver1_sseimul_avx256" 4
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "mode" "OI")
@@ -1282,12 +1951,66 @@
(eq_attr "mode" "SF,DF,V4SF,V2DF"))
(and (eq_attr "cpu" "znver2")
(eq_attr "mode" "SF,DF,V4SF,V2DF,V8SF,V4DF"))
- (and (eq_attr "cpu" "znver3")
+ (and (eq_attr "cpu" "znver3,znver4")
(eq_attr "mode" "SF,DF,V4SF,V2DF,V8SF,V4DF")))
(and (eq_attr "type" "ssecmp")
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp0|znver1-fp1")
+(define_insn_reservation "znver4_sse_cmp" 3
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "mode" "SF,DF,V4SF,V2DF")
+ (and (eq_attr "type" "ssecmp")
+ (and (eq_attr "prefix" "evex")
+ (and (eq_attr "length_immediate" "1")
+ (eq_attr "memory" "none"))))))
+ "znver1-direct,znver1-fp0|znver1-fp1")
+
+(define_insn_reservation "znver4_sse_cmp_load" 10
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "mode" "SF,DF,V4SF,V2DF")
+ (and (eq_attr "type" "ssecmp")
+ (and (eq_attr "prefix" "evex")
+ (and (eq_attr "length_immediate" "1")
+ (eq_attr "memory" "load"))))))
+ "znver1-double,znver4-load,znver1-fp0|znver1-fp1")
+
+(define_insn_reservation "znver4_sse_cmp_vex" 4
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "mode" "V8SF,V4DF")
+ (and (eq_attr "type" "ssecmp")
+ (and (eq_attr "prefix" "evex")
+ (and (eq_attr "length_immediate" "1")
+ (eq_attr "memory" "none"))))))
+ "znver1-direct,znver1-fp0|znver1-fp1")
+
+(define_insn_reservation "znver4_sse_cmp_vex_load" 11
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "mode" "V8SF,V4DF")
+ (and (eq_attr "type" "ssecmp")
+ (and (eq_attr "prefix" "evex")
+ (and (eq_attr "length_immediate" "1")
+ (eq_attr "memory" "load"))))))
+ "znver1-double,znver4-load,znver1-fp0|znver1-fp1")
+
+(define_insn_reservation "znver4_sse_cmp_evex" 5
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "mode" "V16SF,V8DF")
+ (and (eq_attr "type" "ssecmp")
+ (and (eq_attr "prefix" "evex")
+ (and (eq_attr "length_immediate" "1")
+ (eq_attr "memory" "none"))))))
+ "znver1-direct,znver1-fp0+znver1-fp1")
+
+(define_insn_reservation "znver4_sse_cmp_evex_load" 12
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "mode" "V16SF,V8DF")
+ (and (eq_attr "type" "ssecmp")
+ (and (eq_attr "prefix" "evex")
+ (and (eq_attr "length_immediate" "1")
+ (eq_attr "memory" "load"))))))
+ "znver1-double,znver4-load,znver1-fp0+znver1-fp1")
+
(define_insn_reservation "znver1_sse_cmp_load" 8
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "SF,DF,V4SF,V2DF"))
@@ -1318,7 +2041,7 @@
(eq_attr "mode" "QI,HI,SI,DI,TI"))
(and (eq_attr "cpu" "znver2")
(eq_attr "mode" "QI,HI,SI,DI,TI,OI"))
- (and (eq_attr "cpu" "znver3")
+ (and (eq_attr "cpu" "znver3,znver4")
(eq_attr "mode" "QI,HI,SI,DI,TI,OI")))
(and (eq_attr "type" "ssecmp")
(eq_attr "memory" "none")))
@@ -1335,6 +2058,60 @@
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fp0|znver1-fp3")
+(define_insn_reservation "znver4_sse_icmp" 3
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "mode" "QI,HI,SI,DI,TI")
+ (and (eq_attr "type" "ssecmp")
+ (and (eq_attr "prefix" "evex")
+ (and (eq_attr "length_immediate" "1")
+ (eq_attr "memory" "none"))))))
+ "znver1-direct,znver1-fp0|znver1-fp1")
+
+(define_insn_reservation "znver4_sse_icmp_load" 10
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "mode" "QI,HI,SI,DI,TI")
+ (and (eq_attr "type" "ssecmp")
+ (and (eq_attr "prefix" "evex")
+ (and (eq_attr "length_immediate" "1")
+ (eq_attr "memory" "load"))))))
+ "znver1-double,znver4-load,znver1-fp0|znver1-fp1")
+
+(define_insn_reservation "znver4_sse_icmp_vex" 4
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "mode" "OI")
+ (and (eq_attr "type" "ssecmp")
+ (and (eq_attr "prefix" "evex")
+ (and (eq_attr "length_immediate" "1")
+ (eq_attr "memory" "none"))))))
+ "znver1-direct,znver1-fp0|znver1-fp1")
+
+(define_insn_reservation "znver4_sse_cmp_ivex_load" 11
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "mode" "OI")
+ (and (eq_attr "type" "ssecmp")
+ (and (eq_attr "prefix" "evex")
+ (and (eq_attr "length_immediate" "1")
+ (eq_attr "memory" "load"))))))
+ "znver1-double,znver4-load,znver1-fp0|znver1-fp1")
+
+(define_insn_reservation "znver4_sse_icmp_evex" 5
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "mode" "XI")
+ (and (eq_attr "type" "ssecmp")
+ (and (eq_attr "prefix" "evex")
+ (and (eq_attr "length_immediate" "1")
+ (eq_attr "memory" "none"))))))
+ "znver1-direct,znver1-fp0+znver1-fp1")
+
+(define_insn_reservation "znver4_sse_icmp_evex_load" 12
+ (and (eq_attr "cpu" "znver4")
+ (and (eq_attr "mode" "XI")
+ (and (eq_attr "type" "ssecmp")
+ (and (eq_attr "prefix" "evex")
+ (and (eq_attr "length_immediate" "1")
+ (eq_attr "memory" "load"))))))
+ "znver1-double,znver4-load,znver1-fp0+znver1-fp1")
+
(define_insn_reservation "znver1_sse_icmp_avx256" 1
(and (eq_attr "cpu" "znver1")
(and (eq_attr "mode" "OI")