aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorVenkataramanan Kumar <Venkataramanan.Kumar@amd.com>2020-12-05 11:12:15 +0530
committerVenkataramanan Kumar <Venkataramanan.Kumar@amd.com>2020-12-05 11:19:35 +0530
commit3e2ae3ee285a57455d5a23bd352a68c289130186 (patch)
tree4204651ca4dfc0fc778b852ce18853b9c20df143 /gcc
parent625e002396f7d0108f845bfba6a6f4f4fcadad05 (diff)
downloadgcc-3e2ae3ee285a57455d5a23bd352a68c289130186.zip
gcc-3e2ae3ee285a57455d5a23bd352a68c289130186.tar.gz
gcc-3e2ae3ee285a57455d5a23bd352a68c289130186.tar.bz2
X86_64: Enable support for next generation AMD Zen3 CPU.
2020-12-03 Venkataramanan Kumar <Venkataramanan.Kumar@amd.com> Sharavan Kumar <Shravan.Kumar@amd.com> gcc/ChangeLog: * common/config/i386/cpuinfo.h (get_amd_cpu) recognize znver3. * common/config/i386/i386-common.c (processor_names): Add znver3. (processor_alias_table): Add znver3 and AMDFAM19H entry. * common/config/i386/i386-cpuinfo.h (processor_types): Add AMDFAM19H. (processor_subtypes): AMDFAM19H_ZNVER3. * config.gcc (i[34567]86-*-linux* | ...): Likewise. * config/i386/driver-i386.c: (host_detect_local_cpu): Let -march=native recognize znver3 processors. * config/i386/i386-c.c (ix86_target_macros_internal): Add znver3. * config/i386/i386-options.c (m_znver3): New definition. (m_ZNVER): Include m_znver3. (processor_cost_table): Add znver3. * config/i386/i386.c (ix86_reassociation_width): Likewise. * config/i386/i386.h (TARGET_znver3): New definition. (enum processor_type): Add PROCESSOR_ZNVER3. * config/i386/i386.md (define_attr "cpu"): Add znver3. * config/i386/x86-tune-sched.c: (ix86_issue_rate): Likewise. (ix86_adjust_cost): Likewise. * config/i386/x86-tune.def (X86_TUNE_AVOID_256FMA_CHAINS: Likewise. * config/i386/znver1.md: Add new reservations for znver3. * doc/extend.texi: Add details about znver3. * doc/invoke.texi: Likewise. gcc/testsuite/ChangeLog: * gcc.target/i386/funcspec-56.inc: Handle new march. * g++.target/i386/mv29.C: New file.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/common/config/i386/cpuinfo.h17
-rw-r--r--gcc/common/config/i386/i386-common.c16
-rw-r--r--gcc/common/config/i386/i386-cpuinfo.h2
-rw-r--r--gcc/config.gcc10
-rw-r--r--gcc/config/i386/driver-i386.c5
-rw-r--r--gcc/config/i386/i386-c.c7
-rw-r--r--gcc/config/i386/i386-options.c4
-rw-r--r--gcc/config/i386/i386.c5
-rw-r--r--gcc/config/i386/i386.h2
-rw-r--r--gcc/config/i386/i386.md2
-rw-r--r--gcc/config/i386/x86-tune-sched.c2
-rw-r--r--gcc/config/i386/x86-tune.def2
-rw-r--r--gcc/config/i386/znver1.md353
-rw-r--r--gcc/doc/extend.texi6
-rw-r--r--gcc/doc/invoke.texi7
-rw-r--r--gcc/testsuite/g++.target/i386/mv29.C79
-rw-r--r--gcc/testsuite/gcc.target/i386/funcspec-56.inc6
17 files changed, 397 insertions, 128 deletions
diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
index 41728a2..4f1ab63 100644
--- a/gcc/common/config/i386/cpuinfo.h
+++ b/gcc/common/config/i386/cpuinfo.h
@@ -241,6 +241,23 @@ get_amd_cpu (struct __processor_model *cpu_model,
cpu_model->__cpu_subtype = AMDFAM17H_ZNVER1;
}
break;
+ case 0x19:
+ cpu_model->__cpu_type = AMDFAM19H;
+ /* AMD family 19h version 1. */
+ if (model <= 0x0f)
+ {
+ cpu = "znver3";
+ CHECK___builtin_cpu_is ("znver3");
+ cpu_model->__cpu_subtype = AMDFAM19H_ZNVER3;
+ }
+ else if (has_cpu_feature (cpu_model, cpu_features2,
+ FEATURE_VAES))
+ {
+ cpu = "znver3";
+ CHECK___builtin_cpu_is ("znver3");
+ cpu_model->__cpu_subtype = AMDFAM19H_ZNVER3;
+ }
+ break;
default:
break;
}
diff --git a/gcc/common/config/i386/i386-common.c b/gcc/common/config/i386/i386-common.c
index 8f809c1..2a1d31f 100644
--- a/gcc/common/config/i386/i386-common.c
+++ b/gcc/common/config/i386/i386-common.c
@@ -1762,7 +1762,8 @@ const char *const processor_names[] =
"btver1",
"btver2",
"znver1",
- "znver2"
+ "znver2",
+ "znver3"
};
/* Guarantee that the array is aligned with enum processor_type. */
@@ -2004,6 +2005,17 @@ const pta processor_alias_table[] =
| PTA_SHA | PTA_LZCNT | PTA_POPCNT | PTA_CLWB | PTA_RDPID
| PTA_WBNOINVD,
M_CPU_SUBTYPE (AMDFAM17H_ZNVER2), P_PROC_AVX2},
+ {"znver3", PROCESSOR_ZNVER3, CPU_ZNVER3,
+ PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
+ | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
+ | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
+ | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_PRFCHW
+ | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE
+ | PTA_RDRND | PTA_MOVBE | PTA_MWAITX | PTA_ADX | PTA_RDSEED
+ | PTA_CLZERO | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES
+ | PTA_SHA | PTA_LZCNT | PTA_POPCNT | PTA_CLWB | PTA_RDPID
+ | PTA_WBNOINVD | PTA_VAES | PTA_VPCLMULQDQ | PTA_PKU,
+ M_CPU_SUBTYPE (AMDFAM19H_ZNVER3), P_PROC_AVX2},
{"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
| PTA_SSSE3 | PTA_SSE4A | PTA_ABM | PTA_CX16 | PTA_PRFCHW
@@ -2030,6 +2042,8 @@ const pta processor_alias_table[] =
M_CPU_TYPE (AMDFAM15H), P_NONE},
{"amdfam17h", PROCESSOR_GENERIC, CPU_GENERIC, 0,
M_CPU_TYPE (AMDFAM17H), P_NONE},
+ {"amdfam19h", PROCESSOR_GENERIC, CPU_GENERIC, 0,
+ M_CPU_TYPE (AMDFAM19H), P_NONE},
{"shanghai", PROCESSOR_GENERIC, CPU_GENERIC, 0,
M_CPU_TYPE (AMDFAM10H_SHANGHAI), P_NONE},
{"istanbul", PROCESSOR_GENERIC, CPU_GENERIC, 0,
diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h
index af02be5..849e95a 100644
--- a/gcc/common/config/i386/i386-cpuinfo.h
+++ b/gcc/common/config/i386/i386-cpuinfo.h
@@ -55,6 +55,7 @@ enum processor_types
INTEL_GOLDMONT,
INTEL_GOLDMONT_PLUS,
INTEL_TREMONT,
+ AMDFAM19H,
CPU_TYPE_MAX,
BUILTIN_CPU_TYPE_MAX = CPU_TYPE_MAX
};
@@ -86,6 +87,7 @@ enum processor_subtypes
INTEL_COREI7_COOPERLAKE,
INTEL_COREI7_SAPPHIRERAPIDS,
INTEL_COREI7_ALDERLAKE,
+ AMDFAM19H_ZNVER3,
CPU_SUBTYPE_MAX
};
diff --git a/gcc/config.gcc b/gcc/config.gcc
index 7b138d1..9c76044 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -668,7 +668,7 @@ c7 esther"
# 64-bit x86 processors supported by --with-arch=. Each processor
# MUST be separated by exactly one space.
x86_64_archs="amdfam10 athlon64 athlon64-sse3 barcelona bdver1 bdver2 \
-bdver3 bdver4 znver1 znver2 btver1 btver2 k8 k8-sse3 opteron \
+bdver3 bdver4 znver1 znver2 znver3 btver1 btver2 k8 k8-sse3 opteron \
opteron-sse3 nocona core2 corei7 corei7-avx core-avx-i core-avx2 atom \
slm nehalem westmere sandybridge ivybridge haswell broadwell bonnell \
silvermont knl knm skylake-avx512 cannonlake icelake-client icelake-server \
@@ -3678,6 +3678,10 @@ case ${target} in
arch=znver2
cpu=znver2
;;
+ znver3-*)
+ arch=znver3
+ cpu=znver3
+ ;;
bdver4-*)
arch=bdver4
cpu=bdver4
@@ -3799,6 +3803,10 @@ case ${target} in
arch=znver2
cpu=znver2
;;
+ znver3-*)
+ arch=znver3
+ cpu=znver3
+ ;;
bdver4-*)
arch=bdver4
cpu=bdver4
diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c
index ecdad57..2bfa037 100644
--- a/gcc/config/i386/driver-i386.c
+++ b/gcc/config/i386/driver-i386.c
@@ -455,6 +455,8 @@ const char *host_detect_local_cpu (int argc, const char **argv)
processor = PROCESSOR_GEODE;
else if (has_feature (FEATURE_MOVBE) && family == 22)
processor = PROCESSOR_BTVER2;
+ else if (has_feature (FEATURE_VAES))
+ processor = PROCESSOR_ZNVER3;
else if (has_feature (FEATURE_CLWB))
processor = PROCESSOR_ZNVER2;
else if (has_feature (FEATURE_CLZERO))
@@ -753,6 +755,9 @@ const char *host_detect_local_cpu (int argc, const char **argv)
case PROCESSOR_ZNVER2:
cpu = "znver2";
break;
+ case PROCESSOR_ZNVER3:
+ cpu = "znver3";
+ break;
case PROCESSOR_BTVER1:
cpu = "btver1";
break;
diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
index 87b3a2b..6d690e0 100644
--- a/gcc/config/i386/i386-c.c
+++ b/gcc/config/i386/i386-c.c
@@ -128,6 +128,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
def_or_undef (parse_in, "__znver2");
def_or_undef (parse_in, "__znver2__");
break;
+ case PROCESSOR_ZNVER3:
+ def_or_undef (parse_in, "__znver3");
+ def_or_undef (parse_in, "__znver3__");
+ break;
case PROCESSOR_BTVER1:
def_or_undef (parse_in, "__btver1");
def_or_undef (parse_in, "__btver1__");
@@ -315,6 +319,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
case PROCESSOR_ZNVER2:
def_or_undef (parse_in, "__tune_znver2__");
break;
+ case PROCESSOR_ZNVER3:
+ def_or_undef (parse_in, "__tune_znver3__");
+ break;
case PROCESSOR_BTVER1:
def_or_undef (parse_in, "__tune_btver1__");
break;
diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c
index dc07697..40714c8 100644
--- a/gcc/config/i386/i386-options.c
+++ b/gcc/config/i386/i386-options.c
@@ -147,11 +147,12 @@ along with GCC; see the file COPYING3. If not see
#define m_BDVER4 (HOST_WIDE_INT_1U<<PROCESSOR_BDVER4)
#define m_ZNVER1 (HOST_WIDE_INT_1U<<PROCESSOR_ZNVER1)
#define m_ZNVER2 (HOST_WIDE_INT_1U<<PROCESSOR_ZNVER2)
+#define m_ZNVER3 (HOST_WIDE_INT_1U<<PROCESSOR_ZNVER3)
#define m_BTVER1 (HOST_WIDE_INT_1U<<PROCESSOR_BTVER1)
#define m_BTVER2 (HOST_WIDE_INT_1U<<PROCESSOR_BTVER2)
#define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
#define m_BTVER (m_BTVER1 | m_BTVER2)
-#define m_ZNVER (m_ZNVER1 | m_ZNVER2)
+#define m_ZNVER (m_ZNVER1 | m_ZNVER2 | m_ZNVER3)
#define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER \
| m_ZNVER)
@@ -745,6 +746,7 @@ static const struct processor_costs *processor_cost_table[] =
&btver1_cost,
&btver2_cost,
&znver1_cost,
+ &znver2_cost,
&znver2_cost
};
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 6321678..3a57710 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -21976,8 +21976,9 @@ ix86_reassociation_width (unsigned int op, machine_mode mode)
/* Integer vector instructions execute in FP unit
and can execute 3 additions and one multiplication per cycle. */
- if ((ix86_tune == PROCESSOR_ZNVER1 || ix86_tune == PROCESSOR_ZNVER2)
- && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS)
+ if ((ix86_tune == PROCESSOR_ZNVER1 || ix86_tune == PROCESSOR_ZNVER2
+ || ix86_tune == PROCESSOR_ZNVER3)
+ && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS)
return 1;
/* Account for targets that splits wide vectors into multiple parts. */
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index b8ae16e..5680fdc 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -484,6 +484,7 @@ extern const struct processor_costs ix86_size_cost;
#define TARGET_BTVER2 (ix86_tune == PROCESSOR_BTVER2)
#define TARGET_ZNVER1 (ix86_tune == PROCESSOR_ZNVER1)
#define TARGET_ZNVER2 (ix86_tune == PROCESSOR_ZNVER2)
+#define TARGET_ZNVER3 (ix86_tune == PROCESSOR_ZNVER3)
/* Feature tests against the various tunings. */
enum ix86_tune_indices {
@@ -2397,6 +2398,7 @@ enum processor_type
PROCESSOR_BTVER2,
PROCESSOR_ZNVER1,
PROCESSOR_ZNVER2,
+ PROCESSOR_ZNVER3,
PROCESSOR_max
};
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 0b28895..21f0044 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -458,7 +458,7 @@
;; Processor type.
(define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,nehalem,
atom,slm,glm,haswell,generic,amdfam10,bdver1,bdver2,bdver3,
- bdver4,btver2,znver1,znver2"
+ bdver4,btver2,znver1,znver2,znver3"
(const (symbol_ref "ix86_schedule")))
;; A basic instruction type. Refinements due to arguments to be
diff --git a/gcc/config/i386/x86-tune-sched.c b/gcc/config/i386/x86-tune-sched.c
index d4d8a12..404b5b1 100644
--- a/gcc/config/i386/x86-tune-sched.c
+++ b/gcc/config/i386/x86-tune-sched.c
@@ -66,6 +66,7 @@ ix86_issue_rate (void)
case PROCESSOR_BDVER4:
case PROCESSOR_ZNVER1:
case PROCESSOR_ZNVER2:
+ case PROCESSOR_ZNVER3:
case PROCESSOR_CORE2:
case PROCESSOR_NEHALEM:
case PROCESSOR_SANDYBRIDGE:
@@ -396,6 +397,7 @@ ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
case PROCESSOR_ZNVER1:
case PROCESSOR_ZNVER2:
+ case PROCESSOR_ZNVER3:
/* Stack engine allows to execute push&pop instructions in parall. */
if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
&& (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index 6eff825..ed4d74c 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -444,7 +444,7 @@ DEF_TUNE (X86_TUNE_AVOID_128FMA_CHAINS, "avoid_fma_chains", m_ZNVER)
/* X86_TUNE_AVOID_256FMA_CHAINS: Avoid creating loops with tight 256bit or
smaller FMA chain. */
-DEF_TUNE (X86_TUNE_AVOID_256FMA_CHAINS, "avoid_fma256_chains", m_ZNVER2)
+DEF_TUNE (X86_TUNE_AVOID_256FMA_CHAINS, "avoid_fma256_chains", m_ZNVER2 | m_ZNVER3)
/*****************************************************************************/
/* AVX instruction selection tuning (some of SSE flags affects AVX, too) */
diff --git a/gcc/config/i386/znver1.md b/gcc/config/i386/znver1.md
index 6812a3d..b0edfab 100644
--- a/gcc/config/i386/znver1.md
+++ b/gcc/config/i386/znver1.md
@@ -21,7 +21,7 @@
(define_attr "znver1_decode" "direct,vector,double"
(const_string "direct"))
-;; AMD znver1 and znver2 Scheduling
+;; AMD znver1, znver2 and znver3 Scheduling
;; Modeling automatons for zen decoders, integer execution pipes,
;; AGU pipes and floating point execution units.
(define_automaton "znver1, znver1_ieu, znver1_fp, znver1_agu")
@@ -52,7 +52,7 @@
(define_cpu_unit "znver1-ieu3" "znver1_ieu")
(define_reservation "znver1-ieu" "znver1-ieu0|znver1-ieu1|znver1-ieu2|znver1-ieu3")
-;; 2 AGU pipes in znver1 and 3 AGU pipes in znver2
+;; 2 AGU pipes in znver1 and 3 AGU pipes in znver2 and znver3
;; According to CPU diagram last AGU unit is used only for stores.
(define_cpu_unit "znver1-agu0" "znver1_agu")
(define_cpu_unit "znver1-agu1" "znver1_agu")
@@ -63,7 +63,7 @@
;; Load is 4 cycles. We do not model reservation of load unit.
;;(define_reservation "znver1-load" "znver1-agu-reserve, nothing, nothing, nothing")
(define_reservation "znver1-load" "znver1-agu-reserve")
-;; Store operations differs between znver1 and znver2 because extra AGU
+;; Store operations differs between znver1, znver2 and znver3 because extra AGU
;; was added.
(define_reservation "znver1-store" "znver1-agu-reserve")
(define_reservation "znver2-store" "znver2-store-agu-reserve")
@@ -77,6 +77,7 @@
(define_reservation "znver2-ivector" "znver1-ieu0+znver1-ieu1
+znver1-ieu2+znver1-ieu3
+znver1-agu0+znver1-agu1+znver2-agu2")
+
;; Floating point unit 4 FP pipes.
(define_cpu_unit "znver1-fp0" "znver1_fp")
(define_cpu_unit "znver1-fp1" "znver1_fp")
@@ -99,7 +100,7 @@
"znver1-double,znver1-store,znver1-ieu0|znver1-ieu3")
(define_insn_reservation "znver2_call" 1
- (and (eq_attr "cpu" "znver2")
+ (and (eq_attr "cpu" "znver2,znver3")
(eq_attr "type" "call,callv"))
"znver1-double,znver2-store,znver1-ieu0|znver1-ieu3")
@@ -110,10 +111,10 @@
(eq_attr "memory" "store")))
"znver1-direct,znver1-store")
(define_insn_reservation "znver2_push" 1
- (and (eq_attr "cpu" "znver2")
+ (and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "type" "push")
(eq_attr "memory" "store")))
- "znver1-direct,znver1-store")
+ "znver1-direct,znver2-store")
(define_insn_reservation "znver1_push_load" 4
(and (eq_attr "cpu" "znver1")
@@ -121,13 +122,13 @@
(eq_attr "memory" "both")))
"znver1-direct,znver1-load,znver1-store")
(define_insn_reservation "znver2_push_load" 4
- (and (eq_attr "cpu" "znver2")
+ (and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "type" "push")
(eq_attr "memory" "both")))
"znver1-direct,znver1-load,znver2-store")
(define_insn_reservation "znver1_pop" 4
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "pop")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load")
@@ -138,7 +139,7 @@
(eq_attr "memory" "both")))
"znver1-direct,znver1-load,znver1-store")
(define_insn_reservation "znver2_pop_mem" 4
- (and (eq_attr "cpu" "znver2")
+ (and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "type" "pop")
(eq_attr "memory" "both")))
"znver1-direct,znver1-load,znver2-store")
@@ -149,7 +150,7 @@
(eq_attr "type" "leave"))
"znver1-double,znver1-ieu, znver1-store")
(define_insn_reservation "znver2_leave" 1
- (and (eq_attr "cpu" "znver2")
+ (and (eq_attr "cpu" "znver2,znver3")
(eq_attr "type" "leave"))
"znver1-double,znver1-ieu, znver2-store")
@@ -157,13 +158,13 @@
;; Multiplications
;; Reg operands
(define_insn_reservation "znver1_imul" 3
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "imul")
(eq_attr "memory" "none")))
"znver1-direct,znver1-ieu1")
(define_insn_reservation "znver1_imul_mem" 7
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "imul")
(eq_attr "memory" "!none")))
"znver1-direct,znver1-load, znver1-ieu1")
@@ -227,6 +228,62 @@
(eq_attr "memory" "none"))))
"znver1-direct,znver1-load,znver1-ieu2*12")
+(define_insn_reservation "znver3_idiv_DI" 18
+ (and (eq_attr "cpu" "znver3")
+ (and (eq_attr "type" "idiv")
+ (and (eq_attr "mode" "DI")
+ (eq_attr "memory" "none"))))
+ "znver1-double,znver1-ieu2*18")
+
+(define_insn_reservation "znver3_idiv_SI" 12
+ (and (eq_attr "cpu" "znver3")
+ (and (eq_attr "type" "idiv")
+ (and (eq_attr "mode" "SI")
+ (eq_attr "memory" "none"))))
+ "znver1-double,znver1-ieu2*12")
+
+(define_insn_reservation "znver3_idiv_HI" 10
+ (and (eq_attr "cpu" "znver3")
+ (and (eq_attr "type" "idiv")
+ (and (eq_attr "mode" "HI")
+ (eq_attr "memory" "none"))))
+ "znver1-double,znver1-ieu2*10")
+
+(define_insn_reservation "znver3_idiv_QI" 9
+ (and (eq_attr "cpu" "znver3")
+ (and (eq_attr "type" "idiv")
+ (and (eq_attr "mode" "QI")
+ (eq_attr "memory" "none"))))
+ "znver1-direct,znver1-ieu2*9")
+
+(define_insn_reservation "znver3_idiv_mem_DI" 22
+ (and (eq_attr "cpu" "znver3")
+ (and (eq_attr "type" "idiv")
+ (and (eq_attr "mode" "DI")
+ (eq_attr "memory" "load"))))
+ "znver1-double,znver1-load,znver1-ieu2*22")
+
+(define_insn_reservation "znver3_idiv_mem_SI" 16
+ (and (eq_attr "cpu" "znver3")
+ (and (eq_attr "type" "idiv")
+ (and (eq_attr "mode" "SI")
+ (eq_attr "memory" "load"))))
+ "znver1-double,znver1-load,znver1-ieu2*16")
+
+(define_insn_reservation "znver3_idiv_mem_HI" 14
+ (and (eq_attr "cpu" "znver3")
+ (and (eq_attr "type" "idiv")
+ (and (eq_attr "mode" "HI")
+ (eq_attr "memory" "load"))))
+ "znver1-double,znver1-load,znver1-ieu2*10")
+
+(define_insn_reservation "znver3_idiv_mem_QI" 13
+ (and (eq_attr "cpu" "znver3")
+ (and (eq_attr "type" "idiv")
+ (and (eq_attr "mode" "QI")
+ (eq_attr "memory" "load"))))
+ "znver1-direct,znver1-load,znver1-ieu2*9")
+
;; STR ISHIFT which are micro coded.
;; Fix me: Latency need to be rechecked.
(define_insn_reservation "znver1_str_ishift" 6
@@ -236,15 +293,16 @@
"znver1-vector,znver1-ivector")
(define_insn_reservation "znver2_str_ishift" 3
- (and (eq_attr "cpu" "znver2")
+ (and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "type" "ishift")
(eq_attr "memory" "both,store")))
"znver1-vector,znver1-ivector")
(define_insn_reservation "znver2_str_istr" 19
- (and (eq_attr "cpu" "znver2")
+ (and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "type" "str")
(eq_attr "memory" "both,store")))
"znver1-vector,znver1-ivector")
+
;; MOV - integer moves
(define_insn_reservation "znver1_load_imov_double" 2
(and (eq_attr "cpu" "znver1")
@@ -254,14 +312,14 @@
"znver1-double,znver1-ieu|znver1-ieu")
(define_insn_reservation "znver2_load_imov_double" 1
- (and (eq_attr "cpu" "znver2")
+ (and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "znver1_decode" "double")
(and (eq_attr "type" "imovx")
(eq_attr "memory" "none"))))
"znver1-double,znver1-ieu|znver1-ieu")
(define_insn_reservation "znver1_load_imov_direct" 1
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "imov,imovx")
(eq_attr "memory" "none")))
"znver1-direct,znver1-ieu")
@@ -274,7 +332,7 @@
"znver1-double,znver1-ieu|znver1-ieu,znver1-store")
(define_insn_reservation "znver2_load_imov_double_store" 1
- (and (eq_attr "cpu" "znver2")
+ (and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "znver1_decode" "double")
(and (eq_attr "type" "imovx")
(eq_attr "memory" "store"))))
@@ -287,7 +345,7 @@
"znver1-direct,znver1-ieu,znver1-store")
(define_insn_reservation "znver2_load_imov_direct_store" 1
- (and (eq_attr "cpu" "znver2")
+ (and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "type" "imov,imovx")
(eq_attr "memory" "store")))
"znver1-direct,znver1-ieu,znver2-store")
@@ -300,14 +358,14 @@
"znver1-double,znver1-load,znver1-ieu|znver1-ieu")
(define_insn_reservation "znver2_load_imov_double_load" 4
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "znver1_decode" "double")
(and (eq_attr "type" "imovx")
(eq_attr "memory" "load"))))
"znver1-double,znver1-load,znver1-ieu|znver1-ieu")
(define_insn_reservation "znver1_load_imov_direct_load" 4
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "imov,imovx")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load")
@@ -315,13 +373,13 @@
;; INTEGER/GENERAL instructions
;; register/imm operands only: ALU, ICMP, NEG, NOT, ROTATE, ISHIFT, TEST
(define_insn_reservation "znver1_insn" 1
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "alu,icmp,negnot,rotate,rotate1,ishift,ishift1,test,setcc,incdec,icmov")
(eq_attr "memory" "none,unknown")))
"znver1-direct,znver1-ieu")
(define_insn_reservation "znver1_insn_load" 5
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "alu,icmp,negnot,rotate,rotate1,ishift,ishift1,test,setcc,incdec,icmov")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-ieu")
@@ -333,7 +391,7 @@
"znver1-direct,znver1-ieu,znver1-store")
(define_insn_reservation "znver2_insn_store" 1
- (and (eq_attr "cpu" "znver2")
+ (and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "type" "alu,icmp,negnot,rotate,rotate1,ishift1,test,setcc,incdec")
(eq_attr "memory" "store")))
"znver1-direct,znver1-ieu,znver2-store")
@@ -345,7 +403,7 @@
"znver1-direct,znver1-load,znver1-ieu,znver1-store")
(define_insn_reservation "znver2_insn_both" 5
- (and (eq_attr "cpu" "znver2")
+ (and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "type" "alu,icmp,negnot,rotate,rotate1,ishift1,test,setcc,incdec")
(eq_attr "memory" "both")))
"znver1-direct,znver1-load,znver1-ieu,znver2-store")
@@ -357,7 +415,7 @@
"znver1-vector,znver1-ivector")
(define_insn_reservation "znver2_ieu_vector" 5
- (and (eq_attr "cpu" "znver2")
+ (and (eq_attr "cpu" "znver2,znver3")
(eq_attr "type" "other,str,multi"))
"znver1-vector,znver2-ivector")
@@ -370,21 +428,21 @@
"znver1-vector,znver1-ivector")
(define_insn_reservation "znver2_alu1_vector" 3
- (and (eq_attr "cpu" "znver2")
+ (and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "znver1_decode" "vector")
(and (eq_attr "type" "alu1")
(eq_attr "memory" "none,unknown"))))
"znver1-vector,znver2-ivector")
(define_insn_reservation "znver1_alu1_double" 2
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "znver1_decode" "double")
(and (eq_attr "type" "alu1")
(eq_attr "memory" "none,unknown"))))
"znver1-double,znver1-ieu")
(define_insn_reservation "znver1_alu1_direct" 1
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "znver1_decode" "direct")
(and (eq_attr "type" "alu1")
(eq_attr "memory" "none,unknown"))))
@@ -392,45 +450,45 @@
;; Branches : Fix me need to model conditional branches.
(define_insn_reservation "znver1_branch" 1
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "ibr")
- (eq_attr "memory" "none")))
+ (eq_attr "memory" "none")))
"znver1-direct")
;; Indirect branches check latencies.
(define_insn_reservation "znver1_indirect_branch_mem" 6
(and (eq_attr "cpu" "znver1")
(and (eq_attr "type" "ibr")
- (eq_attr "memory" "load")))
+ (eq_attr "memory" "load")))
"znver1-vector,znver1-ivector")
(define_insn_reservation "znver2_indirect_branch_mem" 6
- (and (eq_attr "cpu" "znver2")
+ (and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "type" "ibr")
- (eq_attr "memory" "load")))
+ (eq_attr "memory" "load")))
"znver1-vector,znver2-ivector")
;; LEA executes in ALU units with 1 cycle latency.
(define_insn_reservation "znver1_lea" 1
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(eq_attr "type" "lea"))
"znver1-direct,znver1-ieu")
;; Other integer instrucions
(define_insn_reservation "znver1_idirect" 1
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "unit" "integer,unknown")
(eq_attr "memory" "none,unknown")))
"znver1-direct,znver1-ieu")
;; Floating point
(define_insn_reservation "znver1_fp_cmov" 6
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(eq_attr "type" "fcmov"))
"znver1-vector,znver1-fvector")
(define_insn_reservation "znver1_fp_mov_direct_load" 8
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "znver1_decode" "direct")
(and (eq_attr "type" "fmov")
(eq_attr "memory" "load"))))
@@ -443,41 +501,34 @@
(eq_attr "memory" "store"))))
"znver1-direct,znver1-fp2|znver1-fp3,znver1-store")
(define_insn_reservation "znver2_fp_mov_direct_store" 5
- (and (eq_attr "cpu" "znver2")
+ (and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "znver1_decode" "direct")
(and (eq_attr "type" "fmov")
(eq_attr "memory" "store"))))
"znver1-direct,znver1-fp2|znver1-fp3,znver2-store")
(define_insn_reservation "znver1_fp_mov_double" 4
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "znver1_decode" "double")
(and (eq_attr "type" "fmov")
(eq_attr "memory" "none"))))
"znver1-double,znver1-fp3")
(define_insn_reservation "znver1_fp_mov_double_load" 12
- (and (eq_attr "cpu" "znver1")
- (and (eq_attr "znver1_decode" "double")
- (and (eq_attr "type" "fmov")
- (eq_attr "memory" "load"))))
- "znver1-double,znver1-load,znver1-fp3")
-
-(define_insn_reservation "znver2_fp_mov_double_load" 12
- (and (eq_attr "cpu" "znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "znver1_decode" "double")
(and (eq_attr "type" "fmov")
(eq_attr "memory" "load"))))
"znver1-double,znver1-load,znver1-fp3")
(define_insn_reservation "znver1_fp_mov_direct" 1
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(eq_attr "type" "fmov"))
"znver1-direct,znver1-fp3")
;; TODO: AGU?
(define_insn_reservation "znver1_fp_spc_direct" 5
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "fpspc")
(eq_attr "memory" "store")))
"znver1-direct,znver1-fp3,znver1-fp2")
@@ -488,26 +539,26 @@
(eq_attr "type" "fpspc,mmxcvt,sselog1,ssemul,ssemov")))
"znver1-vector,znver1-fvector")
(define_insn_reservation "znver2_fp_insn_vector" 6
- (and (eq_attr "cpu" "znver2")
+ (and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "znver1_decode" "vector")
(eq_attr "type" "fpspc,mmxcvt,sselog1,ssemul,ssemov")))
"znver1-vector,znver2-fvector")
;; FABS
(define_insn_reservation "znver1_fp_fsgn" 1
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(eq_attr "type" "fsgn"))
"znver1-direct,znver1-fp3")
(define_insn_reservation "znver1_fp_fcmp" 2
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "memory" "none")
(and (eq_attr "znver1_decode" "double")
(eq_attr "type" "fcmp"))))
"znver1-double,znver1-fp0,znver1-fp2")
(define_insn_reservation "znver1_fp_fcmp_load" 9
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "memory" "none")
(and (eq_attr "znver1_decode" "double")
(eq_attr "type" "fcmp"))))
@@ -515,32 +566,32 @@
;;FADD FSUB FMUL
(define_insn_reservation "znver1_fp_op_mul" 5
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "fop,fmul")
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp0*5")
(define_insn_reservation "znver1_fp_op_mul_load" 12
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "fop,fmul")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fp0*5")
(define_insn_reservation "znver1_fp_op_imul_load" 16
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "fop,fmul")
(and (eq_attr "fp_int_src" "true")
(eq_attr "memory" "load"))))
"znver1-double,znver1-load,znver1-fp3,znver1-fp0")
(define_insn_reservation "znver1_fp_op_div" 15
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "fdiv")
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp3*15")
(define_insn_reservation "znver1_fp_op_div_load" 22
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "fdiv")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fp3*15")
@@ -553,62 +604,63 @@
"znver1-double,znver1-load,znver1-fp3*19")
(define_insn_reservation "znver2_fp_op_idiv_load" 26
- (and (eq_attr "cpu" "znver2")
+ (and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "type" "fdiv")
(and (eq_attr "fp_int_src" "true")
(eq_attr "memory" "load"))))
"znver1-double,znver1-load,znver1-fp3*19")
+
;; MMX, SSE, SSEn.n, AVX, AVX2 instructions
(define_insn_reservation "znver1_fp_insn" 1
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(eq_attr "type" "mmx"))
"znver1-direct,znver1-fpu")
(define_insn_reservation "znver1_mmx_add" 1
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "mmxadd")
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp0|znver1-fp1|znver1-fp3")
(define_insn_reservation "znver1_mmx_add_load" 8
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "mmxadd")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fp0|znver1-fp1|znver1-fp3")
(define_insn_reservation "znver1_mmx_cmp" 1
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "mmxcmp")
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp0|znver1-fp3")
(define_insn_reservation "znver1_mmx_cmp_load" 8
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "mmxcmp")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fp0|znver1-fp3")
(define_insn_reservation "znver1_mmx_cvt_pck_shuf" 1
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "mmxcvt,sseshuf,sseshuf1")
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp1|znver1-fp2")
(define_insn_reservation "znver1_mmx_cvt_pck_shuf_load" 8
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "mmxcvt,sseshuf,sseshuf1")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fp1|znver1-fp2")
(define_insn_reservation "znver1_mmx_shift_move" 1
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "mmxshft,mmxmov")
(eq_attr "memory" "none")))
- "znver1-direct,znver1-fp2")
+ "znver1-direct,znver1-fp2")
(define_insn_reservation "znver1_mmx_shift_move_load" 8
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "mmxshft,mmxmov")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fp2")
@@ -619,19 +671,19 @@
(eq_attr "memory" "store,both")))
"znver1-direct,znver1-fp2,znver1-store")
(define_insn_reservation "znver2_mmx_move_store" 1
- (and (eq_attr "cpu" "znver1")
+ (and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "type" "mmxshft,mmxmov")
(eq_attr "memory" "store,both")))
"znver1-direct,znver1-fp2,znver2-store")
(define_insn_reservation "znver1_mmx_mul" 3
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "mmxmul")
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp0*3")
(define_insn_reservation "znver1_mmx_load" 10
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "mmxmul")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fp0*3")
@@ -652,13 +704,13 @@
"znver1-double,znver1-load,znver1-fpu")
(define_insn_reservation "znver1_sse_log" 1
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "sselog")
(eq_attr "memory" "none")))
"znver1-direct,znver1-fpu")
(define_insn_reservation "znver1_sse_log_load" 8
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "sselog")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fpu")
@@ -678,13 +730,13 @@
"znver1-double,znver1-load,znver1-fp1|znver1-fp2")
(define_insn_reservation "znver1_sse_log1" 1
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "sselog1")
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp1|znver1-fp2")
(define_insn_reservation "znver1_sse_log1_load" 8
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "sselog1")
(eq_attr "memory" "!none")))
"znver1-direct,znver1-load,znver1-fp1|znver1-fp2")
@@ -701,7 +753,8 @@
(define_insn_reservation "znver1_sse_comi_load" 8
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "SF,DF,V4SF,V2DF"))
- (eq_attr "cpu" "znver2"))
+ (ior (eq_attr "cpu" "znver2")
+ (eq_attr "cpu" "znver3")))
(and (eq_attr "prefix_extra" "0")
(and (eq_attr "type" "ssecomi")
(eq_attr "memory" "load"))))
@@ -710,7 +763,8 @@
(define_insn_reservation "znver1_sse_comi_double" 2
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "V4SF,V2DF,TI"))
- (eq_attr "cpu" "znver2"))
+ (ior (eq_attr "cpu" "znver2")
+ (eq_attr "cpu" "znver3")))
(and (eq_attr "prefix" "vex")
(and (eq_attr "prefix_extra" "0")
(and (eq_attr "type" "ssecomi")
@@ -720,7 +774,8 @@
(define_insn_reservation "znver1_sse_comi_double_load" 10
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "V4SF,V2DF,TI"))
- (eq_attr "cpu" "znver2"))
+ (ior (eq_attr "cpu" "znver2")
+ (eq_attr "cpu" "znver3")))
(and (eq_attr "prefix" "vex")
(and (eq_attr "prefix_extra" "0")
(and (eq_attr "type" "ssecomi")
@@ -730,7 +785,8 @@
(define_insn_reservation "znver1_sse_test" 1
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "SF,DF,V4SF,V2DF,TI"))
- (eq_attr "cpu" "znver2"))
+ (ior (eq_attr "cpu" "znver2")
+ (eq_attr "cpu" "znver3")))
(and (eq_attr "prefix_extra" "1")
(and (eq_attr "type" "ssecomi")
(eq_attr "memory" "none"))))
@@ -739,7 +795,8 @@
(define_insn_reservation "znver1_sse_test_load" 8
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "SF,DF,V4SF,V2DF,TI"))
- (eq_attr "cpu" "znver2"))
+ (ior (eq_attr "cpu" "znver2")
+ (eq_attr "cpu" "znver3")))
(and (eq_attr "prefix_extra" "1")
(and (eq_attr "type" "ssecomi")
(eq_attr "memory" "load"))))
@@ -757,7 +814,7 @@
"znver1-direct,znver1-ieu0")
(define_insn_reservation "znver2_sse_mov" 1
- (and (eq_attr "cpu" "znver2")
+ (and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "mode" "SI")
(and (eq_attr "isa" "avx")
(and (eq_attr "type" "ssemov")
@@ -774,7 +831,7 @@
"znver1-direct,znver1-ieu2")
(define_insn_reservation "znver2_avx_mov" 1
- (and (eq_attr "cpu" "znver2")
+ (and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "mode" "TI")
(and (eq_attr "isa" "avx")
(and (eq_attr "type" "ssemov")
@@ -785,7 +842,8 @@
(define_insn_reservation "znver1_sseavx_mov" 1
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "SF,DF,V4SF,V2DF,TI"))
- (eq_attr "cpu" "znver2"))
+ (ior (eq_attr "cpu" "znver2")
+ (eq_attr "cpu" "znver3")))
(and (eq_attr "type" "ssemov")
(eq_attr "memory" "none")))
"znver1-direct,znver1-fpu")
@@ -797,7 +855,7 @@
(eq_attr "memory" "store"))))
"znver1-direct,znver1-fpu,znver1-store")
(define_insn_reservation "znver2_sseavx_mov_store" 1
- (and (eq_attr "cpu" "znver2")
+ (and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "type" "ssemov")
(eq_attr "memory" "store")))
"znver1-direct,znver1-fpu,znver2-store")
@@ -805,7 +863,8 @@
(define_insn_reservation "znver1_sseavx_mov_load" 8
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "SF,DF,V4SF,V2DF,TI"))
- (eq_attr "cpu" "znver2"))
+ (ior (eq_attr "cpu" "znver2")
+ (eq_attr "cpu" "znver3")))
(and (eq_attr "type" "ssemov")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fpu")
@@ -835,7 +894,8 @@
(define_insn_reservation "znver1_sseavx_add" 3
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "SF,DF,V4SF,V2DF,TI"))
- (eq_attr "cpu" "znver2"))
+ (ior (eq_attr "cpu" "znver2")
+ (eq_attr "cpu" "znver3")))
(and (eq_attr "type" "sseadd")
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp2|znver1-fp3")
@@ -843,7 +903,8 @@
(define_insn_reservation "znver1_sseavx_add_load" 10
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "SF,DF,V4SF,V2DF,TI"))
- (eq_attr "cpu" "znver2"))
+ (ior (eq_attr "cpu" "znver2")
+ (eq_attr "cpu" "znver3")))
(and (eq_attr "type" "sseadd")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fp2|znver1-fp3")
@@ -892,10 +953,39 @@
(eq_attr "memory" "load"))))
"znver1-double,znver1-load,znver1-fp0|znver1-fp1")
+(define_insn_reservation "znver3_sseavx_fma" 4
+ (and (and (eq_attr "cpu" "znver3")
+ (eq_attr "mode" "SF,DF,V4SF,V2DF"))
+ (and (eq_attr "type" "ssemuladd")
+ (eq_attr "memory" "none")))
+ "znver1-direct,znver1-fp0|znver1-fp1")
+
+(define_insn_reservation "znver3_sseavx_fma_load" 11
+ (and (and (eq_attr "cpu" "znver3")
+ (eq_attr "mode" "SF,DF,V4SF,V2DF"))
+ (and (eq_attr "type" "ssemuladd")
+ (eq_attr "memory" "load")))
+ "znver1-direct,znver1-load,znver1-fp0|znver1-fp1")
+
+(define_insn_reservation "znver3_avx256_fma" 4
+ (and (eq_attr "cpu" "znver3")
+ (and (eq_attr "mode" "V8SF,V4DF")
+ (and (eq_attr "type" "ssemuladd")
+ (eq_attr "memory" "none"))))
+ "znver1-double,znver1-fp0|znver1-fp1")
+
+(define_insn_reservation "znver3_avx256_fma_load" 11
+ (and (eq_attr "cpu" "znver3")
+ (and (eq_attr "mode" "V8SF,V4DF")
+ (and (eq_attr "type" "ssemuladd")
+ (eq_attr "memory" "load"))))
+ "znver1-double,znver1-load,znver1-fp0|znver1-fp1")
+
(define_insn_reservation "znver1_sseavx_iadd" 1
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "DI,TI"))
- (eq_attr "cpu" "znver2"))
+ (ior (eq_attr "cpu" "znver2")
+ (eq_attr "cpu" "znver3")))
(and (eq_attr "type" "sseiadd")
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp0|znver1-fp1|znver1-fp3")
@@ -903,7 +993,8 @@
(define_insn_reservation "znver1_sseavx_iadd_load" 8
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "DI,TI"))
- (eq_attr "cpu" "znver2"))
+ (ior (eq_attr "cpu" "znver2")
+ (eq_attr "cpu" "znver3")))
(and (eq_attr "type" "sseiadd")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fp0|znver1-fp1|znver1-fp3")
@@ -924,7 +1015,7 @@
;; SSE conversions.
(define_insn_reservation "znver1_ssecvtsf_si_load" 12
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "mode" "SI")
(and (eq_attr "type" "sseicvt")
(and (match_operand:SF 1 "memory_operand")
@@ -939,7 +1030,7 @@
(eq_attr "memory" "none")))))
"znver1-double,znver1-fp3,znver1-ieu0")
(define_insn_reservation "znver2_ssecvtdf_si" 4
- (and (eq_attr "cpu" "znver2")
+ (and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "mode" "SI")
(and (match_operand:DF 1 "register_operand")
(and (eq_attr "type" "sseicvt")
@@ -955,13 +1046,14 @@
"znver1-double,znver1-load,znver1-fp3,znver1-ieu0")
(define_insn_reservation "znver2_ssecvtdf_si_load" 11
- (and (eq_attr "cpu" "znver2")
+ (and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "mode" "SI")
(and (eq_attr "type" "sseicvt")
(and (match_operand:DF 1 "memory_operand")
(eq_attr "memory" "load")))))
"znver1-double,znver1-load,znver1-fp3,znver1-ieu0")
+
;; All other used ssecvt fp3 pipes
;; Check: Need to revisit this again.
;; Some SSE converts may use different pipe combinations.
@@ -972,19 +1064,13 @@
"znver1-direct,znver1-fp3")
(define_insn_reservation "znver2_ssecvt" 3
- (and (eq_attr "cpu" "znver2")
+ (and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "type" "ssecvt")
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp3")
(define_insn_reservation "znver1_ssecvt_load" 11
- (and (eq_attr "cpu" "znver1")
- (and (eq_attr "type" "ssecvt")
- (eq_attr "memory" "load")))
- "znver1-direct,znver1-load,znver1-fp3")
-
-(define_insn_reservation "znver2_ssecvt_load" 11
- (and (eq_attr "cpu" "znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "ssecvt")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fp3")
@@ -994,7 +1080,9 @@
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "V4SF,SF"))
(and (eq_attr "cpu" "znver2")
- (eq_attr "mode" "V8SF,V4SF,SF")))
+ (eq_attr "mode" "V8SF,V4SF,SF"))
+ (and (eq_attr "cpu" "znver3")
+ (eq_attr "mode" "V8SF,V4SF,SF")))
(and (eq_attr "type" "ssediv")
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp3*10")
@@ -1003,7 +1091,9 @@
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "V4SF,SF"))
(and (eq_attr "cpu" "znver2")
- (eq_attr "mode" "V8SF,V4SF,SF")))
+ (eq_attr "mode" "V8SF,V4SF,SF"))
+ (and (eq_attr "cpu" "znver3")
+ (eq_attr "mode" "V8SF,V4SF,SF")))
(and (eq_attr "type" "ssediv")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fp3*10")
@@ -1012,16 +1102,20 @@
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "V2DF,DF"))
(and (eq_attr "cpu" "znver2")
- (eq_attr "mode" "V4DF,V2DF,DF")))
+ (eq_attr "mode" "V4DF,V2DF,DF"))
+ (and (eq_attr "cpu" "znver3")
+ (eq_attr "mode" "V4DF,V2DF,DF")))
(and (eq_attr "type" "ssediv")
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp3*13")
(define_insn_reservation "znver1_ssediv_sd_pd_load" 20
(and (ior (and (eq_attr "cpu" "znver1")
- (eq_attr "mode" "V2DF,DF"))
+ (eq_attr "mode" "V2DF,DF"))
(and (eq_attr "cpu" "znver2")
- (eq_attr "mode" "V4DF,V2DF,DF")))
+ (eq_attr "mode" "V4DF,V2DF,DF"))
+ (and (eq_attr "cpu" "znver3")
+ (eq_attr "mode" "V4DF,V2DF,DF")))
(and (eq_attr "type" "ssediv")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fp3*13")
@@ -1058,7 +1152,9 @@
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "V4SF,SF"))
(and (eq_attr "cpu" "znver2")
- (eq_attr "mode" "V8SF,V4SF,SF,V4DF,V2DF,DF")))
+ (eq_attr "mode" "V8SF,V4SF,SF,V4DF,V2DF,DF"))
+ (and (eq_attr "cpu" "znver3")
+ (eq_attr "mode" "V8SF,V4SF,SF,V4DF,V2DF,DF")))
(and (eq_attr "type" "ssemul")
(eq_attr "memory" "none")))
"znver1-direct,(znver1-fp0|znver1-fp1)*3")
@@ -1067,7 +1163,9 @@
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "V4SF,SF"))
(and (eq_attr "cpu" "znver2")
- (eq_attr "mode" "V8SF,V4SF,SF")))
+ (eq_attr "mode" "V8SF,V4SF,SF"))
+ (and (eq_attr "cpu" "znver3")
+ (eq_attr "mode" "V8SF,V4SF,SF")))
(and (eq_attr "type" "ssemul")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,(znver1-fp0|znver1-fp1)*3")
@@ -1101,17 +1199,18 @@
"znver1-direct,znver1-load,(znver1-fp0|znver1-fp1)*4")
(define_insn_reservation "znver2_ssemul_sd_pd" 3
- (and (eq_attr "cpu" "znver2")
+ (and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "type" "ssemul")
(eq_attr "memory" "none")))
"znver1-direct,(znver1-fp0|znver1-fp1)*3")
(define_insn_reservation "znver2_ssemul_sd_pd_load" 10
- (and (eq_attr "cpu" "znver2")
+ (and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "type" "ssemul")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,(znver1-fp0|znver1-fp1)*3")
+
(define_insn_reservation "znver1_ssemul_avx256_pd" 5
(and (eq_attr "cpu" "znver1")
(and (eq_attr "mode" "V4DF")
@@ -1131,13 +1230,15 @@
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "TI"))
(and (eq_attr "cpu" "znver2")
- (eq_attr "mode" "TI,OI")))
+ (eq_attr "mode" "TI,OI"))
+ (and (eq_attr "cpu" "znver3")
+ (eq_attr "mode" "TI,OI")))
(and (eq_attr "type" "sseimul")
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp0*3")
(define_insn_reservation "znver1_sseimul_avx256" 4
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "mode" "OI")
(and (eq_attr "type" "sseimul")
(eq_attr "memory" "none"))))
@@ -1147,27 +1248,29 @@
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "TI"))
(and (eq_attr "cpu" "znver2")
+ (eq_attr "mode" "TI,OI"))
+ (and (eq_attr "cpu" "znver3")
(eq_attr "mode" "TI,OI")))
(and (eq_attr "type" "sseimul")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fp0*3")
(define_insn_reservation "znver1_sseimul_avx256_load" 11
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "mode" "OI")
(and (eq_attr "type" "sseimul")
(eq_attr "memory" "load"))))
"znver1-double,znver1-load,znver1-fp0*4")
(define_insn_reservation "znver1_sseimul_di" 3
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "mode" "DI")
(and (eq_attr "memory" "none")
(eq_attr "type" "sseimul"))))
"znver1-direct,znver1-fp0*3")
(define_insn_reservation "znver1_sseimul_load_di" 10
- (and (eq_attr "cpu" "znver1,znver2")
+ (and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "mode" "DI")
(and (eq_attr "type" "sseimul")
(eq_attr "memory" "load"))))
@@ -1178,16 +1281,20 @@
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "SF,DF,V4SF,V2DF"))
(and (eq_attr "cpu" "znver2")
- (eq_attr "mode" "SF,DF,V4SF,V2DF,V8SF,V4DF")))
+ (eq_attr "mode" "SF,DF,V4SF,V2DF,V8SF,V4DF"))
+ (and (eq_attr "cpu" "znver3")
+ (eq_attr "mode" "SF,DF,V4SF,V2DF,V8SF,V4DF")))
(and (eq_attr "type" "ssecmp")
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp0|znver1-fp1")
(define_insn_reservation "znver1_sse_cmp_load" 8
(and (ior (and (eq_attr "cpu" "znver1")
- (eq_attr "mode" "SF,DF,V4SF,V2DF"))
+ (eq_attr "mode" "SF,DF,V4SF,V2DF"))
(and (eq_attr "cpu" "znver2")
- (eq_attr "mode" "SF,DF,V4SF,V2DF,V8SF,V4DF")))
+ (eq_attr "mode" "SF,DF,V4SF,V2DF,V8SF,V4DF"))
+ (and (eq_attr "cpu" "znver3")
+ (eq_attr "mode" "SF,DF,V4SF,V2DF,V8SF,V4DF")))
(and (eq_attr "type" "ssecmp")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fp0|znver1-fp1")
@@ -1208,9 +1315,11 @@
(define_insn_reservation "znver1_sse_icmp" 1
(and (ior (and (eq_attr "cpu" "znver1")
- (eq_attr "mode" "QI,HI,SI,DI,TI"))
+ (eq_attr "mode" "QI,HI,SI,DI,TI"))
(and (eq_attr "cpu" "znver2")
- (eq_attr "mode" "QI,HI,SI,DI,TI,OI")))
+ (eq_attr "mode" "QI,HI,SI,DI,TI,OI"))
+ (and (eq_attr "cpu" "znver3")
+ (eq_attr "mode" "QI,HI,SI,DI,TI,OI")))
(and (eq_attr "type" "ssecmp")
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp0|znver1-fp3")
@@ -1219,7 +1328,9 @@
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "QI,HI,SI,DI,TI"))
(and (eq_attr "cpu" "znver2")
- (eq_attr "mode" "QI,HI,SI,DI,TI,OI")))
+ (eq_attr "mode" "QI,HI,SI,DI,TI,OI"))
+ (and (eq_attr "cpu" "znver3")
+ (eq_attr "mode" "QI,HI,SI,DI,TI,OI")))
(and (eq_attr "type" "ssecmp")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fp0|znver1-fp3")
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 4357615..93529aa 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -22882,6 +22882,12 @@ AMD Family 17h Zen version 1.
@item znver2
AMD Family 17h Zen version 2.
+
+@item amdfam19h
+AMD Family 19h CPU.
+
+@item znver3
+AMD Family 19h Zen version 3.
@end table
Here is an example:
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 671b297..f7e8c8b 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -30052,6 +30052,13 @@ MWAITX, SHA, CLZERO, AES, PCLMUL, CX16, MOVBE, MMX, SSE, SSE2, SSE3, SSE4A,
SSSE3, SSE4.1, SSE4.2, ABM, XSAVEC, XSAVES, CLFLUSHOPT, POPCNT, RDPID,
WBNOINVD, and 64-bit instruction set extensions.)
+@item znver3
+AMD Family 19h core based CPUs with x86-64 instruction set support. (This
+supersets BMI, BMI2, CLWB, F16C, FMA, FSGSBASE, AVX, AVX2, ADCX, RDSEED,
+MWAITX, SHA, CLZERO, AES, PCLMUL, CX16, MOVBE, MMX, SSE, SSE2, SSE3, SSE4A,
+SSSE3, SSE4.1, SSE4.2, ABM, XSAVEC, XSAVES, CLFLUSHOPT, POPCNT, RDPID,
+WBNOINVD, PKU, VPCLMULQDQ, VAES, and 64-bit instruction set extensions.)
+
@item btver1
CPUs based on AMD Family 14h cores with x86-64 instruction set support. (This
supersets MMX, SSE, SSE2, SSE3, SSSE3, SSE4A, CX16, ABM and 64-bit
diff --git a/gcc/testsuite/g++.target/i386/mv29.C b/gcc/testsuite/g++.target/i386/mv29.C
new file mode 100644
index 0000000..c7723e3
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/mv29.C
@@ -0,0 +1,79 @@
+// Test that dispatching can choose the right multiversion
+// for AMD CPUs with the same internal GCC processor id
+
+// { dg-do run }
+// { dg-require-ifunc "" }
+// { dg-options "-O2" }
+
+#include <assert.h>
+
+int __attribute__ ((target("default")))
+foo ()
+{
+ return 0;
+}
+
+int __attribute__ ((target("arch=amdfam10"))) foo () {
+ return 1;
+}
+
+int __attribute__ ((target("arch=btver1"))) foo () {
+ return 2;
+}
+
+int __attribute__ ((target("arch=btver2"))) foo () {
+ return 3;
+}
+
+int __attribute__ ((target("arch=bdver1"))) foo () {
+ return 4;
+}
+
+int __attribute__ ((target("arch=bdver2"))) foo () {
+ return 5;
+}
+
+int __attribute__ ((target("arch=bdver3"))) foo () {
+ return 6;
+}
+
+int __attribute__ ((target("arch=znver1"))) foo () {
+ return 7;
+}
+
+int __attribute__ ((target("arch=znver2"))) foo () {
+ return 8;
+}
+
+int __attribute__ ((target("arch=znver3"))) foo () {
+ return 9;
+}
+
+
+int main ()
+{
+ int val = foo ();
+
+ if (__builtin_cpu_is ("amdfam10h"))
+ assert (val == 1);
+ else if (__builtin_cpu_is ("btver1"))
+ assert (val == 2);
+ else if (__builtin_cpu_is ("btver2"))
+ assert (val == 3);
+ else if (__builtin_cpu_is ("bdver1"))
+ assert (val == 4);
+ else if (__builtin_cpu_is ("bdver2"))
+ assert (val == 5);
+ else if (__builtin_cpu_is ("bdver3"))
+ assert (val == 6);
+ else if (__builtin_cpu_is ("znver1"))
+ assert (val == 7);
+ else if (__builtin_cpu_is ("znver2"))
+ assert (val == 8);
+ else if (__builtin_cpu_is ("znver3"))
+ assert (val == 9);
+ else
+ assert (val == 0);
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc
index 395a21c..5d4800f 100644
--- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc
+++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc
@@ -193,6 +193,9 @@ extern void test_arch_barcelona (void) __attribute__((__target__("arch=barcelon
extern void test_arch_bdver1 (void) __attribute__((__target__("arch=bdver1")));
extern void test_arch_bdver2 (void) __attribute__((__target__("arch=bdver2")));
extern void test_arch_bdver3 (void) __attribute__((__target__("arch=bdver3")));
+extern void test_arch_znver1 (void) __attribute__((__target__("arch=znver1")));
+extern void test_arch_znver2 (void) __attribute__((__target__("arch=znver2")));
+extern void test_arch_znver3 (void) __attribute__((__target__("arch=znver3")));
extern void test_tune_nocona (void) __attribute__((__target__("tune=nocona")));
extern void test_tune_core2 (void) __attribute__((__target__("tune=core2")));
@@ -212,6 +215,9 @@ extern void test_tune_bdver1 (void) __attribute__((__target__("tune=bdver1")));
extern void test_tune_bdver2 (void) __attribute__((__target__("tune=bdver2")));
extern void test_tune_bdver3 (void) __attribute__((__target__("tune=bdver3")));
extern void test_tune_generic (void) __attribute__((__target__("tune=generic")));
+extern void test_tune_znver1 (void) __attribute__((__target__("tune=znver1")));
+extern void test_tune_znver2 (void) __attribute__((__target__("tune=znver2")));
+extern void test_tune_znver3 (void) __attribute__((__target__("tune=znver3")));
extern void test_fpmath_sse (void) __attribute__((__target__("sse2,fpmath=sse")));
extern void test_fpmath_387 (void) __attribute__((__target__("sse2,fpmath=387")));