diff options
author | Ganesh Gopalasubramanian <Ganesh.Gopalasubramanian@amd.com> | 2013-11-18 09:25:21 +0000 |
---|---|---|
committer | Ganesh Gopalasubramanian <gganesh@gcc.gnu.org> | 2013-11-18 09:25:21 +0000 |
commit | ed97ad4709f095da78aa0a4f5653b9509984d579 (patch) | |
tree | 9f7a7cf7f1b537666804815feb73b7f92593f5da /gcc | |
parent | 2621c8604391474434d483dace479cb71452f123 (diff) | |
download | gcc-ed97ad4709f095da78aa0a4f5653b9509984d579.zip gcc-ed97ad4709f095da78aa0a4f5653b9509984d579.tar.gz gcc-ed97ad4709f095da78aa0a4f5653b9509984d579.tar.bz2 |
AMD bdver4 enablement
From-SVN: r204939
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 31 | ||||
-rw-r--r-- | gcc/config.gcc | 27 | ||||
-rw-r--r-- | gcc/config/i386/bdver3.md | 224 | ||||
-rw-r--r-- | gcc/config/i386/driver-i386.c | 5 | ||||
-rw-r--r-- | gcc/config/i386/i386-c.c | 7 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 107 | ||||
-rw-r--r-- | gcc/config/i386/i386.h | 3 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 3 | ||||
-rw-r--r-- | gcc/config/i386/i386.opt | 2 | ||||
-rw-r--r-- | gcc/doc/extend.texi | 3 | ||||
-rw-r--r-- | gcc/doc/invoke.texi | 15 |
11 files changed, 296 insertions, 131 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 725f66d..7f97e31 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,34 @@ +2013-11-12 Ganesh Gopalasubramanian <Ganesh.Gopalasubramanian@amd.com> + + * config.gcc (i[34567]86-*-linux* | ...): Add bdver4. + (case ${target}): Add bdver4. + * config/i386/bdver3.md: Add bdver4. + * config/i386/driver-i386.c: (host_detect_local_cpu): Let + -march=native recognize bdver4 processors. + * config/i386/i386-c.c (ix86_target_macros_internal): Add + bdver4 def_and_undef + * config/i386/i386.c (struct processor_costs bdver4_cost): New. + (m_BDVER4): New definition. + (m_AMD_MULTIPLE): Includes m_BDVER4. + (processor_target_table): Add bdver4 entry. + (static const char *const cpu_names): Add bdver4 entry. + (software_prefetching_beneficial_p): Add bdver3. + (ix86_option_override_internal): Add bdver4 instruction sets. + (ix86_issue_rate): Add bdver4. + (ix86_adjust_cost): Add bdver4. + (ia32_multipass_dfa_lookahead): Add bdver4. + (enum processor_model): Add M_AMDFAM15H_BDVER4. + (struct _arch_names_table): Add M_AMDFAM15H_BDVER4. + (has_dispatch): Add bdver4. + * config/i386/i386.h (TARGET_BDVER4): New definition. + (enum target_cpu_default): Add TARGET_CPU_DEFAULT_bdver4. + (enum processor_type): Add PROCESSOR_BDVER4. + * config/i386/i386.md (define_attr "cpu"): Add bdver4. + * config/i386/i386.opt (flag_dispatch_scheduler): Add bdver4. + * gcc/doc/extend.texi: Add details about bdver4. + * gcc/doc/invoke.texi: Add details about bdver4. Add + fma4 and fsgsbase for bdver3. Add fma4 for bdver2. + 2013-11-17 Ulrich Weigand <Ulrich.Weigand@de.ibm.com> * config/rs6000/rs6000.c (rs6000_emit_move): Use low word of diff --git a/gcc/config.gcc b/gcc/config.gcc index fafa8b8..2907018 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -1398,7 +1398,7 @@ i[34567]86-*-linux* | i[34567]86-*-kfreebsd*-gnu | i[34567]86-*-knetbsd*-gnu | i TM_MULTILIB_CONFIG=`echo $TM_MULTILIB_CONFIG | sed 's/^,//'` need_64bit_isa=yes case X"${with_cpu}" in - Xgeneric|Xatom|Xslm|Xcore2|Xcorei7|Xcorei7-avx|Xnocona|Xx86-64|Xbdver3|Xbdver2|Xbdver1|Xbtver2|Xbtver1|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx|Xathlon64-sse3|Xk8-sse3|Xopteron-sse3) + Xgeneric|Xatom|Xslm|Xcore2|Xcorei7|Xcorei7-avx|Xnocona|Xx86-64|Xbdver4|Xbdver3|Xbdver2|Xbdver1|Xbtver2|Xbtver1|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx|Xathlon64-sse3|Xk8-sse3|Xopteron-sse3) ;; X) if test x$with_cpu_64 = x; then @@ -1407,7 +1407,7 @@ i[34567]86-*-linux* | i[34567]86-*-kfreebsd*-gnu | i[34567]86-*-knetbsd*-gnu | i ;; *) echo "Unsupported CPU used in --with-cpu=$with_cpu, supported values:" 1>&2 - echo "generic atom slm core2 corei7 corei7-avx nocona x86-64 bdver3 bdver2 bdver1 btver2 btver1 amdfam10 barcelona k8 opteron athlon64 athlon-fx athlon64-sse3 k8-sse3 opteron-sse3" 1>&2 + echo "generic atom slm core2 corei7 corei7-avx nocona x86-64 bdver4 bdver3 bdver2 bdver1 btver2 btver1 amdfam10 barcelona k8 opteron athlon64 athlon-fx athlon64-sse3 k8-sse3 opteron-sse3" 1>&2 exit 1 ;; esac @@ -1519,7 +1519,7 @@ i[34567]86-*-solaris2* | x86_64-*-solaris2.1[0-9]*) tmake_file="$tmake_file i386/t-sol2-64" need_64bit_isa=yes case X"${with_cpu}" in - Xgeneric|Xatom|Xslm|Xcore2|Xcorei7|Xcorei7-avx|Xnocona|Xx86-64|Xbdver3|Xbdver2|Xbdver1|Xbtver2|Xbtver1|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx|Xathlon64-sse3|Xk8-sse3|Xopteron-sse3) + Xgeneric|Xatom|Xslm|Xcore2|Xcorei7|Xcorei7-avx|Xnocona|Xx86-64|Xbdver4|Xbdver3|Xbdver2|Xbdver1|Xbtver2|Xbtver1|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx|Xathlon64-sse3|Xk8-sse3|Xopteron-sse3) ;; X) if test x$with_cpu_64 = x; then @@ -1528,7 +1528,7 @@ i[34567]86-*-solaris2* | x86_64-*-solaris2.1[0-9]*) ;; *) echo "Unsupported CPU used in --with-cpu=$with_cpu, supported values:" 1>&2 - echo "generic atom slm core2 corei7 corei7-avx nocona x86-64 bdver3 bdver2 bdver1 btver2 btver1 amdfam10 barcelona k8 opteron athlon64 athlon-fx athlon64-sse3 k8-sse3 opteron-sse3" 1>&2 + echo "generic atom slm core2 corei7 corei7-avx nocona x86-64 bdver4 bdver3 bdver2 bdver1 btver2 btver1 amdfam10 barcelona k8 opteron athlon64 athlon-fx athlon64-sse3 k8-sse3 opteron-sse3" 1>&2 exit 1 ;; esac @@ -1604,7 +1604,7 @@ i[34567]86-*-mingw* | x86_64-*-mingw*) if test x$enable_targets = xall; then tm_defines="${tm_defines} TARGET_BI_ARCH=1" case X"${with_cpu}" in - Xgeneric|Xatom|Xslm|Xcore2|Xcorei7|Xcorei7-avx|Xnocona|Xx86-64|Xbdver3|Xbdver2|Xbdver1|Xbtver2|Xbtver1|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx|Xathlon64-sse3|Xk8-sse3|Xopteron-sse3) + Xgeneric|Xatom|Xslm|Xcore2|Xcorei7|Xcorei7-avx|Xnocona|Xx86-64|Xbdver4|Xbdver3|Xbdver2|Xbdver1|Xbtver2|Xbtver1|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx|Xathlon64-sse3|Xk8-sse3|Xopteron-sse3) ;; X) if test x$with_cpu_64 = x; then @@ -1613,7 +1613,7 @@ i[34567]86-*-mingw* | x86_64-*-mingw*) ;; *) echo "Unsupported CPU used in --with-cpu=$with_cpu, supported values:" 1>&2 - echo "generic atom slm core2 corei7 Xcorei7-avx nocona x86-64 bdver3 bdver2 bdver1 btver2 btver1 amdfam10 barcelona k8 opteron athlon64 athlon-fx athlon64-sse3 k8-sse3 opteron-sse3" 1>&2 + echo "generic atom slm core2 corei7 Xcorei7-avx nocona x86-64 bdver4 bdver3 bdver2 bdver1 btver2 btver1 amdfam10 barcelona k8 opteron athlon64 athlon-fx athlon64-sse3 k8-sse3 opteron-sse3" 1>&2 exit 1 ;; esac @@ -2911,6 +2911,10 @@ case ${target} in ;; i686-*-* | i786-*-*) case ${target_noncanonical} in + bdver4-*) + arch=bdver4 + cpu=bdver4 + ;; bdver3-*) arch=bdver3 cpu=bdver3 @@ -3020,6 +3024,10 @@ case ${target} in ;; x86_64-*-*) case ${target_noncanonical} in + bdver4-*) + arch=bdver4 + cpu=bdver4 + ;; bdver3-*) arch=bdver3 cpu=bdver3 @@ -3658,9 +3666,10 @@ case "${target}" in ;; "" | x86-64 | generic | native \ | k8 | k8-sse3 | athlon64 | athlon64-sse3 | opteron \ - | opteron-sse3 | athlon-fx | bdver3 | bdver2 | bdver1 | btver2 \ - | btver1 | amdfam10 | barcelona | nocona | core2 | corei7 \ - | corei7-avx | core-avx-i | core-avx2 | atom | slm) + | opteron-sse3 | athlon-fx | bdver4 | bdver3 | bdver2 \ + | bdver1 | btver2 | btver1 | amdfam10 | barcelona \ + | nocona | core2 | corei7 | corei7-avx | core-avx-i \ + | core-avx2 | atom | slm) # OK ;; *) diff --git a/gcc/config/i386/bdver3.md b/gcc/config/i386/bdver3.md index 421a3d1..019e929 100644 --- a/gcc/config/i386/bdver3.md +++ b/gcc/config/i386/bdver3.md @@ -16,19 +16,19 @@ ;; along with GCC; see the file COPYING3. If not see ;; <http://www.gnu.org/licenses/>. ;; -;; AMD bdver3 Scheduling +;; AMD bdver3 and bdver4 Scheduling ;; -;; The bdver3 contains three pipelined FP units and two integer units. -;; Fetching and decoding logic is different from previous fam15 processors. -;; Fetching is done every two cycles rather than every cycle and -;; two decode units are available. The decode units therefore decode +;; The bdver3 and bdver4 contains three pipelined FP units and two integer +;; units. ;; Fetching and decoding logic is different from previous fam15 +;; processors. Fetching is done every two cycles rather than every cycle +;; and two decode units are available. The decode units therefore decode ;; four instructions in two cycles. ;; ;; The load/store queue unit is not attached to the schedulers but ;; communicates with all the execution units separately instead. ;; -;; bdver3 belong to fam15 processors. We use the same insn attribute -;; that was used for bdver1 decoding scheme. +;; bdver3 and bdver4 belong to fam15 processors. We use the same insn +;; attribute that was used for bdver1 decoding scheme. (define_automaton "bdver3,bdver3_ieu,bdver3_load,bdver3_fp,bdver3_agu") @@ -102,90 +102,90 @@ ;; Jump instructions are executed in the branch unit completely transparent to us. (define_insn_reservation "bdver3_call" 2 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (eq_attr "type" "call,callv")) "bdver3-double,(bdver3-agu | bdver3-ieu),nothing") ;; PUSH mem is double path. (define_insn_reservation "bdver3_push" 1 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (eq_attr "type" "push")) "bdver3-direct,bdver3-ieu,bdver3-store") ;; POP r16/mem are double path. (define_insn_reservation "bdver3_pop" 1 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (eq_attr "type" "pop")) "bdver3-direct,bdver3-ivector") ;; LEAVE no latency info so far, assume same with amdfam10. (define_insn_reservation "bdver3_leave" 3 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (eq_attr "type" "leave")) "bdver3-vector,bdver3-ivector") ;; LEA executes in AGU unit with 1 cycle latency on BDVER3. (define_insn_reservation "bdver3_lea" 1 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (eq_attr "type" "lea")) "bdver3-direct,bdver3-ieu") ;; MUL executes in special multiplier unit attached to IEU1. (define_insn_reservation "bdver3_imul_DI" 6 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "imul") (and (eq_attr "mode" "DI") (eq_attr "memory" "none,unknown")))) "bdver3-direct,bdver3-ieu1") (define_insn_reservation "bdver3_imul" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "imul") (eq_attr "memory" "none,unknown"))) "bdver3-direct,bdver3-ieu1") (define_insn_reservation "bdver3_imul_mem_DI" 10 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "imul") (and (eq_attr "mode" "DI") (eq_attr "memory" "load,both")))) "bdver3-direct,bdver3-load,bdver3-ieu1") (define_insn_reservation "bdver3_imul_mem" 8 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "imul") (eq_attr "memory" "load,both"))) "bdver3-direct,bdver3-load,bdver3-ieu1") (define_insn_reservation "bdver3_str" 6 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "str") (eq_attr "memory" "load,both,store"))) "bdver3-vector,bdver3-load,bdver3-ivector") ;; Integer instructions. (define_insn_reservation "bdver3_idirect" 1 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "bdver1_decode" "direct") (and (eq_attr "unit" "integer,unknown") (eq_attr "memory" "none,unknown")))) "bdver3-direct,(bdver3-ieu|bdver3-agu)") (define_insn_reservation "bdver3_ivector" 2 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "bdver1_decode" "vector") (and (eq_attr "unit" "integer,unknown") (eq_attr "memory" "none,unknown")))) "bdver3-vector,bdver3-ivector") (define_insn_reservation "bdver3_idirect_loadmov" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "imov") (eq_attr "memory" "load"))) "bdver3-direct,bdver3-load") (define_insn_reservation "bdver3_idirect_load" 5 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "bdver1_decode" "direct") (and (eq_attr "unit" "integer,unknown") (eq_attr "memory" "load")))) "bdver3-direct,bdver3-load,bdver3-ieu") (define_insn_reservation "bdver3_idirect_movstore" 5 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "imov") (eq_attr "memory" "store"))) "bdver3-direct,bdver3-ieu,bdver3-store") (define_insn_reservation "bdver3_idirect_both" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "bdver1_decode" "direct") (and (eq_attr "unit" "integer,unknown") (eq_attr "memory" "both")))) @@ -193,7 +193,7 @@ bdver3-ieu,bdver3-store, bdver3-store") (define_insn_reservation "bdver3_idirect_store" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "bdver1_decode" "direct") (and (eq_attr "unit" "integer,unknown") (eq_attr "memory" "store")))) @@ -201,108 +201,108 @@ bdver3-store") ;; BDVER3 floating point units. (define_insn_reservation "bdver3_fldxf" 13 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "fmov") (and (eq_attr "memory" "load") (eq_attr "mode" "XF")))) "bdver3-vector,bdver3-fpload2,bdver3-fvector*9") (define_insn_reservation "bdver3_fld" 2 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "fmov") (eq_attr "memory" "load"))) "bdver3-direct,bdver3-fpload,bdver3-ffma") (define_insn_reservation "bdver3_fstxf" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "fmov") (and (eq_attr "memory" "store,both") (eq_attr "mode" "XF")))) "bdver3-vector,(bdver3-fpsched+bdver3-agu),(bdver3-store2+(bdver3-fvector*6))") (define_insn_reservation "bdver3_fst" 2 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "fmov") (eq_attr "memory" "store,both"))) "bdver3-double,(bdver3-fpsched),(bdver3-fsto+bdver3-store)") (define_insn_reservation "bdver3_fist" 2 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (eq_attr "type" "fistp,fisttp")) "bdver3-double,(bdver3-fpsched),(bdver3-fsto+bdver3-store)") (define_insn_reservation "bdver3_fmov_bdver3" 2 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (eq_attr "type" "fmov")) "bdver3-direct,bdver3-fpsched,bdver3-ffma") (define_insn_reservation "bdver3_fadd_load" 10 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "fop") (eq_attr "memory" "load"))) "bdver3-direct,bdver3-fpload,bdver3-ffma") (define_insn_reservation "bdver3_fadd" 6 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (eq_attr "type" "fop")) "bdver3-direct,bdver3-fpsched,bdver3-ffma") (define_insn_reservation "bdver3_fmul_load" 6 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "fmul") (eq_attr "memory" "load"))) "bdver3-double,bdver3-fpload,bdver3-ffma") (define_insn_reservation "bdver3_fmul" 6 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (eq_attr "type" "fmul")) "bdver3-direct,bdver3-fpsched,bdver3-ffma") (define_insn_reservation "bdver3_fsgn" 2 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (eq_attr "type" "fsgn")) "bdver3-direct,bdver3-fpsched,bdver3-ffma") (define_insn_reservation "bdver3_fdiv_load" 42 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "fdiv") (eq_attr "memory" "load"))) "bdver3-direct,bdver3-fpload,bdver3-ffma") (define_insn_reservation "bdver3_fdiv" 42 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (eq_attr "type" "fdiv")) "bdver3-direct,bdver3-fpsched,bdver3-ffma") (define_insn_reservation "bdver3_fpspc_load" 143 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "fpspc") (eq_attr "memory" "load"))) "bdver3-vector,bdver3-fpload,bdver3-fvector") (define_insn_reservation "bdver3_fcmov_load" 17 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "fcmov") (eq_attr "memory" "load"))) "bdver3-vector,bdver3-fpload,bdver3-fvector") (define_insn_reservation "bdver3_fcmov" 15 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (eq_attr "type" "fcmov")) "bdver3-vector,bdver3-fpsched,bdver3-fvector") (define_insn_reservation "bdver3_fcomi_load" 6 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "fcmp") (and (eq_attr "bdver1_decode" "double") (eq_attr "memory" "load")))) "bdver3-double,bdver3-fpload,(bdver3-ffma | bdver3-fsto)") (define_insn_reservation "bdver3_fcomi" 2 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "bdver1_decode" "double") (eq_attr "type" "fcmp"))) "bdver3-double,bdver3-fpsched,(bdver3-ffma | bdver3-fsto)") (define_insn_reservation "bdver3_fcom_load" 6 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "fcmp") (eq_attr "memory" "load"))) "bdver3-direct,bdver3-fpload,bdver3-ffma") (define_insn_reservation "bdver3_fcom" 2 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (eq_attr "type" "fcmp")) "bdver3-direct,bdver3-fpsched,bdver3-ffma") (define_insn_reservation "bdver3_fxch" 2 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (eq_attr "type" "fxch")) "bdver3-direct,bdver3-fpsched,bdver3-ffma") ;; SSE loads. (define_insn_reservation "bdver3_ssevector_avx128_unaligned_load" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssemov") (and (eq_attr "prefix" "vex") (and (eq_attr "movu" "1") @@ -310,162 +310,162 @@ (eq_attr "memory" "load")))))) "bdver3-direct,bdver3-fpload") (define_insn_reservation "bdver3_ssevector_avx256_unaligned_load" 5 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssemov") (and (eq_attr "movu" "1") (and (eq_attr "mode" "V8SF,V4DF") (eq_attr "memory" "load"))))) "bdver3-double,bdver3-fpload") (define_insn_reservation "bdver3_ssevector_sse128_unaligned_load" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssemov") (and (eq_attr "movu" "1") (and (eq_attr "mode" "V4SF,V2DF") (eq_attr "memory" "load"))))) "bdver3-direct,bdver3-fpload,bdver3-fmal") (define_insn_reservation "bdver3_ssevector_avx128_load" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssemov") (and (eq_attr "prefix" "vex") (and (eq_attr "mode" "V4SF,V2DF,TI") (eq_attr "memory" "load"))))) "bdver3-direct,bdver3-fpload,bdver3-fmal") (define_insn_reservation "bdver3_ssevector_avx256_load" 5 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssemov") (and (eq_attr "mode" "V8SF,V4DF,OI") (eq_attr "memory" "load")))) "bdver3-double,bdver3-fpload,bdver3-fmal") (define_insn_reservation "bdver3_ssevector_sse128_load" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssemov") (and (eq_attr "mode" "V4SF,V2DF,TI") (eq_attr "memory" "load")))) "bdver3-direct,bdver3-fpload") (define_insn_reservation "bdver3_ssescalar_movq_load" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssemov") (and (eq_attr "mode" "DI") (eq_attr "memory" "load")))) "bdver3-direct,bdver3-fpload,bdver3-fmal") (define_insn_reservation "bdver3_ssescalar_vmovss_load" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssemov") (and (eq_attr "prefix" "vex") (and (eq_attr "mode" "SF") (eq_attr "memory" "load"))))) "bdver3-direct,bdver3-fpload") (define_insn_reservation "bdver3_ssescalar_sse128_load" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssemov") (and (eq_attr "mode" "SF,DF") (eq_attr "memory" "load")))) "bdver3-direct,bdver3-fpload, bdver3-ffma") (define_insn_reservation "bdver3_mmxsse_load" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "mmxmov,ssemov") (eq_attr "memory" "load"))) "bdver3-direct,bdver3-fpload, bdver3-fmal") ;; SSE stores. (define_insn_reservation "bdver3_sse_store_avx256" 5 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssemov") (and (eq_attr "mode" "V8SF,V4DF,OI") (eq_attr "memory" "store,both")))) "bdver3-double,bdver3-fpsched,((bdver3-fsto+bdver3-store)*2)") (define_insn_reservation "bdver3_sse_store" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssemov") (and (eq_attr "mode" "V4SF,V2DF,TI") (eq_attr "memory" "store,both")))) "bdver3-direct,bdver3-fpsched,((bdver3-fsto+bdver3-store)*2)") (define_insn_reservation "bdver3_mmxsse_store_short" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "mmxmov,ssemov") (eq_attr "memory" "store,both"))) "bdver3-direct,bdver3-fpsched,(bdver3-fsto+bdver3-store)") ;; Register moves. (define_insn_reservation "bdver3_ssevector_avx256" 3 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssemov") (and (eq_attr "mode" "V8SF,V4DF,OI") (eq_attr "memory" "none")))) "bdver3-double,bdver3-fpsched,bdver3-fmal") (define_insn_reservation "bdver3_movss_movsd" 2 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssemov") (and (eq_attr "mode" "SF,DF") (eq_attr "memory" "none")))) "bdver3-direct,bdver3-fpsched,bdver3-ffma") (define_insn_reservation "bdver3_mmxssemov" 2 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "mmxmov,ssemov") (eq_attr "memory" "none"))) "bdver3-direct,bdver3-fpsched,bdver3-fmal") ;; SSE logs. (define_insn_reservation "bdver3_sselog_load_256" 7 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "sselog,sselog1") (and (eq_attr "mode" "V8SF") (eq_attr "memory" "load")))) "bdver3-double,bdver3-fpload,bdver3-fmal") (define_insn_reservation "bdver3_sselog_256" 3 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "sselog,sselog1") (eq_attr "mode" "V8SF"))) "bdver3-double,bdver3-fpsched,bdver3-fmal") (define_insn_reservation "bdver3_sselog_load" 6 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "sselog,sselog1") (eq_attr "memory" "load"))) "bdver3-direct,bdver3-fpload,bdver3-fxbar") (define_insn_reservation "bdver3_sselog" 2 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (eq_attr "type" "sselog,sselog1")) "bdver3-direct,bdver3-fpsched,bdver3-fxbar") ;; SSE Shuffles (define_insn_reservation "bdver3_sseshuf_load_256" 7 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "sseshuf,sseshuf1") (and (eq_attr "mode" "V8SF") (eq_attr "memory" "load")))) "bdver3-double,bdver3-fpload,bdver3-fpshuf") (define_insn_reservation "bdver3_sseshuf_load" 6 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "sseshuf,sseshuf1") (eq_attr "memory" "load"))) "bdver3-direct,bdver3-fpload,bdver3-fpshuf") (define_insn_reservation "bdver3_sseshuf_256" 3 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "sseshuf") (eq_attr "mode" "V8SF"))) "bdver3-double,bdver3-fpsched,bdver3-fpshuf") (define_insn_reservation "bdver3_sseshuf" 2 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (eq_attr "type" "sseshuf,sseshuf1")) "bdver3-direct,bdver3-fpsched,bdver3-fpshuf") ;; PCMP actually executes in FMAL. (define_insn_reservation "bdver3_ssecmp_load" 6 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssecmp") (eq_attr "memory" "load"))) "bdver3-direct,bdver3-fpload,bdver3-ffma") (define_insn_reservation "bdver3_ssecmp" 2 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (eq_attr "type" "ssecmp")) "bdver3-direct,bdver3-fpsched,bdver3-ffma") (define_insn_reservation "bdver3_ssecomi_load" 6 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssecomi") (eq_attr "memory" "load"))) "bdver3-double,bdver3-fpload,(bdver3-ffma | bdver3-fsto)") (define_insn_reservation "bdver3_ssecomi" 2 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (eq_attr "type" "ssecomi")) "bdver3-double,bdver3-fpsched,(bdver3-ffma | bdver3-fsto)") @@ -474,7 +474,7 @@ ;; 256 bit conversion. (define_insn_reservation "bdver3_vcvtX2Y_avx256_load" 8 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssecvt") (and (eq_attr "memory" "load") (ior (ior (match_operand:V4DF 0 "register_operand") @@ -485,7 +485,7 @@ (match_operand:V8SI 1 "nonimmediate_operand"))))))) "bdver3-vector,bdver3-fpload,bdver3-fvector") (define_insn_reservation "bdver3_vcvtX2Y_avx256" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssecvt") (and (eq_attr "memory" "none") (ior (ior (match_operand:V4DF 0 "register_operand") @@ -497,40 +497,40 @@ "bdver3-vector,bdver3-fpsched,bdver3-fvector") ;; CVTSS2SD, CVTSD2SS. (define_insn_reservation "bdver3_ssecvt_cvtss2sd_load" 8 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssecvt") (and (eq_attr "mode" "SF,DF") (eq_attr "memory" "load")))) "bdver3-direct,bdver3-fpload,bdver3-fcvt") (define_insn_reservation "bdver3_ssecvt_cvtss2sd" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssecvt") (and (eq_attr "mode" "SF,DF") (eq_attr "memory" "none")))) "bdver3-direct,bdver3-fpsched,bdver3-fcvt") ;; CVTSI2SD, CVTSI2SS, CVTSI2SDQ, CVTSI2SSQ. (define_insn_reservation "bdver3_sseicvt_cvtsi2sd_load" 8 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "sseicvt") (and (eq_attr "mode" "SF,DF") (eq_attr "memory" "load")))) "bdver3-direct,bdver3-fpload,bdver3-fcvt") (define_insn_reservation "bdver3_sseicvt_cvtsi2sd" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "sseicvt") (and (eq_attr "mode" "SF,DF") (eq_attr "memory" "none")))) "bdver3-double,bdver3-fpsched,(nothing | bdver3-fcvt)") ;; CVTPD2PS. (define_insn_reservation "bdver3_ssecvt_cvtpd2ps_load" 8 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssecvt") (and (eq_attr "memory" "load") (and (match_operand:V4SF 0 "register_operand") (match_operand:V2DF 1 "nonimmediate_operand"))))) "bdver3-double,bdver3-fpload,(bdver3-fxbar | bdver3-fcvt)") (define_insn_reservation "bdver3_ssecvt_cvtpd2ps" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssecvt") (and (eq_attr "memory" "none") (and (match_operand:V4SF 0 "register_operand") @@ -538,7 +538,7 @@ "bdver3-double,bdver3-fpsched,(bdver3-fxbar | bdver3-fcvt)") ;; CVTPI2PS, CVTDQ2PS. (define_insn_reservation "bdver3_ssecvt_cvtdq2ps_load" 8 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssecvt") (and (eq_attr "memory" "load") (and (match_operand:V4SF 0 "register_operand") @@ -546,7 +546,7 @@ (match_operand:V4SI 1 "nonimmediate_operand")))))) "bdver3-direct,bdver3-fpload,bdver3-fcvt") (define_insn_reservation "bdver3_ssecvt_cvtdq2ps" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssecvt") (and (eq_attr "memory" "none") (and (match_operand:V4SF 0 "register_operand") @@ -555,14 +555,14 @@ "bdver3-direct,bdver3-fpsched,bdver3-fcvt") ;; CVTDQ2PD. (define_insn_reservation "bdver3_ssecvt_cvtdq2pd_load" 8 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssecvt") (and (eq_attr "memory" "load") (and (match_operand:V2DF 0 "register_operand") (match_operand:V4SI 1 "nonimmediate_operand"))))) "bdver3-double,bdver3-fpload,(bdver3-fxbar | bdver3-fcvt)") (define_insn_reservation "bdver3_ssecvt_cvtdq2pd" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssecvt") (and (eq_attr "memory" "none") (and (match_operand:V2DF 0 "register_operand") @@ -570,7 +570,7 @@ "bdver3-double,bdver3-fpsched,(bdver3-fxbar | bdver3-fcvt)") ;; CVTPS2PD, CVTPI2PD. (define_insn_reservation "bdver3_ssecvt_cvtps2pd_load" 6 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssecvt") (and (eq_attr "memory" "load") (and (match_operand:V2DF 0 "register_operand") @@ -578,7 +578,7 @@ (match_operand:V4SF 1 "nonimmediate_operand")))))) "bdver3-double,bdver3-fpload,(bdver3-fxbar | bdver3-fcvt)") (define_insn_reservation "bdver3_ssecvt_cvtps2pd" 2 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssecvt") (and (eq_attr "memory" "load") (and (match_operand:V2DF 0 "register_operand") @@ -587,27 +587,27 @@ "bdver3-double,bdver3-fpsched,(bdver3-fxbar | bdver3-fcvt)") ;; CVTSD2SI, CVTSD2SIQ, CVTSS2SI, CVTSS2SIQ, CVTTSD2SI, CVTTSD2SIQ, CVTTSS2SI, CVTTSS2SIQ. (define_insn_reservation "bdver3_ssecvt_cvtsX2si_load" 8 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "sseicvt") (and (eq_attr "mode" "SI,DI") (eq_attr "memory" "load")))) "bdver3-double,bdver3-fpload,(bdver3-fcvt | bdver3-fsto)") (define_insn_reservation "bdver3_ssecvt_cvtsX2si" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "sseicvt") (and (eq_attr "mode" "SI,DI") (eq_attr "memory" "none")))) "bdver3-double,bdver3-fpsched,(bdver3-fcvt | bdver3-fsto)") ;; CVTPD2PI, CVTTPD2PI. (define_insn_reservation "bdver3_ssecvt_cvtpd2pi_load" 8 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssecvt") (and (eq_attr "memory" "load") (and (match_operand:V2DF 1 "nonimmediate_operand") (match_operand:V2SI 0 "register_operand"))))) "bdver3-double,bdver3-fpload,(bdver3-fcvt | bdver3-fxbar)") (define_insn_reservation "bdver3_ssecvt_cvtpd2pi" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssecvt") (and (eq_attr "memory" "none") (and (match_operand:V2DF 1 "nonimmediate_operand") @@ -615,14 +615,14 @@ "bdver3-double,bdver3-fpsched,(bdver3-fcvt | bdver3-fxbar)") ;; CVTPD2DQ, CVTTPD2DQ. (define_insn_reservation "bdver3_ssecvt_cvtpd2dq_load" 6 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssecvt") (and (eq_attr "memory" "load") (and (match_operand:V2DF 1 "nonimmediate_operand") (match_operand:V4SI 0 "register_operand"))))) "bdver3-double,bdver3-fpload,(bdver3-fcvt | bdver3-fxbar)") (define_insn_reservation "bdver3_ssecvt_cvtpd2dq" 2 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssecvt") (and (eq_attr "memory" "none") (and (match_operand:V2DF 1 "nonimmediate_operand") @@ -630,7 +630,7 @@ "bdver3-double,bdver3-fpsched,(bdver3-fcvt | bdver3-fxbar)") ;; CVTPS2PI, CVTTPS2PI, CVTPS2DQ, CVTTPS2DQ. (define_insn_reservation "bdver3_ssecvt_cvtps2pi_load" 8 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssecvt") (and (eq_attr "memory" "load") (and (match_operand:V4SF 1 "nonimmediate_operand") @@ -638,7 +638,7 @@ (match_operand: V4SI 0 "register_operand")))))) "bdver3-direct,bdver3-fpload,bdver3-fcvt") (define_insn_reservation "bdver3_ssecvt_cvtps2pi" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssecvt") (and (eq_attr "memory" "none") (and (match_operand:V4SF 1 "nonimmediate_operand") @@ -648,100 +648,100 @@ ;; SSE MUL, ADD, and MULADD. (define_insn_reservation "bdver3_ssemuladd_load_256" 11 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd") (and (eq_attr "mode" "V8SF,V4DF") (eq_attr "memory" "load")))) "bdver3-double,bdver3-fpload,bdver3-ffma") (define_insn_reservation "bdver3_ssemuladd_256" 7 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd") (and (eq_attr "mode" "V8SF,V4DF") (eq_attr "memory" "none")))) "bdver3-double,bdver3-fpsched,bdver3-ffma") (define_insn_reservation "bdver3_ssemuladd_load" 10 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd") (eq_attr "memory" "load"))) "bdver3-direct,bdver3-fpload,bdver3-ffma") (define_insn_reservation "bdver3_ssemuladd" 6 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd") (eq_attr "memory" "none"))) "bdver3-direct,bdver3-fpsched,bdver3-ffma") (define_insn_reservation "bdver3_sseimul_load" 8 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "sseimul") (eq_attr "memory" "load"))) "bdver3-direct,bdver3-fpload,bdver3-fmma") (define_insn_reservation "bdver3_sseimul" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "sseimul") (eq_attr "memory" "none"))) "bdver3-direct,bdver3-fpsched,bdver3-fmma") (define_insn_reservation "bdver3_sseiadd_load" 6 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "sseiadd") (eq_attr "memory" "load"))) "bdver3-direct,bdver3-fpload,bdver3-fmal") (define_insn_reservation "bdver3_sseiadd" 2 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "sseiadd") (eq_attr "memory" "none"))) "bdver3-direct,bdver3-fpsched,bdver3-fmal") ;; SSE DIV: no throughput information (assume same as amdfam10). (define_insn_reservation "bdver3_ssediv_double_load_256" 27 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssediv") (and (eq_attr "mode" "V4DF") (eq_attr "memory" "load")))) "bdver3-double,bdver3-fpload,(bdver3-ffma0*17 | bdver3-ffma1*17)") (define_insn_reservation "bdver3_ssediv_double_256" 27 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssediv") (and (eq_attr "mode" "V4DF") (eq_attr "memory" "none")))) "bdver3-double,bdver3-fpsched,(bdver3-ffma0*17 | bdver3-ffma1*17)") (define_insn_reservation "bdver3_ssediv_single_load_256" 27 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssediv") (and (eq_attr "mode" "V8SF") (eq_attr "memory" "load")))) "bdver3-double,bdver3-fpload,(bdver3-ffma0*17 | bdver3-ffma1*17)") (define_insn_reservation "bdver3_ssediv_single_256" 24 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssediv") (and (eq_attr "mode" "V8SF") (eq_attr "memory" "none")))) "bdver3-double,bdver3-fpsched,(bdver3-ffma0*17 | bdver3-ffma1*17)") (define_insn_reservation "bdver3_ssediv_double_load" 27 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssediv") (and (eq_attr "mode" "DF,V2DF") (eq_attr "memory" "load")))) "bdver3-direct,bdver3-fpload,(bdver3-ffma0*17 | bdver3-ffma1*17)") (define_insn_reservation "bdver3_ssediv_double" 27 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssediv") (and (eq_attr "mode" "DF,V2DF") (eq_attr "memory" "none")))) "bdver3-direct,bdver3-fpsched,(bdver3-ffma0*17 | bdver3-ffma1*17)") (define_insn_reservation "bdver3_ssediv_single_load" 27 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssediv") (and (eq_attr "mode" "SF,V4SF") (eq_attr "memory" "load")))) "bdver3-direct,bdver3-fpload,(bdver3-ffma0*17 | bdver3-ffma1*17)") (define_insn_reservation "bdver3_ssediv_single" 24 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssediv") (and (eq_attr "mode" "SF,V4SF") (eq_attr "memory" "none")))) "bdver3-direct,bdver3-fpsched,(bdver3-ffma0*17 | bdver3-ffma1*17)") (define_insn_reservation "bdver3_sseins" 3 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "sseins") (eq_attr "mode" "TI"))) "bdver3-direct,bdver3-fpsched,bdver3-fxbar") diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c index 823f92d..a4a1f40 100644 --- a/gcc/config/i386/driver-i386.c +++ b/gcc/config/i386/driver-i386.c @@ -550,6 +550,8 @@ const char *host_detect_local_cpu (int argc, const char **argv) processor = PROCESSOR_GEODE; else if (has_movbe) processor = PROCESSOR_BTVER2; + else if (has_avx2) + processor = PROCESSOR_BDVER4; else if (has_xsaveopt) processor = PROCESSOR_BDVER3; else if (has_bmi) @@ -772,6 +774,9 @@ const char *host_detect_local_cpu (int argc, const char **argv) case PROCESSOR_BDVER3: cpu = "bdver3"; break; + case PROCESSOR_BDVER4: + cpu = "bdver4"; + break; case PROCESSOR_BTVER1: cpu = "btver1"; break; diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c index 1c053b1..18c2929 100644 --- a/gcc/config/i386/i386-c.c +++ b/gcc/config/i386/i386-c.c @@ -117,6 +117,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, def_or_undef (parse_in, "__bdver3"); def_or_undef (parse_in, "__bdver3__"); break; + case PROCESSOR_BDVER4: + def_or_undef (parse_in, "__bdver4"); + def_or_undef (parse_in, "__bdver4__"); + break; case PROCESSOR_BTVER1: def_or_undef (parse_in, "__btver1"); def_or_undef (parse_in, "__btver1__"); @@ -224,6 +228,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, case PROCESSOR_BDVER3: def_or_undef (parse_in, "__tune_bdver3__"); break; + case PROCESSOR_BDVER4: + def_or_undef (parse_in, "__tune_bdver4__"); + break; case PROCESSOR_BTVER1: def_or_undef (parse_in, "__tune_btver1__"); break; diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 565d8fa..bb6d15a 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -1161,6 +1161,92 @@ struct processor_costs bdver3_cost = { 1, /* cond_not_taken_branch_cost. */ }; +/* BDVER4 has optimized REP instruction for medium sized blocks, but for + very small blocks it is better to use loop. For large blocks, libcall + can do nontemporary accesses and beat inline considerably. */ +static stringop_algs bdver4_memcpy[2] = { + {libcall, {{6, loop, false}, {14, unrolled_loop, false}, + {-1, rep_prefix_4_byte, false}}}, + {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false}, + {-1, libcall, false}}}}; +static stringop_algs bdver4_memset[2] = { + {libcall, {{8, loop, false}, {24, unrolled_loop, false}, + {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, + {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false}, + {-1, libcall, false}}}}; +struct processor_costs bdver4_cost = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (1), /* cost of a lea instruction */ + COSTS_N_INSNS (1), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (4), /* cost of starting multiply for QI */ + COSTS_N_INSNS (4), /* HI */ + COSTS_N_INSNS (4), /* SI */ + COSTS_N_INSNS (6), /* DI */ + COSTS_N_INSNS (6)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (35), /* HI */ + COSTS_N_INSNS (51), /* SI */ + COSTS_N_INSNS (83), /* DI */ + COSTS_N_INSNS (83)}, /* other */ + COSTS_N_INSNS (1), /* cost of movsx */ + COSTS_N_INSNS (1), /* cost of movzx */ + 8, /* "large" insn */ + 9, /* MOVE_RATIO */ + 4, /* cost for loading QImode using movzbl */ + {5, 5, 4}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {4, 4, 4}, /* cost of storing integer registers */ + 2, /* cost of reg,reg fld/fst */ + {5, 5, 12}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {4, 4, 8}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {4, 4}, /* cost of loading MMX registers + in SImode and DImode */ + {4, 4}, /* cost of storing MMX registers + in SImode and DImode */ + 2, /* cost of moving SSE register */ + {4, 4, 4}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {4, 4, 4}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 2, /* MMX or SSE register to integer */ + 16, /* size of l1 cache. */ + 2048, /* size of l2 cache. */ + 64, /* size of prefetch block */ + /* New AMD processors never drop prefetches; if they cannot be performed + immediately, they are queued. We set number of simultaneous prefetches + to a large constant to reflect this (it probably is not a good idea not + to limit number of prefetches at all, as their execution also takes some + time). */ + 100, /* number of parallel prefetches */ + 2, /* Branch cost */ + COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (6), /* cost of FMUL instruction. */ + COSTS_N_INSNS (42), /* cost of FDIV instruction. */ + COSTS_N_INSNS (2), /* cost of FABS instruction. */ + COSTS_N_INSNS (2), /* cost of FCHS instruction. */ + COSTS_N_INSNS (52), /* cost of FSQRT instruction. */ + + bdver4_memcpy, + bdver4_memset, + 6, /* scalar_stmt_cost. */ + 4, /* scalar load_cost. */ + 4, /* scalar_store_cost. */ + 6, /* vec_stmt_cost. */ + 0, /* vec_to_scalar_cost. */ + 2, /* scalar_to_vec_cost. */ + 4, /* vec_align_load_cost. */ + 4, /* vec_unalign_load_cost. */ + 4, /* vec_store_cost. */ + 2, /* cond_taken_branch_cost. */ + 1, /* cond_not_taken_branch_cost. */ +}; + /* BTVER1 has optimized REP instruction for medium sized blocks, but for very small blocks it is better to use loop. For large blocks, libcall can do nontemporary accesses and beat inline considerably. */ @@ -1850,9 +1936,10 @@ const struct processor_costs *ix86_cost = &pentium_cost; #define m_BDVER1 (1<<PROCESSOR_BDVER1) #define m_BDVER2 (1<<PROCESSOR_BDVER2) #define m_BDVER3 (1<<PROCESSOR_BDVER3) +#define m_BDVER4 (1<<PROCESSOR_BDVER4) #define m_BTVER1 (1<<PROCESSOR_BTVER1) #define m_BTVER2 (1<<PROCESSOR_BTVER2) -#define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3) +#define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4) #define m_BTVER (m_BTVER1 | m_BTVER2) #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER) @@ -2310,6 +2397,7 @@ static const struct ptt processor_target_table[PROCESSOR_max] = {&bdver1_cost, 16, 10, 16, 7, 11}, {&bdver2_cost, 16, 10, 16, 7, 11}, {&bdver3_cost, 16, 10, 16, 7, 11}, + {&bdver4_cost, 16, 10, 16, 7, 11}, {&btver1_cost, 16, 10, 16, 7, 11}, {&btver2_cost, 16, 10, 16, 7, 11}, {&atom_cost, 16, 15, 16, 7, 16}, @@ -2347,6 +2435,7 @@ static const char *const cpu_names[TARGET_CPU_DEFAULT_max] = "bdver1", "bdver2", "bdver3", + "bdver4", "btver1", "btver2" }; @@ -3107,6 +3196,13 @@ ix86_option_override_internal (bool main_args_p, | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE}, + {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4, + PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 + | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1 + | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2 + | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2 + | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR + | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE}, {"btver1", PROCESSOR_BTVER1, CPU_GENERIC, PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW @@ -24841,6 +24937,7 @@ ix86_issue_rate (void) case PROCESSOR_BDVER1: case PROCESSOR_BDVER2: case PROCESSOR_BDVER3: + case PROCESSOR_BDVER4: case PROCESSOR_CORE2: case PROCESSOR_COREI7: case PROCESSOR_COREI7_AVX: @@ -25101,6 +25198,7 @@ ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) case PROCESSOR_BDVER1: case PROCESSOR_BDVER2: case PROCESSOR_BDVER3: + case PROCESSOR_BDVER4: case PROCESSOR_BTVER1: case PROCESSOR_BTVER2: case PROCESSOR_GENERIC: @@ -25220,6 +25318,7 @@ ia32_multipass_dfa_lookahead (void) case PROCESSOR_BDVER1: case PROCESSOR_BDVER2: case PROCESSOR_BDVER3: + case PROCESSOR_BDVER4: /* We use lookahead value 4 for BD both before and after reload schedules. Plan is to have value 8 included for O3. */ return 4; @@ -30755,7 +30854,8 @@ fold_builtin_cpu (tree fndecl, tree *args) M_AMDFAM10H_ISTANBUL, M_AMDFAM15H_BDVER1, M_AMDFAM15H_BDVER2, - M_AMDFAM15H_BDVER3 + M_AMDFAM15H_BDVER3, + M_AMDFAM15H_BDVER4 }; static struct _arch_names_table @@ -30782,6 +30882,7 @@ fold_builtin_cpu (tree fndecl, tree *args) {"bdver1", M_AMDFAM15H_BDVER1}, {"bdver2", M_AMDFAM15H_BDVER2}, {"bdver3", M_AMDFAM15H_BDVER3}, + {"bdver4", M_AMDFAM15H_BDVER4}, }; static struct _isa_names_table @@ -43363,7 +43464,7 @@ do_dispatch (rtx insn, int mode) static bool has_dispatch (rtx insn, int action) { - if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3) + if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4) && flag_dispatch_scheduler) switch (action) { diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 123e3fa..2fd5fdd 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -311,6 +311,7 @@ extern const struct processor_costs ix86_size_cost; #define TARGET_BDVER1 (ix86_tune == PROCESSOR_BDVER1) #define TARGET_BDVER2 (ix86_tune == PROCESSOR_BDVER2) #define TARGET_BDVER3 (ix86_tune == PROCESSOR_BDVER3) +#define TARGET_BDVER4 (ix86_tune == PROCESSOR_BDVER4) #define TARGET_BTVER1 (ix86_tune == PROCESSOR_BTVER1) #define TARGET_BTVER2 (ix86_tune == PROCESSOR_BTVER2) #define TARGET_ATOM (ix86_tune == PROCESSOR_ATOM) @@ -639,6 +640,7 @@ enum target_cpu_default TARGET_CPU_DEFAULT_bdver1, TARGET_CPU_DEFAULT_bdver2, TARGET_CPU_DEFAULT_bdver3, + TARGET_CPU_DEFAULT_bdver4, TARGET_CPU_DEFAULT_btver1, TARGET_CPU_DEFAULT_btver2, @@ -2247,6 +2249,7 @@ enum processor_type PROCESSOR_BDVER1, PROCESSOR_BDVER2, PROCESSOR_BDVER3, + PROCESSOR_BDVER4, PROCESSOR_BTVER1, PROCESSOR_BTVER2, PROCESSOR_ATOM, diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 263c0b9..045d4ae 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -368,7 +368,8 @@ ;; Processor type. (define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,corei7, - atom,slm,generic,amdfam10,bdver1,bdver2,bdver3,btver1,btver2" + atom,slm,generic,amdfam10,bdver1,bdver2,bdver3,bdver4, + btver1,btver2" (const (symbol_ref "ix86_schedule"))) ;; A basic instruction type. Refinements due to arguments to be diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index 1a1b8ab..c4f9c8c 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -444,7 +444,7 @@ the function. mdispatch-scheduler Target RejectNegative Var(flag_dispatch_scheduler) -Do dispatch scheduling if processor is bdver1 or bdver2 or bdver3 and Haifa scheduling +Do dispatch scheduling if processor is bdver1 or bdver2 or bdver3 or bdver4 and Haifa scheduling is selected. mprefer-avx128 diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 599dee3..88eba80 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -10587,6 +10587,9 @@ AMD Family 15h Bulldozer version 2. @item bdver3 AMD Family 15h Bulldozer version 3. +@item bdver4 +AMD Family 15h Bulldozer version 4. + @item btver2 AMD Family 16h CPU. @end table diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index ff4c2ee..a3fdbb5 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -14529,14 +14529,19 @@ supersets FMA4, AVX, XOP, LWP, AES, PCL_MUL, CX16, MMX, SSE, SSE2, SSE3, SSE4A, SSSE3, SSE4.1, SSE4.2, ABM and 64-bit instruction set extensions.) @item bdver2 AMD Family 15h core based CPUs with x86-64 instruction set support. (This -supersets BMI, TBM, F16C, FMA, AVX, XOP, LWP, AES, PCL_MUL, CX16, MMX, SSE, -SSE2, SSE3, SSE4A, SSSE3, SSE4.1, SSE4.2, ABM and 64-bit instruction set +supersets BMI, TBM, F16C, FMA, FMA4, AVX, XOP, LWP, AES, PCL_MUL, CX16, MMX, +SSE, SSE2, SSE3, SSE4A, SSSE3, SSE4.1, SSE4.2, ABM and 64-bit instruction set extensions.) @item bdver3 AMD Family 15h core based CPUs with x86-64 instruction set support. (This -supersets BMI, TBM, F16C, FMA, AVX, XOP, LWP, AES, PCL_MUL, CX16, MMX, SSE, -SSE2, SSE3, SSE4A, SSSE3, SSE4.1, SSE4.2, ABM and 64-bit instruction set -extensions. +supersets BMI, TBM, F16C, FMA, FMA4, FSGSBASE, AVX, XOP, LWP, AES, +PCL_MUL, CX16, MMX, SSE, SSE2, SSE3, SSE4A, SSSE3, SSE4.1, SSE4.2, ABM and +64-bit instruction set extensions. +@item bdver4 +AMD Family 15h core based CPUs with x86-64 instruction set support. (This +supersets BMI, BMI2, TBM, F16C, FMA, FMA4, FSGSBASE, AVX, AVX2, XOP, LWP, +AES, PCL_MUL, CX16, MOVBE, MMX, SSE, SSE2, SSE3, SSE4A, SSSE3, SSE4.1, +SSE4.2, ABM and 64-bit instruction set extensions. @item btver1 CPUs based on AMD Family 14h cores with x86-64 instruction set support. (This |