diff options
author | Sebastian Peryt <sebastian.peryt@intel.com> | 2017-09-20 15:47:30 +0200 |
---|---|---|
committer | Uros Bizjak <uros@gcc.gnu.org> | 2017-09-20 15:47:30 +0200 |
commit | cace2309d494617577161853a358eb3a444c1688 (patch) | |
tree | 6c3c5c54ecc5c5ab93aec514857d5e72edff0bfa /gcc | |
parent | 2288ea23816dd1368d92b4386f7f9aab2fcf1aaf (diff) | |
download | gcc-cace2309d494617577161853a358eb3a444c1688.zip gcc-cace2309d494617577161853a358eb3a444c1688.tar.gz gcc-cace2309d494617577161853a358eb3a444c1688.tar.bz2 |
config.gcc: Support "knm".
gcc/
* config.gcc: Support "knm".
* config/i386/driver-i386.c (host_detect_local_cpu): Detect "knm".
* config/i386/i386-c.c (ix86_target_macros_internal): Handle
PROCESSOR_KNM.
* config/i386/i386.c (m_KNM): Define.
(processor_target_table): Add "knm".
(PTA_KNM): Define.
(ix86_option_override_internal): Add "knm".
(ix86_issue_rate): Add PROCESSOR_KNM.
(ix86_adjust_cost): Ditto.
(ia32_multipass_dfa_lookahead): Ditto.
(get_builtin_code_for_version): Handle PROCESSOR_KNM.
(fold_builtin_cpu): Add M_INTEL_KNM.
* config/i386/i386.h (processor_costs): Define TARGET_KNM.
(processor_type): Add PROCESSOR_KNM.
* config/i386/x86-tune.def: Add m_KNM.
* doc/invoke.texi: Add knm as x86 -march=/-mtune= CPU type.
libgcc/
* config/i386/cpuinfo.h (processor_types): Add INTEL_KNM.
* config/i386/cpuinfo.c (get_intel_cpu): Detect Knights Mill.
gcc/testsuite/
* gcc.target/i386/builtin_target.c: Test knm.
* gcc.target/i386/funcspec-56.inc: Test arch=knm.
From-SVN: r253013
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 26 | ||||
-rw-r--r-- | gcc/config.gcc | 2 | ||||
-rw-r--r-- | gcc/config/i386/driver-i386.c | 7 | ||||
-rw-r--r-- | gcc/config/i386/i386-c.c | 7 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 14 | ||||
-rw-r--r-- | gcc/config/i386/i386.h | 2 | ||||
-rw-r--r-- | gcc/config/i386/x86-tune.def | 46 | ||||
-rw-r--r-- | gcc/doc/invoke.texi | 6 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/builtin_target.c | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/funcspec-56.inc | 1 |
11 files changed, 93 insertions, 27 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b41cd03..1bbb3bf 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,23 @@ +2017-09-20 Sebastian Peryt <sebastian.peryt@intel.com> + + * config.gcc: Support "knm". + * config/i386/driver-i386.c (host_detect_local_cpu): Detect "knm". + * config/i386/i386-c.c (ix86_target_macros_internal): Handle + PROCESSOR_KNM. + * config/i386/i386.c (m_KNM): Define. + (processor_target_table): Add "knm". + (PTA_KNM): Define. + (ix86_option_override_internal): Add "knm". + (ix86_issue_rate): Add PROCESSOR_KNM. + (ix86_adjust_cost): Ditto. + (ia32_multipass_dfa_lookahead): Ditto. + (get_builtin_code_for_version): Handle PROCESSOR_KNM. + (fold_builtin_cpu): Add M_INTEL_KNM. + * config/i386/i386.h (processor_costs): Define TARGET_KNM. + (processor_type): Add PROCESSOR_KNM. + * config/i386/x86-tune.def: Add m_KNM. + * doc/invoke.texi: Add knm as x86 -march=/-mtune= CPU type. + 2017-09-20 Richard Biener <rguenther@suse.de> PR tree-optimization/80213 @@ -97,7 +117,7 @@ * rtl.h (get_stack_check_protect): Prototype. * target.def (stack_clash_protection_final_dynamic_probe): New hook. * targhooks.c (default_stack_clash_protection_final_dynamic_probe): New. - * targhooks.h (default_stack_clash_protection_final_dynamic_probe): + * targhooks.h (default_stack_clash_protection_final_dynamic_probe): Prototype. * doc/tm.texi.in (TARGET_STACK_CLASH_PROTECTION_FINAL_DYNAMIC_PROBE): Add @hook. @@ -312,8 +332,8 @@ 2017-09-17 Daniel Santos <daniel.santos@pobox.com> - config/i386/i386.c: (xlogue_layout::STUB_NAME_MAX_LEN): Increase to 20 - bytes. + * config/i386/i386.c (xlogue_layout::STUB_NAME_MAX_LEN): + Increase to 20 bytes. (xlogue_layout::s_stub_names): Add an additional size-2 diminsion. (xlogue_layout::get_stub_name): Modify to select the appropairate sse or avx version of the stub. diff --git a/gcc/config.gcc b/gcc/config.gcc index 630832f..555ed69 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -623,7 +623,7 @@ pentium4 pentium4m pentiumpro prescott lakemont" x86_64_archs="amdfam10 athlon64 athlon64-sse3 barcelona bdver1 bdver2 \ bdver3 bdver4 znver1 btver1 btver2 k8 k8-sse3 opteron opteron-sse3 nocona \ core2 corei7 corei7-avx core-avx-i core-avx2 atom slm nehalem westmere \ -sandybridge ivybridge haswell broadwell bonnell silvermont knl \ +sandybridge ivybridge haswell broadwell bonnell silvermont knl knm \ skylake-avx512 x86-64 native" # Additional x86 processors supported by --with-cpu=. Each processor diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c index 570c490..e78cd92 100644 --- a/gcc/config/i386/driver-i386.c +++ b/gcc/config/i386/driver-i386.c @@ -790,6 +790,10 @@ const char *host_detect_local_cpu (int argc, const char **argv) /* Knights Landing. */ cpu = "knl"; break; + case 0x85: + /* Knights Mill. */ + cpu = "knm"; + break; default: if (arch) { @@ -797,6 +801,9 @@ const char *host_detect_local_cpu (int argc, const char **argv) /* Assume Knights Landing. */ if (has_avx512f) cpu = "knl"; + /* Assume Knights Mill */ + else if (has_avx5124vnniw) + cpu = "knm"; /* Assume Skylake. */ else if (has_clflushopt) cpu = "skylake"; diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c index 9a79a21..44cbe28 100644 --- a/gcc/config/i386/i386-c.c +++ b/gcc/config/i386/i386-c.c @@ -176,6 +176,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, def_or_undef (parse_in, "__knl"); def_or_undef (parse_in, "__knl__"); break; + case PROCESSOR_KNM: + def_or_undef (parse_in, "__knm"); + def_or_undef (parse_in, "__knm__"); + break; case PROCESSOR_SKYLAKE_AVX512: def_or_undef (parse_in, "__skylake_avx512"); def_or_undef (parse_in, "__skylake_avx512__"); @@ -292,6 +296,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, case PROCESSOR_KNL: def_or_undef (parse_in, "__tune_knl__"); break; + case PROCESSOR_KNM: + def_or_undef (parse_in, "__tune_knm__"); + break; case PROCESSOR_SKYLAKE_AVX512: def_or_undef (parse_in, "__tune_skylake_avx512__"); break; diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index d19c770..aeafd0d 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -2192,6 +2192,7 @@ const struct processor_costs *ix86_cost = &pentium_cost; #define m_BONNELL (1U<<PROCESSOR_BONNELL) #define m_SILVERMONT (1U<<PROCESSOR_SILVERMONT) #define m_KNL (1U<<PROCESSOR_KNL) +#define m_KNM (1U<<PROCESSOR_KNM) #define m_SKYLAKE_AVX512 (1U<<PROCESSOR_SKYLAKE_AVX512) #define m_INTEL (1U<<PROCESSOR_INTEL) @@ -2903,6 +2904,7 @@ static const struct ptt processor_target_table[PROCESSOR_max] = {"bonnell", &atom_cost, 16, 15, 16, 7, 16}, {"silvermont", &slm_cost, 16, 15, 16, 7, 16}, {"knl", &slm_cost, 16, 15, 16, 7, 16}, + {"knm", &slm_cost, 16, 15, 16, 7, 16}, {"skylake-avx512", &core_cost, 16, 10, 16, 10, 16}, {"intel", &intel_cost, 16, 15, 16, 7, 16}, {"geode", &geode_cost, 0, 0, 0, 0, 0}, @@ -5352,6 +5354,8 @@ ix86_option_override_internal (bool main_args_p, (PTA_CORE2 | PTA_MOVBE) #define PTA_SILVERMONT \ (PTA_WESTMERE | PTA_MOVBE) +#define PTA_KNM \ + (PTA_KNL | PTA_AVX5124VNNIW | PTA_AVX5124FMAPS | PTA_AVX512VPOPCNTDQ) /* if this reaches 64, need to widen struct pta flags below */ @@ -5422,6 +5426,7 @@ ix86_option_override_internal (bool main_args_p, {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT}, {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT}, {"knl", PROCESSOR_KNL, CPU_SLM, PTA_KNL}, + {"knm", PROCESSOR_KNM, CPU_SLM, PTA_KNM}, {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM}, {"geode", PROCESSOR_GEODE, CPU_GEODE, PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE}, @@ -30282,6 +30287,7 @@ ix86_issue_rate (void) case PROCESSOR_BONNELL: case PROCESSOR_SILVERMONT: case PROCESSOR_KNL: + case PROCESSOR_KNM: case PROCESSOR_INTEL: case PROCESSOR_K6: case PROCESSOR_BTVER2: @@ -30648,6 +30654,7 @@ ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost, case PROCESSOR_SILVERMONT: case PROCESSOR_KNL: + case PROCESSOR_KNM: case PROCESSOR_INTEL: if (!reload_completed) return cost; @@ -30719,6 +30726,7 @@ ia32_multipass_dfa_lookahead (void) case PROCESSOR_BONNELL: case PROCESSOR_SILVERMONT: case PROCESSOR_KNL: + case PROCESSOR_KNM: case PROCESSOR_INTEL: /* Generally, we want haifa-sched:max_issue() to look ahead as far as many instructions can be executed on a cycle, i.e., @@ -33844,6 +33852,10 @@ get_builtin_code_for_version (tree decl, tree *predicate_list) arg_str = "knl"; priority = P_PROC_AVX512F; break; + case PROCESSOR_KNM: + arg_str = "knm"; + priority = P_PROC_AVX512F; + break; case PROCESSOR_SILVERMONT: arg_str = "silvermont"; priority = P_PROC_SSE4_2; @@ -34527,6 +34539,7 @@ fold_builtin_cpu (tree fndecl, tree *args) M_AMD_BTVER1, M_AMD_BTVER2, M_AMDFAM17H, + M_INTEL_KNM, M_CPU_SUBTYPE_START, M_INTEL_COREI7_NEHALEM, M_INTEL_COREI7_WESTMERE, @@ -34570,6 +34583,7 @@ fold_builtin_cpu (tree fndecl, tree *args) {"bonnell", M_INTEL_BONNELL}, {"silvermont", M_INTEL_SILVERMONT}, {"knl", M_INTEL_KNL}, + {"knm", M_INTEL_KNM}, {"amdfam10h", M_AMDFAM10H}, {"barcelona", M_AMDFAM10H_BARCELONA}, {"shanghai", M_AMDFAM10H_SHANGHAI}, diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 6c8ae97..012b85d 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -351,6 +351,7 @@ extern const struct processor_costs ix86_size_cost; #define TARGET_BONNELL (ix86_tune == PROCESSOR_BONNELL) #define TARGET_SILVERMONT (ix86_tune == PROCESSOR_SILVERMONT) #define TARGET_KNL (ix86_tune == PROCESSOR_KNL) +#define TARGET_KNM (ix86_tune == PROCESSOR_KNM) #define TARGET_SKYLAKE_AVX512 (ix86_tune == PROCESSOR_SKYLAKE_AVX512) #define TARGET_INTEL (ix86_tune == PROCESSOR_INTEL) #define TARGET_GENERIC (ix86_tune == PROCESSOR_GENERIC) @@ -2250,6 +2251,7 @@ enum processor_type PROCESSOR_BONNELL, PROCESSOR_SILVERMONT, PROCESSOR_KNL, + PROCESSOR_KNM, PROCESSOR_SKYLAKE_AVX512, PROCESSOR_INTEL, PROCESSOR_GEODE, diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index c642f45..82c853b 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -41,7 +41,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see /* X86_TUNE_SCHEDULE: Enable scheduling. */ DEF_TUNE (X86_TUNE_SCHEDULE, "schedule", m_PENT | m_LAKEMONT | m_PPRO | m_CORE_ALL | m_BONNELL | m_SILVERMONT - | m_INTEL | m_KNL | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC) + | m_INTEL | m_KNL | m_KNM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC) /* X86_TUNE_PARTIAL_REG_DEPENDENCY: Enable more register renaming on modern chips. Preffer stores affecting whole integer register @@ -49,7 +49,7 @@ DEF_TUNE (X86_TUNE_SCHEDULE, "schedule", value over movb. */ DEF_TUNE (X86_TUNE_PARTIAL_REG_DEPENDENCY, "partial_reg_dependency", m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL - | m_KNL | m_AMD_MULTIPLE | m_GENERIC) + | m_KNL | m_KNM | m_AMD_MULTIPLE | m_GENERIC) /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: This knob promotes all store destinations to be 128bit to allow register renaming on 128bit SSE units, @@ -85,13 +85,13 @@ DEF_TUNE (X86_TUNE_PARTIAL_FLAG_REG_STALL, "partial_flag_reg_stall", partial dependencies. */ DEF_TUNE (X86_TUNE_MOVX, "movx", m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT - | m_KNL | m_INTEL | m_GEODE | m_AMD_MULTIPLE | m_GENERIC) + | m_KNL | m_KNM | m_INTEL | m_GEODE | m_AMD_MULTIPLE | m_GENERIC) /* X86_TUNE_MEMORY_MISMATCH_STALL: Avoid partial stores that are followed by full sized loads. */ DEF_TUNE (X86_TUNE_MEMORY_MISMATCH_STALL, "memory_mismatch_stall", m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL - | m_KNL | m_AMD_MULTIPLE | m_GENERIC) + | m_KNL | m_KNM | m_AMD_MULTIPLE | m_GENERIC) /* X86_TUNE_FUSE_CMP_AND_BRANCH_32: Fuse compare with a subsequent conditional jump instruction for 32 bit TARGET. @@ -125,7 +125,7 @@ DEF_TUNE (X86_TUNE_REASSOC_INT_TO_PARALLEL, "reassoc_int_to_parallel", /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations during reassociation of fp computation. */ DEF_TUNE (X86_TUNE_REASSOC_FP_TO_PARALLEL, "reassoc_fp_to_parallel", - m_BONNELL | m_SILVERMONT | m_HASWELL | m_KNL |m_INTEL | m_BDVER1 + m_BONNELL | m_SILVERMONT | m_HASWELL | m_KNL | m_KNM |m_INTEL | m_BDVER1 | m_BDVER2 | m_ZNVER1 | m_GENERIC) /*****************************************************************************/ @@ -145,7 +145,7 @@ DEF_TUNE (X86_TUNE_REASSOC_FP_TO_PARALLEL, "reassoc_fp_to_parallel", regression on mgrid due to IRA limitation leading to unecessary use of the frame pointer in 32bit mode. */ DEF_TUNE (X86_TUNE_ACCUMULATE_OUTGOING_ARGS, "accumulate_outgoing_args", - m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_KNL | m_INTEL + m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_ATHLON_K8) /* X86_TUNE_PROLOGUE_USING_MOVE: Do not use push/pop in prologues that are @@ -207,8 +207,8 @@ DEF_TUNE (X86_TUNE_PAD_RETURNS, "pad_returns", /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more than 4 branch instructions in the 16 byte window. */ DEF_TUNE (X86_TUNE_FOUR_JUMP_LIMIT, "four_jump_limit", - m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_KNL |m_INTEL | - m_ATHLON_K8 | m_AMDFAM10) + m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM + |m_INTEL | m_ATHLON_K8 | m_AMDFAM10) /*****************************************************************************/ /* Integer instruction selection tuning */ @@ -231,22 +231,22 @@ DEF_TUNE (X86_TUNE_READ_MODIFY, "read_modify", ~(m_PENT | m_LAKEMONT | m_PPRO)) /* X86_TUNE_USE_INCDEC: Enable use of inc/dec instructions. */ DEF_TUNE (X86_TUNE_USE_INCDEC, "use_incdec", ~(m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL - | m_KNL | m_GENERIC)) + | m_KNL | m_KNM | m_GENERIC)) /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred for DFmode copies */ DEF_TUNE (X86_TUNE_INTEGER_DFMODE_MOVES, "integer_dfmode_moves", ~(m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT - | m_KNL | m_INTEL | m_GEODE | m_AMD_MULTIPLE | m_GENERIC)) + | m_KNL | m_KNM | m_INTEL | m_GEODE | m_AMD_MULTIPLE | m_GENERIC)) /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag will impact LEA instruction selection. */ DEF_TUNE (X86_TUNE_OPT_AGU, "opt_agu", m_BONNELL | m_SILVERMONT | m_KNL - | m_INTEL) + | m_KNM | m_INTEL) /* X86_TUNE_AVOID_LEA_FOR_ADDR: Avoid lea for address computation. */ DEF_TUNE (X86_TUNE_AVOID_LEA_FOR_ADDR, "avoid_lea_for_addr", - m_BONNELL | m_SILVERMONT | m_KNL) + m_BONNELL | m_SILVERMONT | m_KNL | m_KNM) /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is vector path on AMD machines. @@ -263,7 +263,7 @@ DEF_TUNE (X86_TUNE_SLOW_IMUL_IMM8, "slow_imul_imm8", /* X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE: Try to avoid memory operands for a conditional move. */ DEF_TUNE (X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE, "avoid_mem_opnd_for_cmove", - m_BONNELL | m_SILVERMONT | m_KNL | m_INTEL) + m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL) /* X86_TUNE_SINGLE_STRINGOP: Enable use of single string operations, such as MOVS and STOS (without a REP prefix) to move/set sequences of bytes. */ @@ -281,17 +281,17 @@ DEF_TUNE (X86_TUNE_MISALIGNED_MOVE_STRING_PRO_EPILOGUES, /* X86_TUNE_USE_SAHF: Controls use of SAHF. */ DEF_TUNE (X86_TUNE_USE_SAHF, "use_sahf", m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT - | m_KNL | m_INTEL | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER + | m_KNL | m_KNM | m_INTEL | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER | m_ZNVER1 | m_GENERIC) /* X86_TUNE_USE_CLTD: Controls use of CLTD and CTQO instructions. */ DEF_TUNE (X86_TUNE_USE_CLTD, "use_cltd", - ~(m_PENT | m_LAKEMONT | m_BONNELL | m_SILVERMONT | m_KNL | m_INTEL + ~(m_PENT | m_LAKEMONT | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_K6)) /* X86_TUNE_USE_BT: Enable use of BT (bit test) instructions. */ DEF_TUNE (X86_TUNE_USE_BT, "use_bt", - m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_KNL | m_INTEL + m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_LAKEMONT | m_AMD_MULTIPLE | m_GENERIC) /*****************************************************************************/ @@ -308,7 +308,7 @@ DEF_TUNE (X86_TUNE_USE_HIMODE_FIOP, "use_himode_fiop", integer operand. */ DEF_TUNE (X86_TUNE_USE_SIMODE_FIOP, "use_simode_fiop", ~(m_PENT | m_LAKEMONT | m_PPRO | m_CORE_ALL | m_BONNELL - | m_SILVERMONT | m_KNL | m_INTEL | m_AMD_MULTIPLE | m_GENERIC)) + | m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_AMD_MULTIPLE | m_GENERIC)) /* X86_TUNE_USE_FFREEP: Use freep instruction instead of fstp. */ DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep", m_AMD_MULTIPLE) @@ -316,7 +316,7 @@ DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep", m_AMD_MULTIPLE) /* X86_TUNE_EXT_80387_CONSTANTS: Use fancy 80387 constants, such as PI. */ DEF_TUNE (X86_TUNE_EXT_80387_CONSTANTS, "ext_80387_constants", m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT - | m_KNL | m_INTEL | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC) + | m_KNL | m_KNM | m_INTEL | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC) /*****************************************************************************/ /* SSE instruction selection tuning */ @@ -330,13 +330,13 @@ DEF_TUNE (X86_TUNE_GENERAL_REGS_SSE_SPILL, "general_regs_sse_spill", /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL: Use movups for misaligned loads instead of a sequence loading registers by parts. */ DEF_TUNE (X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL, "sse_unaligned_load_optimal", - m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_SILVERMONT | m_KNL + m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_AMDFAM10 | m_BDVER | m_BTVER | m_ZNVER1 | m_GENERIC) /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL: Use movups for misaligned stores instead of a sequence loading registers by parts. */ DEF_TUNE (X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL, "sse_unaligned_store_optimal", - m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_SILVERMONT | m_KNL + m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_BDVER | m_ZNVER1 | m_GENERIC) /* Use packed single precision instructions where posisble. I.e. movups instead @@ -375,7 +375,7 @@ DEF_TUNE (X86_TUNE_INTER_UNIT_CONVERSIONS, "inter_unit_conversions", /* X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS: Try to split memory operand for fp converts to destination register. */ DEF_TUNE (X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS, "split_mem_opnd_for_fp_converts", - m_SILVERMONT | m_KNL | m_INTEL) + m_SILVERMONT | m_KNL | m_KNM | m_INTEL) /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion from FP to FP. This form of instructions avoids partial write to the @@ -389,7 +389,7 @@ DEF_TUNE (X86_TUNE_USE_VECTOR_CONVERTS, "use_vector_converts", m_AMDFAM10) /* X86_TUNE_SLOW_SHUFB: Indicates tunings with slow pshufb instruction. */ DEF_TUNE (X86_TUNE_SLOW_PSHUFB, "slow_pshufb", - m_BONNELL | m_SILVERMONT | m_KNL | m_INTEL) + m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL) /* X86_TUNE_VECTOR_PARALLEL_EXECUTION: Indicates tunings with ability to execute 2 or more vector instructions in parallel. */ @@ -550,4 +550,4 @@ DEF_TUNE (X86_TUNE_ADJUST_UNROLL, "adjust_unroll_factor", m_BDVER3 | m_BDVER4) /* X86_TUNE_ONE_IF_CONV_INSNS: Restrict a number of cmov insns in if-converted sequence to one. */ DEF_TUNE (X86_TUNE_ONE_IF_CONV_INSN, "one_if_conv_insn", - m_SILVERMONT | m_KNL | m_INTEL | m_CORE_ALL | m_GENERIC) + m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_CORE_ALL | m_GENERIC) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index f0f9559..db843ed 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -25089,6 +25089,12 @@ SSSE3, SSE4.1, SSE4.2, POPCNT, AVX, AVX2, AES, PCLMUL, FSGSBASE, RDRND, FMA, BMI, BMI2, F16C, RDSEED, ADCX, PREFETCHW, AVX512F, AVX512PF, AVX512ER and AVX512CD instruction set support. +@item knm +Intel Knights Mill CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, +SSSE3, SSE4.1, SSE4.2, POPCNT, AVX, AVX2, AES, PCLMUL, FSGSBASE, RDRND, FMA, +BMI, BMI2, F16C, RDSEED, ADCX, PREFETCHW, AVX512F, AVX512PF, AVX512ER, AVX512CD, +AVX5124VNNIW, AVX5124FMAPS and AVX512VPOPCNTDQ instruction set support. + @item skylake-avx512 Intel Skylake Server CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, PKU, AVX, AVX2, AES, PCLMUL, FSGSBASE, RDRND, FMA, diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 65c489e..8d44116 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2017-09-20 Sebastian Peryt <sebastian.peryt@intel.com> + + * gcc.target/i386/builtin_target.c: Test knm. + * gcc.target/i386/funcspec-56.inc: Test arch=knm. + 2017-09-20 Richard Biener <rguenther@suse.de> PR tree-optimization/77362 diff --git a/gcc/testsuite/gcc.target/i386/builtin_target.c b/gcc/testsuite/gcc.target/i386/builtin_target.c index 9c190eb..8fa9797 100644 --- a/gcc/testsuite/gcc.target/i386/builtin_target.c +++ b/gcc/testsuite/gcc.target/i386/builtin_target.c @@ -42,6 +42,10 @@ check_intel_cpu_model (unsigned int family, unsigned int model, /* Knights Landing. */ assert (__builtin_cpu_is ("knl")); break; + case 0x85: + /* Knights Mill */ + assert (__builtin_cpu_is ("knm")); + break; case 0x1a: case 0x1e: case 0x1f: diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc index 746c9cf..9ae74cb 100644 --- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc +++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc @@ -142,6 +142,7 @@ extern void test_arch_corei7 (void) __attribute__((__target__("arch=corei7"))); extern void test_arch_corei7_avx (void) __attribute__((__target__("arch=corei7-avx"))); extern void test_arch_core_avx2 (void) __attribute__((__target__("arch=core-avx2"))); extern void test_arch_knl (void) __attribute__((__target__("arch=knl"))); +extern void test_arch_knm (void) __attribute__((__target__("arch=knm"))); extern void test_arch_skylake_avx512 (void) __attribute__((__target__("arch=skylake-avx512"))); extern void test_arch_k8 (void) __attribute__((__target__("arch=k8"))); extern void test_arch_k8_sse3 (void) __attribute__((__target__("arch=k8-sse3"))); |