diff options
Diffstat (limited to 'gcc/config/i386/i386.h')
-rw-r--r-- | gcc/config/i386/i386.h | 59 |
1 files changed, 41 insertions, 18 deletions
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 791f3b9..ac0ce68 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -102,6 +102,15 @@ struct stringop_algs #define COSTS_N_BYTES(N) ((N) * 2) #endif + +enum ix86_reduc_unroll_factor{ + X86_REDUC_FMA, + X86_REDUC_DOT_PROD, + X86_REDUC_SAD, + + X86_REDUC_LAST +}; + /* Define the specific costs for a given cpu. NB: hard_register is used by TARGET_REGISTER_MOVE_COST and TARGET_MEMORY_MOVE_COST to compute hard register move costs by register allocator. Relative costs of @@ -225,6 +234,13 @@ struct processor_costs { to number of instructions executed in parallel. See also ix86_reassociation_width. */ + const unsigned reduc_lat_mult_thr[X86_REDUC_LAST]; + /* Latency times throughput of + FMA/DOT_PROD_EXPR/SAD_EXPR, + it's used to determine unroll + factor in the vectorizer. */ + const unsigned vect_unroll_limit; /* Limit how much the autovectorizer + may unroll a loop. */ struct stringop_algs *memcpy, *memset; const int cond_taken_branch_cost; /* Cost of taken branch for vectorizer cost model. */ @@ -644,7 +660,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); {"cpu_64", "%{" OPT_ARCH64 ":%{!mtune=*:%{!mcpu=*:%{!march=*:-mtune=%(VALUE)}}}}" }, \ {"arch", "%{!march=*:-march=%(VALUE)}"}, \ {"arch_32", "%{" OPT_ARCH32 ":%{!march=*:-march=%(VALUE)}}"}, \ - {"arch_64", "%{" OPT_ARCH64 ":%{!march=*:-march=%(VALUE)}}"}, + {"arch_64", "%{" OPT_ARCH64 ":%{!march=*:-march=%(VALUE)}}"}, \ + {"tls", "%{!mtls-dialect=*:-mtls-dialect=%(VALUE)}"}, /* Specs for the compiler proper */ @@ -2477,9 +2494,9 @@ constexpr wide_int_bitmask PTA_DIAMONDRAPIDS = PTA_GRANITERAPIDS_D | PTA_MOVRS | PTA_AMX_MOVRS | PTA_USER_MSR; constexpr wide_int_bitmask PTA_BDVER1 = PTA_64BIT | PTA_MMX | PTA_SSE - | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 - | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4 - | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE; + | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_POPCNT | PTA_LZCNT + | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL + | PTA_AVX | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE; constexpr wide_int_bitmask PTA_BDVER2 = PTA_BDVER1 | PTA_BMI | PTA_TBM | PTA_F16C | PTA_FMA; constexpr wide_int_bitmask PTA_BDVER3 = PTA_BDVER2 | PTA_XSAVEOPT @@ -2487,13 +2504,13 @@ constexpr wide_int_bitmask PTA_BDVER3 = PTA_BDVER2 | PTA_XSAVEOPT constexpr wide_int_bitmask PTA_BDVER4 = PTA_BDVER3 | PTA_AVX2 | PTA_BMI2 | PTA_RDRND | PTA_MOVBE | PTA_MWAITX; -constexpr wide_int_bitmask PTA_ZNVER1 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 - | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1 - | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2 | PTA_BMI | PTA_BMI2 - | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT - | PTA_FSGSBASE | PTA_RDRND | PTA_MOVBE | PTA_MWAITX | PTA_ADX | PTA_RDSEED - | PTA_CLZERO | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES | PTA_SHA | PTA_LZCNT - | PTA_POPCNT; +constexpr wide_int_bitmask PTA_ZNVER1 = PTA_64BIT | PTA_MMX | PTA_SSE + | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_POPCNT | PTA_LZCNT + | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL + | PTA_AVX | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_PRFCHW + | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND | PTA_MOVBE + | PTA_MWAITX | PTA_ADX | PTA_RDSEED | PTA_CLZERO | PTA_CLFLUSHOPT + | PTA_XSAVEC | PTA_XSAVES | PTA_SHA; constexpr wide_int_bitmask PTA_ZNVER2 = PTA_ZNVER1 | PTA_CLWB | PTA_RDPID | PTA_WBNOINVD; constexpr wide_int_bitmask PTA_ZNVER3 = PTA_ZNVER2 | PTA_VAES | PTA_VPCLMULQDQ @@ -2506,19 +2523,19 @@ constexpr wide_int_bitmask PTA_ZNVER5 = PTA_ZNVER4 | PTA_AVXVNNI | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_AVX512VP2INTERSECT | PTA_PREFETCHI; constexpr wide_int_bitmask PTA_BTVER1 = PTA_64BIT | PTA_MMX | PTA_SSE - | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 | PTA_SSE4A | PTA_ABM | PTA_CX16 - | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE; + | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 | PTA_SSE4A | PTA_LZCNT | PTA_POPCNT + | PTA_ABM | PTA_CX16 | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE; constexpr wide_int_bitmask PTA_BTVER2 = PTA_BTVER1 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_XSAVEOPT; constexpr wide_int_bitmask PTA_LUJIAZUI = PTA_64BIT | PTA_MMX | PTA_SSE - | PTA_SSE2 | PTA_SSE3 | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1 - | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_BMI | PTA_BMI2 | PTA_PRFCHW - | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND | PTA_MOVBE - | PTA_ADX | PTA_RDSEED | PTA_POPCNT; + | PTA_SSE2 | PTA_SSE3 | PTA_CX16 | PTA_LZCNT | PTA_POPCNT | PTA_ABM + | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_BMI + | PTA_BMI2 | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE + | PTA_RDRND | PTA_MOVBE | PTA_ADX | PTA_RDSEED; constexpr wide_int_bitmask PTA_YONGFENG = PTA_LUJIAZUI | PTA_AVX | PTA_AVX2 - | PTA_F16C | PTA_FMA | PTA_SHA | PTA_LZCNT; + | PTA_F16C | PTA_FMA | PTA_SHA; #ifndef GENERATOR_FILE @@ -2865,6 +2882,9 @@ struct GTY(()) machine_function { approximation. */ BOOL_BITFIELD tls_descriptor_call_expanded_p : 1; + /* True if TLS descriptor is called more than once. */ + BOOL_BITFIELD tls_descriptor_call_multiple_p : 1; + /* If true, the current function has a STATIC_CHAIN is placed on the stack below the return address. */ BOOL_BITFIELD static_chain_on_stack : 1; @@ -2934,6 +2954,9 @@ struct GTY(()) machine_function { /* True if this is a recursive function. */ BOOL_BITFIELD recursive_function : 1; + /* True if by_pieces op is currently in use. */ + BOOL_BITFIELD by_pieces_in_use : 1; + /* The largest alignment, in bytes, of stack slot actually used. */ unsigned int max_used_stack_alignment; |