aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/i386
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/i386')
-rw-r--r--gcc/config/i386/i386-options.cc4
-rw-r--r--gcc/config/i386/i386.cc18
-rw-r--r--gcc/config/i386/i386.h10
-rw-r--r--gcc/config/i386/x86-tune-costs.h2
-rw-r--r--gcc/config/i386/x86-tune-sched.cc15
5 files changed, 32 insertions, 17 deletions
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
index a9fac01..964449f 100644
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -2828,8 +2828,8 @@ ix86_option_override_internal (bool main_args_p,
if (flag_nop_mcount)
error ("%<-mnop-mcount%> is not compatible with this target");
#endif
- if (flag_nop_mcount && flag_pic)
- error ("%<-mnop-mcount%> is not implemented for %<-fPIC%>");
+ if (flag_nop_mcount && flag_pic && !flag_plt)
+ error ("%<-mnop-mcount%> is not implemented for %<-fno-plt%>");
/* Accept -msseregparm only if at least SSE support is enabled. */
if (TARGET_SSEREGPARM_P (opts->x_target_flags)
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 4f8380c4..38df84f 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -458,6 +458,9 @@ int ix86_arch_specified;
indirect thunk pushes the return address onto stack, destroying
red-zone.
+ NB: Don't use red-zone for functions with no_caller_saved_registers
+ and 32 GPRs since 128-byte red-zone is too small for 31 GPRs.
+
TODO: If we can reserve the first 2 WORDs, for PUSH and, another
for CALL, in red-zone, we can allow local indirect jumps with
indirect thunk. */
@@ -467,6 +470,9 @@ ix86_using_red_zone (void)
{
return (TARGET_RED_ZONE
&& !TARGET_64BIT_MS_ABI
+ && (!TARGET_APX_EGPR
+ || (cfun->machine->call_saved_registers
+ != TYPE_NO_CALLER_SAVED_REGISTERS))
&& (!cfun->machine->has_local_indirect_jump
|| cfun->machine->indirect_branch_type == indirect_branch_keep));
}
@@ -23158,6 +23164,12 @@ x86_print_call_or_nop (FILE *file, const char *target)
if (flag_nop_mcount || !strcmp (target, "nop"))
/* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
fprintf (file, "1:" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
+ else if (!TARGET_PECOFF && flag_pic)
+ {
+ gcc_assert (flag_plt);
+
+ fprintf (file, "1:\tcall\t%s@PLT\n", target);
+ }
else
fprintf (file, "1:\tcall\t%s\n", target);
}
@@ -23321,7 +23333,7 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
break;
case CM_SMALL_PIC:
case CM_MEDIUM_PIC:
- if (!ix86_direct_extern_access)
+ if (!flag_plt)
{
if (ASSEMBLER_DIALECT == ASM_INTEL)
fprintf (file, "1:\tcall\t[QWORD PTR %s@GOTPCREL[rip]]\n",
@@ -23352,7 +23364,9 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
"\tleal\t%sP%d@GOTOFF(%%ebx), %%" PROFILE_COUNT_REGISTER "\n",
LPREFIX, labelno);
#endif
- if (ASSEMBLER_DIALECT == ASM_INTEL)
+ if (flag_plt)
+ x86_print_call_or_nop (file, mcount_name);
+ else if (ASSEMBLER_DIALECT == ASM_INTEL)
fprintf (file, "1:\tcall\t[DWORD PTR %s@GOT[ebx]]\n", mcount_name);
else
fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 13da3d8..8507243 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2449,11 +2449,11 @@ constexpr wide_int_bitmask PTA_DIAMONDRAPIDS = PTA_SKYLAKE | PTA_PKU | PTA_SHA
| PTA_WBNOINVD | PTA_CLWB | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_ENQCMD
| PTA_CLDEMOTE | PTA_PTWRITE | PTA_WAITPKG | PTA_SERIALIZE | PTA_TSXLDTRK
| PTA_AMX_TILE | PTA_AMX_INT8 | PTA_AMX_BF16 | PTA_UINTR | PTA_AVXVNNI
- | PTA_AMX_FP16 | PTA_PREFETCHI | PTA_AMX_COMPLEX | PTA_AVX10_1
- | PTA_AVXIFMA | PTA_AVXNECONVERT | PTA_AVXVNNIINT16 | PTA_AVXVNNIINT8
- | PTA_CMPCCXADD | PTA_SHA512 | PTA_SM3 | PTA_SM4 | PTA_AVX10_2
- | PTA_APX_F | PTA_AMX_AVX512 | PTA_AMX_FP8 | PTA_AMX_TF32 | PTA_AMX_TRANSPOSE
- | PTA_MOVRS | PTA_AMX_MOVRS | PTA_USER_MSR;
+ | PTA_AMX_FP16 | PTA_PREFETCHI | PTA_AMX_COMPLEX | PTA_AVX10_1_256
+ | PTA_AVX10_1 | PTA_AVXIFMA | PTA_AVXNECONVERT | PTA_AVXVNNIINT16
+ | PTA_AVXVNNIINT8 | PTA_CMPCCXADD | PTA_SHA512 | PTA_SM3 | PTA_SM4
+ | PTA_AVX10_2 | PTA_APX_F | PTA_AMX_AVX512 | PTA_AMX_FP8 | PTA_AMX_TF32
+ | PTA_AMX_TRANSPOSE | PTA_MOVRS | PTA_AMX_MOVRS | PTA_USER_MSR;
constexpr wide_int_bitmask PTA_BDVER1 = PTA_64BIT | PTA_MMX | PTA_SSE
| PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index 7c8cb73..9477345 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -2120,7 +2120,7 @@ struct processor_costs znver5_cost = {
COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
/* ADDSS has throughput 2 and latency 2
(in some cases when source is another addition). */
- COSTS_N_INSNS (2), /* cost of ADDSS/SD SUBSS/SD insns. */
+ COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
/* MULSS has throughput 2 and latency 3. */
COSTS_N_INSNS (3), /* cost of MULSS instruction. */
COSTS_N_INSNS (3), /* cost of MULSD instruction. */
diff --git a/gcc/config/i386/x86-tune-sched.cc b/gcc/config/i386/x86-tune-sched.cc
index 685a83c..15d3d91 100644
--- a/gcc/config/i386/x86-tune-sched.cc
+++ b/gcc/config/i386/x86-tune-sched.cc
@@ -81,6 +81,14 @@ ix86_issue_rate (void)
case PROCESSOR_YONGFENG:
case PROCESSOR_SHIJIDADAO:
case PROCESSOR_GENERIC:
+ /* For znver5 decoder can handle 4 or 8 instructions per cycle,
+ op cache 12 instruction/cycle, dispatch 8 instructions
+ integer rename 8 instructions and Fp 6 instructions.
+
+ The scheduler, without understanding out of order nature of the CPU
+ is not going to be able to use more than 4 instructions since that
+ is limits of the decoders. */
+ case PROCESSOR_ZNVER5:
return 4;
case PROCESSOR_ICELAKE_CLIENT:
@@ -91,13 +99,6 @@ ix86_issue_rate (void)
return 5;
case PROCESSOR_SAPPHIRERAPIDS:
- /* For znver5 decoder can handle 4 or 8 instructions per cycle,
- op cache 12 instruction/cycle, dispatch 8 instructions
- integer rename 8 instructions and Fp 6 instructions.
-
- The scheduler, without understanding out of order nature of the CPU
- is unlikely going to be able to fill all of these. */
- case PROCESSOR_ZNVER5:
return 6;
default: