diff options
author | Andrew Stubbs <ams@codesourcery.com> | 2023-12-13 12:00:52 +0000 |
---|---|---|
committer | Andrew Stubbs <ams@codesourcery.com> | 2023-12-13 15:30:42 +0000 |
commit | 4c12bcbeb0c0fd6da4c56e7622814201daadd585 (patch) | |
tree | 29c8e995959fc98396035e1aaef957634def21bc /gcc | |
parent | 392f70cc11089f6da2611177de5f3e8baad6d327 (diff) | |
download | gcc-4c12bcbeb0c0fd6da4c56e7622814201daadd585.zip gcc-4c12bcbeb0c0fd6da4c56e7622814201daadd585.tar.gz gcc-4c12bcbeb0c0fd6da4c56e7622814201daadd585.tar.bz2 |
amdgcn: Work around XNACK register allocation problem
The extra register pressure is causing infinite loops in some cases, especially
at -O0. I have not yet observed any issue on devices that have AVGPRs for
spilling, and XNACK is only really useful on those devices anyway, so change
the defaults.
gcc/ChangeLog:
* config/gcn/gcn-hsa.h (NO_XNACK): Change the defaults.
* config/gcn/gcn-opts.h (enum hsaco_attr_type): Add HSACO_ATTR_DEFAULT.
* config/gcn/gcn.cc (gcn_option_override): Set the default flag_xnack.
* config/gcn/gcn.opt: Add -mxnack=default.
* doc/invoke.texi: Document the -mxnack default.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/gcn/gcn-hsa.h | 4 | ||||
-rw-r--r-- | gcc/config/gcn/gcn-opts.h | 3 | ||||
-rw-r--r-- | gcc/config/gcn/gcn.cc | 23 | ||||
-rw-r--r-- | gcc/config/gcn/gcn.opt | 7 | ||||
-rw-r--r-- | gcc/doc/invoke.texi | 3 |
5 files changed, 35 insertions, 5 deletions
diff --git a/gcc/config/gcn/gcn-hsa.h b/gcc/config/gcn/gcn-hsa.h index bfb1045..b44d42b 100644 --- a/gcc/config/gcn/gcn-hsa.h +++ b/gcc/config/gcn/gcn-hsa.h @@ -75,7 +75,9 @@ extern unsigned int gcn_local_sym_hash (const char *name); supported for gcn. */ #define GOMP_SELF_SPECS "" -#define NO_XNACK "march=fiji:;march=gfx1030:;" +#define NO_XNACK "march=fiji:;march=gfx1030:;" \ + /* These match the defaults set in gcn.cc. */ \ + "!mxnack*|mxnack=default:%{march=gfx900|march=gfx906|march=gfx908:-mattr=-xnack};" #define NO_SRAM_ECC "!march=*:;march=fiji:;march=gfx900:;march=gfx906:;" /* In HSACOv4 no attribute setting means the binary supports "any" hardware diff --git a/gcc/config/gcn/gcn-opts.h b/gcc/config/gcn/gcn-opts.h index b4f494d..634cec6 100644 --- a/gcc/config/gcn/gcn-opts.h +++ b/gcc/config/gcn/gcn-opts.h @@ -65,7 +65,8 @@ enum hsaco_attr_type { HSACO_ATTR_OFF, HSACO_ATTR_ON, - HSACO_ATTR_ANY + HSACO_ATTR_ANY, + HSACO_ATTR_DEFAULT }; #endif diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc index d92cd01..b67551a 100644 --- a/gcc/config/gcn/gcn.cc +++ b/gcc/config/gcn/gcn.cc @@ -172,6 +172,29 @@ gcn_option_override (void) /* Allow HSACO_ATTR_ANY silently because that's the default. */ flag_xnack = HSACO_ATTR_OFF; } + + /* There's no need for XNACK on devices without USM, and there are register + allocation problems caused by the early-clobber when AVGPR spills are not + available. + FIXME: can the regalloc mean the default can be really "any"? */ + if (flag_xnack == HSACO_ATTR_DEFAULT) + switch (gcn_arch) + { + case PROCESSOR_FIJI: + case PROCESSOR_VEGA10: + case PROCESSOR_VEGA20: + case PROCESSOR_GFX908: + flag_xnack = HSACO_ATTR_OFF; + break; + case PROCESSOR_GFX90a: + flag_xnack = HSACO_ATTR_ANY; + break; + default: + gcc_unreachable (); + } + + if (flag_sram_ecc == HSACO_ATTR_DEFAULT) + flag_sram_ecc = HSACO_ATTR_ANY; } /* }}} */ diff --git a/gcc/config/gcn/gcn.opt b/gcc/config/gcn/gcn.opt index c356a0c..32486d9 100644 --- a/gcc/config/gcn/gcn.opt +++ b/gcc/config/gcn/gcn.opt @@ -97,9 +97,12 @@ Enum(hsaco_attr_type) String(on) Value(HSACO_ATTR_ON) EnumValue Enum(hsaco_attr_type) String(any) Value(HSACO_ATTR_ANY) +EnumValue +Enum(hsaco_attr_type) String(default) Value(HSACO_ATTR_DEFAULT) + mxnack= -Target RejectNegative Joined ToLower Enum(hsaco_attr_type) Var(flag_xnack) Init(HSACO_ATTR_ANY) -Compile for devices requiring XNACK enabled. Default \"any\". +Target RejectNegative Joined ToLower Enum(hsaco_attr_type) Var(flag_xnack) Init(HSACO_ATTR_DEFAULT) +Compile for devices requiring XNACK enabled. Default \"any\" if USM is supported. msram-ecc= Target RejectNegative Joined ToLower Enum(hsaco_attr_type) Var(flag_sram_ecc) Init(HSACO_ATTR_ANY) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index db039c4..8f885b8 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -21628,7 +21628,8 @@ run-time performance. The default is 32KB when using OpenACC or OpenMP, and Compile binaries suitable for devices with the XNACK feature enabled, disabled, or either mode. Some devices always require XNACK and some allow the user to configure XNACK. The compiled code must match the device mode. -The default is @samp{-mxnack=any}. +The default is @samp{-mxnack=any} on devices that support Unified Shared +Memory, and @samp{-mxnack=no} otherwise. @end table |