aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorAndrew Stubbs <ams@codesourcery.com>2023-12-13 12:00:52 +0000
committerAndrew Stubbs <ams@codesourcery.com>2023-12-13 15:30:42 +0000
commit4c12bcbeb0c0fd6da4c56e7622814201daadd585 (patch)
tree29c8e995959fc98396035e1aaef957634def21bc /gcc
parent392f70cc11089f6da2611177de5f3e8baad6d327 (diff)
downloadgcc-4c12bcbeb0c0fd6da4c56e7622814201daadd585.zip
gcc-4c12bcbeb0c0fd6da4c56e7622814201daadd585.tar.gz
gcc-4c12bcbeb0c0fd6da4c56e7622814201daadd585.tar.bz2
amdgcn: Work around XNACK register allocation problem
The extra register pressure is causing infinite loops in some cases, especially at -O0. I have not yet observed any issue on devices that have AVGPRs for spilling, and XNACK is only really useful on those devices anyway, so change the defaults. gcc/ChangeLog: * config/gcn/gcn-hsa.h (NO_XNACK): Change the defaults. * config/gcn/gcn-opts.h (enum hsaco_attr_type): Add HSACO_ATTR_DEFAULT. * config/gcn/gcn.cc (gcn_option_override): Set the default flag_xnack. * config/gcn/gcn.opt: Add -mxnack=default. * doc/invoke.texi: Document the -mxnack default.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/gcn/gcn-hsa.h4
-rw-r--r--gcc/config/gcn/gcn-opts.h3
-rw-r--r--gcc/config/gcn/gcn.cc23
-rw-r--r--gcc/config/gcn/gcn.opt7
-rw-r--r--gcc/doc/invoke.texi3
5 files changed, 35 insertions, 5 deletions
diff --git a/gcc/config/gcn/gcn-hsa.h b/gcc/config/gcn/gcn-hsa.h
index bfb1045..b44d42b 100644
--- a/gcc/config/gcn/gcn-hsa.h
+++ b/gcc/config/gcn/gcn-hsa.h
@@ -75,7 +75,9 @@ extern unsigned int gcn_local_sym_hash (const char *name);
supported for gcn. */
#define GOMP_SELF_SPECS ""
-#define NO_XNACK "march=fiji:;march=gfx1030:;"
+#define NO_XNACK "march=fiji:;march=gfx1030:;" \
+ /* These match the defaults set in gcn.cc. */ \
+ "!mxnack*|mxnack=default:%{march=gfx900|march=gfx906|march=gfx908:-mattr=-xnack};"
#define NO_SRAM_ECC "!march=*:;march=fiji:;march=gfx900:;march=gfx906:;"
/* In HSACOv4 no attribute setting means the binary supports "any" hardware
diff --git a/gcc/config/gcn/gcn-opts.h b/gcc/config/gcn/gcn-opts.h
index b4f494d..634cec6 100644
--- a/gcc/config/gcn/gcn-opts.h
+++ b/gcc/config/gcn/gcn-opts.h
@@ -65,7 +65,8 @@ enum hsaco_attr_type
{
HSACO_ATTR_OFF,
HSACO_ATTR_ON,
- HSACO_ATTR_ANY
+ HSACO_ATTR_ANY,
+ HSACO_ATTR_DEFAULT
};
#endif
diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index d92cd01..b67551a 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -172,6 +172,29 @@ gcn_option_override (void)
/* Allow HSACO_ATTR_ANY silently because that's the default. */
flag_xnack = HSACO_ATTR_OFF;
}
+
+ /* There's no need for XNACK on devices without USM, and there are register
+ allocation problems caused by the early-clobber when AVGPR spills are not
+ available.
+ FIXME: can the regalloc mean the default can be really "any"? */
+ if (flag_xnack == HSACO_ATTR_DEFAULT)
+ switch (gcn_arch)
+ {
+ case PROCESSOR_FIJI:
+ case PROCESSOR_VEGA10:
+ case PROCESSOR_VEGA20:
+ case PROCESSOR_GFX908:
+ flag_xnack = HSACO_ATTR_OFF;
+ break;
+ case PROCESSOR_GFX90a:
+ flag_xnack = HSACO_ATTR_ANY;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ if (flag_sram_ecc == HSACO_ATTR_DEFAULT)
+ flag_sram_ecc = HSACO_ATTR_ANY;
}
/* }}} */
diff --git a/gcc/config/gcn/gcn.opt b/gcc/config/gcn/gcn.opt
index c356a0c..32486d9 100644
--- a/gcc/config/gcn/gcn.opt
+++ b/gcc/config/gcn/gcn.opt
@@ -97,9 +97,12 @@ Enum(hsaco_attr_type) String(on) Value(HSACO_ATTR_ON)
EnumValue
Enum(hsaco_attr_type) String(any) Value(HSACO_ATTR_ANY)
+EnumValue
+Enum(hsaco_attr_type) String(default) Value(HSACO_ATTR_DEFAULT)
+
mxnack=
-Target RejectNegative Joined ToLower Enum(hsaco_attr_type) Var(flag_xnack) Init(HSACO_ATTR_ANY)
-Compile for devices requiring XNACK enabled. Default \"any\".
+Target RejectNegative Joined ToLower Enum(hsaco_attr_type) Var(flag_xnack) Init(HSACO_ATTR_DEFAULT)
+Compile for devices requiring XNACK enabled. Default \"any\" if USM is supported.
msram-ecc=
Target RejectNegative Joined ToLower Enum(hsaco_attr_type) Var(flag_sram_ecc) Init(HSACO_ATTR_ANY)
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index db039c4..8f885b8 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -21628,7 +21628,8 @@ run-time performance. The default is 32KB when using OpenACC or OpenMP, and
Compile binaries suitable for devices with the XNACK feature enabled, disabled,
or either mode. Some devices always require XNACK and some allow the user to
configure XNACK. The compiled code must match the device mode.
-The default is @samp{-mxnack=any}.
+The default is @samp{-mxnack=any} on devices that support Unified Shared
+Memory, and @samp{-mxnack=no} otherwise.
@end table