aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog8
-rw-r--r--gcc/config/gcn/gcn.c16
2 files changed, 20 insertions, 4 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 9fbe728..443d64d 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,11 @@
+2019-11-22 Andrew Stubbs <ams@codesourcery.com>
+
+ * config/gcn/gcn.c (OMP_LDS_SIZE): Define.
+ (ACC_LDS_SIZE): Define.
+ (OTHER_LDS_SIZE): Define.
+ (LDS_SIZE): Redefine using above.
+ (gcn_expand_prologue): Initialize m0 with LDS_SIZE-1.
+
2019-11-22 Martin Sebor <msebor@redhat.com>
PR tree-optimization/92501
diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c
index 3a8c10e..f85d84b 100644
--- a/gcc/config/gcn/gcn.c
+++ b/gcc/config/gcn/gcn.c
@@ -70,10 +70,15 @@ int gcn_isa = 3; /* Default to GCN3. */
worker-single mode to worker-partitioned mode), per workgroup. Global
analysis could calculate an exact bound, but we don't do that yet.
- We reserve the whole LDS, which also prevents any other workgroup
- sharing the Compute Unit. */
+ We want to permit full occupancy, so size accordingly. */
-#define LDS_SIZE 65536
+#define OMP_LDS_SIZE 0x600 /* 0x600 is 1/40 total, rounded down. */
+#define ACC_LDS_SIZE 32768 /* Half of the total should be fine. */
+#define OTHER_LDS_SIZE 65536 /* If in doubt, reserve all of it. */
+
+#define LDS_SIZE (flag_openacc ? ACC_LDS_SIZE \
+ : flag_openmp ? OMP_LDS_SIZE \
+ : OTHER_LDS_SIZE)
/* The number of registers usable by normal non-kernel functions.
The SGPR count includes any special extra registers such as VCC. */
@@ -2876,8 +2881,11 @@ gcn_expand_prologue ()
/* Ensure that the scheduler doesn't do anything unexpected. */
emit_insn (gen_blockage ());
+ /* m0 is initialized for the usual LDS DS and FLAT memory case.
+ The low-part is the address of the topmost addressable byte, which is
+ size-1. The high-part is an offset and should be zero. */
emit_move_insn (gen_rtx_REG (SImode, M0_REG),
- gen_int_mode (LDS_SIZE, SImode));
+ gen_int_mode (LDS_SIZE-1, SImode));
emit_insn (gen_prologue_use (gen_rtx_REG (SImode, M0_REG)));