aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorJulian Brown <julian@codesourcery.com>2020-02-10 12:26:57 -0800
committerJulian Brown <julian@codesourcery.com>2020-09-08 13:26:42 -0700
commite929d65b48ad5583e995d872ca5de95b23de4d72 (patch)
tree653ba525d9caf52a579a85f2e5e929cf36307744 /gcc
parent3aee3aaf48be2d3d81e381690ae9dd305d8b505f (diff)
downloadgcc-e929d65b48ad5583e995d872ca5de95b23de4d72.zip
gcc-e929d65b48ad5583e995d872ca5de95b23de4d72.tar.gz
gcc-e929d65b48ad5583e995d872ca5de95b23de4d72.tar.bz2
amdgcn: Add waitcnt after LDS write instructions
Data-share write (ds_write) instructions do not necessarily complete the write to LDS immediately. When a write completes, LGKM_CNT is decremented. For now, we wait until LGKM_CNT reaches zero after each ds_write instruction. This fixes a race condition in the case where LDS is read immediately after being written. This can happen with broadcast operations. 2020-09-08 Julian Brown <julian@codesourcery.com> gcc/ * config/gcn/gcn-valu.md (scatter<mode>_insn_1offset_ds<exec_scatter>): Add waitcnt. * config/gcn/gcn.md (*mov<mode>_insn, *movti_insn): Add waitcnt to ds_write alternatives.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/gcn/gcn-valu.md2
-rw-r--r--gcc/config/gcn/gcn.md8
2 files changed, 5 insertions, 5 deletions
diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md
index 26559ff..e4d7f2a 100644
--- a/gcc/config/gcn/gcn-valu.md
+++ b/gcc/config/gcn/gcn-valu.md
@@ -923,7 +923,7 @@
{
addr_space_t as = INTVAL (operands[3]);
static char buf[200];
- sprintf (buf, "ds_write%%b2\t%%0, %%2 offset:%%1%s",
+ sprintf (buf, "ds_write%%b2\t%%0, %%2 offset:%%1%s\;s_waitcnt\tlgkmcnt(0)",
(AS_GDS_P (as) ? " gds" : ""));
return buf;
}
diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md
index ed98d2d..aeb25fb 100644
--- a/gcc/config/gcn/gcn.md
+++ b/gcc/config/gcn/gcn.md
@@ -554,7 +554,7 @@
flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0
flat_store_dword\t%A0, %1%O0%g0
v_mov_b32\t%0, %1
- ds_write_b32\t%A0, %1%O0
+ ds_write_b32\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
ds_read_b32\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
s_mov_b32\t%0, %1
global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
@@ -582,7 +582,7 @@
flat_load%o1\t%0, %A1%O1%g1\;s_waitcnt\t0
flat_store%s0\t%A0, %1%O0%g0
v_mov_b32\t%0, %1
- ds_write%b0\t%A0, %1%O0
+ ds_write%b0\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
ds_read%u1\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
global_load%o1\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
global_store%s0\t%A0, %1%O0%g0"
@@ -611,7 +611,7 @@
#
flat_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\t0
flat_store_dwordx2\t%A0, %1%O0%g0
- ds_write_b64\t%A0, %1%O0
+ ds_write_b64\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
ds_read_b64\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
global_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
global_store_dwordx2\t%A0, %1%O0%g0"
@@ -667,7 +667,7 @@
#
global_store_dwordx4\t%A0, %1%O0%g0
global_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
- ds_write_b128\t%A0, %1%O0
+ ds_write_b128\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
ds_read_b128\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)"
"reload_completed
&& REG_P (operands[0])