aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorTom de Vries <tdevries@suse.de>2018-12-19 10:17:21 +0000
committerTom de Vries <vries@gcc.gnu.org>2018-12-19 10:17:21 +0000
commita0b3b5c4af07ba34991c4f253496725a760104c2 (patch)
tree5b11b5a684d3c6f49f873effa3e01ac51347ad67 /gcc/config
parent1ed57fb8b6988d649c787cbbc2c87acf823498c1 (diff)
downloadgcc-a0b3b5c4af07ba34991c4f253496725a760104c2.zip
gcc-a0b3b5c4af07ba34991c4f253496725a760104c2.tar.gz
gcc-a0b3b5c4af07ba34991c4f253496725a760104c2.tar.bz2
[nvptx] Make nvptx state propagation function names more generic
Rename state propagation functions to avoid worker/vector terminology. Build and reg-tested on x86_64 with nvptx accelerator. 2018-12-19 Tom de Vries <tdevries@suse.de> * config/nvptx/nvptx.c (nvptx_gen_wcast): Rename as nvptx_gen_warp_bcast. (nvptx_gen_wcast): Rename to nvptx_gen_shared_bcast, add bool vector argument, and update call to nvptx_gen_shared_bcast. (propagator_fn): Add bool argument. (nvptx_propagate): New bool argument, pass bool argument to fn. (vprop_gen): Rename to warp_prop_gen, update call to nvptx_gen_warp_bcast. (nvptx_vpropagate): Rename to nvptx_warp_propagate, update call to nvptx_propagate. (wprop_gen): Rename to shared_prop_gen, update call to nvptx_gen_shared_bcast. (nvptx_wpropagate): Rename to nvptx_shared_propagate, update call to nvptx_propagate. (nvptx_wsync): Rename to nvptx_cta_sync. (nvptx_single): Update calls to nvptx_gen_warp_bcast, nvptx_gen_shared_bcast and nvptx_cta_sync. (nvptx_process_pars): Likewise. (write_worker_buffer): Rename as write_shared_buffer. (nvptx_file_end): Update calls to write_shared_buffer. (nvptx_expand_worker_addr): Rename as nvptx_expand_shared_addr. (nvptx_expand_builtin): Update call to nvptx_expand_shared_addr. (nvptx_get_worker_red_addr): Rename as nvptx_get_shared_red_addr. (nvptx_goacc_reduction_setup): Update call to nvptx_get_shared_red_addr. (nvptx_goacc_reduction_fini): Likewise. (nvptx_goacc_reduction_teardown): Likewise. From-SVN: r267260
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/nvptx/nvptx.c96
1 files changed, 54 insertions, 42 deletions
diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index 9625ac8..163f226 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -1748,7 +1748,7 @@ nvptx_gen_shuffle (rtx dst, rtx src, rtx idx, nvptx_shuffle_kind kind)
across the vectors of a single warp. */
static rtx
-nvptx_gen_vcast (rtx reg)
+nvptx_gen_warp_bcast (rtx reg)
{
return nvptx_gen_shuffle (reg, reg, const0_rtx, SHUFFLE_IDX);
}
@@ -1779,7 +1779,8 @@ enum propagate_mask
how many loop iterations will be executed (0 for not a loop). */
static rtx
-nvptx_gen_wcast (rtx reg, propagate_mask pm, unsigned rep, broadcast_data_t *data)
+nvptx_gen_shared_bcast (rtx reg, propagate_mask pm, unsigned rep,
+ broadcast_data_t *data, bool vector)
{
rtx res;
machine_mode mode = GET_MODE (reg);
@@ -1793,7 +1794,7 @@ nvptx_gen_wcast (rtx reg, propagate_mask pm, unsigned rep, broadcast_data_t *dat
start_sequence ();
if (pm & PM_read)
emit_insn (gen_sel_truesi (tmp, reg, GEN_INT (1), const0_rtx));
- emit_insn (nvptx_gen_wcast (tmp, pm, rep, data));
+ emit_insn (nvptx_gen_shared_bcast (tmp, pm, rep, data, vector));
if (pm & PM_write)
emit_insn (gen_rtx_SET (reg, gen_rtx_NE (BImode, tmp, const0_rtx)));
res = get_insns ();
@@ -1813,6 +1814,7 @@ nvptx_gen_wcast (rtx reg, propagate_mask pm, unsigned rep, broadcast_data_t *dat
oacc_bcast_align = align;
data->offset = (data->offset + align - 1) & ~(align - 1);
addr = data->base;
+ gcc_assert (data->base != NULL);
if (data->offset)
addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (data->offset));
}
@@ -3803,11 +3805,11 @@ nvptx_find_sese (auto_vec<basic_block> &blocks, bb_pair_vec_t &regions)
regions and (b) only propagating stack entries that are used. The
latter might be quite hard to determine. */
-typedef rtx (*propagator_fn) (rtx, propagate_mask, unsigned, void *);
+typedef rtx (*propagator_fn) (rtx, propagate_mask, unsigned, void *, bool);
static bool
nvptx_propagate (bool is_call, basic_block block, rtx_insn *insn,
- propagate_mask rw, propagator_fn fn, void *data)
+ propagate_mask rw, propagator_fn fn, void *data, bool vector)
{
bitmap live = DF_LIVE_IN (block);
bitmap_iterator iterator;
@@ -3842,7 +3844,7 @@ nvptx_propagate (bool is_call, basic_block block, rtx_insn *insn,
emit_insn (gen_rtx_SET (idx, GEN_INT (fs)));
/* Allow worker function to initialize anything needed. */
- rtx init = fn (tmp, PM_loop_begin, fs, data);
+ rtx init = fn (tmp, PM_loop_begin, fs, data, vector);
if (init)
emit_insn (init);
emit_label (label);
@@ -3851,7 +3853,7 @@ nvptx_propagate (bool is_call, basic_block block, rtx_insn *insn,
}
if (rw & PM_read)
emit_insn (gen_rtx_SET (tmp, gen_rtx_MEM (DImode, ptr)));
- emit_insn (fn (tmp, rw, fs, data));
+ emit_insn (fn (tmp, rw, fs, data, vector));
if (rw & PM_write)
emit_insn (gen_rtx_SET (gen_rtx_MEM (DImode, ptr), tmp));
if (fs)
@@ -3859,7 +3861,7 @@ nvptx_propagate (bool is_call, basic_block block, rtx_insn *insn,
emit_insn (gen_rtx_SET (pred, gen_rtx_NE (BImode, idx, const0_rtx)));
emit_insn (gen_adddi3 (ptr, ptr, GEN_INT (GET_MODE_SIZE (DImode))));
emit_insn (gen_br_true_uni (pred, label));
- rtx fini = fn (tmp, PM_loop_end, fs, data);
+ rtx fini = fn (tmp, PM_loop_end, fs, data, vector);
if (fini)
emit_insn (fini);
emit_insn (gen_rtx_CLOBBER (GET_MODE (idx), idx));
@@ -3879,7 +3881,7 @@ nvptx_propagate (bool is_call, basic_block block, rtx_insn *insn,
if (REGNO (reg) >= FIRST_PSEUDO_REGISTER)
{
- rtx bcast = fn (reg, rw, 0, data);
+ rtx bcast = fn (reg, rw, 0, data, vector);
insn = emit_insn_after (bcast, insn);
empty = false;
@@ -3888,16 +3890,17 @@ nvptx_propagate (bool is_call, basic_block block, rtx_insn *insn,
return empty;
}
-/* Worker for nvptx_vpropagate. */
+/* Worker for nvptx_warp_propagate. */
static rtx
-vprop_gen (rtx reg, propagate_mask pm,
- unsigned ARG_UNUSED (count), void *ARG_UNUSED (data))
+warp_prop_gen (rtx reg, propagate_mask pm,
+ unsigned ARG_UNUSED (count), void *ARG_UNUSED (data),
+ bool ARG_UNUSED (vector))
{
if (!(pm & PM_read_write))
return 0;
- return nvptx_gen_vcast (reg);
+ return nvptx_gen_warp_bcast (reg);
}
/* Propagate state that is live at start of BLOCK across the vectors
@@ -3905,15 +3908,17 @@ vprop_gen (rtx reg, propagate_mask pm,
IS_CALL and return as for nvptx_propagate. */
static bool
-nvptx_vpropagate (bool is_call, basic_block block, rtx_insn *insn)
+nvptx_warp_propagate (bool is_call, basic_block block, rtx_insn *insn)
{
- return nvptx_propagate (is_call, block, insn, PM_read_write, vprop_gen, 0);
+ return nvptx_propagate (is_call, block, insn, PM_read_write,
+ warp_prop_gen, 0, false);
}
-/* Worker for nvptx_wpropagate. */
+/* Worker for nvptx_shared_propagate. */
static rtx
-wprop_gen (rtx reg, propagate_mask pm, unsigned rep, void *data_)
+shared_prop_gen (rtx reg, propagate_mask pm, unsigned rep, void *data_,
+ bool vector)
{
broadcast_data_t *data = (broadcast_data_t *)data_;
@@ -3937,7 +3942,7 @@ wprop_gen (rtx reg, propagate_mask pm, unsigned rep, void *data_)
return clobber;
}
else
- return nvptx_gen_wcast (reg, pm, rep, data);
+ return nvptx_gen_shared_bcast (reg, pm, rep, data, vector);
}
/* Spill or fill live state that is live at start of BLOCK. PRE_P
@@ -3946,7 +3951,8 @@ wprop_gen (rtx reg, propagate_mask pm, unsigned rep, void *data_)
INSN. IS_CALL and return as for nvptx_propagate. */
static bool
-nvptx_wpropagate (bool pre_p, bool is_call, basic_block block, rtx_insn *insn)
+nvptx_shared_propagate (bool pre_p, bool is_call, basic_block block,
+ rtx_insn *insn, bool vector)
{
broadcast_data_t data;
@@ -3955,7 +3961,8 @@ nvptx_wpropagate (bool pre_p, bool is_call, basic_block block, rtx_insn *insn)
data.ptr = NULL_RTX;
bool empty = nvptx_propagate (is_call, block, insn,
- pre_p ? PM_read : PM_write, wprop_gen, &data);
+ pre_p ? PM_read : PM_write, shared_prop_gen,
+ &data, vector);
gcc_assert (empty == !data.offset);
if (data.offset)
{
@@ -3973,7 +3980,7 @@ nvptx_wpropagate (bool pre_p, bool is_call, basic_block block, rtx_insn *insn)
markers for before and after synchronizations. */
static rtx
-nvptx_wsync (bool after)
+nvptx_cta_sync (bool after)
{
return gen_nvptx_barsync (GEN_INT (after), GEN_INT (0));
}
@@ -4328,7 +4335,7 @@ nvptx_single (unsigned mask, basic_block from, basic_block to)
emit_insn_before (gen_rtx_SET (tmp, pvar), label);
emit_insn_before (gen_rtx_SET (pvar, tmp), tail);
#endif
- emit_insn_before (nvptx_gen_vcast (pvar), tail);
+ emit_insn_before (nvptx_gen_warp_bcast (pvar), tail);
}
else
{
@@ -4343,16 +4350,18 @@ nvptx_single (unsigned mask, basic_block from, basic_block to)
oacc_bcast_size = GET_MODE_SIZE (SImode);
data.offset = 0;
- emit_insn_before (nvptx_gen_wcast (pvar, PM_read, 0, &data),
+ emit_insn_before (nvptx_gen_shared_bcast (pvar, PM_read, 0, &data,
+ false),
before);
/* Barrier so other workers can see the write. */
- emit_insn_before (nvptx_wsync (false), tail);
+ emit_insn_before (nvptx_cta_sync (false), tail);
data.offset = 0;
- emit_insn_before (nvptx_gen_wcast (pvar, PM_write, 0, &data), tail);
+ emit_insn_before (nvptx_gen_shared_bcast (pvar, PM_write, 0, &data,
+ false), tail);
/* This barrier is needed to avoid worker zero clobbering
the broadcast buffer before all the other workers have
had a chance to read this instance of it. */
- emit_insn_before (nvptx_wsync (false), tail);
+ emit_insn_before (nvptx_cta_sync (false), tail);
}
extract_insn (tail);
@@ -4469,19 +4478,21 @@ nvptx_process_pars (parallel *par)
if (par->mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))
{
- nvptx_wpropagate (false, is_call, par->forked_block, par->forked_insn);
- bool empty = nvptx_wpropagate (true, is_call,
- par->forked_block, par->fork_insn);
+ nvptx_shared_propagate (false, is_call, par->forked_block,
+ par->forked_insn, false);
+ bool empty = nvptx_shared_propagate (true, is_call,
+ par->forked_block, par->fork_insn,
+ false);
if (!empty || !is_call)
{
/* Insert begin and end synchronizations. */
- emit_insn_before (nvptx_wsync (false), par->forked_insn);
- emit_insn_before (nvptx_wsync (false), par->join_insn);
+ emit_insn_before (nvptx_cta_sync (false), par->forked_insn);
+ emit_insn_before (nvptx_cta_sync (false), par->join_insn);
}
}
else if (par->mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR))
- nvptx_vpropagate (is_call, par->forked_block, par->forked_insn);
+ nvptx_warp_propagate (is_call, par->forked_block, par->forked_insn);
/* Now do siblings. */
if (par->next)
@@ -4945,10 +4956,11 @@ nvptx_file_start (void)
fputs ("// END PREAMBLE\n", asm_out_file);
}
-/* Emit a declaration for a worker-level buffer in .shared memory. */
+/* Emit a declaration for a worker and vector-level buffer in .shared
+ memory. */
static void
-write_worker_buffer (FILE *file, rtx sym, unsigned align, unsigned size)
+write_shared_buffer (FILE *file, rtx sym, unsigned align, unsigned size)
{
const char *name = XSTR (sym, 0);
@@ -4970,11 +4982,11 @@ nvptx_file_end (void)
fputs (func_decls.str().c_str(), asm_out_file);
if (oacc_bcast_size)
- write_worker_buffer (asm_out_file, oacc_bcast_sym,
+ write_shared_buffer (asm_out_file, oacc_bcast_sym,
oacc_bcast_align, oacc_bcast_size);
if (worker_red_size)
- write_worker_buffer (asm_out_file, worker_red_sym,
+ write_shared_buffer (asm_out_file, worker_red_sym,
worker_red_align, worker_red_size);
if (need_softstack_decl)
@@ -5025,7 +5037,7 @@ nvptx_expand_shuffle (tree exp, rtx target, machine_mode mode, int ignore)
/* Worker reduction address expander. */
static rtx
-nvptx_expand_worker_addr (tree exp, rtx target,
+nvptx_expand_shared_addr (tree exp, rtx target,
machine_mode ARG_UNUSED (mode), int ignore)
{
if (ignore)
@@ -5161,7 +5173,7 @@ nvptx_expand_builtin (tree exp, rtx target, rtx ARG_UNUSED (subtarget),
return nvptx_expand_shuffle (exp, target, mode, ignore);
case NVPTX_BUILTIN_WORKER_ADDR:
- return nvptx_expand_worker_addr (exp, target, mode, ignore);
+ return nvptx_expand_shared_addr (exp, target, mode, ignore);
case NVPTX_BUILTIN_CMP_SWAP:
case NVPTX_BUILTIN_CMP_SWAPLL:
@@ -5330,7 +5342,7 @@ nvptx_goacc_fork_join (gcall *call, const int dims[],
data at that location. */
static tree
-nvptx_get_worker_red_addr (tree type, tree offset)
+nvptx_get_shared_red_addr (tree type, tree offset)
{
machine_mode mode = TYPE_MODE (type);
tree fndecl = nvptx_builtin_decl (NVPTX_BUILTIN_WORKER_ADDR, true);
@@ -5672,7 +5684,7 @@ nvptx_goacc_reduction_setup (gcall *call)
{
/* Store incoming value to worker reduction buffer. */
tree offset = gimple_call_arg (call, 5);
- tree call = nvptx_get_worker_red_addr (TREE_TYPE (var), offset);
+ tree call = nvptx_get_shared_red_addr (TREE_TYPE (var), offset);
tree ptr = make_ssa_name (TREE_TYPE (call));
gimplify_assign (ptr, call, &seq);
@@ -5814,7 +5826,7 @@ nvptx_goacc_reduction_fini (gcall *call)
{
/* Get reduction buffer address. */
tree offset = gimple_call_arg (call, 5);
- tree call = nvptx_get_worker_red_addr (TREE_TYPE (var), offset);
+ tree call = nvptx_get_shared_red_addr (TREE_TYPE (var), offset);
tree ptr = make_ssa_name (TREE_TYPE (call));
gimplify_assign (ptr, call, &seq);
@@ -5858,7 +5870,7 @@ nvptx_goacc_reduction_teardown (gcall *call)
{
/* Read the worker reduction buffer. */
tree offset = gimple_call_arg (call, 5);
- tree call = nvptx_get_worker_red_addr(TREE_TYPE (var), offset);
+ tree call = nvptx_get_shared_red_addr(TREE_TYPE (var), offset);
tree ptr = make_ssa_name (TREE_TYPE (call));
gimplify_assign (ptr, call, &seq);