diff options
Diffstat (limited to 'gcc/config/nvptx')
-rw-r--r-- | gcc/config/nvptx/mkoffload.cc | 13 | ||||
-rw-r--r-- | gcc/config/nvptx/nvptx-protos.h | 2 | ||||
-rw-r--r-- | gcc/config/nvptx/nvptx.cc | 269 | ||||
-rw-r--r-- | gcc/config/nvptx/nvptx.h | 3 | ||||
-rw-r--r-- | gcc/config/nvptx/nvptx.md | 68 |
5 files changed, 331 insertions, 24 deletions
diff --git a/gcc/config/nvptx/mkoffload.cc b/gcc/config/nvptx/mkoffload.cc index 5d89ba8..82ea313 100644 --- a/gcc/config/nvptx/mkoffload.cc +++ b/gcc/config/nvptx/mkoffload.cc @@ -603,6 +603,7 @@ main (int argc, char **argv) /* Scan the argument vector. */ bool fopenmp = false; + bool fopenmp_target = false; bool fopenacc = false; bool fPIC = false; bool fpic = false; @@ -622,6 +623,9 @@ main (int argc, char **argv) #undef STR else if (strcmp (argv[i], "-fopenmp") == 0) fopenmp = true; + else if (strncmp (argv[i], "-fopenmp-target=", + strlen ("-fopenmp-target=")) == 0) + fopenmp_target = true; else if (strcmp (argv[i], "-fopenacc") == 0) fopenacc = true; else if (strcmp (argv[i], "-fPIC") == 0) @@ -639,6 +643,15 @@ main (int argc, char **argv) if (!(fopenacc ^ fopenmp)) fatal_error (input_location, "either %<-fopenacc%> or %<-fopenmp%> " "must be set"); + if (fopenmp_target) + { + if (fopenacc) + fatal_error (input_location, "%<-fopenacc%> not compatible with " + "%<-fopenmp-target=%>"); + if (!fopenmp) + fatal_error (input_location, "%<-fopenmp-target=%> requires " + "%<-fopenmp%>"); + } struct obstack argv_obstack; obstack_init (&argv_obstack); diff --git a/gcc/config/nvptx/nvptx-protos.h b/gcc/config/nvptx/nvptx-protos.h index dfa08ec..a86514b 100644 --- a/gcc/config/nvptx/nvptx-protos.h +++ b/gcc/config/nvptx/nvptx-protos.h @@ -50,6 +50,7 @@ extern unsigned int ptx_version_to_number (enum ptx_version, bool); extern void nvptx_expand_oacc_fork (unsigned); extern void nvptx_expand_oacc_join (unsigned); extern void nvptx_expand_call (rtx, rtx); +extern void nvptx_expand_omp_get_num_threads (rtx); extern rtx nvptx_gen_shuffle (rtx, rtx, rtx, nvptx_shuffle_kind); extern rtx nvptx_expand_compare (rtx); extern const char *nvptx_ptx_type_from_mode (machine_mode, bool); @@ -63,5 +64,6 @@ extern const char *nvptx_output_red_partition (rtx, rtx); extern const char *nvptx_output_atomic_insn (const char *, rtx *, int, int); extern bool nvptx_mem_local_p (rtx); extern bool nvptx_mem_maybe_shared_p (const_rtx); +extern bool nvptx_mem_shared_p (const_rtx); #endif #endif diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc index 9c284ed..3b2bfd3 100644 --- a/gcc/config/nvptx/nvptx.cc +++ b/gcc/config/nvptx/nvptx.cc @@ -176,6 +176,9 @@ static unsigned gang_private_shared_align; static GTY(()) rtx gang_private_shared_sym; static hash_map<tree_decl_hash, unsigned int> gang_private_shared_hmap; +static GTY(()) rtx omp_num_threads_sym; +static unsigned omp_num_threads_align; + /* Global lock variable, needed for 128bit worker & gang reductions. */ static GTY(()) tree global_lock_var; @@ -187,6 +190,9 @@ static bool have_softstack_decl; static bool need_unisimt_decl; static bool have_unisimt_decl; +/* True if any function references __nvptx_omp_num_threads. */ +static bool need_omp_num_threads; + static int nvptx_mach_max_workers (); /* Allocate a new, cleared machine_function structure. */ @@ -393,6 +399,10 @@ nvptx_option_override (void) SET_SYMBOL_DATA_AREA (gang_private_shared_sym, DATA_AREA_SHARED); gang_private_shared_align = GET_MODE_ALIGNMENT (SImode) / BITS_PER_UNIT; + omp_num_threads_sym = gen_rtx_SYMBOL_REF (Pmode, "__nvptx_omp_num_threads"); + SET_SYMBOL_DATA_AREA (omp_num_threads_sym, DATA_AREA_SHARED); + omp_num_threads_align = GET_MODE_ALIGNMENT (SImode) / BITS_PER_UNIT; + diagnose_openacc_conflict (TARGET_GOMP, "-mgomp"); diagnose_openacc_conflict (TARGET_SOFT_STACK, "-msoft-stack"); diagnose_openacc_conflict (TARGET_UNIFORM_SIMT, "-muniform-simt"); @@ -961,7 +971,8 @@ write_as_kernel (tree attrs) { return (lookup_attribute ("kernel", attrs) != NULL_TREE || (lookup_attribute ("omp target entrypoint", attrs) != NULL_TREE - && lookup_attribute ("oacc function", attrs) != NULL_TREE)); + && (lookup_attribute ("oacc function", attrs) != NULL_TREE + || lookup_attribute ("ompacc", attrs) != NULL_TREE))); /* For OpenMP target regions, the corresponding kernel entry is emitted from write_omp_entry as a separate function. */ } @@ -1495,6 +1506,7 @@ nvptx_declare_function_name (FILE *file, const char *name, const_tree decl) DECL_ATTRIBUTES (decl))) force_public = true; if (lookup_attribute ("omp target entrypoint", DECL_ATTRIBUTES (decl)) + && !lookup_attribute ("ompacc", DECL_ATTRIBUTES (decl)) && !lookup_attribute ("oacc function", DECL_ATTRIBUTES (decl))) { char *buf = (char *) alloca (strlen (name) + sizeof ("$impl")); @@ -1548,7 +1560,7 @@ nvptx_declare_function_name (FILE *file, const char *name, const_tree decl) HOST_WIDE_INT sz = get_frame_size (); bool need_frameptr = sz || cfun->machine->has_chain; int alignment = crtl->stack_alignment_needed / BITS_PER_UNIT; - if (!TARGET_SOFT_STACK) + if (!TARGET_SOFT_STACK || lookup_attribute ("ompacc", DECL_ATTRIBUTES (decl))) { /* Declare a local var for outgoing varargs. */ if (cfun->machine->has_varadic) @@ -1619,6 +1631,45 @@ nvptx_declare_function_name (FILE *file, const char *name, const_tree decl) nvptx_init_unisimt_predicate (file); if (cfun->machine->bcast_partition || cfun->machine->sync_bar) nvptx_init_oacc_workers (file); + + if (offloading_function_p ((tree) decl) + && lookup_attribute ("ompacc", DECL_ATTRIBUTES (decl)) + && !lookup_attribute ("ompacc seq", DECL_ATTRIBUTES (decl))) + { + int nthr_regno = REGNO (cfun->machine->omp_fn_entry_num_threads_reg); + if (lookup_attribute ("omp target entrypoint", DECL_ATTRIBUTES (decl))) + { + fprintf (file, "\t{\n"); + if (cfun->machine->omp_parallel_predicate) + { + /* Borrow num-threads regno as temp register. */ + fprintf (file, "\t\tmov.u32 %%r%d, %%tid.x;\n", nthr_regno); + fprintf (file, "\t\tsetp.ne.u32 %%r%d, %%r%d, 0;\n", + REGNO (cfun->machine->omp_parallel_predicate), nthr_regno); + } + fprintf (file, "\t\tmov.u32 %%r%d, 1;\n", nthr_regno); + fprintf (file, "\t\tst.shared.u32 [__nvptx_omp_num_threads], %%r%d;\n", nthr_regno); + fprintf (file, "\t}\n"); + need_omp_num_threads = true; + } + else + { + fprintf (file, "\t\tld.shared.u32 %%r%d, [__nvptx_omp_num_threads];\n", nthr_regno); + if (cfun->machine->omp_parallel_predicate) + { + fprintf (file, "\t{\n"); + fprintf (file, "\t\t.reg.u32 %%tmp1;\n"); + fprintf (file, "\t\t.reg.pred %%not_parallel_mode, %%v1_lane;\n"); + fprintf (file, "\t\tsetp.eq.u32 %%not_parallel_mode, %%r%d, 1;\n", nthr_regno); + fprintf (file, "\t\tmov.u32 %%tmp1, %%tid.x;\n"); + fprintf (file, "\t\tsetp.ne.u32 %%v1_lane, %%tmp1, 0;\n"); + fprintf (file, "\t\tand.pred %%r%d, %%not_parallel_mode, %%v1_lane;\n", + REGNO (cfun->machine->omp_parallel_predicate)); + fprintf (file, "\t}\n"); + need_omp_num_threads = true; + } + } + } } /* Output code for switching uniform-simt state. ENTERING indicates whether @@ -1736,6 +1787,10 @@ nvptx_output_simt_exit (rtx src) const char * nvptx_output_set_softstack (unsigned src_regno) { + if (flag_openmp_target == OMP_TARGET_MODE_OMPACC + && lookup_attribute ("ompacc", + DECL_ATTRIBUTES (current_function_decl))) + return ""; if (cfun->machine->has_softstack && !crtl->is_leaf) { fprintf (asm_out_file, "\tst.shared.u%d\t[%s], ", @@ -1854,20 +1909,29 @@ nvptx_expand_call (rtx retval, rtx address) if (DECL_STATIC_CHAIN (decl)) cfun->machine->has_chain = true; - tree attr = oacc_get_fn_attrib (decl); - if (attr) + if (flag_openmp_target == OMP_TARGET_MODE_OMPACC) { - tree dims = TREE_VALUE (attr); - - parallel = GOMP_DIM_MASK (GOMP_DIM_MAX) - 1; - for (int ix = 0; ix != GOMP_DIM_MAX; ix++) + if (lookup_attribute ("ompacc", DECL_ATTRIBUTES (decl)) + && !lookup_attribute ("ompacc seq", DECL_ATTRIBUTES (decl))) + parallel = GOMP_DIM_MASK (GOMP_DIM_VECTOR); + } + else + { + tree attr = oacc_get_fn_attrib (decl); + if (attr) { - if (TREE_PURPOSE (dims) - && !integer_zerop (TREE_PURPOSE (dims))) - break; - /* Not on this axis. */ - parallel ^= GOMP_DIM_MASK (ix); - dims = TREE_CHAIN (dims); + tree dims = TREE_VALUE (attr); + + parallel = GOMP_DIM_MASK (GOMP_DIM_MAX) - 1; + for (int ix = 0; ix != GOMP_DIM_MAX; ix++) + { + if (TREE_PURPOSE (dims) + && !integer_zerop (TREE_PURPOSE (dims))) + break; + /* Not on this axis. */ + parallel ^= GOMP_DIM_MASK (ix); + dims = TREE_CHAIN (dims); + } } } } @@ -1930,15 +1994,27 @@ nvptx_expand_compare (rtx compare) void nvptx_expand_oacc_fork (unsigned mode) { + if (flag_openmp_target == OMP_TARGET_MODE_OMPACC) + mode = GOMP_DIM_VECTOR; nvptx_emit_forking (GOMP_DIM_MASK (mode), false); } void nvptx_expand_oacc_join (unsigned mode) { + if (flag_openmp_target == OMP_TARGET_MODE_OMPACC) + mode = GOMP_DIM_VECTOR; nvptx_emit_joining (GOMP_DIM_MASK (mode), false); } +void +nvptx_expand_omp_get_num_threads (rtx target) +{ + rtx mem = gen_rtx_MEM (SImode, omp_num_threads_sym); + emit_insn (gen_rtx_SET (target, mem)); + need_omp_num_threads = true; +} + /* Generate instruction(s) to unpack a 64 bit object into 2 32 bit objects. */ @@ -2879,6 +2955,13 @@ nvptx_mem_maybe_shared_p (const_rtx x) return area == DATA_AREA_SHARED || area == DATA_AREA_GENERIC; } +bool +nvptx_mem_shared_p (const_rtx x) +{ + nvptx_data_area area = nvptx_mem_data_area (x); + return area == DATA_AREA_SHARED; +} + /* Print an operand, X, to FILE, with an optional modifier in CODE. Meaning of CODE: @@ -3483,6 +3566,11 @@ init_axis_dim (void) static int ATTRIBUTE_UNUSED nvptx_mach_max_workers () { + if (flag_openmp_target == OMP_TARGET_MODE_OMPACC + && lookup_attribute ("ompacc", + DECL_ATTRIBUTES (current_function_decl))) + return 1; + if (!cfun->machine->axis_dim_init_p) init_axis_dim (); return cfun->machine->axis_dim[MACH_MAX_WORKERS]; @@ -3491,6 +3579,11 @@ nvptx_mach_max_workers () static int ATTRIBUTE_UNUSED nvptx_mach_vector_length () { + if (flag_openmp_target == OMP_TARGET_MODE_OMPACC + && lookup_attribute ("ompacc", + DECL_ATTRIBUTES (current_function_decl))) + return 32; + if (!cfun->machine->axis_dim_init_p) init_axis_dim (); return cfun->machine->axis_dim[MACH_VECTOR_LENGTH]; @@ -4873,11 +4966,27 @@ nvptx_single (unsigned mask, basic_block from, basic_block to) rtx_insn *tail = BB_END (to); unsigned skip_mask = mask; + rtx_insn *join = NULL; + rtx_insn *fork = NULL; + while (true) { /* Find first insn of from block. */ - while (head != BB_END (from) && !needs_neutering_p (head)) - head = NEXT_INSN (head); + while (true) + { + if (INSN_P (head) + && recog_memoized (head) == CODE_FOR_nvptx_join) + { + /* Record join if we see it. */ + gcc_assert (!join); + join = head; + } + + if (head != BB_END (from) && !needs_neutering_p (head)) + head = NEXT_INSN (head); + else + break; + } if (from == to) break; @@ -4895,8 +5004,46 @@ nvptx_single (unsigned mask, basic_block from, basic_block to) /* Find last insn of to block */ rtx_insn *limit = from == to ? head : BB_HEAD (to); - while (tail != limit && !INSN_P (tail) && !LABEL_P (tail)) - tail = PREV_INSN (tail); + while (true) + { + if (INSN_P (tail) + && recog_memoized (tail) == CODE_FOR_nvptx_fork) + { + /* Record join if we see it. */ + gcc_assert (!fork); + fork = tail; + } + + if (tail != limit && !INSN_P (tail) && !LABEL_P (tail)) + tail = PREV_INSN (tail); + else + break; + } + + if (flag_openmp_target == OMP_TARGET_MODE_OMPACC) + { + if (join + /* We do not set/restore parallel state across function calls. */ + && !(INTVAL (XVECEXP (PATTERN (join), 0, 0)) & (1 << GOMP_DIM_MAX))) + { + rtx reg = cfun->machine->omp_fn_entry_num_threads_reg; + rtx mem = gen_rtx_MEM (SImode, omp_num_threads_sym); + emit_insn_before (gen_nvptx_omp_parallel_join (mem, reg), head); + need_omp_num_threads = true; + head = PREV_INSN (head); + } + + if (fork + /* We do not set/restore parallel state across function calls. */ + && !(INTVAL (XVECEXP (PATTERN (fork), 0, 0)) & (1 << GOMP_DIM_MAX))) + { + rtx reg = gen_reg_rtx (SImode); + rtx mem = gen_rtx_MEM (SImode, omp_num_threads_sym); + emit_insn_before (gen_get_ntid (reg), tail); + emit_insn_before (gen_nvptx_omp_parallel_fork (mem, reg), tail); + need_omp_num_threads = true; + } + } /* Detect if tail is a branch. */ rtx tail_branch = NULL_RTX; @@ -4943,16 +5090,31 @@ nvptx_single (unsigned mask, basic_block from, basic_block to) if (GOMP_DIM_MASK (mode) & skip_mask) { rtx_code_label *label = gen_label_rtx (); - rtx pred = cfun->machine->axis_predicate[mode - GOMP_DIM_WORKER]; rtx_insn **mode_jump = mode == GOMP_DIM_VECTOR ? &vector_jump : &worker_jump; rtx_insn **mode_label = mode == GOMP_DIM_VECTOR ? &vector_label : &worker_label; - if (!pred) + rtx pred; + + if (flag_openmp_target == OMP_TARGET_MODE_OMPACC + && mode == GOMP_DIM_VECTOR) + { + pred = cfun->machine->omp_parallel_predicate; + if (!pred) + { + pred = gen_reg_rtx (BImode); + cfun->machine->omp_parallel_predicate = pred; + } + } + else { - pred = gen_reg_rtx (BImode); - cfun->machine->axis_predicate[mode - GOMP_DIM_WORKER] = pred; + pred = cfun->machine->axis_predicate[mode - GOMP_DIM_WORKER]; + if (!pred) + { + pred = gen_reg_rtx (BImode); + cfun->machine->axis_predicate[mode - GOMP_DIM_WORKER] = pred; + } } rtx br; @@ -5067,7 +5229,38 @@ nvptx_single (unsigned mask, basic_block from, basic_block to) rtx tmp = gen_reg_rtx (BImode); emit_insn_before (gen_movbi (tmp, const0_rtx), bb_first_real_insn (from)); - emit_insn_before (gen_rtx_SET (tmp, pvar), label); + + if(flag_openmp_target == OMP_TARGET_MODE_OMPACC) + { + rtx nthr = cfun->machine->omp_fn_entry_num_threads_reg; + rtx single_p = gen_reg_rtx (BImode); + + rtx_code_label *lbl_copy_tmp_pvar = gen_label_rtx (); + LABEL_NUSES (lbl_copy_tmp_pvar) = 1; + + rtx_insn *lbl_fallthru = NEXT_INSN (tail); + gcc_assert (lbl_fallthru); + if (!LABEL_P (lbl_fallthru)) + { + rtx_code_label *nlbl = gen_label_rtx (); + LABEL_NUSES (nlbl) = 1; + emit_label_before (nlbl, lbl_fallthru); + lbl_fallthru = nlbl; + } + emit_insn_before + (gen_rtx_SET (single_p, + gen_rtx_EQ (BImode, nthr, GEN_INT (1))), + label); + emit_insn_before + (gen_br_true (single_p, lbl_copy_tmp_pvar), label); + emit_jump_insn_before (copy_rtx (tail_branch), label); + emit_insn_before (gen_jump (lbl_fallthru), label); + emit_label_before (lbl_copy_tmp_pvar, label); + emit_insn_before (gen_rtx_SET (tmp, pvar), label); + } + else + emit_insn_before (gen_rtx_SET (tmp, pvar), label); + emit_insn_before (gen_rtx_SET (pvar, tmp), tail); #endif emit_insn_before (nvptx_gen_warp_bcast (pvar), tail); @@ -5826,10 +6019,29 @@ nvptx_reorg (void) delete pars; } + if (flag_openmp_target == OMP_TARGET_MODE_OMPACC + && offloading_function_p (current_function_decl) + && lookup_attribute ("ompacc", + DECL_ATTRIBUTES (current_function_decl)) + && !lookup_attribute ("ompacc seq", + DECL_ATTRIBUTES (current_function_decl))) + { + cfun->machine->omp_fn_entry_num_threads_reg = gen_reg_rtx (SImode); + + /* Discover & process partitioned regions. */ + parallel *pars = nvptx_discover_pars (&bb_insn_map); + nvptx_process_pars (pars); + nvptx_neuter_pars (pars, GOMP_DIM_MASK (GOMP_DIM_VECTOR), 0); + delete pars; + } + /* Replace subregs. */ nvptx_reorg_subreg (); - if (TARGET_UNIFORM_SIMT) + if (TARGET_UNIFORM_SIMT + && (flag_openmp_target != OMP_TARGET_MODE_OMPACC + || !lookup_attribute ("ompacc", + DECL_ATTRIBUTES (current_function_decl)))) nvptx_reorg_uniform_simt (); #if WORKAROUND_PTXJIT_BUG_2 @@ -6076,6 +6288,12 @@ nvptx_file_end (void) write_var_marker (asm_out_file, false, true, "__nvptx_uni"); fprintf (asm_out_file, ".extern .shared .u32 __nvptx_uni[32];\n"); } + if (need_omp_num_threads) + { + write_var_marker (asm_out_file, false, true, "__nvptx_omp_num_threads"); + fprintf (asm_out_file, + ".extern .shared .u32 __nvptx_omp_num_threads;\n"); + } } /* Expander for the shuffle builtins. */ @@ -6732,6 +6950,9 @@ nvptx_goacc_fork_join (gcall *call, const int dims[], tree arg = gimple_call_arg (call, 2); unsigned axis = TREE_INT_CST_LOW (arg); + if (flag_openmp_target == OMP_TARGET_MODE_OMPACC) + return true; + /* We only care about worker and vector partitioning. */ if (axis < GOMP_DIM_WORKER) return false; diff --git a/gcc/config/nvptx/nvptx.h b/gcc/config/nvptx/nvptx.h index d815081..59580d2 100644 --- a/gcc/config/nvptx/nvptx.h +++ b/gcc/config/nvptx/nvptx.h @@ -267,6 +267,9 @@ struct GTY(()) machine_function for per-lane storage in OpenMP SIMD regions. */ unsigned HOST_WIDE_INT simt_stack_size; unsigned HOST_WIDE_INT simt_stack_align; + + rtx omp_parallel_predicate; + rtx omp_fn_entry_num_threads_reg; }; #endif diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md index d271265..1d1a857 100644 --- a/gcc/config/nvptx/nvptx.md +++ b/gcc/config/nvptx/nvptx.md @@ -80,6 +80,14 @@ UNSPECV_SIMT_EXIT UNSPECV_RED_PART + + UNSPECV_GET_TID + UNSPECV_GET_NTID + UNSPECV_GET_CTAID + UNSPECV_GET_NCTAID + + UNSPECV_OMP_PARALLEL_FORK + UNSPECV_OMP_PARALLEL_JOIN ]) (define_attr "subregs_ok" "false,true" @@ -123,6 +131,12 @@ : immediate_operand (op, mode)); }) +(define_predicate "nvptx_shared_mem_operand" + (match_code "mem") +{ + return nvptx_mem_shared_p (op); +}) + (define_predicate "const0_operand" (and (match_code "const_int") (match_test "op == const0_rtx"))) @@ -1774,6 +1788,60 @@ return asms[INTVAL (operands[1])]; }) +(define_expand "gomp_barrier" + [(const_int 1)] + "flag_openmp_target == OMP_TARGET_MODE_OMPACC" +{ + emit_insn (gen_nvptx_barsync (GEN_INT (0), GEN_INT (0))); + DONE; +}) + +(define_expand "omp_get_num_threads" + [(match_operand 0 "nvptx_register_operand" "=R")] + "flag_openmp_target == OMP_TARGET_MODE_OMPACC" +{ + nvptx_expand_omp_get_num_threads (operands[0]); + DONE; +}) + +(define_insn "omp_get_num_teams" + [(set (match_operand:SI 0 "nvptx_register_operand" "=R") + (unspec_volatile:SI [(const_int 0)] UNSPECV_GET_NCTAID))] + "flag_openmp_target == OMP_TARGET_MODE_OMPACC" + "%.\\tmov.u32\\t%0, %%nctaid.x;") + +(define_insn "omp_get_thread_num" + [(set (match_operand:SI 0 "nvptx_register_operand" "=R") + (unspec_volatile:SI [(const_int 0)] UNSPECV_GET_TID))] + "flag_openmp_target == OMP_TARGET_MODE_OMPACC" + "%.\\tmov.u32\\t%0, %%tid.x;") + +(define_insn "omp_get_team_num" + [(set (match_operand:SI 0 "nvptx_register_operand" "=R") + (unspec_volatile:SI [(const_int 0)] UNSPECV_GET_CTAID))] + "flag_openmp_target == OMP_TARGET_MODE_OMPACC" + "%.\\tmov.u32\\t%0, %%ctaid.x;") + +(define_insn "get_ntid" + [(set (match_operand:SI 0 "nvptx_register_operand" "=R") + (unspec_volatile:SI [(const_int 0)] UNSPECV_GET_NTID))] + "flag_openmp_target == OMP_TARGET_MODE_OMPACC" + "%.\\tmov.u32\\t%0, %%ntid.x;") + +(define_insn "nvptx_omp_parallel_fork" + [(set (match_operand:SI 0 "nvptx_shared_mem_operand" "=m") + (unspec_volatile:SI [(match_operand:SI 1 "nvptx_register_operand" "R")] + UNSPECV_OMP_PARALLEL_FORK))] + "flag_openmp_target == OMP_TARGET_MODE_OMPACC" + "%.\\tst.shared.u32\\t%0, %1; //omp parallel fork") + +(define_insn "nvptx_omp_parallel_join" + [(set (match_operand:SI 0 "nvptx_shared_mem_operand" "=m") + (unspec_volatile:SI [(match_operand:SI 1 "nvptx_register_operand" "R")] + UNSPECV_OMP_PARALLEL_JOIN))] + "flag_openmp_target == OMP_TARGET_MODE_OMPACC" + "%.\\tst.shared.u32\\t%0, %1; //omp parallel join") + (define_insn "nvptx_fork" [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")] UNSPECV_FORK)] |