diff options
author | Nathan Sidwell <nathan@codesourcery.com> | 2015-10-28 14:24:39 +0000 |
---|---|---|
committer | Nathan Sidwell <nathan@gcc.gnu.org> | 2015-10-28 14:24:39 +0000 |
commit | d88cd9c42448f3e166a17356a7f8539541b68a25 (patch) | |
tree | f2ef967c3b53d765605ce0256832fc68c5156568 /gcc/config/nvptx/nvptx.md | |
parent | 1e355e1de076db6dcc8999babd03837531e3e3f6 (diff) | |
download | gcc-d88cd9c42448f3e166a17356a7f8539541b68a25.zip gcc-d88cd9c42448f3e166a17356a7f8539541b68a25.tar.gz gcc-d88cd9c42448f3e166a17356a7f8539541b68a25.tar.bz2 |
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
Diffstat (limited to 'gcc/config/nvptx/nvptx.md')
-rw-r--r-- | gcc/config/nvptx/nvptx.md | 174 |
1 files changed, 155 insertions, 19 deletions
diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md index 049f34c..7930f8d 100644 --- a/gcc/config/nvptx/nvptx.md +++ b/gcc/config/nvptx/nvptx.md @@ -49,14 +49,27 @@ UNSPEC_ALLOCA - UNSPEC_NTID - UNSPEC_TID + UNSPEC_DIM_SIZE + + UNSPEC_SHARED_DATA + + UNSPEC_BIT_CONV + + UNSPEC_SHUFFLE + UNSPEC_BR_UNIFIED ]) (define_c_enum "unspecv" [ UNSPECV_LOCK UNSPECV_CAS UNSPECV_XCHG + UNSPECV_BARSYNC + UNSPECV_DIM_POS + + UNSPECV_FORK + UNSPECV_FORKED + UNSPECV_JOINING + UNSPECV_JOIN ]) (define_attr "subregs_ok" "false,true" @@ -246,6 +259,8 @@ (define_mode_iterator QHSIM [QI HI SI]) (define_mode_iterator SDFM [SF DF]) (define_mode_iterator SDCM [SC DC]) +(define_mode_iterator BITS [SI SF]) +(define_mode_iterator BITD [DI DF]) ;; This mode iterator allows :P to be used for patterns that operate on ;; pointer-sized quantities. Exactly one of the two alternatives will match. @@ -817,6 +832,23 @@ "" "%J0\\tbra\\t%l1;") +;; unified conditional branch +(define_insn "br_true_uni" + [(set (pc) (if_then_else + (ne (unspec:BI [(match_operand:BI 0 "nvptx_register_operand" "R")] + UNSPEC_BR_UNIFIED) (const_int 0)) + (label_ref (match_operand 1 "" "")) (pc)))] + "" + "%j0\\tbra.uni\\t%l1;") + +(define_insn "br_false_uni" + [(set (pc) (if_then_else + (eq (unspec:BI [(match_operand:BI 0 "nvptx_register_operand" "R")] + UNSPEC_BR_UNIFIED) (const_int 0)) + (label_ref (match_operand 1 "" "")) (pc)))] + "" + "%J0\\tbra.uni\\t%l1;") + (define_expand "cbranch<mode>4" [(set (pc) (if_then_else (match_operator 0 "nvptx_comparison_operator" @@ -1308,36 +1340,134 @@ DONE; }) -(define_insn "*oacc_ntid_insn" - [(set (match_operand:SI 0 "nvptx_register_operand" "=R") - (unspec:SI [(match_operand:SI 1 "const_int_operand" "n")] UNSPEC_NTID))] +(define_insn "oacc_dim_size" + [(set (match_operand:SI 0 "nvptx_register_operand" "") + (unspec:SI [(match_operand:SI 1 "const_int_operand" "")] + UNSPEC_DIM_SIZE))] "" - "%.\\tmov.u32 %0, %%ntid%d1;") +{ + static const char *const asms[] = +{ /* Must match oacc_loop_levels ordering. */ + "%.\\tmov.u32\\t%0, %%nctaid.x;", /* gang */ + "%.\\tmov.u32\\t%0, %%ntid.y;", /* worker */ + "%.\\tmov.u32\\t%0, %%ntid.x;", /* vector */ +}; + return asms[INTVAL (operands[1])]; +}) -(define_expand "oacc_ntid" +(define_insn "oacc_dim_pos" [(set (match_operand:SI 0 "nvptx_register_operand" "") - (unspec:SI [(match_operand:SI 1 "const_int_operand" "")] UNSPEC_NTID))] + (unspec_volatile:SI [(match_operand:SI 1 "const_int_operand" "")] + UNSPECV_DIM_POS))] "" { - if (INTVAL (operands[1]) < 0 || INTVAL (operands[1]) > 2) - FAIL; + static const char *const asms[] = +{ /* Must match oacc_loop_levels ordering. */ + "%.\\tmov.u32\\t%0, %%ctaid.x;", /* gang */ + "%.\\tmov.u32\\t%0, %%tid.y;", /* worker */ + "%.\\tmov.u32\\t%0, %%tid.x;", /* vector */ +}; + return asms[INTVAL (operands[1])]; }) -(define_insn "*oacc_tid_insn" - [(set (match_operand:SI 0 "nvptx_register_operand" "=R") - (unspec:SI [(match_operand:SI 1 "const_int_operand" "n")] UNSPEC_TID))] +(define_insn "nvptx_fork" + [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")] + UNSPECV_FORK)] "" - "%.\\tmov.u32 %0, %%tid%d1;") + "// fork %0;" +) -(define_expand "oacc_tid" - [(set (match_operand:SI 0 "nvptx_register_operand" "") - (unspec:SI [(match_operand:SI 1 "const_int_operand" "")] UNSPEC_TID))] +(define_insn "nvptx_forked" + [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")] + UNSPECV_FORKED)] + "" + "// forked %0;" +) + +(define_insn "nvptx_joining" + [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")] + UNSPECV_JOINING)] + "" + "// joining %0;" +) + +(define_insn "nvptx_join" + [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")] + UNSPECV_JOIN)] + "" + "// join %0;" +) + +(define_expand "oacc_fork" + [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "") + (match_operand:SI 1 "nvptx_general_operand" "")) + (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")] + UNSPECV_FORKED)] "" { - if (INTVAL (operands[1]) < 0 || INTVAL (operands[1]) > 2) - FAIL; + if (operands[0] != const0_rtx) + emit_move_insn (operands[0], operands[1]); + nvptx_expand_oacc_fork (INTVAL (operands[2])); + DONE; +}) + +(define_expand "oacc_join" + [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "") + (match_operand:SI 1 "nvptx_general_operand" "")) + (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")] + UNSPECV_JOIN)] + "" +{ + if (operands[0] != const0_rtx) + emit_move_insn (operands[0], operands[1]); + nvptx_expand_oacc_join (INTVAL (operands[2])); + DONE; }) +;; only 32-bit shuffles exist. +(define_insn "nvptx_shuffle<mode>" + [(set (match_operand:BITS 0 "nvptx_register_operand" "=R") + (unspec:BITS + [(match_operand:BITS 1 "nvptx_register_operand" "R") + (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri") + (match_operand:SI 3 "const_int_operand" "n")] + UNSPEC_SHUFFLE))] + "" + "%.\\tshfl%S3.b32\\t%0, %1, %2, 31;") + +;; extract parts of a 64 bit object into 2 32-bit ints +(define_insn "unpack<mode>si2" + [(set (match_operand:SI 0 "nvptx_register_operand" "=R") + (unspec:SI [(match_operand:BITD 2 "nvptx_register_operand" "R") + (const_int 0)] UNSPEC_BIT_CONV)) + (set (match_operand:SI 1 "nvptx_register_operand" "=R") + (unspec:SI [(match_dup 2) (const_int 1)] UNSPEC_BIT_CONV))] + "" + "%.\\tmov.b64\\t{%0,%1}, %2;") + +;; pack 2 32-bit ints into a 64 bit object +(define_insn "packsi<mode>2" + [(set (match_operand:BITD 0 "nvptx_register_operand" "=R") + (unspec:BITD [(match_operand:SI 1 "nvptx_register_operand" "R") + (match_operand:SI 2 "nvptx_register_operand" "R")] + UNSPEC_BIT_CONV))] + "" + "%.\\tmov.b64\\t%0, {%1,%2};") + +(define_insn "worker_load<mode>" + [(set (match_operand:SDISDFM 0 "nvptx_register_operand" "=R") + (unspec:SDISDFM [(match_operand:SDISDFM 1 "memory_operand" "m")] + UNSPEC_SHARED_DATA))] + "" + "%.\\tld.shared%u0\\t%0, %1;") + +(define_insn "worker_store<mode>" + [(set (unspec:SDISDFM [(match_operand:SDISDFM 1 "memory_operand" "=m")] + UNSPEC_SHARED_DATA) + (match_operand:SDISDFM 0 "nvptx_register_operand" "R"))] + "" + "%.\\tst.shared%u1\\t%1, %0;") + ;; Atomic insns. (define_expand "atomic_compare_and_swap<mode>" @@ -1423,3 +1553,9 @@ (match_dup 1))] "0" "%.\\tatom%A1.b%T0.<logic>\\t%0, %1, %2;") + +(define_insn "nvptx_barsync" + [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "")] + UNSPECV_BARSYNC)] + "" + "\\tbar.sync\\t%0;") |