aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/nvptx/nvptx.md
diff options
context:
space:
mode:
authorNathan Sidwell <nathan@codesourcery.com>2015-10-28 14:24:39 +0000
committerNathan Sidwell <nathan@gcc.gnu.org>2015-10-28 14:24:39 +0000
commitd88cd9c42448f3e166a17356a7f8539541b68a25 (patch)
treef2ef967c3b53d765605ce0256832fc68c5156568 /gcc/config/nvptx/nvptx.md
parent1e355e1de076db6dcc8999babd03837531e3e3f6 (diff)
downloadgcc-d88cd9c42448f3e166a17356a7f8539541b68a25.zip
gcc-d88cd9c42448f3e166a17356a7f8539541b68a25.tar.gz
gcc-d88cd9c42448f3e166a17356a7f8539541b68a25.tar.bz2
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add axis_predicate. * config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork, nvptx_expand_oacc_join): Declare. * config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete. (UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV, UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New. (UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED, UNSPECV_JOINING, UNSPECV_JOIN): New. (BITS, BITD): New mode iterators. (br_true_uni, br_false_uni): New. (*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete. (oacc_dim_size, oacc_dim_pos): New. (nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New. (oacc_fork, oacc_join): New. (nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New. (worker_load<mode>, worker_store<mode>): New. (nvptx_barsync): New. * config/nvptx/nvptx.c: Include gimple.h & dumpfile.h. (SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define. (worker_bcast_hwm, worker_bcast_align, worker_bcast_name, worker_bcast_sym): New. (nvptx_option_override): Initialize worker broadcast buffer. (nvptx_emit_forking, nvptx_emit_joining): New. (nvptx_init_axis_predicate): New. (nvptx_declare_function_name): Init axis predicates. (nvptx_expand_call): Add fork/join markers around routine call. (nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New. (nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New. (nvptx_gen_vcast): New. (struct wcast_data_t): New. (enum propagate_mask): New. (nvptx_gen_wcast): New. (nvptx_print_operand): Add 'S' case. (struct parallel): New. (parallel::parallel, parallel::~parallel): New. (bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs. (nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars, nvptx_find_par, nvptx_discover_pars): New. (nvptx_propagate): New. (vprop_gen, nvptx_vpropagate): New. (wprop_gen, nvptx_wpropagate): New. (nvptx_wsync): New. (nvptx_single, nvptx_skip_par): New. (nvptx_process_pars, nvptx_neuter_pars): New. (ntptx_reorg): Split blocks, generate parallel structure, apply neutering. (nvptx_cannot_copy_insn_p): New. (nvptx_file_end): Emit worker broadcast decl. (nvptx_goacc_fork_join): New. (TARGET_CANNOT_COPY_INSN_P): Override. (TARGET_GOACC_FORK_JOIN): Override. From-SVN: r229486
Diffstat (limited to 'gcc/config/nvptx/nvptx.md')
-rw-r--r--gcc/config/nvptx/nvptx.md174
1 files changed, 155 insertions, 19 deletions
diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
index 049f34c..7930f8d 100644
--- a/gcc/config/nvptx/nvptx.md
+++ b/gcc/config/nvptx/nvptx.md
@@ -49,14 +49,27 @@
UNSPEC_ALLOCA
- UNSPEC_NTID
- UNSPEC_TID
+ UNSPEC_DIM_SIZE
+
+ UNSPEC_SHARED_DATA
+
+ UNSPEC_BIT_CONV
+
+ UNSPEC_SHUFFLE
+ UNSPEC_BR_UNIFIED
])
(define_c_enum "unspecv" [
UNSPECV_LOCK
UNSPECV_CAS
UNSPECV_XCHG
+ UNSPECV_BARSYNC
+ UNSPECV_DIM_POS
+
+ UNSPECV_FORK
+ UNSPECV_FORKED
+ UNSPECV_JOINING
+ UNSPECV_JOIN
])
(define_attr "subregs_ok" "false,true"
@@ -246,6 +259,8 @@
(define_mode_iterator QHSIM [QI HI SI])
(define_mode_iterator SDFM [SF DF])
(define_mode_iterator SDCM [SC DC])
+(define_mode_iterator BITS [SI SF])
+(define_mode_iterator BITD [DI DF])
;; This mode iterator allows :P to be used for patterns that operate on
;; pointer-sized quantities. Exactly one of the two alternatives will match.
@@ -817,6 +832,23 @@
""
"%J0\\tbra\\t%l1;")
+;; unified conditional branch
+(define_insn "br_true_uni"
+ [(set (pc) (if_then_else
+ (ne (unspec:BI [(match_operand:BI 0 "nvptx_register_operand" "R")]
+ UNSPEC_BR_UNIFIED) (const_int 0))
+ (label_ref (match_operand 1 "" "")) (pc)))]
+ ""
+ "%j0\\tbra.uni\\t%l1;")
+
+(define_insn "br_false_uni"
+ [(set (pc) (if_then_else
+ (eq (unspec:BI [(match_operand:BI 0 "nvptx_register_operand" "R")]
+ UNSPEC_BR_UNIFIED) (const_int 0))
+ (label_ref (match_operand 1 "" "")) (pc)))]
+ ""
+ "%J0\\tbra.uni\\t%l1;")
+
(define_expand "cbranch<mode>4"
[(set (pc)
(if_then_else (match_operator 0 "nvptx_comparison_operator"
@@ -1308,36 +1340,134 @@
DONE;
})
-(define_insn "*oacc_ntid_insn"
- [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
- (unspec:SI [(match_operand:SI 1 "const_int_operand" "n")] UNSPEC_NTID))]
+(define_insn "oacc_dim_size"
+ [(set (match_operand:SI 0 "nvptx_register_operand" "")
+ (unspec:SI [(match_operand:SI 1 "const_int_operand" "")]
+ UNSPEC_DIM_SIZE))]
""
- "%.\\tmov.u32 %0, %%ntid%d1;")
+{
+ static const char *const asms[] =
+{ /* Must match oacc_loop_levels ordering. */
+ "%.\\tmov.u32\\t%0, %%nctaid.x;", /* gang */
+ "%.\\tmov.u32\\t%0, %%ntid.y;", /* worker */
+ "%.\\tmov.u32\\t%0, %%ntid.x;", /* vector */
+};
+ return asms[INTVAL (operands[1])];
+})
-(define_expand "oacc_ntid"
+(define_insn "oacc_dim_pos"
[(set (match_operand:SI 0 "nvptx_register_operand" "")
- (unspec:SI [(match_operand:SI 1 "const_int_operand" "")] UNSPEC_NTID))]
+ (unspec_volatile:SI [(match_operand:SI 1 "const_int_operand" "")]
+ UNSPECV_DIM_POS))]
""
{
- if (INTVAL (operands[1]) < 0 || INTVAL (operands[1]) > 2)
- FAIL;
+ static const char *const asms[] =
+{ /* Must match oacc_loop_levels ordering. */
+ "%.\\tmov.u32\\t%0, %%ctaid.x;", /* gang */
+ "%.\\tmov.u32\\t%0, %%tid.y;", /* worker */
+ "%.\\tmov.u32\\t%0, %%tid.x;", /* vector */
+};
+ return asms[INTVAL (operands[1])];
})
-(define_insn "*oacc_tid_insn"
- [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
- (unspec:SI [(match_operand:SI 1 "const_int_operand" "n")] UNSPEC_TID))]
+(define_insn "nvptx_fork"
+ [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
+ UNSPECV_FORK)]
""
- "%.\\tmov.u32 %0, %%tid%d1;")
+ "// fork %0;"
+)
-(define_expand "oacc_tid"
- [(set (match_operand:SI 0 "nvptx_register_operand" "")
- (unspec:SI [(match_operand:SI 1 "const_int_operand" "")] UNSPEC_TID))]
+(define_insn "nvptx_forked"
+ [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
+ UNSPECV_FORKED)]
+ ""
+ "// forked %0;"
+)
+
+(define_insn "nvptx_joining"
+ [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
+ UNSPECV_JOINING)]
+ ""
+ "// joining %0;"
+)
+
+(define_insn "nvptx_join"
+ [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
+ UNSPECV_JOIN)]
+ ""
+ "// join %0;"
+)
+
+(define_expand "oacc_fork"
+ [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "")
+ (match_operand:SI 1 "nvptx_general_operand" ""))
+ (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")]
+ UNSPECV_FORKED)]
""
{
- if (INTVAL (operands[1]) < 0 || INTVAL (operands[1]) > 2)
- FAIL;
+ if (operands[0] != const0_rtx)
+ emit_move_insn (operands[0], operands[1]);
+ nvptx_expand_oacc_fork (INTVAL (operands[2]));
+ DONE;
+})
+
+(define_expand "oacc_join"
+ [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "")
+ (match_operand:SI 1 "nvptx_general_operand" ""))
+ (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")]
+ UNSPECV_JOIN)]
+ ""
+{
+ if (operands[0] != const0_rtx)
+ emit_move_insn (operands[0], operands[1]);
+ nvptx_expand_oacc_join (INTVAL (operands[2]));
+ DONE;
})
+;; only 32-bit shuffles exist.
+(define_insn "nvptx_shuffle<mode>"
+ [(set (match_operand:BITS 0 "nvptx_register_operand" "=R")
+ (unspec:BITS
+ [(match_operand:BITS 1 "nvptx_register_operand" "R")
+ (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")
+ (match_operand:SI 3 "const_int_operand" "n")]
+ UNSPEC_SHUFFLE))]
+ ""
+ "%.\\tshfl%S3.b32\\t%0, %1, %2, 31;")
+
+;; extract parts of a 64 bit object into 2 32-bit ints
+(define_insn "unpack<mode>si2"
+ [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
+ (unspec:SI [(match_operand:BITD 2 "nvptx_register_operand" "R")
+ (const_int 0)] UNSPEC_BIT_CONV))
+ (set (match_operand:SI 1 "nvptx_register_operand" "=R")
+ (unspec:SI [(match_dup 2) (const_int 1)] UNSPEC_BIT_CONV))]
+ ""
+ "%.\\tmov.b64\\t{%0,%1}, %2;")
+
+;; pack 2 32-bit ints into a 64 bit object
+(define_insn "packsi<mode>2"
+ [(set (match_operand:BITD 0 "nvptx_register_operand" "=R")
+ (unspec:BITD [(match_operand:SI 1 "nvptx_register_operand" "R")
+ (match_operand:SI 2 "nvptx_register_operand" "R")]
+ UNSPEC_BIT_CONV))]
+ ""
+ "%.\\tmov.b64\\t%0, {%1,%2};")
+
+(define_insn "worker_load<mode>"
+ [(set (match_operand:SDISDFM 0 "nvptx_register_operand" "=R")
+ (unspec:SDISDFM [(match_operand:SDISDFM 1 "memory_operand" "m")]
+ UNSPEC_SHARED_DATA))]
+ ""
+ "%.\\tld.shared%u0\\t%0, %1;")
+
+(define_insn "worker_store<mode>"
+ [(set (unspec:SDISDFM [(match_operand:SDISDFM 1 "memory_operand" "=m")]
+ UNSPEC_SHARED_DATA)
+ (match_operand:SDISDFM 0 "nvptx_register_operand" "R"))]
+ ""
+ "%.\\tst.shared%u1\\t%1, %0;")
+
;; Atomic insns.
(define_expand "atomic_compare_and_swap<mode>"
@@ -1423,3 +1553,9 @@
(match_dup 1))]
"0"
"%.\\tatom%A1.b%T0.<logic>\\t%0, %1, %2;")
+
+(define_insn "nvptx_barsync"
+ [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "")]
+ UNSPECV_BARSYNC)]
+ ""
+ "\\tbar.sync\\t%0;")