aboutsummaryrefslogtreecommitdiff
path: root/gcc/hsa-brig.c
diff options
context:
space:
mode:
authorMartin Jambor <mjambor@suse.cz>2016-11-23 15:51:02 +0100
committerMartin Jambor <jamborm@gcc.gnu.org>2016-11-23 15:51:02 +0100
commit56b1c60e412fcf1245b4780871553cbdebb956a3 (patch)
tree3a3e101ec1a0e1bdd140db82245f5884d841c62f /gcc/hsa-brig.c
parentf6cdfe826444e1a0b52b271588fbef5c2a4bac4d (diff)
downloadgcc-56b1c60e412fcf1245b4780871553cbdebb956a3.zip
gcc-56b1c60e412fcf1245b4780871553cbdebb956a3.tar.gz
gcc-56b1c60e412fcf1245b4780871553cbdebb956a3.tar.bz2
backport: hsa-builtins.def: New file.
Merge from HSA branch to trunk 2016-11-23 Martin Jambor <mjambor@suse.cz> Martin Liska <mliska@suse.cz> gcc/ * hsa-builtins.def: New file. * Makefile.in (BUILTINS_DEF): Add hsa-builtins.def dependency. * builtins.def: Include hsa-builtins.def. (DEF_HSA_BUILTIN): New macro. * dumpfile.h (OPTGROUP_OPENMP): Define. * dumpfile.c (optgroup_options): Added OPTGROUP_OPENMP. * gimple.h (gf_mask): Added elements GF_OMP_FOR_GRID_INTRA_GROUP and GF_OMP_FOR_GRID_GROUP_ITER. (gimple_omp_for_grid_phony): Added checking assert. (gimple_omp_for_set_grid_phony): Likewise. (gimple_omp_for_grid_intra_group): New function. (gimple_omp_for_set_grid_intra_group): Likewise. (gimple_omp_for_grid_group_iter): Likewise. (gimple_omp_for_set_grid_group_iter): Likewise. * omp-low.c (check_omp_nesting_restrictions): Allow GRID loop where previosuly only distribute loop was permitted. (lower_lastprivate_clauses): Allow non tcc_comparison predicates. (grid_get_kernel_launch_attributes): Support multiple HSA grid dimensions. (grid_expand_omp_for_loop): Likewise and also support standalone distribute constructs. New parameter INTRA_GROUP, updated both users. (grid_expand_target_grid_body): Support standalone distribute constructs. (pass_data_expand_omp): Changed optinfo_flags to OPTGROUP_OPENMP. (pass_data_expand_omp_ssa): Likewise. (pass_data_omp_device_lower): Likewsie. (pass_data_lower_omp): Likewise. (pass_data_diagnose_omp_blocks): Likewise. (pass_data_oacc_device_lower): Likewise. (pass_data_omp_target_link): Likewise. (grid_lastprivate_predicate): New function. (lower_omp_for_lastprivate): Call grid_lastprivate_predicate for gridified loops. (lower_omp_for): Support standalone distribute constructs. (grid_prop): New type. (grid_safe_assignment_p): Check for assignments to group_sizes, new parameter GRID. (grid_seq_only_contains_local_assignments): New parameter GRID, pass it to callee. (grid_find_single_omp_among_assignments_1): Likewise, improve missed optimization info messages. (grid_find_single_omp_among_assignments): Likewise. (grid_find_ungridifiable_statement): Do not bail out for SIMDs. (grid_parallel_clauses_gridifiable): New function. (grid_inner_loop_gridifiable_p): Likewise. (grid_dist_follows_simple_pattern): Likewise. (grid_gfor_follows_tiling_pattern): Likewise. (grid_call_permissible_in_distribute_p): Likewise. (grid_handle_call_in_distribute): Likewise. (grid_dist_follows_tiling_pattern): Likewise. (grid_target_follows_gridifiable_pattern): Support standalone distribute constructs. (grid_var_segment): New enum. (grid_mark_variable_segment): New function. (grid_copy_leading_local_assignments): Call grid_mark_variable_segment if a new argument says so. (grid_process_grid_body): New function. (grid_eliminate_combined_simd_part): Likewise. (grid_mark_tiling_loops): Likewise. (grid_mark_tiling_parallels_and_loops): Likewise. (grid_process_kernel_body_copy): Support standalone distribute constructs. (grid_attempt_target_gridification): New grid variable holding overall gridification state. Support standalone distribute constructs and collapse clauses. * doc/optinfo.texi (Optimization groups): Document OPTGROUP_OPENMP. * hsa.h (hsa_bb): Add method method append_phi. (hsa_insn_br): Renamed to hsa_insn_cbr, renamed all occurences in all files too. (hsa_insn_br): New class, now the ancestor of hsa_incn_cbr. (is_a_helper <hsa_insn_br *>::test): New function. (is_a_helper <hsa_insn_cbr *>::test): Adjust to only cover conditional branch instructions. (hsa_insn_signal): Make a direct descendant of hsa_insn_basic. Add memorder constructor parameter and m_memory_order and m_signalop member variables. (hsa_insn_queue): Changed constructor parameters to common form. Added m_segment and m_memory_order member variables. (hsa_summary_t): Add private member function process_gpu_implementation_attributes. (hsa_function_summary): Rename m_binded_function to m_bound_function. (hsa_insn_basic_p): Remove typedef. (hsa_op_with_type): Change hsa_insn_basic_p into plain pointers. (hsa_op_reg_p): Remove typedef. (hsa_function_representation): Change hsa_op_reg_p into plain pointers. (hsa_insn_phi): Removed new and delete operators. (hsa_insn_br): Likewise. (hsa_insn_cbr): Likewise. (hsa_insn_sbr): Likewise. (hsa_insn_cmp): Likewise. (hsa_insn_mem): Likewise. (hsa_insn_atomic): Likewise. (hsa_insn_signal): Likewise. (hsa_insn_seg): Likewise. (hsa_insn_call): Likewise. (hsa_insn_arg_block): Likewise. (hsa_insn_comment): Likewise. (hsa_insn_srctype): Likewise. (hsa_insn_packed): Likewise. (hsa_insn_cvt): Likewise. (hsa_insn_alloca): Likewise. * hsa.c (hsa_destroy_insn): Also handle instances of hsa_insn_br. (process_gpu_implementation_attributes): New function. (link_functions): Move some functionality into it. Adjust after renaming m_binded_functions to m_bound_functions. (hsa_insn_basic::op_output_p): Add BRIG_OPCODE_DEBUGTRAP to the list of instructions with no output registers. (get_in_type): Return this if it is a register of matching size. (hsa_get_declaration_name): Moved to... * hsa-gen.c (hsa_get_declaration_name): ...here. Allocate temporary string on an obstack instead from ggc. (query_hsa_grid): Renamed to query_hsa_grid_dim, reimplemented, cut down to two overloads. (hsa_allocp_operand_address): Removed. (hsa_allocp_operand_immed): Likewise. (hsa_allocp_operand_reg): Likewise. (hsa_allocp_operand_code_list): Likewise. (hsa_allocp_operand_operand_list): Likewise. (hsa_allocp_inst_basic): Likewise. (hsa_allocp_inst_phi): Likewise. (hsa_allocp_inst_mem): Likewise. (hsa_allocp_inst_atomic): Likewise. (hsa_allocp_inst_signal): Likewise. (hsa_allocp_inst_seg): Likewise. (hsa_allocp_inst_cmp): Likewise. (hsa_allocp_inst_br): Likewise. (hsa_allocp_inst_sbr): Likewise. (hsa_allocp_inst_call): Likewise. (hsa_allocp_inst_arg_block): Likewise. (hsa_allocp_inst_comment): Likewise. (hsa_allocp_inst_queue): Likewise. (hsa_allocp_inst_srctype): Likewise. (hsa_allocp_inst_packed): Likewise. (hsa_allocp_inst_cvt): Likewise. (hsa_allocp_inst_alloca): Likewise. (hsa_allocp_bb): Likewise. (hsa_obstack): New. (hsa_init_data_for_cfun): Initialize obstack. (hsa_deinit_data_for_cfun): Release memory of the obstack. (hsa_op_immed::operator new): Use obstack instead of object_allocator. (hsa_op_reg::operator new): Likewise. (hsa_op_address::operator new): Likewise. (hsa_op_code_list::operator new): Likewise. (hsa_op_operand_list::operator new): Likewise. (hsa_insn_basic::operator new): Likewise. (hsa_insn_phi::operator new): Likewise. (hsa_insn_br::operator new): Likewise. (hsa_insn_sbr::operator new): Likewise. (hsa_insn_cmp::operator new): Likewise. (hsa_insn_mem::operator new): Likewise. (hsa_insn_atomic::operator new): Likewise. (hsa_insn_signal::operator new): Likewise. (hsa_insn_seg::operator new): Likewise. (hsa_insn_call::operator new): Likewise. (hsa_insn_arg_block::operator new): Likewise. (hsa_insn_comment::operator new): Likewise. (hsa_insn_srctype::operator new): Likewise. (hsa_insn_packed::operator new): Likewise. (hsa_insn_cvt::operator new): Likewise. (hsa_insn_alloca::operator new): Likewise. (hsa_init_new_bb): Likewise. (hsa_bb::append_phi): New function. (gen_hsa_phi_from_gimple_phi): Use it. (get_symbol_for_decl): Fix dinstinguishing between global and local functions. Put local variables into a segment according to their attribute or static flag, if there is one. (hsa_insn_br::hsa_insn_br): New. (hsa_insn_br::operator new): Likewise. (hsa_insn_cbr::hsa_insn_cbr): Set width via ancestor constructor. (query_hsa_grid_nodim): New function. (multiply_grid_dim_characteristics): Likewise. (gen_get_num_threads): Likewise. (gen_get_num_teams): Reimplemented. (gen_get_team_num): Likewise. (gen_hsa_insns_for_known_library_call): Updated calls to the above helper functions. (get_memory_order_name): Removed. (get_memory_order): Likewise. (hsa_memorder_from_tree): New function. (gen_hsa_ternary_atomic_for_builtin): Renamed to gen_hsa_atomic_for_builtin, can also create signals. (gen_hsa_insns_for_call): Handle many new builtins. Adjust to use hsa_memory_order_from_tree and gen_hsa_atomic_for_builtin. (hsa_insn_atomic): Fix function comment. (hsa_insn_signal::hsa_insn_signal): Fix comment. Update call to ancestor constructor and initialization of new member variables. (hsa_insn_queue::hsa_insn_queue): Added initialization of new member variables. (hsa_get_host_function): Handle functions with no bound CPU implementation. Fix binded to bound. (get_brig_function_name): Likewise. (HSA_SORRY_ATV): Remove semicolon after macro. (HSA_SORRY_AT): Likewise. (omp_simple_builtin::generate): Add missing semicolons. (hsa_insn_phi::operator new): Removed. (hsa_insn_br::operator new): Likewise. (hsa_insn_cbr::operator new): Likewise. (hsa_insn_sbr::operator new): Likewise. (hsa_insn_cmp::operator new): Likewise. (hsa_insn_mem::operator new): Likewise. (hsa_insn_atomic::operator new): Likewise. (hsa_insn_signal::operator new): Likewise. (hsa_insn_seg::operator new): Likewise. (hsa_insn_call::operator new): Likewise. (hsa_insn_arg_block::operator new): Likewise. (hsa_insn_comment::operator new): Likewise. (hsa_insn_srctype::operator new): Likewise. (hsa_insn_packed::operator new): Likewise. (hsa_insn_cvt::operator new): Likewise. (hsa_insn_alloca::operator new): Likewise. (get_symbol_for_decl): Accept CONST_DECLs, put them to readonly segment. (gen_hsa_addr): Also process CONST_DECLs. (gen_hsa_addr_insns): Process CONST_DECLs by creating private copies. (gen_hsa_unary_operation): Make sure the function does not use bittype source type for firstbit and lastbit operations. (gen_hsa_popcount_to_dest): Make sure the function uses a bittype source type. * hsa-brig.c (emit_insn_operands): Cope with zero operands in an instruction. (emit_branch_insn): Renamed to emit_cond_branch_insn. Emit the width stored in the class. (emit_generic_branch_insn): New function. (emit_insn): Call emit_generic_branch_insn. (emit_signal_insn): Remove obsolete comment. Update member variable name, pick a type according to profile. (emit_alloca_insn): Remove obsolete comment. (emit_atomic_insn): Likewise. (emit_queue_insn): Get segment and memory order from the IR object. (hsa_brig_section): Make allocate_new_chunk, chunks and cur_chunk provate, add a default NULL parameter to add method. (hsa_brig_section::add): Added a new parameter, store pointer to output data there if it is non-NULL. (emit_function_directives): Use this new parameter instead of calculating the pointer itself, fix function comment. (hsa_brig_emit_function): Add forgotten endian conversion. (hsa_output_kernels): Remove unnecessary building of kernel_dependencies_vector_type. (emit_immediate_operand): Declare. (emit_directive_variable): Also emit initializers of CONST_DECLs. (gen_hsa_insn_for_internal_fn_call): Also handle IFN_RSQRT. (verify_function_arguments): Properly detect variadic arguments. * hsa-dump.c (hsa_width_specifier_name): New function. (dump_hsa_insn_1): Dump generic branch instructions, update signal member variable name. Special dumping for queue objects. * ipa-hsa.c (process_hsa_functions): Adjust after renaming m_binded_functions to m_bound_functions. Copy externally visible flag to the node. (ipa_hsa_write_summary): Likewise. (ipa_hsa_read_section): Likewise. gcc/fortran/ * f95-lang.c (DEF_HSA_BUILTIN): New macro. gcc/testsuite/ * c-c++-common/gomp/gridify-1.c: Update scan string. * gfortran.dg/gomp/gridify-1.f90: Likewise. * c-c++-common/gomp/gridify-2.c: New test. * c-c++-common/gomp/gridify-3.c: Likewise. libgomp/ * testsuite/libgomp.hsa.c/bits-insns.c: New test. * testsuite/libgomp.hsa.c/tiling-1.c: Likewise. * testsuite/libgomp.hsa.c/tiling-2.c: Likewise. Co-Authored-By: Martin Liska <mliska@suse.cz> From-SVN: r242761
Diffstat (limited to 'gcc/hsa-brig.c')
-rw-r--r--gcc/hsa-brig.c140
1 files changed, 76 insertions, 64 deletions
diff --git a/gcc/hsa-brig.c b/gcc/hsa-brig.c
index 66ff8f9..acd9164 100644
--- a/gcc/hsa-brig.c
+++ b/gcc/hsa-brig.c
@@ -161,19 +161,21 @@ public:
/* The size of the header of the section without any padding. */
unsigned header_byte_delta;
- /* Buffers of binary data, each containing BRIG_CHUNK_MAX_SIZE bytes. */
- vec <struct hsa_brig_data_chunk> chunks;
-
- /* More convenient access to the last chunk from the vector above. */
- struct hsa_brig_data_chunk *cur_chunk;
-
- void allocate_new_chunk ();
void init (const char *name);
void release ();
void output ();
- unsigned add (const void *data, unsigned len);
+ unsigned add (const void *data, unsigned len, void **output = NULL);
void round_size_up (int factor);
void *get_ptr_by_offset (unsigned int offset);
+
+private:
+ void allocate_new_chunk ();
+
+ /* Buffers of binary data, each containing BRIG_CHUNK_MAX_SIZE bytes. */
+ vec <struct hsa_brig_data_chunk> chunks;
+
+ /* More convenient access to the last chunk from the vector above. */
+ struct hsa_brig_data_chunk *cur_chunk;
};
static struct hsa_brig_section brig_data, brig_code, brig_operand;
@@ -271,10 +273,11 @@ hsa_brig_section::output ()
}
/* Add to the stream LEN bytes of opaque binary DATA. Return the offset at
- which it was stored. */
+ which it was stored. If OUTPUT is not NULL, store into it the pointer to
+ the place where DATA was actually stored. */
unsigned
-hsa_brig_section::add (const void *data, unsigned len)
+hsa_brig_section::add (const void *data, unsigned len, void **output)
{
unsigned offset = total_size;
@@ -282,7 +285,10 @@ hsa_brig_section::add (const void *data, unsigned len)
if (cur_chunk->size > (BRIG_CHUNK_MAX_SIZE - len))
allocate_new_chunk ();
- memcpy (cur_chunk->data + cur_chunk->size, data, len);
+ char *dst = cur_chunk->data + cur_chunk->size;
+ memcpy (dst, data, len);
+ if (output)
+ *output = dst;
cur_chunk->size += len;
total_size += len;
@@ -565,6 +571,7 @@ enqueue_op (hsa_op_base *op)
return ret;
}
+static void emit_immediate_operand (hsa_op_immed *imm);
/* Emit directive describing a symbol if it has not been emitted already.
Return the offset of the directive. */
@@ -603,7 +610,14 @@ emit_directive_variable (struct hsa_symbol *symbol)
}
dirvar.name = lendian32 (name_offset);
- dirvar.init = 0;
+
+ if (symbol->m_decl && TREE_CODE (symbol->m_decl) == CONST_DECL)
+ {
+ hsa_op_immed *tmp = new hsa_op_immed (DECL_INITIAL (symbol->m_decl));
+ dirvar.init = lendian32 (enqueue_op (tmp));
+ }
+ else
+ dirvar.init = 0;
dirvar.type = lendian16 (symbol->m_type);
dirvar.segment = symbol->m_segment;
dirvar.align = symbol->m_align;
@@ -626,8 +640,12 @@ emit_directive_variable (struct hsa_symbol *symbol)
return symbol->m_directive_offset;
}
-/* Emit directives describing either a function declaration or
- definition F. */
+/* Emit directives describing either a function declaration or definition F and
+ return the produced BrigDirectiveExecutable structure. The function does
+ not take into account any instructions when calculating nextModuleEntry
+ field of the produced BrigDirectiveExecutable structure so when emitting
+ actual definitions, this field needs to be updated after all of the function
+ is actually added to the code section. */
static BrigDirectiveExecutable *
emit_function_directives (hsa_function_representation *f, bool is_declaration)
@@ -635,7 +653,7 @@ emit_function_directives (hsa_function_representation *f, bool is_declaration)
struct BrigDirectiveExecutable fndir;
unsigned name_offset, inarg_off, scoped_off, next_toplev_off;
int count = 0;
- BrigDirectiveExecutable *ptr_to_fndir;
+ void *ptr_to_fndir;
hsa_symbol *sym;
if (!f->m_declaration_p)
@@ -693,17 +711,7 @@ emit_function_directives (hsa_function_representation *f, bool is_declaration)
*slot = int_fn;
}
- brig_code.add (&fndir, sizeof (fndir));
- /* terrible hack: we need to set instCount after we emit all
- insns, but we need to emit directive in order, and we emit directives
- during insn emitting. So we need to emit the FUNCTION directive
- early, then the insns, and then we need to set instCount, so remember
- a pointer to it, in some horrible way. cur_chunk.data+size points
- directly to after fndir here. */
- ptr_to_fndir
- = (BrigDirectiveExecutable *)(brig_code.cur_chunk->data
- + brig_code.cur_chunk->size
- - sizeof (fndir));
+ brig_code.add (&fndir, sizeof (fndir), &ptr_to_fndir);
if (f->m_output_arg)
emit_directive_variable (f->m_output_arg);
@@ -724,7 +732,7 @@ emit_function_directives (hsa_function_representation *f, bool is_declaration)
}
}
- return ptr_to_fndir;
+ return (BrigDirectiveExecutable *) ptr_to_fndir;
}
/* Emit a label directive for the given HBB. We assume it is about to start on
@@ -1237,20 +1245,20 @@ emit_insn_operands (hsa_insn_basic *insn)
operand_offsets;
unsigned l = insn->operand_count ();
- operand_offsets.safe_grow (l);
-
- for (unsigned i = 0; i < l; i++)
- operand_offsets[i] = lendian32 (enqueue_op (insn->get_op (i)));
/* We have N operands so use 4 * N for the byte_count. */
uint32_t byte_count = lendian32 (4 * l);
-
unsigned offset = brig_data.add (&byte_count, sizeof (byte_count));
- brig_data.add (operand_offsets.address (),
- l * sizeof (BrigOperandOffset32_t));
+ if (l > 0)
+ {
+ operand_offsets.safe_grow (l);
+ for (unsigned i = 0; i < l; i++)
+ operand_offsets[i] = lendian32 (enqueue_op (insn->get_op (i)));
+ brig_data.add (operand_offsets.address (),
+ l * sizeof (BrigOperandOffset32_t));
+ }
brig_data.round_size_up (4);
-
return offset;
}
@@ -1334,10 +1342,6 @@ emit_signal_insn (hsa_insn_signal *mem)
{
struct BrigInstSignal repr;
- /* This is necessary because of the erroneous typedef of
- BrigMemoryModifier8_t which introduces padding which may then contain
- random stuff (which we do not want so that we can test things don't
- change). */
memset (&repr, 0, sizeof (repr));
repr.base.base.byteCount = lendian16 (sizeof (repr));
repr.base.base.kind = lendian16 (BRIG_KIND_INST_SIGNAL);
@@ -1345,9 +1349,9 @@ emit_signal_insn (hsa_insn_signal *mem)
repr.base.type = lendian16 (mem->m_type);
repr.base.operands = lendian32 (emit_insn_operands (mem));
- repr.memoryOrder = mem->m_memoryorder;
- repr.signalOperation = mem->m_atomicop;
- repr.signalType = BRIG_TYPE_SIG64;
+ repr.memoryOrder = mem->m_memory_order;
+ repr.signalOperation = mem->m_signalop;
+ repr.signalType = hsa_machine_large_p () ? BRIG_TYPE_SIG64 : BRIG_TYPE_SIG32;
brig_code.add (&repr, sizeof (repr));
brig_insn_count++;
@@ -1368,10 +1372,6 @@ emit_atomic_insn (hsa_insn_atomic *mem)
else
addr = as_a <hsa_op_address *> (mem->get_op (1));
- /* This is necessary because of the erroneous typedef of
- BrigMemoryModifier8_t which introduces padding which may then contain
- random stuff (which we do not want so that we can test things don't
- change). */
memset (&repr, 0, sizeof (repr));
repr.base.base.byteCount = lendian16 (sizeof (repr));
repr.base.base.kind = lendian16 (BRIG_KIND_INST_ATOMIC);
@@ -1448,10 +1448,6 @@ emit_alloca_insn (hsa_insn_alloca *alloca)
struct BrigInstMem repr;
gcc_checking_assert (alloca->operand_count () == 2);
- /* This is necessary because of the erroneous typedef of
- BrigMemoryModifier8_t which introduces padding which may then contain
- random stuff (which we do not want so that we can test things don't
- change). */
memset (&repr, 0, sizeof (repr));
repr.base.base.byteCount = lendian16 (sizeof (repr));
repr.base.base.kind = lendian16 (BRIG_KIND_INST_MEM);
@@ -1497,11 +1493,29 @@ emit_cmp_insn (hsa_insn_cmp *cmp)
brig_insn_count++;
}
-/* Emit an HSA branching instruction and all necessary directives, schedule
- necessary operands for writing. */
+/* Emit an HSA generic branching/sycnronization instruction. */
+
+static void
+emit_generic_branch_insn (hsa_insn_br *br)
+{
+ struct BrigInstBr repr;
+ repr.base.base.byteCount = lendian16 (sizeof (repr));
+ repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
+ repr.base.opcode = lendian16 (br->m_opcode);
+ repr.width = br->m_width;
+ repr.base.type = lendian16 (br->m_type);
+ repr.base.operands = lendian32 (emit_insn_operands (br));
+ memset (&repr.reserved, 0, sizeof (repr.reserved));
+
+ brig_code.add (&repr, sizeof (repr));
+ brig_insn_count++;
+}
+
+/* Emit an HSA conditional branching instruction and all necessary directives,
+ schedule necessary operands for writing. */
static void
-emit_branch_insn (hsa_insn_br *br)
+emit_cond_branch_insn (hsa_insn_cbr *br)
{
struct BrigInstBr repr;
@@ -1514,7 +1528,7 @@ emit_branch_insn (hsa_insn_br *br)
repr.base.base.byteCount = lendian16 (sizeof (repr));
repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
repr.base.opcode = lendian16 (br->m_opcode);
- repr.width = BRIG_WIDTH_1;
+ repr.width = br->m_width;
/* For Conditional jumps the type is always B1. */
repr.base.type = lendian16 (BRIG_TYPE_B1);
@@ -1730,8 +1744,8 @@ emit_queue_insn (hsa_insn_queue *insn)
repr.base.base.kind = lendian16 (BRIG_KIND_INST_QUEUE);
repr.base.opcode = lendian16 (insn->m_opcode);
repr.base.type = lendian16 (insn->m_type);
- repr.segment = BRIG_SEGMENT_GLOBAL;
- repr.memoryOrder = BRIG_MEMORY_ORDER_SC_RELEASE;
+ repr.segment = insn->m_segment;
+ repr.memoryOrder = insn->m_memory_order;
repr.base.operands = lendian32 (emit_insn_operands (insn));
brig_data.round_size_up (4);
brig_code.add (&repr, sizeof (repr));
@@ -1886,8 +1900,8 @@ emit_insn (hsa_insn_basic *insn)
emit_segment_insn (seg);
else if (hsa_insn_cmp *cmp = dyn_cast <hsa_insn_cmp *> (insn))
emit_cmp_insn (cmp);
- else if (hsa_insn_br *br = dyn_cast <hsa_insn_br *> (insn))
- emit_branch_insn (br);
+ else if (hsa_insn_cbr *br = dyn_cast <hsa_insn_cbr *> (insn))
+ emit_cond_branch_insn (br);
else if (hsa_insn_sbr *sbr = dyn_cast <hsa_insn_sbr *> (insn))
{
if (switch_instructions == NULL)
@@ -1896,6 +1910,8 @@ emit_insn (hsa_insn_basic *insn)
switch_instructions->safe_push (sbr);
emit_switch_insn (sbr);
}
+ else if (hsa_insn_br *br = dyn_cast <hsa_insn_br *> (insn))
+ emit_generic_branch_insn (br);
else if (hsa_insn_arg_block *block = dyn_cast <hsa_insn_arg_block *> (insn))
emit_arg_block_insn (block);
else if (hsa_insn_call *call = dyn_cast <hsa_insn_call *> (insn))
@@ -2006,7 +2022,7 @@ hsa_brig_emit_function (void)
prev_bb = bb;
}
perhaps_emit_branch (prev_bb, NULL);
- ptr_to_fndir->nextModuleEntry = brig_code.total_size;
+ ptr_to_fndir->nextModuleEntry = lendian32 (brig_code.total_size);
/* Fill up label references for all sbr instructions. */
if (switch_instructions)
@@ -2225,11 +2241,6 @@ hsa_output_kernels (tree *host_func_table, tree *kernels)
tree gridified_kernel_p_tree = build_int_cstu (boolean_type_node,
gridified_kernel_p);
unsigned count = 0;
-
- kernel_dependencies_vector_type
- = build_array_type (build_pointer_type (char_type_node),
- build_index_type (size_int (0)));
-
vec<constructor_elt, va_gc> *kernel_dependencies_vec = NULL;
if (hsa_decl_kernel_dependencies)
{
@@ -2279,6 +2290,7 @@ hsa_output_kernels (tree *host_func_table, tree *kernels)
if (count > 0)
{
ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_dependencies_list", i);
+ gcc_checking_assert (kernel_dependencies_vector_type);
tree dependencies_list = build_decl (UNKNOWN_LOCATION, VAR_DECL,
get_identifier (tmp_name),
kernel_dependencies_vector_type);