/* Copyright (C) 2017-2025 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GCC; see the file COPYING3. If not see . */ /* {{{ Includes. */ #include "config.h" #include "system.h" #include "coretypes.h" #include "backend.h" #include "target.h" #include "tree.h" #include "gimple.h" #include "tree-pass.h" #include "gimple-iterator.h" #include "cfghooks.h" #include "cfgloop.h" #include "tm_p.h" #include "stringpool.h" #include "fold-const.h" #include "varasm.h" #include "omp-low.h" #include "omp-general.h" #include "omp-offload.h" #include "internal-fn.h" #include "tree-vrp.h" #include "tree-ssanames.h" #include "tree-ssa-operands.h" #include "gimplify.h" #include "tree-phinodes.h" #include "cgraph.h" #include "targhooks.h" #include "langhooks-def.h" #include "memmodel.h" /* }}} */ /* {{{ OpenACC reductions. */ /* Global lock variable, needed for 128bit worker & gang reductions. */ static GTY(()) tree global_lock_var; /* Lazily generate the global_lock_var decl and return its address. */ static tree gcn_global_lock_addr () { tree v = global_lock_var; if (!v) { tree name = get_identifier ("__reduction_lock"); tree type = build_qualified_type (unsigned_type_node, TYPE_QUAL_VOLATILE); v = build_decl (BUILTINS_LOCATION, VAR_DECL, name, type); global_lock_var = v; DECL_ARTIFICIAL (v) = 1; DECL_EXTERNAL (v) = 1; TREE_STATIC (v) = 1; TREE_PUBLIC (v) = 1; TREE_USED (v) = 1; mark_addressable (v); mark_decl_referenced (v); } return build_fold_addr_expr (v); } /* Pointer variables for array reduction buffers used. */ static vec gcn_array_reduction_buffers; /* Helper function for gcn_reduction_update. Insert code to locklessly update *PTR with *PTR OP VAR just before GSI. We use a lockless scheme for nearly all case, which looks like: actual = initval (OP); do { guess = actual; write = guess OP myval; actual = cmp&swap (ptr, guess, write) } while (actual bit-different-to guess); return write; This relies on a cmp&swap instruction, which is available for 32- and 64-bit types. Larger types must use a locking scheme. */ static tree gcn_lockless_update (location_t loc, gimple_stmt_iterator *gsi, tree ptr, tree var, tree_code op) { unsigned fn = GCN_BUILTIN_CMP_SWAP; tree_code code = NOP_EXPR; tree arg_type = unsigned_type_node; tree var_type = TREE_TYPE (var); if (TREE_CODE (var_type) == COMPLEX_TYPE || SCALAR_FLOAT_TYPE_P (var_type)) code = VIEW_CONVERT_EXPR; if (TYPE_SIZE (var_type) == TYPE_SIZE (long_long_unsigned_type_node)) { arg_type = long_long_unsigned_type_node; fn = GCN_BUILTIN_CMP_SWAPLL; } tree swap_fn = gcn_builtin_decl (fn, true); gimple_seq init_seq = NULL; tree init_var = make_ssa_name (arg_type); tree init_expr = omp_reduction_init_op (loc, op, var_type); init_expr = fold_build1 (code, arg_type, init_expr); gimplify_assign (init_var, init_expr, &init_seq); gimple *init_end = gimple_seq_last (init_seq); gsi_insert_seq_before (gsi, init_seq, GSI_SAME_STMT); /* Split the block just after the init stmts. */ basic_block pre_bb = gsi_bb (*gsi); edge pre_edge = split_block (pre_bb, init_end); basic_block loop_bb = pre_edge->dest; pre_bb = pre_edge->src; /* Reset the iterator. */ *gsi = gsi_for_stmt (gsi_stmt (*gsi)); tree expect_var = make_ssa_name (arg_type); tree actual_var = make_ssa_name (arg_type); tree write_var = make_ssa_name (arg_type); /* Build and insert the reduction calculation. */ gimple_seq red_seq = NULL; tree write_expr = fold_build1 (code, var_type, expect_var); write_expr = fold_build2 (op, var_type, write_expr, var); write_expr = fold_build1 (code, arg_type, write_expr); gimplify_assign (write_var, write_expr, &red_seq); gsi_insert_seq_before (gsi, red_seq, GSI_SAME_STMT); /* Build & insert the cmp&swap sequence. */ gimple_seq latch_seq = NULL; tree swap_expr = build_call_expr_loc (loc, swap_fn, 3, ptr, expect_var, write_var); gimplify_assign (actual_var, swap_expr, &latch_seq); gcond *cond = gimple_build_cond (EQ_EXPR, actual_var, expect_var, NULL_TREE, NULL_TREE); gimple_seq_add_stmt (&latch_seq, cond); gimple *latch_end = gimple_seq_last (latch_seq); gsi_insert_seq_before (gsi, latch_seq, GSI_SAME_STMT); /* Split the block just after the latch stmts. */ edge post_edge = split_block (loop_bb, latch_end); basic_block post_bb = post_edge->dest; loop_bb = post_edge->src; *gsi = gsi_for_stmt (gsi_stmt (*gsi)); post_edge->flags ^= EDGE_TRUE_VALUE | EDGE_FALLTHRU; /* post_edge->probability = profile_probability::even (); */ edge loop_edge = make_edge (loop_bb, loop_bb, EDGE_FALSE_VALUE); /* loop_edge->probability = profile_probability::even (); */ set_immediate_dominator (CDI_DOMINATORS, loop_bb, pre_bb); set_immediate_dominator (CDI_DOMINATORS, post_bb, loop_bb); gphi *phi = create_phi_node (expect_var, loop_bb); add_phi_arg (phi, init_var, pre_edge, loc); add_phi_arg (phi, actual_var, loop_edge, loc); loop *loop = alloc_loop (); loop->header = loop_bb; loop->latch = loop_bb; add_loop (loop, loop_bb->loop_father); return fold_build1 (code, var_type, write_var); } /* Helper function for gcn_reduction_update. Insert code to lockfully update *PTR with *PTR OP VAR just before GSI. This is necessary for types larger than 64 bits, where there is no cmp&swap instruction to implement a lockless scheme. We use a lock variable in global memory. while (cmp&swap (&lock_var, 0, 1)) continue; T accum = *ptr; accum = accum OP var; *ptr = accum; cmp&swap (&lock_var, 1, 0); return accum; A lock in global memory is necessary to force execution engine descheduling and avoid resource starvation that can occur if the lock is in shared memory. */ static tree gcn_lockfull_update (location_t loc, gimple_stmt_iterator *gsi, tree ptr, tree var, tree_code op) { tree var_type = TREE_TYPE (var); tree swap_fn = gcn_builtin_decl (GCN_BUILTIN_CMP_SWAP, true); tree uns_unlocked = build_int_cst (unsigned_type_node, 0); tree uns_locked = build_int_cst (unsigned_type_node, 1); /* Split the block just before the gsi. Insert a gimple nop to make this easier. */ gimple *nop = gimple_build_nop (); gsi_insert_before (gsi, nop, GSI_SAME_STMT); basic_block entry_bb = gsi_bb (*gsi); edge entry_edge = split_block (entry_bb, nop); basic_block lock_bb = entry_edge->dest; /* Reset the iterator. */ *gsi = gsi_for_stmt (gsi_stmt (*gsi)); /* Build and insert the locking sequence. */ gimple_seq lock_seq = NULL; tree lock_var = make_ssa_name (unsigned_type_node); tree lock_expr = gcn_global_lock_addr (); lock_expr = build_call_expr_loc (loc, swap_fn, 3, lock_expr, uns_unlocked, uns_locked); gimplify_assign (lock_var, lock_expr, &lock_seq); gcond *cond = gimple_build_cond (EQ_EXPR, lock_var, uns_unlocked, NULL_TREE, NULL_TREE); gimple_seq_add_stmt (&lock_seq, cond); gimple *lock_end = gimple_seq_last (lock_seq); gsi_insert_seq_before (gsi, lock_seq, GSI_SAME_STMT); /* Split the block just after the lock sequence. */ edge locked_edge = split_block (lock_bb, lock_end); basic_block update_bb = locked_edge->dest; lock_bb = locked_edge->src; *gsi = gsi_for_stmt (gsi_stmt (*gsi)); /* Create the lock loop. */ locked_edge->flags ^= EDGE_TRUE_VALUE | EDGE_FALLTHRU; locked_edge->probability = profile_probability::even (); edge loop_edge = make_edge (lock_bb, lock_bb, EDGE_FALSE_VALUE); loop_edge->probability = profile_probability::even (); set_immediate_dominator (CDI_DOMINATORS, lock_bb, entry_bb); set_immediate_dominator (CDI_DOMINATORS, update_bb, lock_bb); /* Create the loop structure. */ loop *lock_loop = alloc_loop (); lock_loop->header = lock_bb; lock_loop->latch = lock_bb; lock_loop->nb_iterations_estimate = 1; lock_loop->any_estimate = true; add_loop (lock_loop, entry_bb->loop_father); /* Build and insert the reduction calculation. */ gimple_seq red_seq = NULL; tree acc_in = make_ssa_name (var_type); tree ref_in = build_simple_mem_ref (fold_convert (build_pointer_type (var_type), ptr)); TREE_THIS_VOLATILE (ref_in) = 1; gimplify_assign (acc_in, ref_in, &red_seq); tree acc_out = make_ssa_name (var_type); tree update_expr = fold_build2 (op, var_type, ref_in, var); gimplify_assign (acc_out, update_expr, &red_seq); tree ref_out = build_simple_mem_ref (fold_convert (build_pointer_type (var_type), ptr)); TREE_THIS_VOLATILE (ref_out) = 1; gimplify_assign (ref_out, acc_out, &red_seq); gsi_insert_seq_before (gsi, red_seq, GSI_SAME_STMT); /* Build & insert the unlock sequence. */ gimple_seq unlock_seq = NULL; tree unlock_expr = gcn_global_lock_addr (); unlock_expr = build_call_expr_loc (loc, swap_fn, 3, unlock_expr, uns_locked, uns_unlocked); gimplify_and_add (unlock_expr, &unlock_seq); gsi_insert_seq_before (gsi, unlock_seq, GSI_SAME_STMT); return acc_out; } /* Emit a sequence to update a reduction accumulator at *PTR with the value held in VAR using operator OP. Return the updated value. TODO: optimize for atomic ops and independent complex ops. */ static tree gcn_reduction_update (location_t loc, gimple_stmt_iterator *gsi, tree ptr, tree var, tree_code op, tree array_max_idx = NULL_TREE) { tree type = TREE_TYPE (var); tree size = TYPE_SIZE (type); if (!VAR_P (ptr)) { tree t = make_ssa_name (TREE_TYPE (ptr)); gimple_seq seq = NULL; gimplify_assign (t, ptr, &seq); gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT); ptr = t; } if (TREE_CODE (type) == ARRAY_TYPE || TREE_CODE (type) == POINTER_TYPE) { tree array_type; if (TREE_CODE (type) == ARRAY_TYPE) { array_type = TREE_TYPE (var); } else if (TREE_CODE (type) == POINTER_TYPE && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE) { array_type = TREE_TYPE (TREE_TYPE (var)); } else if (TREE_CODE (type) == POINTER_TYPE) { array_type = TREE_TYPE (var); } else gcc_unreachable (); tree array_elem_type = TREE_TYPE (array_type); gimple *g; gimple_seq seq = NULL; tree max_index = array_max_idx; gcc_assert (array_max_idx); tree init_index = make_ssa_name (TREE_TYPE (max_index)); tree loop_index = make_ssa_name (TREE_TYPE (max_index)); tree update_index = make_ssa_name (TREE_TYPE (max_index)); g = gimple_build_assign (init_index, build_int_cst (TREE_TYPE (init_index), 0)); gimple_seq_add_stmt (&seq, g); gimple *init_end = gimple_seq_last (seq); gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT); basic_block init_bb = gsi_bb (*gsi); edge init_edge = split_block (init_bb, init_end); basic_block loop_bb = init_edge->dest; /* Reset the iterator. */ *gsi = gsi_for_stmt (gsi_stmt (*gsi)); seq = NULL; g = gimple_build_assign (update_index, PLUS_EXPR, loop_index, build_int_cst (TREE_TYPE (loop_index), 1)); gimple_seq_add_stmt (&seq, g); g = gimple_build_cond (LE_EXPR, update_index, max_index, NULL, NULL); gimple_seq_add_stmt (&seq, g); gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT); edge post_edge = split_block (loop_bb, g); basic_block post_bb = post_edge->dest; loop_bb = post_edge->src; /* Reset the iterator. */ *gsi = gsi_for_stmt (gsi_stmt (*gsi)); /* Place where we insert reduction code below. */ gimple_stmt_iterator reduction_code_gsi = gsi_start_bb (loop_bb); post_edge->flags ^= EDGE_FALSE_VALUE | EDGE_FALLTHRU; post_edge->probability = profile_probability::even (); edge loop_edge = make_edge (loop_bb, loop_bb, EDGE_TRUE_VALUE); loop_edge->probability = profile_probability::even (); set_immediate_dominator (CDI_DOMINATORS, loop_bb, init_bb); set_immediate_dominator (CDI_DOMINATORS, post_bb, loop_bb); class loop *new_loop = alloc_loop (); new_loop->header = loop_bb; new_loop->latch = loop_bb; add_loop (new_loop, loop_bb->loop_father); gphi *phi = create_phi_node (loop_index, loop_bb); add_phi_arg (phi, init_index, init_edge, loc); add_phi_arg (phi, update_index, loop_edge, loc); tree var_ptr = fold_convert (build_pointer_type (array_elem_type), var); tree idx = fold_build2 (MULT_EXPR, sizetype, fold_convert (sizetype, loop_index), TYPE_SIZE_UNIT (array_elem_type)); var_ptr = build2 (POINTER_PLUS_EXPR, TREE_TYPE (var_ptr), var_ptr, idx); tree var_aref = build_simple_mem_ref (var_ptr); ptr = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr, idx); gcn_reduction_update (loc, &reduction_code_gsi, ptr, var_aref, op); return build_simple_mem_ref (ptr); } else if (TREE_CODE (type) == RECORD_TYPE) { for (tree fld = TYPE_FIELDS (type); fld; fld = TREE_CHAIN (fld)) if (TREE_CODE (fld) == FIELD_DECL) { tree var_fld_ref = build3 (COMPONENT_REF, TREE_TYPE (fld), var, fld, NULL); tree ptr_ref = build_simple_mem_ref (ptr); tree ptr_fld_type = build_qualified_type (TREE_TYPE (fld), TYPE_QUALS (TREE_TYPE (ptr_ref))); tree ptr_fld_ref = build3 (COMPONENT_REF, ptr_fld_type, ptr_ref, fld, NULL); if (TREE_CODE (TREE_TYPE (fld)) == ARRAY_TYPE) { tree array_elem_ptr_type = build_pointer_type (TREE_TYPE (TREE_TYPE (fld))); gcn_reduction_update (loc, gsi, fold_convert (array_elem_ptr_type, build_fold_addr_expr (ptr_fld_ref)), build_fold_addr_expr (var_fld_ref), op, TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (fld)))); } else gcn_reduction_update (loc, gsi, build_fold_addr_expr (ptr_fld_ref), var_fld_ref, op); } return build_simple_mem_ref (ptr); } if (size == TYPE_SIZE (unsigned_type_node) || size == TYPE_SIZE (long_long_unsigned_type_node)) return gcn_lockless_update (loc, gsi, ptr, var, op); else return gcn_lockfull_update (loc, gsi, ptr, var, op); } /* Return a temporary variable decl to use for an OpenACC worker reduction. */ static tree gcn_goacc_get_worker_red_decl (tree type, tree offset_expr) { machine_function *machfun = cfun->machine; if (TREE_CODE (type) == REFERENCE_TYPE) type = TREE_TYPE (type); tree var_type = build_qualified_type (type, (TYPE_QUALS (type) | ENCODE_QUAL_ADDR_SPACE (ADDR_SPACE_LDS))); tree addr; if (TREE_CONSTANT (offset_expr)) { unsigned offset = TREE_INT_CST_LOW (offset_expr); gcc_assert (offset < (machfun->reduction_limit - machfun->reduction_base)); tree ptr_type = build_pointer_type (var_type); addr = build_int_cst (ptr_type, machfun->reduction_base + offset); } else { tree ptr_type = build_pointer_type (var_type); tree red_base = build_int_cst (ptr_type, machfun->reduction_base); addr = build2 (POINTER_PLUS_EXPR, ptr_type, red_base, fold_convert (size_type_node, offset_expr)); } return build_simple_mem_ref (addr); } static tree gcn_goacc_get_worker_array_reduction_buffer (tree array_type, tree array_max_idx, gimple_seq *seq) { gcc_assert (!gcn_array_reduction_buffers.is_empty ()); tree red_buf_ptr = gcn_array_reduction_buffers.last (); tree ptr = make_ssa_name (ptr_type_node); gimplify_assign (ptr, red_buf_ptr, seq); tree whole_block_ptr; if (TREE_CODE (array_type) == ARRAY_TYPE) whole_block_ptr = fold_convert (build_pointer_type (array_type), ptr); else whole_block_ptr = array_type; tree arg = build_int_cst (unsigned_type_node, GOMP_DIM_GANG); tree gang_id = make_ssa_name (integer_type_node); gimple *gang_id_call = gimple_build_call_internal (IFN_GOACC_DIM_POS, 1, arg); gimple_call_set_lhs (gang_id_call, gang_id); gimple_seq_add_stmt (seq, gang_id_call); tree len = fold_build2 (PLUS_EXPR, size_type_node, array_max_idx, size_int (1)); tree elem_size = TYPE_SIZE_UNIT (TREE_TYPE (array_type)); tree array_size_expr = build2 (MULT_EXPR, size_type_node, len, elem_size); tree type_size = make_ssa_name (size_type_node); gimplify_assign (type_size, array_size_expr, seq); tree idx = make_ssa_name (size_type_node); gimplify_assign (idx, build2 (MULT_EXPR, size_type_node, type_size, fold_convert (size_type_node, gang_id)), seq); tree addr = fold_convert (ptr_type_node, whole_block_ptr);; addr = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, idx); addr = fold_convert (build_pointer_type (array_type), addr); tree addr_reg = make_ssa_name (build_pointer_type (array_type)); gimplify_assign (addr_reg, addr, seq); return build_simple_mem_ref (addr_reg); } static void gcn_create_if_else_seq (gimple_stmt_iterator *gsi_p, gimple *split_stmt, gimple_seq *then_seq, gimple_seq *else_seq) { basic_block init_bb = gsi_bb (*gsi_p); edge fallthru_edge = split_block (init_bb, split_stmt); basic_block then_bb = fallthru_edge->dest; /* Reset the iterator. */ *gsi_p = gsi_for_stmt (gsi_stmt (*gsi_p)); gimple *then_seq_end = gimple_seq_last (*then_seq); gsi_insert_seq_before (gsi_p, *then_seq, GSI_SAME_STMT); basic_block last_bb = then_bb; gimple *last_seq_end = then_seq_end; basic_block else_bb = NULL; edge then_else_fallthru_edge = NULL; if (else_seq) { then_else_fallthru_edge = split_block (then_bb, then_seq_end); else_bb = then_else_fallthru_edge->dest; /* Reset the iterator. */ *gsi_p = gsi_for_stmt (gsi_stmt (*gsi_p)); gimple *else_seq_end = gimple_seq_last (*else_seq); gsi_insert_seq_before (gsi_p, *else_seq, GSI_SAME_STMT); last_bb = else_bb; last_seq_end = else_seq_end; } edge post_edge = split_block (last_bb, last_seq_end); basic_block post_bb = post_edge->dest; /* Reset the iterator. */ *gsi_p = gsi_for_stmt (gsi_stmt (*gsi_p)); edge if_true_edge = make_edge (init_bb, (else_seq ? else_bb : post_bb), EDGE_TRUE_VALUE); if_true_edge->probability = profile_probability::even (); fallthru_edge->flags = EDGE_FALSE_VALUE; fallthru_edge->probability = profile_probability::even (); post_edge->flags = EDGE_FALLTHRU; post_edge->probability = profile_probability::always (); set_immediate_dominator (CDI_DOMINATORS, then_bb, init_bb); set_immediate_dominator (CDI_DOMINATORS, post_bb, init_bb); if (else_seq) { redirect_edge_and_branch (then_else_fallthru_edge, post_bb); set_immediate_dominator (CDI_DOMINATORS, else_bb, init_bb); } } static void gcn_create_do_while_loop_seq (gimple_stmt_iterator *gsi_p, gimple_seq *body_seq, int edge_flags) { gimple *g = NULL; basic_block init_bb = gsi_bb (*gsi_p); edge init_edge = split_block (init_bb, g); basic_block loop_bb = init_edge->dest; init_bb = init_edge->src; /* Reset the iterator. */ *gsi_p = gsi_for_stmt (gsi_stmt (*gsi_p)); gimple_stmt_iterator loop_gsi = gsi_start_bb (loop_bb); gimple *body_seq_end = gimple_seq_last (*body_seq); gsi_insert_seq_before (&loop_gsi, *body_seq, GSI_SAME_STMT); edge post_edge = split_block (loop_bb, body_seq_end); basic_block post_bb = post_edge->dest; /* Reset the iterator. */ *gsi_p = gsi_for_stmt (gsi_stmt (*gsi_p)); make_edge (loop_bb, loop_bb, edge_flags); post_edge->flags = EDGE_FALSE_VALUE; set_immediate_dominator (CDI_DOMINATORS, loop_bb, init_bb); set_immediate_dominator (CDI_DOMINATORS, post_bb, loop_bb); loop *loop = alloc_loop (); loop->header = loop_bb; loop->latch = loop_bb; add_loop (loop, loop_bb->loop_father); } /* Expand IFN_GOACC_REDUCTION_SETUP. */ static void gcn_goacc_reduction_setup (gcall *call) { gimple_stmt_iterator gsi = gsi_for_stmt (call); tree lhs = gimple_call_lhs (call); tree var = gimple_call_arg (call, 2); int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3)); tree array_addr = gimple_call_arg (call, 6); tree array_max_idx = gimple_call_arg (call, 7); bool array_p = !integer_zerop (array_addr); tree array_type = NULL_TREE; if (array_p) array_type = (TREE_CODE (TREE_TYPE (array_addr)) == POINTER_TYPE && TREE_CODE (TREE_TYPE (TREE_TYPE (array_addr))) == ARRAY_TYPE ? TREE_TYPE (TREE_TYPE (array_addr)) : TREE_TYPE (array_addr)); gimple_seq seq = NULL; push_gimplify_context (true); /* Copy the receiver object. */ tree ref_to_res = gimple_call_arg (call, 1); if (level != GOMP_DIM_GANG) { if (!integer_zerop (ref_to_res)) { if (!array_p) var = build_simple_mem_ref (ref_to_res); } } if (array_p && !integer_zerop (ref_to_res)) { gimple_seq condseq = NULL; /* Create global variable to store pointer to array reduction buffer. */ tree reduction_buffer_ptr_type = build_qualified_type (ptr_type_node, TYPE_QUAL_VOLATILE); tree reduction_buffer_ptr = build_decl (UNKNOWN_LOCATION, VAR_DECL, create_tmp_var_name ("gcn_array_reduction_buf"), reduction_buffer_ptr_type); TREE_STATIC (reduction_buffer_ptr) = 1; TREE_PUBLIC (reduction_buffer_ptr) = 0; DECL_INITIAL (reduction_buffer_ptr) = 0; DECL_EXTERNAL (reduction_buffer_ptr) = 0; varpool_node::add (reduction_buffer_ptr); tree reduction_buffer_ptr_addr = make_ssa_name (ptr_type_node); gimplify_assign (reduction_buffer_ptr_addr, build_fold_addr_expr (reduction_buffer_ptr), &condseq); tree gang_dim_arg = build_int_cst (unsigned_type_node, GOMP_DIM_GANG); tree gang_pos = make_ssa_name (integer_type_node); gimple *gang_pos_call = gimple_build_call_internal (IFN_GOACC_DIM_POS, 1, gang_dim_arg); gimple_call_set_lhs (gang_pos_call, gang_pos); gimple_seq_add_stmt (&condseq, gang_pos_call); gimple *cond = gimple_build_cond (NE_EXPR, gang_pos, integer_zero_node, NULL, NULL); gimple_seq_add_stmt (&condseq, cond); gimple *cond_end = gimple_seq_last (condseq); gsi_insert_seq_before (&gsi, condseq, GSI_SAME_STMT); gimple_seq malloc_seq = NULL; tree gang_num = make_ssa_name (integer_type_node); gimple *gang_num_call = gimple_build_call_internal (IFN_GOACC_DIM_SIZE, 1, gang_dim_arg); gimple_call_set_lhs (gang_num_call, gang_num); gimple_seq_add_stmt (&malloc_seq, gang_num_call); tree len = fold_build2 (PLUS_EXPR, size_type_node, array_max_idx, size_int (1)); tree elem_size = TYPE_SIZE_UNIT (TREE_TYPE (array_type)); tree malloc_size_expr = build2 (MULT_EXPR, size_type_node, len, elem_size); malloc_size_expr = build2 (MULT_EXPR, size_type_node, malloc_size_expr, fold_convert (size_type_node, gang_num)); tree malloc_size = make_ssa_name (size_type_node); gimplify_assign (malloc_size, malloc_size_expr, &malloc_seq); tree ptr = make_ssa_name (ptr_type_node); tree malloc_decl = builtin_decl_explicit (BUILT_IN_MALLOC); gcall *stmt = gimple_build_call (malloc_decl, 1, malloc_size); gimple_call_set_lhs (stmt, ptr); gimple_seq_add_stmt (&malloc_seq, stmt); tree atomic_store_decl = builtin_decl_explicit (BUILT_IN_ATOMIC_STORE_8); gcall *atomic_store = gimple_build_call (atomic_store_decl, 3, reduction_buffer_ptr_addr, ptr, build_int_cst (integer_type_node, MEMMODEL_RELEASE)); gimple_seq_add_stmt (&malloc_seq, atomic_store); gimple_seq wait_seq = NULL; gimple *nop = gimple_build_nop (); gimple_seq_add_stmt (&wait_seq, nop); gcn_create_if_else_seq (&gsi, cond_end, &malloc_seq, &wait_seq); /* Create cmp-swap loop for other gangs to wait for gcn_array_reduction_buf.* to be properly set by gang zero. */ gimple_stmt_iterator ngsi = gsi_for_stmt (nop); gimple_seq atomic_load_seq = NULL; tree loadval = make_ssa_name (size_type_node); tree atomic_load_decl = builtin_decl_explicit (BUILT_IN_ATOMIC_LOAD_8); gcall *atomic_load = gimple_build_call (atomic_load_decl, 2, reduction_buffer_ptr_addr, build_int_cst (integer_type_node, MEMMODEL_ACQUIRE)); gimple_call_set_lhs (atomic_load, loadval); gimple_seq_add_stmt (&atomic_load_seq, atomic_load); cond = gimple_build_cond (EQ_EXPR, loadval, size_zero_node, NULL_TREE, NULL_TREE); gimple_seq_add_stmt (&atomic_load_seq, cond); gcn_create_do_while_loop_seq (&ngsi, &atomic_load_seq, EDGE_TRUE_VALUE); gcn_array_reduction_buffers.safe_push (reduction_buffer_ptr); } if (level == GOMP_DIM_WORKER) { tree offset = gimple_call_arg (call, 5); if (array_p) { tree copy_src = !integer_zerop (ref_to_res) ? ref_to_res : array_addr; tree decl = gcn_goacc_get_worker_array_reduction_buffer (array_type, array_max_idx, &seq); tree ptr = make_ssa_name (TREE_TYPE (array_addr)); gimplify_assign (ptr, build_fold_addr_expr (decl), &seq); /* Store incoming value to worker reduction buffer. */ oacc_build_array_copy (ptr, copy_src, array_max_idx, &seq); } else { tree var_type = TREE_TYPE (var); /* Store incoming value to worker reduction buffer. */ tree decl = gcn_goacc_get_worker_red_decl (var_type, offset); gimplify_assign (decl, var, &seq); } } if (lhs) gimplify_assign (lhs, unshare_expr (var), &seq); pop_gimplify_context (NULL); gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); gsi_remove (&gsi, true); } /* Expand IFN_GOACC_REDUCTION_INIT. */ static void gcn_goacc_reduction_init (gcall *call) { gimple_stmt_iterator gsi = gsi_for_stmt (call); tree lhs = gimple_call_lhs (call); tree var = gimple_call_arg (call, 2); int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3)); enum tree_code rcode = (enum tree_code) TREE_INT_CST_LOW (gimple_call_arg (call, 4)); tree array_addr = gimple_call_arg (call, 6); tree array_max_idx = gimple_call_arg (call, 7); bool array_p = !integer_zerop (array_addr); tree array_type = NULL_TREE; if (array_p) array_type = (TREE_CODE (TREE_TYPE (array_addr)) == POINTER_TYPE && TREE_CODE (TREE_TYPE (TREE_TYPE (array_addr))) == ARRAY_TYPE ? TREE_TYPE (TREE_TYPE (array_addr)) : TREE_TYPE (array_addr)); tree init = NULL_TREE; gimple_seq seq = NULL; push_gimplify_context (true); if (array_p) { tree loop_index; gimple_stmt_iterator loop_body_gsi; oacc_build_indexed_ssa_loop (gimple_location (call), array_max_idx, &gsi, &loop_index, &loop_body_gsi); tree init_type = TREE_TYPE (array_type); init = omp_reduction_init_op (gimple_location (call), rcode, init_type); gimple_seq seq = NULL; tree ptr = fold_convert (ptr_type_node, array_addr); tree offset = build2 (MULT_EXPR, sizetype, fold_convert (sizetype, loop_index), TYPE_SIZE_UNIT (init_type)); tree addr = build2 (POINTER_PLUS_EXPR, build_pointer_type (init_type), ptr, offset); tree ref = build_simple_mem_ref (addr); push_gimplify_context (true); gimplify_assign (ref, init, &seq); pop_gimplify_context (NULL); gsi_insert_seq_before (&loop_body_gsi, seq, GSI_SAME_STMT); init = var; } else init = omp_reduction_init_op (gimple_location (call), rcode, TREE_TYPE (var)); if (level == GOMP_DIM_GANG) { /* If there's no receiver object, propagate the incoming VAR. */ tree ref_to_res = gimple_call_arg (call, 1); if (integer_zerop (ref_to_res)) init = var; } if (lhs) gimplify_assign (lhs, init, &seq); pop_gimplify_context (NULL); gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); gsi_remove (&gsi, true); } /* Expand IFN_GOACC_REDUCTION_FINI. */ static void gcn_goacc_reduction_fini (gcall *call) { gimple_stmt_iterator gsi = gsi_for_stmt (call); tree lhs = gimple_call_lhs (call); tree ref_to_res = gimple_call_arg (call, 1); tree var = gimple_call_arg (call, 2); int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3)); enum tree_code op = (enum tree_code) TREE_INT_CST_LOW (gimple_call_arg (call, 4)); tree array_addr = gimple_call_arg (call, 6); tree array_max_idx = gimple_call_arg (call, 7); bool array_p = !integer_zerop (array_addr); gimple_seq seq = NULL; tree r = NULL_TREE; push_gimplify_context (true); tree accum = NULL_TREE; if (level == GOMP_DIM_WORKER) { tree offset = gimple_call_arg (call, 5); tree decl; if (array_p) { tree array_type = TREE_TYPE (TREE_TYPE (array_addr)); decl = gcn_goacc_get_worker_array_reduction_buffer (array_type, array_max_idx, &seq); } else { tree var_type = TREE_TYPE (var); decl = gcn_goacc_get_worker_red_decl (var_type, offset); } accum = build_fold_addr_expr (decl); } else if (integer_zerop (ref_to_res)) r = var; else accum = ref_to_res; if (accum) { /* UPDATE the accumulator. */ gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); seq = NULL; if (array_p) { gcn_reduction_update (gimple_location (call), &gsi, accum, array_addr, op, array_max_idx); r = var; } else r = gcn_reduction_update (gimple_location (call), &gsi, accum, var, op); } if (lhs) gimplify_assign (lhs, r, &seq); pop_gimplify_context (NULL); gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); gsi_remove (&gsi, true); } /* Expand IFN_GOACC_REDUCTION_TEARDOWN. */ static void gcn_goacc_reduction_teardown (gcall *call) { gimple_stmt_iterator gsi = gsi_for_stmt (call); tree lhs = gimple_call_lhs (call); tree var = gimple_call_arg (call, 2); int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3)); tree array_addr = gimple_call_arg (call, 6); tree array_max_idx = gimple_call_arg (call, 7); bool array_p = !integer_zerop (array_addr); tree array_accum = NULL_TREE; gimple_seq seq = NULL; push_gimplify_context (true); if (level == GOMP_DIM_WORKER) { tree offset = gimple_call_arg (call, 5); if (array_p) { tree array_type = TREE_TYPE (TREE_TYPE (array_addr)); array_accum = gcn_goacc_get_worker_array_reduction_buffer (array_type, array_max_idx, &seq); } else { tree var_type = TREE_TYPE (var); /* Read the worker reduction buffer. */ tree decl = gcn_goacc_get_worker_red_decl (var_type, offset); var = decl; } } /* Write to the receiver object. */ tree ref_to_res = gimple_call_arg (call, 1); if (level != GOMP_DIM_GANG) { if (!integer_zerop (ref_to_res)) { if (array_p) { tree ptr = make_ssa_name (build_pointer_type (TREE_TYPE (array_addr))); gimplify_assign (ptr, build_fold_addr_expr (array_accum), &seq); oacc_build_array_copy (ref_to_res, ptr, array_max_idx, &seq); } else gimplify_assign (build_simple_mem_ref (ref_to_res), var, &seq); } else if (array_p) { tree ptr = make_ssa_name (build_pointer_type (TREE_TYPE (array_accum))); gimplify_assign (ptr, build_fold_addr_expr (array_accum), &seq); oacc_build_array_copy (array_addr, ptr, array_max_idx, &seq); } } if (array_p && !integer_zerop (ref_to_res)) { gimple_seq condseq = NULL; tree gang_dim_arg = build_int_cst (unsigned_type_node, GOMP_DIM_GANG); tree gang_pos = make_ssa_name (integer_type_node); gimple *gang_pos_call = gimple_build_call_internal (IFN_GOACC_DIM_POS, 1, gang_dim_arg); gimple_call_set_lhs (gang_pos_call, gang_pos); gimple_seq_add_stmt (&condseq, gang_pos_call); gimple *cond = gimple_build_cond (NE_EXPR, gang_pos, integer_zero_node, NULL, NULL); gimple_seq_add_stmt (&condseq, cond); gimple *cond_end = gimple_seq_last (condseq); gsi_insert_seq_before (&gsi, condseq, GSI_SAME_STMT); gimple_seq free_seq = NULL; gcc_assert (!gcn_array_reduction_buffers.is_empty ()); tree red_buf_ptr = gcn_array_reduction_buffers.last (); tree ptr = make_ssa_name (ptr_type_node); gimplify_assign (ptr, red_buf_ptr, &free_seq); gcn_array_reduction_buffers.pop (); tree free_decl = builtin_decl_explicit (BUILT_IN_FREE); gcall *stmt = gimple_build_call (free_decl, 1, ptr); gimple_seq_add_stmt (&free_seq, stmt); gcn_create_if_else_seq (&gsi, cond_end, &free_seq, NULL); } if (lhs) gimplify_assign (lhs, unshare_expr (var), &seq); pop_gimplify_context (NULL); gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); gsi_remove (&gsi, true); } /* Implement TARGET_GOACC_REDUCTION. Expand calls to the GOACC REDUCTION internal function, into a sequence of gimple instructions. */ void gcn_goacc_reduction (gcall *call) { int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3)); if (level == GOMP_DIM_VECTOR) { default_goacc_reduction (call); return; } unsigned code = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 0)); switch (code) { case IFN_GOACC_REDUCTION_SETUP: gcn_goacc_reduction_setup (call); break; case IFN_GOACC_REDUCTION_INIT: gcn_goacc_reduction_init (call); break; case IFN_GOACC_REDUCTION_FINI: gcn_goacc_reduction_fini (call); break; case IFN_GOACC_REDUCTION_TEARDOWN: gcn_goacc_reduction_teardown (call); break; default: gcc_unreachable (); } } tree gcn_goacc_adjust_private_decl (location_t, tree var, int level) { if (level != GOMP_DIM_GANG) return var; tree type = TREE_TYPE (var); tree lds_type = build_qualified_type (type, TYPE_QUALS_NO_ADDR_SPACE (type) | ENCODE_QUAL_ADDR_SPACE (ADDR_SPACE_LDS)); machine_function *machfun = cfun->machine; TREE_TYPE (var) = lds_type; TREE_STATIC (var) = 1; /* We're making VAR static. We have to mangle the name to avoid collisions between different local variables that share the same names. */ lhd_set_decl_assembler_name (var); varpool_node::finalize_decl (var); if (machfun) machfun->use_flat_addressing = true; return var; } /* Implement TARGET_GOACC_CREATE_WORKER_BROADCAST_RECORD. Create OpenACC worker state propagation record in shared memory. */ tree gcn_goacc_create_worker_broadcast_record (tree record_type, bool sender, const char *name, unsigned HOST_WIDE_INT offset) { tree type = build_qualified_type (record_type, TYPE_QUALS_NO_ADDR_SPACE (record_type) | ENCODE_QUAL_ADDR_SPACE (ADDR_SPACE_LDS)); if (!sender) { tree ptr_type = build_pointer_type (type); return create_tmp_var_raw (ptr_type, name); } if (record_type == char_type_node) offset = 1; tree ptr_type = build_pointer_type (type); return build_int_cst (ptr_type, offset); } /* }}} */