aboutsummaryrefslogtreecommitdiff
path: root/gcc/fortran
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2024-06-05 19:10:26 +0200
committerJakub Jelinek <jakub@redhat.com>2024-06-05 19:10:26 +0200
commit804c0f35a6b1d75bafc550b4b42155744d77f990 (patch)
treea0f68633aa578a16cc13798077cad9d32148a70f /gcc/fortran
parentd7cbcfe7c33645eaf95f175f19884d443817857b (diff)
downloadgcc-804c0f35a6b1d75bafc550b4b42155744d77f990.zip
gcc-804c0f35a6b1d75bafc550b4b42155744d77f990.tar.gz
gcc-804c0f35a6b1d75bafc550b4b42155744d77f990.tar.bz2
openmp: OpenMP loop transformation support
This patch is largely rewritten version of the https://gcc.gnu.org/pipermail/gcc-patches/2023-October/631764.html patch set which I've promissed to adjust the way I'd like it but didn't get to it until now. The previous series together in diffstat was 176 files changed, 12107 insertions(+), 298 deletions(-) This patch is 197 files changed, 10843 insertions(+), 212 deletions(-) and diff between the old series and new patch is 268 files changed, 8053 insertions(+), 9231 deletions(-) Only the 5.1/5.2 tile/unroll constructs are supported, in various places some preparations for the other 6.0 loop transformations constructs (interchange/reverse/fuse) are done, but certainly not complete and not everywhere. The important difference is that because tile/unroll partial map 1:1 the original loops to generated canonical loops and add another set of generated loops without canonical form inside of it, the tile/unroll partial constructs are terminal for the generated loop, one can't have some loops from the tile or unroll partial and some further loops from inside the body of that construct. The GENERIC representation attempts to match what the standard specifies, so there are separate OMP_TILE and OMP_UNROLL trees. If for a particular loop in a loop nest of some OpenMP loop it awaits a generated loop from a nested loop, or if in OMP_LOOPXFORM_LOWERED OMP_TILE/UNROLL construct a generated loop has been moved to some surrounding construct, that particular loop is represented by all NULL_TREEs in the OMP_FOR_{INIT,COND,INCR,ORIG_DECLS} vector. The lowering of the loop transforming constructs is done at gimplification time, at the start of gimplify_omp_for. I think this way it is more maintainable over magic clauses with various loop depths on the other looping constructs or the magic OMP_LOOP_TRANS construct. Though, I admit I'm still undecided how to represent the OpenMP 6.0 loop transformation case of say: #pragma omp for collapse (4) for (int i = 0; i < 32; ++i) #pragma omp interchange permutation (2, 1) #pragma omp reverse for (int j = 0; j < 32; ++j) #pragma omp reverse for (int k = 0; k < 32; ++k) for (int l = 0; l < 32; ++l) ; Surely the i loop would go to first vector elements of OMP_FOR_* of the work-sharing loop, then 2 loops are expecting generated loops from interchange which would be inside of the body. But the innermost l loop isn't part of the interchange, so the question is where to put it. One possibility is to have it in the 4th loop of the OMP_FOR, another possibility would be to add some artificial construct inside of the OMP_INTERCHANGE and 2 OMP_REVERSE bodies which would contain the inner loop(s), e.g. it could be OMP_INTERCHANGE without permutation clause or some artificial ones or whatever. I've recently raised various unclear things in the 5.1/5.2/TRs versions regarding loop transformations, in particular https://github.com/OpenMP/spec/issues/3908 https://github.com/OpenMP/spec/issues/3909 (sorry, private links unless you have OpenMP membership). Until those are resolved, I have a sorry on trying to mix generated loops with non-rectangular loops (way too many questions need to be answered before that can be done) and similarly for mixing non-perfectly nested loops with generated loops (again, it can be implemented somehow, but is way too unclear). The second issue is mostly about data sharing, which is ambiguous, the patch makes the artificial iterators of the loops effectively private in the associated constructs (more like local), but for user iterators doesn't do anything in particular, so for now one needs to use explicit data sharing clauses on the non-loop transformation OpenMP looping constructs or surrounding parallel/task/target etc. 2024-06-05 Jakub Jelinek <jakub@redhat.com> Frederik Harwath <frederik@codesourcery.com> Sandra Loosemore <sandra@codesourcery.com> gcc/ * tree.def (OMP_TILE, OMP_UNROLL): New tree codes. * tree-core.h (enum omp_clause_code): Add OMP_CLAUSE_PARTIAL, OMP_CLAUSE_FULL and OMP_CLAUSE_SIZES. * tree.h (OMP_LOOPXFORM_CHECK): Define. (OMP_LOOPXFORM_LOWERED): Define. (OMP_CLAUSE_PARTIAL_EXPR): Define. (OMP_CLAUSE_SIZES_LIST): Define. * tree.cc (omp_clause_num_ops, omp_clause_code_name): Add entries for OMP_CLAUSE_{PARTIAL,FULL,SIZES}. * tree-pretty-print.cc (dump_omp_clause): Handle OMP_CLAUSE_{PARTIAL,FULL,SIZES}. (dump_generic_node): Handle OMP_TILE and OMP_UNROLL. Skip printing loops with NULL OMP_FOR_INIT (node) vector element. * gimplify.cc (is_gimple_stmt): Handle OMP_TILE and OMP_UNROLL. (gimplify_omp_taskloop_expr): For SAVE_EXPR use gimplify_save_expr. (gimplify_omp_loop_xform): New function. (gimplify_omp_for): Call omp_maybe_apply_loop_xforms and if that reshuffles what the passed pointer points to, retry or return GS_OK. Handle OMP_TILE and OMP_UNROLL. (gimplify_omp_loop): Call omp_maybe_apply_loop_xforms and if that reshuffles what the passed pointer points to, return GS_OK. (gimplify_expr): Handle OMP_TILE and OMP_UNROLL. * omp-general.h (omp_loop_number_of_iterations, omp_maybe_apply_loop_xforms): Declare. * omp-general.cc (omp_adjust_for_condition): For LE_EXPR and GE_EXPR with pointers, don't add/subtract one, but the size of what the pointer points to. (omp_loop_number_of_iterations, omp_apply_tile, find_nested_loop_xform, omp_maybe_apply_loop_xforms): New functions. gcc/c-family/ * c-common.h (c_omp_find_generated_loop): Declare. * c-gimplify.cc (c_genericize_control_stmt): Handle OMP_TILE and OMP_UNROLL. * c-omp.cc (c_finish_omp_for): Handle generated loops. (c_omp_is_loop_iterator): Likewise. (c_find_nested_loop_xform_r, c_omp_find_generated_loop): New functions. (c_omp_check_loop_iv): Handle generated loops. For now sorry on mixing non-rectangular loop with generated loops. (c_omp_check_loop_binding_exprs): For now sorry on mixing imperfect loops with generated loops. (c_omp_directives): Uncomment tile and unroll entries. * c-pragma.h (enum pragma_kind): Add PRAGMA_OMP_TILE and PRAGMA_OMP_UNROLL, change PRAGMA_OMP__LAST_ to the latter. (enum pragma_omp_clause): Add PRAGMA_OMP_CLAUSE_FULL and PRAGMA_OMP_CLAUSE_PARTIAL. * c-pragma.cc (omp_pragmas_simd): Add tile and unroll omp pragmas. gcc/c/ * c-parser.cc (c_parser_skip_std_attribute_spec_seq): New function. (check_omp_intervening_code): Reject imperfectly nested tile. (c_parser_compound_statement_nostart): If want_nested_loop, use c_parser_omp_next_tokens_can_be_canon_loop instead of just checking for RID_FOR keyword. (c_parser_omp_clause_name): Handle full and partial clause names. (c_parser_omp_clause_allocate): Remove spurious semicolon. (c_parser_omp_clause_full, c_parser_omp_clause_partial): New functions. (c_parser_omp_all_clauses): Handle PRAGMA_OMP_CLAUSE_FULL and PRAGMA_OMP_CLAUSE_PARTIAL. (c_parser_omp_next_tokens_can_be_canon_loop): New function. (c_parser_omp_loop_nest): Parse C23 attributes. Handle tile/unroll constructs. Use c_parser_omp_next_tokens_can_be_canon_loop instead of just checking for RID_FOR keyword. Only add_stmt (body) if it is non-NULL. (c_parser_omp_for_loop): Rename tiling variable to oacc_tiling. For OMP_CLAUSE_SIZES set collapse to list length of OMP_CLAUSE_SIZES_LIST. Use c_parser_omp_next_tokens_can_be_canon_loop instead of just checking for RID_FOR keyword. Remove spurious semicolon. Don't call c_omp_check_loop_binding_exprs if stmt is NULL. Skip generated loops. (c_parser_omp_tile_sizes, c_parser_omp_tile): New functions. (OMP_UNROLL_CLAUSE_MASK): Define. (c_parser_omp_unroll): New function. (c_parser_omp_construct): Handle PRAGMA_OMP_TILE and PRAGMA_OMP_UNROLL. * c-typeck.cc (c_finish_omp_clauses): Adjust wording of some of the conflicting clause diagnostic messages to include word clause. Handle OMP_CLAUSE_{FULL,PARTIAL,SIZES} and diagnose full vs. partial conflict. gcc/cp/ * cp-tree.h (dependent_omp_for_p): Add another tree argument. * parser.cc (check_omp_intervening_code): Reject imperfectly nested tile. (cp_parser_statement_seq_opt): If want_nested_loop, use cp_parser_next_tokens_can_be_canon_loop instead of just checking for RID_FOR keyword. (cp_parser_omp_clause_name): Handle full and partial clause names. (cp_parser_omp_clause_full, cp_parser_omp_clause_partial): New functions. (cp_parser_omp_all_clauses): Formatting fix. Handle PRAGMA_OMP_CLAUSE_PARTIAL and PRAGMA_OMP_CLAUSE_FULL. (cp_parser_next_tokens_can_be_canon_loop): New function. (cp_parser_omp_loop_nest): Parse C++11 attributes. Handle tile/unroll constructs. Use cp_parser_next_tokens_can_be_canon_loop instead of just checking for RID_FOR keyword. Only add_stmt cp_parser_omp_loop_nest result if it is non-NULL. (cp_parser_omp_for_loop): Rename tiling variable to oacc_tiling. For OMP_CLAUSE_SIZES set collapse to list length of OMP_CLAUSE_SIZES_LIST. Use cp_parser_next_tokens_can_be_canon_loop instead of just checking for RID_FOR keyword. Remove spurious semicolon. Don't call c_omp_check_loop_binding_exprs if stmt is NULL. Skip and/or handle generated loops. Remove spurious ()s around & operands. (cp_parser_omp_tile_sizes, cp_parser_omp_tile): New functions. (OMP_UNROLL_CLAUSE_MASK): Define. (cp_parser_omp_unroll): New function. (cp_parser_omp_construct): Handle PRAGMA_OMP_TILE and PRAGMA_OMP_UNROLL. (cp_parser_pragma): Likewise. * semantics.cc (finish_omp_clauses): Don't call fold_build_cleanup_point_expr for cases which obviously won't need it, like checked INTEGER_CSTs. Handle OMP_CLAUSE_{FULL,PARTIAL,SIZES} and diagnose full vs. partial conflict. Adjust wording of some of the conflicting clause diagnostic messages to include word clause. (finish_omp_for): Use decl equal to global_namespace as a marker for generated loop. Pass also body to dependent_omp_for_p. Skip generated loops. (finish_omp_for_block): Skip generated loops. * pt.cc (tsubst_omp_clauses): Handle OMP_CLAUSE_{FULL,PARTIAL,SIZES}. (tsubst_stmt): Handle OMP_TILE and OMP_UNROLL. Handle or skip generated loops. (dependent_omp_for_p): Add body argument. If declv vector element is NULL, find generated loop. * cp-gimplify.cc (cp_gimplify_expr): Handle OMP_TILE and OMP_UNROLL. (cp_fold_r): Likewise. (cp_genericize_r): Likewise. Skip generated loops. gcc/fortran/ * gfortran.h (enum gfc_statement): Add ST_OMP_UNROLL, ST_OMP_END_UNROLL, ST_OMP_TILE and ST_OMP_END_TILE. (struct gfc_omp_clauses): Add sizes_list, partial, full and erroneous members. (enum gfc_exec_op): Add EXEC_OMP_UNROLL and EXEC_OMP_TILE. (gfc_expr_list_len): Declare. * match.h (gfc_match_omp_tile, gfc_match_omp_unroll): Declare. * openmp.cc (gfc_get_location): Declare. (gfc_free_omp_clauses): Free sizes_list. (match_oacc_expr_list): Rename to ... (match_omp_oacc_expr_list): ... this. Add is_omp argument and change diagnostic wording if it is true. (enum omp_mask2): Add OMP_CLAUSE_{FULL,PARTIAL,SIZES}. (gfc_match_omp_clauses): Parse full, partial and sizes clauses. (gfc_match_oacc_wait): Use match_omp_oacc_expr_list instead of match_oacc_expr_list. (OMP_UNROLL_CLAUSES, OMP_TILE_CLAUSES): Define. (gfc_match_omp_tile, gfc_match_omp_unroll): New functions. (resolve_omp_clauses): Diagnose full vs. partial clause conflict. Resolve sizes clause arguments. (find_nested_loop_in_chain): Use switch instead of series of ifs. Handle EXEC_OMP_TILE and EXEC_OMP_UNROLL. (gfc_resolve_omp_do_blocks): Set omp_current_do_collapse to list length of sizes_list if present. (gfc_resolve_do_iterator): Return for EXEC_OMP_TILE or EXEC_OMP_UNROLL. (restructure_intervening_code): Remove spurious ()s around & operands. (is_outer_iteration_variable): Handle EXEC_OMP_TILE and EXEC_OMP_UNROLL. (check_nested_loop_in_chain): Likewise. (expr_is_invariant): Likewise. (resolve_omp_do): Handle EXEC_OMP_TILE and EXEC_OMP_UNROLL. Diagnose tile without sizes clause. Use sizes_list length for count if non-NULL. Set code->ext.omp_clauses->erroneous on loops where we've reported diagnostics. Sorry for mixing non-rectangular loops with generated loops. (omp_code_to_statement): Handle EXEC_OMP_TILE and EXEC_OMP_UNROLL. (gfc_resolve_omp_directive): Likewise. * parse.cc (decode_omp_directive): Parse end tile, end unroll, tile and unroll. Move nothing entry alphabetically. (case_exec_markers): Add ST_OMP_TILE and ST_OMP_UNROLL. (gfc_ascii_statement): Handle ST_OMP_END_TILE, ST_OMP_END_UNROLL, ST_OMP_TILE and ST_OMP_UNROLL. (parse_omp_do): Add nested argument. Handle ST_OMP_TILE and ST_OMP_UNROLL. (parse_omp_structured_block): Adjust parse_omp_do caller. (parse_executable): Likewise. Handle ST_OMP_TILE and ST_OMP_UNROLL. * resolve.cc (gfc_resolve_blocks): Handle EXEC_OMP_TILE and EXEC_OMP_UNROLL. (gfc_resolve_code): Likewise. * st.cc (gfc_free_statement): Likewise. * trans.cc (trans_code): Likewise. * trans-openmp.cc (gfc_trans_omp_clauses): Handle full, partial and sizes clauses. Use tree_cons + nreverse instead of temporary vector and build_tree_list_vec for tile_list handling. (gfc_expr_list_len): New function. (gfc_trans_omp_do): Rename tile to oacc_tile. Handle sizes clause. Don't assert code->op is EXEC_DO. Handle EXEC_OMP_TILE and EXEC_OMP_UNROLL. (gfc_trans_omp_directive): Handle EXEC_OMP_TILE and EXEC_OMP_UNROLL. * dump-parse-tree.cc (show_omp_clauses): Dump full, partial and sizes clauses. (show_omp_node): Handle EXEC_OMP_TILE and EXEC_OMP_UNROLL. (show_code_node): Likewise. gcc/testsuite/ * c-c++-common/gomp/attrs-tile-1.c: New test. * c-c++-common/gomp/attrs-tile-2.c: New test. * c-c++-common/gomp/attrs-tile-3.c: New test. * c-c++-common/gomp/attrs-tile-4.c: New test. * c-c++-common/gomp/attrs-tile-5.c: New test. * c-c++-common/gomp/attrs-tile-6.c: New test. * c-c++-common/gomp/attrs-unroll-1.c: New test. * c-c++-common/gomp/attrs-unroll-2.c: New test. * c-c++-common/gomp/attrs-unroll-3.c: New test. * c-c++-common/gomp/attrs-unroll-inner-1.c: New test. * c-c++-common/gomp/attrs-unroll-inner-2.c: New test. * c-c++-common/gomp/attrs-unroll-inner-3.c: New test. * c-c++-common/gomp/attrs-unroll-inner-4.c: New test. * c-c++-common/gomp/attrs-unroll-inner-5.c: New test. * c-c++-common/gomp/imperfect-attributes.c: Adjust expected diagnostics. * c-c++-common/gomp/imperfect-loop-nest.c: New test. * c-c++-common/gomp/ordered-5.c: New test. * c-c++-common/gomp/scan-7.c: New test. * c-c++-common/gomp/tile-1.c: New test. * c-c++-common/gomp/tile-2.c: New test. * c-c++-common/gomp/tile-3.c: New test. * c-c++-common/gomp/tile-4.c: New test. * c-c++-common/gomp/tile-5.c: New test. * c-c++-common/gomp/tile-6.c: New test. * c-c++-common/gomp/tile-7.c: New test. * c-c++-common/gomp/tile-8.c: New test. * c-c++-common/gomp/tile-9.c: New test. * c-c++-common/gomp/tile-10.c: New test. * c-c++-common/gomp/tile-11.c: New test. * c-c++-common/gomp/tile-12.c: New test. * c-c++-common/gomp/tile-13.c: New test. * c-c++-common/gomp/tile-14.c: New test. * c-c++-common/gomp/tile-15.c: New test. * c-c++-common/gomp/unroll-1.c: New test. * c-c++-common/gomp/unroll-2.c: New test. * c-c++-common/gomp/unroll-3.c: New test. * c-c++-common/gomp/unroll-4.c: New test. * c-c++-common/gomp/unroll-5.c: New test. * c-c++-common/gomp/unroll-6.c: New test. * c-c++-common/gomp/unroll-7.c: New test. * c-c++-common/gomp/unroll-8.c: New test. * c-c++-common/gomp/unroll-9.c: New test. * c-c++-common/gomp/unroll-inner-1.c: New test. * c-c++-common/gomp/unroll-inner-2.c: New test. * c-c++-common/gomp/unroll-inner-3.c: New test. * c-c++-common/gomp/unroll-non-rect-1.c: New test. * c-c++-common/gomp/unroll-non-rect-2.c: New test. * c-c++-common/gomp/unroll-non-rect-3.c: New test. * c-c++-common/gomp/unroll-simd-1.c: New test. * gcc.dg/gomp/attrs-4.c: Adjust expected diagnostics. * gcc.dg/gomp/for-1.c: Likewise. * gcc.dg/gomp/for-11.c: Likewise. * g++.dg/gomp/attrs-4.C: Likewise. * g++.dg/gomp/for-1.C: Likewise. * g++.dg/gomp/pr94512.C: Likewise. * g++.dg/gomp/tile-1.C: New test. * g++.dg/gomp/tile-2.C: New test. * g++.dg/gomp/unroll-1.C: New test. * g++.dg/gomp/unroll-2.C: New test. * g++.dg/gomp/unroll-3.C: New test. * gfortran.dg/gomp/inner-loops-1.f90: New test. * gfortran.dg/gomp/inner-loops-2.f90: New test. * gfortran.dg/gomp/pure-1.f90: Add tests for !$omp unroll and !$omp tile. * gfortran.dg/gomp/pure-2.f90: Remove those tests from here. * gfortran.dg/gomp/scan-9.f90: New test. * gfortran.dg/gomp/tile-1.f90: New test. * gfortran.dg/gomp/tile-2.f90: New test. * gfortran.dg/gomp/tile-3.f90: New test. * gfortran.dg/gomp/tile-4.f90: New test. * gfortran.dg/gomp/tile-5.f90: New test. * gfortran.dg/gomp/tile-6.f90: New test. * gfortran.dg/gomp/tile-7.f90: New test. * gfortran.dg/gomp/tile-8.f90: New test. * gfortran.dg/gomp/tile-9.f90: New test. * gfortran.dg/gomp/tile-10.f90: New test. * gfortran.dg/gomp/tile-imperfect-nest-1.f90: New test. * gfortran.dg/gomp/tile-imperfect-nest-2.f90: New test. * gfortran.dg/gomp/tile-inner-loops-1.f90: New test. * gfortran.dg/gomp/tile-inner-loops-2.f90: New test. * gfortran.dg/gomp/tile-inner-loops-3.f90: New test. * gfortran.dg/gomp/tile-inner-loops-4.f90: New test. * gfortran.dg/gomp/tile-inner-loops-5.f90: New test. * gfortran.dg/gomp/tile-inner-loops-6.f90: New test. * gfortran.dg/gomp/tile-inner-loops-7.f90: New test. * gfortran.dg/gomp/tile-inner-loops-8.f90: New test. * gfortran.dg/gomp/tile-non-rectangular-1.f90: New test. * gfortran.dg/gomp/tile-non-rectangular-2.f90: New test. * gfortran.dg/gomp/tile-non-rectangular-3.f90: New test. * gfortran.dg/gomp/tile-unroll-1.f90: New test. * gfortran.dg/gomp/tile-unroll-2.f90: New test. * gfortran.dg/gomp/unroll-1.f90: New test. * gfortran.dg/gomp/unroll-2.f90: New test. * gfortran.dg/gomp/unroll-3.f90: New test. * gfortran.dg/gomp/unroll-4.f90: New test. * gfortran.dg/gomp/unroll-5.f90: New test. * gfortran.dg/gomp/unroll-6.f90: New test. * gfortran.dg/gomp/unroll-7.f90: New test. * gfortran.dg/gomp/unroll-8.f90: New test. * gfortran.dg/gomp/unroll-9.f90: New test. * gfortran.dg/gomp/unroll-10.f90: New test. * gfortran.dg/gomp/unroll-11.f90: New test. * gfortran.dg/gomp/unroll-12.f90: New test. * gfortran.dg/gomp/unroll-13.f90: New test. * gfortran.dg/gomp/unroll-inner-loop-1.f90: New test. * gfortran.dg/gomp/unroll-inner-loop-2.f90: New test. * gfortran.dg/gomp/unroll-no-clause-1.f90: New test. * gfortran.dg/gomp/unroll-non-rect-1.f90: New test. * gfortran.dg/gomp/unroll-non-rect-2.f90: New test. * gfortran.dg/gomp/unroll-simd-1.f90: New test. * gfortran.dg/gomp/unroll-simd-2.f90: New test. * gfortran.dg/gomp/unroll-simd-3.f90: New test. * gfortran.dg/gomp/unroll-tile-1.f90: New test. * gfortran.dg/gomp/unroll-tile-2.f90: New test. * gfortran.dg/gomp/unroll-tile-inner-1.f90: New test. libgomp/ * testsuite/libgomp.c-c++-common/imperfect-transform-1.c: New test. * testsuite/libgomp.c-c++-common/imperfect-transform-2.c: New test. * testsuite/libgomp.c-c++-common/matrix-1.h: New test. * testsuite/libgomp.c-c++-common/matrix-constant-iter.h: New test. * testsuite/libgomp.c-c++-common/matrix-helper.h: New test. * testsuite/libgomp.c-c++-common/matrix-no-directive-1.c: New test. * testsuite/libgomp.c-c++-common/matrix-no-directive-unroll-full-1.c: New test. * testsuite/libgomp.c-c++-common/matrix-omp-distribute-parallel-for-1.c: New test. * testsuite/libgomp.c-c++-common/matrix-omp-for-1.c: New test. * testsuite/libgomp.c-c++-common/matrix-omp-parallel-for-1.c: New test. * testsuite/libgomp.c-c++-common/matrix-omp-parallel-masked-taskloop-1.c: New test. * testsuite/libgomp.c-c++-common/matrix-omp-parallel-masked-taskloop-simd-1.c: New test. * testsuite/libgomp.c-c++-common/matrix-omp-target-parallel-for-1.c: New test. * testsuite/libgomp.c-c++-common/matrix-omp-target-teams-distribute-parallel-for-1.c: New test. * testsuite/libgomp.c-c++-common/matrix-omp-taskloop-1.c: New test. * testsuite/libgomp.c-c++-common/matrix-omp-teams-distribute-parallel-for-1.c: New test. * testsuite/libgomp.c-c++-common/matrix-simd-1.c: New test. * testsuite/libgomp.c-c++-common/matrix-transform-variants-1.h: New test. * testsuite/libgomp.c-c++-common/target-imperfect-transform-1.c: New test. * testsuite/libgomp.c-c++-common/target-imperfect-transform-2.c: New test. * testsuite/libgomp.c-c++-common/unroll-1.c: New test. * testsuite/libgomp.c-c++-common/unroll-non-rect-1.c: New test. * testsuite/libgomp.c++/matrix-no-directive-unroll-full-1.C: New test. * testsuite/libgomp.c++/tile-2.C: New test. * testsuite/libgomp.c++/tile-3.C: New test. * testsuite/libgomp.c++/unroll-1.C: New test. * testsuite/libgomp.c++/unroll-2.C: New test. * testsuite/libgomp.c++/unroll-full-tile.C: New test. * testsuite/libgomp.fortran/imperfect-transform-1.f90: New test. * testsuite/libgomp.fortran/imperfect-transform-2.f90: New test. * testsuite/libgomp.fortran/inner-1.f90: New test. * testsuite/libgomp.fortran/nested-fn.f90: New test. * testsuite/libgomp.fortran/target-imperfect-transform-1.f90: New test. * testsuite/libgomp.fortran/target-imperfect-transform-2.f90: New test. * testsuite/libgomp.fortran/tile-1.f90: New test. * testsuite/libgomp.fortran/tile-2.f90: New test. * testsuite/libgomp.fortran/tile-unroll-1.f90: New test. * testsuite/libgomp.fortran/tile-unroll-2.f90: New test. * testsuite/libgomp.fortran/tile-unroll-3.f90: New test. * testsuite/libgomp.fortran/tile-unroll-4.f90: New test. * testsuite/libgomp.fortran/unroll-1.f90: New test. * testsuite/libgomp.fortran/unroll-2.f90: New test. * testsuite/libgomp.fortran/unroll-3.f90: New test. * testsuite/libgomp.fortran/unroll-4.f90: New test. * testsuite/libgomp.fortran/unroll-5.f90: New test. * testsuite/libgomp.fortran/unroll-6.f90: New test. * testsuite/libgomp.fortran/unroll-7a.f90: New test. * testsuite/libgomp.fortran/unroll-7b.f90: New test. * testsuite/libgomp.fortran/unroll-7c.f90: New test. * testsuite/libgomp.fortran/unroll-7.f90: New test. * testsuite/libgomp.fortran/unroll-8.f90: New test. * testsuite/libgomp.fortran/unroll-simd-1.f90: New test. * testsuite/libgomp.fortran/unroll-tile-1.f90: New test. * testsuite/libgomp.fortran/unroll-tile-2.f90: New test.
Diffstat (limited to 'gcc/fortran')
-rw-r--r--gcc/fortran/dump-parse-tree.cc26
-rw-r--r--gcc/fortran/gfortran.h11
-rw-r--r--gcc/fortran/match.h2
-rw-r--r--gcc/fortran/openmp.cc250
-rw-r--r--gcc/fortran/parse.cc49
-rw-r--r--gcc/fortran/resolve.cc6
-rw-r--r--gcc/fortran/st.cc2
-rw-r--r--gcc/fortran/trans-openmp.cc85
-rw-r--r--gcc/fortran/trans.cc2
9 files changed, 380 insertions, 53 deletions
diff --git a/gcc/fortran/dump-parse-tree.cc b/gcc/fortran/dump-parse-tree.cc
index 87a6503..80aa8ef 100644
--- a/gcc/fortran/dump-parse-tree.cc
+++ b/gcc/fortran/dump-parse-tree.cc
@@ -2119,6 +2119,26 @@ show_omp_clauses (gfc_omp_clauses *omp_clauses)
}
if (omp_clauses->assume)
show_omp_assumes (omp_clauses->assume);
+ if (omp_clauses->full)
+ fputs (" FULL", dumpfile);
+ if (omp_clauses->partial)
+ {
+ fputs (" PARTIAL", dumpfile);
+ if (omp_clauses->partial > 0)
+ fprintf (dumpfile, "(%d)", omp_clauses->partial);
+ }
+ if (omp_clauses->sizes_list)
+ {
+ gfc_expr_list *sizes;
+ fputs (" SIZES(", dumpfile);
+ for (sizes = omp_clauses->sizes_list; sizes; sizes = sizes->next)
+ {
+ show_expr (sizes->expr);
+ if (sizes->next)
+ fputs (", ", dumpfile);
+ }
+ fputc (')', dumpfile);
+ }
}
/* Show a single OpenMP or OpenACC directive node and everything underneath it
@@ -2231,6 +2251,8 @@ show_omp_node (int level, gfc_code *c)
name = "TEAMS DISTRIBUTE PARALLEL DO SIMD"; break;
case EXEC_OMP_TEAMS_DISTRIBUTE_SIMD: name = "TEAMS DISTRIBUTE SIMD"; break;
case EXEC_OMP_TEAMS_LOOP: name = "TEAMS LOOP"; break;
+ case EXEC_OMP_TILE: name = "TILE"; break;
+ case EXEC_OMP_UNROLL: name = "UNROLL"; break;
case EXEC_OMP_WORKSHARE: name = "WORKSHARE"; break;
default:
gcc_unreachable ();
@@ -2309,6 +2331,8 @@ show_omp_node (int level, gfc_code *c)
case EXEC_OMP_TEAMS_DISTRIBUTE_PARALLEL_DO_SIMD:
case EXEC_OMP_TEAMS_DISTRIBUTE_SIMD:
case EXEC_OMP_TEAMS_LOOP:
+ case EXEC_OMP_TILE:
+ case EXEC_OMP_UNROLL:
case EXEC_OMP_WORKSHARE:
omp_clauses = c->ext.omp_clauses;
break;
@@ -3559,6 +3583,8 @@ show_code_node (int level, gfc_code *c)
case EXEC_OMP_TEAMS_DISTRIBUTE_PARALLEL_DO_SIMD:
case EXEC_OMP_TEAMS_DISTRIBUTE_SIMD:
case EXEC_OMP_TEAMS_LOOP:
+ case EXEC_OMP_TILE:
+ case EXEC_OMP_UNROLL:
case EXEC_OMP_WORKSHARE:
show_omp_node (level, c);
break;
diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h
index de1a7cd..36ed8ee 100644
--- a/gcc/fortran/gfortran.h
+++ b/gcc/fortran/gfortran.h
@@ -321,7 +321,9 @@ enum gfc_statement
ST_OMP_ALLOCATE, ST_OMP_ALLOCATE_EXEC,
ST_OMP_ALLOCATORS, ST_OMP_END_ALLOCATORS,
/* Note: gfc_match_omp_nothing returns ST_NONE. */
- ST_OMP_NOTHING, ST_NONE
+ ST_OMP_NOTHING, ST_NONE,
+ ST_OMP_UNROLL, ST_OMP_END_UNROLL,
+ ST_OMP_TILE, ST_OMP_END_TILE
};
/* Types of interfaces that we can have. Assignment interfaces are
@@ -1577,11 +1579,13 @@ typedef struct gfc_omp_clauses
struct gfc_expr *dist_chunk_size;
struct gfc_expr *message;
struct gfc_omp_assumptions *assume;
+ struct gfc_expr_list *sizes_list;
const char *critical_name;
enum gfc_omp_default_sharing default_sharing;
enum gfc_omp_atomic_op atomic_op;
enum gfc_omp_defaultmap defaultmap[OMP_DEFAULTMAP_CAT_NUM];
int collapse, orderedc;
+ int partial;
unsigned nowait:1, ordered:1, untied:1, mergeable:1, ancestor:1;
unsigned inbranch:1, notinbranch:1, nogroup:1;
unsigned sched_simd:1, sched_monotonic:1, sched_nonmonotonic:1;
@@ -1591,6 +1595,7 @@ typedef struct gfc_omp_clauses
unsigned non_rectangular:1, order_concurrent:1;
unsigned contains_teams_construct:1, target_first_st_is_teams:1;
unsigned contained_in_target_construct:1, indirect:1;
+ unsigned full:1, erroneous:1;
ENUM_BITFIELD (gfc_omp_sched_kind) sched_kind:3;
ENUM_BITFIELD (gfc_omp_device_type) device_type:2;
ENUM_BITFIELD (gfc_omp_memorder) memorder:3;
@@ -3033,6 +3038,7 @@ enum gfc_exec_op
EXEC_OMP_TARGET_TEAMS_LOOP, EXEC_OMP_MASKED, EXEC_OMP_PARALLEL_MASKED,
EXEC_OMP_PARALLEL_MASKED_TASKLOOP, EXEC_OMP_PARALLEL_MASKED_TASKLOOP_SIMD,
EXEC_OMP_MASKED_TASKLOOP, EXEC_OMP_MASKED_TASKLOOP_SIMD, EXEC_OMP_SCOPE,
+ EXEC_OMP_UNROLL, EXEC_OMP_TILE,
EXEC_OMP_ERROR, EXEC_OMP_ALLOCATE, EXEC_OMP_ALLOCATORS
};
@@ -3958,6 +3964,9 @@ void gfc_generate_module_code (gfc_namespace *);
/* trans-intrinsic.cc */
bool gfc_inline_intrinsic_function_p (gfc_expr *);
+/* trans-openmp.cc */
+int gfc_expr_list_len (gfc_expr_list *);
+
/* bbt.cc */
typedef int (*compare_fn) (void *, void *);
void gfc_insert_bbt (void *, void *, compare_fn);
diff --git a/gcc/fortran/match.h b/gcc/fortran/match.h
index b099213..c2b7d69 100644
--- a/gcc/fortran/match.h
+++ b/gcc/fortran/match.h
@@ -228,6 +228,8 @@ match gfc_match_omp_teams_distribute_parallel_do_simd (void);
match gfc_match_omp_teams_distribute_simd (void);
match gfc_match_omp_teams_loop (void);
match gfc_match_omp_threadprivate (void);
+match gfc_match_omp_tile (void);
+match gfc_match_omp_unroll (void);
match gfc_match_omp_workshare (void);
match gfc_match_omp_end_critical (void);
match gfc_match_omp_end_nowait (void);
diff --git a/gcc/fortran/openmp.cc b/gcc/fortran/openmp.cc
index 5246647..9b30a10 100644
--- a/gcc/fortran/openmp.cc
+++ b/gcc/fortran/openmp.cc
@@ -33,6 +33,7 @@ along with GCC; see the file COPYING3. If not see
#include "bitmap.h"
#include "omp-api.h" /* For omp_runtime_api_procname. */
+location_t gfc_get_location (locus *);
static gfc_statement omp_code_to_statement (gfc_code *);
@@ -195,6 +196,7 @@ gfc_free_omp_clauses (gfc_omp_clauses *c)
i == OMP_LIST_USES_ALLOCATORS);
gfc_free_expr_list (c->wait_list);
gfc_free_expr_list (c->tile_list);
+ gfc_free_expr_list (c->sizes_list);
free (CONST_CAST (char *, c->critical_name));
if (c->assume)
{
@@ -762,8 +764,8 @@ cleanup:
}
static match
-match_oacc_expr_list (const char *str, gfc_expr_list **list,
- bool allow_asterisk)
+match_omp_oacc_expr_list (const char *str, gfc_expr_list **list,
+ bool allow_asterisk, bool is_omp)
{
gfc_expr_list *head, *tail, *p;
locus old_loc;
@@ -815,7 +817,10 @@ match_oacc_expr_list (const char *str, gfc_expr_list **list,
return MATCH_YES;
syntax:
- gfc_error ("Syntax error in OpenACC expression list at %C");
+ if (is_omp)
+ gfc_error ("Syntax error in OpenMP expression list at %C");
+ else
+ gfc_error ("Syntax error in OpenACC expression list at %C");
cleanup:
gfc_free_expr_list (head);
@@ -1098,6 +1103,9 @@ enum omp_mask2
OMP_CLAUSE_ASSUMPTIONS, /* OpenMP 5.1. */
OMP_CLAUSE_USES_ALLOCATORS, /* OpenMP 5.0 */
OMP_CLAUSE_INDIRECT, /* OpenMP 5.1 */
+ OMP_CLAUSE_FULL, /* OpenMP 5.1. */
+ OMP_CLAUSE_PARTIAL, /* OpenMP 5.1. */
+ OMP_CLAUSE_SIZES, /* OpenMP 5.1. */
/* This must come last. */
OMP_MASK2_LAST
};
@@ -2682,6 +2690,14 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, const omp_mask mask,
&& gfc_match_motion_var_list ("from (", &c->lists[OMP_LIST_FROM],
&head) == MATCH_YES)
continue;
+ if ((mask & OMP_CLAUSE_FULL)
+ && (m = gfc_match_dupl_check (!c->full, "full")) != MATCH_NO)
+ {
+ if (m == MATCH_ERROR)
+ goto error;
+ c->full = needs_space = true;
+ continue;
+ }
break;
case 'g':
if ((mask & OMP_CLAUSE_GANG)
@@ -3367,6 +3383,32 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, const omp_mask mask,
}
break;
case 'p':
+ if (mask & OMP_CLAUSE_PARTIAL)
+ {
+ if ((m = gfc_match_dupl_check (!c->partial, "partial"))
+ != MATCH_NO)
+ {
+ int expr;
+ if (m == MATCH_ERROR)
+ goto error;
+
+ c->partial = -1;
+
+ gfc_expr *cexpr = NULL;
+ m = gfc_match (" ( %e )", &cexpr);
+ if (m == MATCH_NO)
+ ;
+ else if (m == MATCH_YES
+ && !gfc_extract_int (cexpr, &expr, -1)
+ && expr > 0)
+ c->partial = expr;
+ else
+ gfc_error_now ("PARTIAL clause argument not constant "
+ "positive integer at %C");
+ gfc_free_expr (cexpr);
+ continue;
+ }
+ }
if ((mask & OMP_CLAUSE_COPY)
&& gfc_match ("pcopy ( ") == MATCH_YES
&& gfc_match_omp_map_clause (&c->lists[OMP_LIST_MAP],
@@ -3649,6 +3691,20 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, const omp_mask mask,
}
continue;
}
+ if ((mask & OMP_CLAUSE_SIZES)
+ && ((m = gfc_match_dupl_check (!c->sizes_list, "sizes"))
+ != MATCH_NO))
+ {
+ if (m == MATCH_ERROR)
+ goto error;
+ m = match_omp_oacc_expr_list (" (", &c->sizes_list, false, true);
+ if (m == MATCH_ERROR)
+ goto error;
+ if (m == MATCH_YES)
+ continue;
+ gfc_error ("Expected %<(%> after %qs at %C", "sizes");
+ goto error;
+ }
break;
case 't':
if ((mask & OMP_CLAUSE_TASK_REDUCTION)
@@ -3675,8 +3731,8 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, const omp_mask mask,
}
if ((mask & OMP_CLAUSE_TILE)
&& !c->tile_list
- && match_oacc_expr_list ("tile (", &c->tile_list,
- true) == MATCH_YES)
+ && match_omp_oacc_expr_list ("tile (", &c->tile_list,
+ true, false) == MATCH_YES)
continue;
if ((mask & OMP_CLAUSE_TO) && (mask & OMP_CLAUSE_LINK))
{
@@ -3772,7 +3828,7 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, const omp_mask mask,
if ((mask & OMP_CLAUSE_WAIT)
&& gfc_match ("wait") == MATCH_YES)
{
- m = match_oacc_expr_list (" (", &c->wait_list, false);
+ m = match_omp_oacc_expr_list (" (", &c->wait_list, false, false);
if (m == MATCH_ERROR)
goto error;
else if (m == MATCH_NO)
@@ -4128,7 +4184,7 @@ gfc_match_oacc_wait (void)
bool space = true;
match m;
- m = match_oacc_expr_list (" (", &wait_list, true);
+ m = match_omp_oacc_expr_list (" (", &wait_list, true, false);
if (m == MATCH_ERROR)
return m;
else if (m == MATCH_YES)
@@ -4528,6 +4584,10 @@ cleanup:
(omp_mask (OMP_CLAUSE_AT) | OMP_CLAUSE_MESSAGE | OMP_CLAUSE_SEVERITY)
#define OMP_WORKSHARE_CLAUSES \
omp_mask (OMP_CLAUSE_NOWAIT)
+#define OMP_UNROLL_CLAUSES \
+ (omp_mask (OMP_CLAUSE_FULL) | OMP_CLAUSE_PARTIAL)
+#define OMP_TILE_CLAUSES \
+ (omp_mask (OMP_CLAUSE_SIZES))
#define OMP_ALLOCATORS_CLAUSES \
omp_mask (OMP_CLAUSE_ALLOCATE)
@@ -6793,6 +6853,17 @@ gfc_match_omp_teams_distribute_simd (void)
| OMP_SIMD_CLAUSES);
}
+match
+gfc_match_omp_tile (void)
+{
+ return match_omp (EXEC_OMP_TILE, OMP_TILE_CLAUSES);
+}
+
+match
+gfc_match_omp_unroll (void)
+{
+ return match_omp (EXEC_OMP_UNROLL, OMP_UNROLL_CLAUSES);
+}
match
gfc_match_omp_workshare (void)
@@ -9182,6 +9253,9 @@ resolve_omp_clauses (gfc_code *code, gfc_omp_clauses *omp_clauses,
gfc_error ("%<REDUCTION%> clause at %L must not be used together with "
"%<NOGROUP%> clause",
&omp_clauses->lists[OMP_LIST_REDUCTION]->where);
+ if (omp_clauses->full && omp_clauses->partial)
+ gfc_error ("%<FULL%> clause at %C must not be used together with "
+ "%<PARTIAL%> clause");
if (omp_clauses->async)
if (omp_clauses->async_expr)
resolve_scalar_int_expr (omp_clauses->async_expr, "ASYNC");
@@ -9233,6 +9307,22 @@ resolve_omp_clauses (gfc_code *code, gfc_omp_clauses *omp_clauses,
gfc_error ("%s must contain at least one MAP clause at %L",
p, &code->loc);
}
+ if (omp_clauses->sizes_list)
+ {
+ gfc_expr_list *el;
+ for (el = omp_clauses->sizes_list; el; el = el->next)
+ {
+ resolve_scalar_int_expr (el->expr, "SIZES");
+ if (el->expr->expr_type != EXPR_CONSTANT)
+ gfc_error ("SIZES requires constant expression at %L",
+ &el->expr->where);
+ else if (el->expr->expr_type == EXPR_CONSTANT
+ && el->expr->ts.type == BT_INTEGER
+ && mpz_sgn (el->expr->value.integer) <= 0)
+ gfc_error ("INTEGER expression of %s clause at %L must be "
+ "positive", "SIZES", &el->expr->where);
+ }
+ }
if (!openacc && omp_clauses->detach)
{
@@ -9913,16 +10003,19 @@ find_nested_loop_in_chain (gfc_code *chain)
return NULL;
for (code = chain; code; code = code->next)
- {
- if (code->op == EXEC_DO)
+ switch (code->op)
+ {
+ case EXEC_DO:
+ case EXEC_OMP_TILE:
+ case EXEC_OMP_UNROLL:
return code;
- else if (code->op == EXEC_BLOCK)
- {
- gfc_code *c = find_nested_loop_in_block (code);
- if (c)
- return c;
- }
- }
+ case EXEC_BLOCK:
+ if (gfc_code *c = find_nested_loop_in_block (code))
+ return c;
+ break;
+ default:
+ break;
+ }
return NULL;
}
@@ -9950,6 +10043,9 @@ gfc_resolve_omp_do_blocks (gfc_code *code, gfc_namespace *ns)
omp_current_do_collapse = code->ext.omp_clauses->orderedc;
else if (code->ext.omp_clauses->collapse)
omp_current_do_collapse = code->ext.omp_clauses->collapse;
+ else if (code->ext.omp_clauses->sizes_list)
+ omp_current_do_collapse
+ = gfc_expr_list_len (code->ext.omp_clauses->sizes_list);
else
omp_current_do_collapse = 1;
if (code->ext.omp_clauses->lists[OMP_LIST_REDUCTION_INSCAN])
@@ -10141,6 +10237,8 @@ gfc_resolve_do_iterator (gfc_code *code, gfc_symbol *sym, bool add_clause)
if (code == c)
return;
c = find_nested_loop_in_chain (c->block->next);
+ if (c && (c->op == EXEC_OMP_TILE || c->op == EXEC_OMP_UNROLL))
+ return;
}
/* An openacc context may represent a data clause. Abort if so. */
@@ -10439,7 +10537,7 @@ restructure_intervening_code (gfc_code **chainp, gfc_code *outer_loop,
gfc_code *tail = NULL;
gfc_code *innermost_loop = NULL;
- for (code = *chainp; code; code = code->next, chainp = &((*chainp)->next))
+ for (code = *chainp; code; code = code->next, chainp = &(*chainp)->next)
{
if (code->op == EXEC_DO)
{
@@ -10452,7 +10550,7 @@ restructure_intervening_code (gfc_code **chainp, gfc_code *outer_loop,
innermost_loop = code;
else
innermost_loop
- = restructure_intervening_code (&(code->block->next),
+ = restructure_intervening_code (&code->block->next,
code, count - 1);
break;
}
@@ -10467,7 +10565,7 @@ restructure_intervening_code (gfc_code **chainp, gfc_code *outer_loop,
code->next = NULL;
innermost_loop
- = restructure_intervening_code (&(ns->code), outer_loop,
+ = restructure_intervening_code (&ns->code, outer_loop,
count);
/* At this point we have already pulled out the nested loop and
@@ -10524,6 +10622,11 @@ is_outer_iteration_variable (gfc_code *code, int depth, gfc_symbol *var)
{
do_code = find_nested_loop_in_chain (do_code->block->next);
gcc_assert (do_code);
+ if (do_code->op == EXEC_OMP_TILE || do_code->op == EXEC_OMP_UNROLL)
+ {
+ --i;
+ continue;
+ }
gfc_symbol *ivar = do_code->ext.iterator->var->symtree->n.sym;
if (var == ivar)
return true;
@@ -10548,6 +10651,8 @@ check_nested_loop_in_chain (gfc_code *chain, gfc_expr *expr, gfc_symbol *sym,
{
if (code->op == EXEC_DO)
return code;
+ else if (code->op == EXEC_OMP_TILE || code->op == EXEC_OMP_UNROLL)
+ return check_nested_loop_in_chain (code->block->next, expr, sym, bad);
else if (code->op == EXEC_BLOCK)
{
gfc_code *c = check_nested_loop_in_block (code, expr, sym, bad);
@@ -10654,6 +10759,11 @@ expr_is_invariant (gfc_code *code, int depth, gfc_expr *expr)
{
do_code = find_nested_loop_in_chain (do_code->block->next);
gcc_assert (do_code);
+ if (do_code->op == EXEC_OMP_TILE || do_code->op == EXEC_OMP_UNROLL)
+ {
+ --i;
+ continue;
+ }
gfc_symbol *ivar = do_code->ext.iterator->var->symtree->n.sym;
if (gfc_find_sym_in_expr (ivar, expr))
return false;
@@ -10728,13 +10838,14 @@ static void
resolve_omp_do (gfc_code *code)
{
gfc_code *do_code, *next;
- int list, i, count;
+ int list, i, count, non_generated_count;
gfc_omp_namelist *n;
gfc_symbol *dovar;
const char *name;
bool is_simd = false;
bool errorp = false;
bool perfect_nesting_errorp = false;
+ bool imperfect = false;
switch (code->op)
{
@@ -10829,15 +10940,23 @@ resolve_omp_do (gfc_code *code)
is_simd = true;
break;
case EXEC_OMP_TEAMS_LOOP: name = "!$OMP TEAMS LOOP"; break;
+ case EXEC_OMP_TILE: name = "!$OMP TILE"; break;
+ case EXEC_OMP_UNROLL: name = "!$OMP UNROLL"; break;
default: gcc_unreachable ();
}
if (code->ext.omp_clauses)
resolve_omp_clauses (code, code->ext.omp_clauses, NULL);
+ if (code->op == EXEC_OMP_TILE && code->ext.omp_clauses->sizes_list == NULL)
+ gfc_error ("SIZES clause is required on !$OMP TILE construct at %L",
+ &code->loc);
+
do_code = code->block->next;
if (code->ext.omp_clauses->orderedc)
count = code->ext.omp_clauses->orderedc;
+ else if (code->ext.omp_clauses->sizes_list)
+ count = gfc_expr_list_len (code->ext.omp_clauses->sizes_list);
else
{
count = code->ext.omp_clauses->collapse;
@@ -10845,6 +10964,7 @@ resolve_omp_do (gfc_code *code)
count = 1;
}
+ non_generated_count = count;
/* While the spec defines the loop nest depth independently of the COLLAPSE
clause, in practice the middle end only pays attention to the COLLAPSE
depth and treats any further inner loops as the final-loop-body. So
@@ -10858,13 +10978,61 @@ resolve_omp_do (gfc_code *code)
{
gfc_error ("%s cannot be a DO WHILE or DO without loop control "
"at %L", name, &do_code->loc);
- return;
+ goto fail;
}
if (do_code->op == EXEC_DO_CONCURRENT)
{
gfc_error ("%s cannot be a DO CONCURRENT loop at %L", name,
&do_code->loc);
- return;
+ goto fail;
+ }
+ if (do_code->op == EXEC_OMP_TILE || do_code->op == EXEC_OMP_UNROLL)
+ {
+ if (do_code->op == EXEC_OMP_UNROLL)
+ {
+ if (!do_code->ext.omp_clauses->partial)
+ {
+ gfc_error ("Generated loop of UNROLL construct at %L "
+ "without PARTIAL clause does not have "
+ "canonical form", &do_code->loc);
+ goto fail;
+ }
+ else if (i != count)
+ {
+ gfc_error ("UNROLL construct at %L with PARTIAL clause "
+ "generates just one loop with canonical form "
+ "but %d loops are needed",
+ &do_code->loc, count - i + 1);
+ goto fail;
+ }
+ }
+ else if (do_code->op == EXEC_OMP_TILE)
+ {
+ if (do_code->ext.omp_clauses->sizes_list == NULL)
+ /* This should have been diagnosed earlier already. */
+ return;
+ int l = gfc_expr_list_len (do_code->ext.omp_clauses->sizes_list);
+ if (count - i + 1 > l)
+ {
+ gfc_error ("TILE construct at %L generates %d loops "
+ "with canonical form but %d loops are needed",
+ &do_code->loc, l, count - i + 1);
+ goto fail;
+ }
+ }
+ if (do_code->ext.omp_clauses && do_code->ext.omp_clauses->erroneous)
+ goto fail;
+ if (imperfect && !perfect_nesting_errorp)
+ {
+ sorry_at (gfc_get_location (&do_code->loc),
+ "Imperfectly nested loop using generated loops");
+ errorp = true;
+ }
+ if (non_generated_count == count)
+ non_generated_count = i - 1;
+ --i;
+ do_code = do_code->block->next;
+ continue;
}
gcc_assert (do_code->op == EXEC_DO);
if (do_code->ext.iterator->var->ts.type != BT_INTEGER)
@@ -10966,7 +11134,16 @@ resolve_omp_do (gfc_code *code)
errorp = true;
}
if (start_var || end_var)
- code->ext.omp_clauses->non_rectangular = 1;
+ {
+ code->ext.omp_clauses->non_rectangular = 1;
+ if (i > non_generated_count)
+ {
+ sorry_at (gfc_get_location (&do_code->loc),
+ "Non-rectangular loops from generated loops "
+ "unsupported");
+ errorp = true;
+ }
+ }
/* Only parse loop body into nested loop and intervening code if
there are supposed to be more loops in the nest to collapse. */
@@ -10980,7 +11157,7 @@ resolve_omp_do (gfc_code *code)
/* Parse error, can't recover from this. */
gfc_error ("not enough DO loops for collapsed %s (level %d) at %L",
name, i, &code->loc);
- return;
+ goto fail;
}
else if (next != do_code->block->next || next->next)
/* Imperfectly nested loop found. */
@@ -11002,22 +11179,35 @@ resolve_omp_do (gfc_code *code)
name, &code->loc);
perfect_nesting_errorp = true;
}
- /* FIXME: Also diagnose for TILE directives. */
+ else if (code->op == EXEC_OMP_TILE)
+ {
+ gfc_error ("%s inner loops must be perfectly nested at %L",
+ name, &code->loc);
+ perfect_nesting_errorp = true;
+ }
if (perfect_nesting_errorp)
errorp = true;
}
if (diagnose_intervening_code_errors (do_code->block->next,
name, next))
errorp = true;
+ imperfect = true;
}
do_code = next;
}
/* Give up now if we found any constraint violations. */
if (errorp)
- return;
+ {
+ fail:
+ if (code->ext.omp_clauses)
+ code->ext.omp_clauses->erroneous = 1;
+ return;
+ }
- restructure_intervening_code (&(code->block->next), code, count);
+ if (non_generated_count)
+ restructure_intervening_code (&code->block->next, code,
+ non_generated_count);
}
@@ -11168,6 +11358,10 @@ omp_code_to_statement (gfc_code *code)
return ST_OMP_PARALLEL_LOOP;
case EXEC_OMP_DEPOBJ:
return ST_OMP_DEPOBJ;
+ case EXEC_OMP_TILE:
+ return ST_OMP_TILE;
+ case EXEC_OMP_UNROLL:
+ return ST_OMP_UNROLL;
default:
gcc_unreachable ();
}
@@ -11632,6 +11826,8 @@ gfc_resolve_omp_directive (gfc_code *code, gfc_namespace *ns)
case EXEC_OMP_TEAMS_DISTRIBUTE_PARALLEL_DO_SIMD:
case EXEC_OMP_TEAMS_DISTRIBUTE_SIMD:
case EXEC_OMP_TEAMS_LOOP:
+ case EXEC_OMP_TILE:
+ case EXEC_OMP_UNROLL:
resolve_omp_do (code);
break;
case EXEC_OMP_TARGET:
diff --git a/gcc/fortran/parse.cc b/gcc/fortran/parse.cc
index 79c810c..b28c8a9 100644
--- a/gcc/fortran/parse.cc
+++ b/gcc/fortran/parse.cc
@@ -1006,14 +1006,22 @@ decode_omp_directive (void)
case 'e':
matchs ("end assume", gfc_match_omp_eos_error, ST_OMP_END_ASSUME);
matchs ("end simd", gfc_match_omp_eos_error, ST_OMP_END_SIMD);
+ matchs ("end tile", gfc_match_omp_eos_error, ST_OMP_END_TILE);
+ matchs ("end unroll", gfc_match_omp_eos_error, ST_OMP_END_UNROLL);
matcho ("error", gfc_match_omp_error, ST_OMP_ERROR);
break;
+ case 'n':
+ matcho ("nothing", gfc_match_omp_nothing, ST_NONE);
+ break;
case 's':
matchs ("scan", gfc_match_omp_scan, ST_OMP_SCAN);
matchs ("simd", gfc_match_omp_simd, ST_OMP_SIMD);
break;
- case 'n':
- matcho ("nothing", gfc_match_omp_nothing, ST_NONE);
+ case 't':
+ matchs ("tile", gfc_match_omp_tile, ST_OMP_TILE);
+ break;
+ case 'u':
+ matchs ("unroll", gfc_match_omp_unroll, ST_OMP_UNROLL);
break;
}
@@ -1916,6 +1924,7 @@ next_statement (void)
case ST_OMP_LOOP: case ST_OMP_PARALLEL_LOOP: case ST_OMP_TEAMS_LOOP: \
case ST_OMP_TARGET_PARALLEL_LOOP: case ST_OMP_TARGET_TEAMS_LOOP: \
case ST_OMP_ALLOCATE_EXEC: case ST_OMP_ALLOCATORS: case ST_OMP_ASSUME: \
+ case ST_OMP_TILE: case ST_OMP_UNROLL: \
case ST_CRITICAL: \
case ST_OACC_PARALLEL_LOOP: case ST_OACC_PARALLEL: case ST_OACC_KERNELS: \
case ST_OACC_DATA: case ST_OACC_HOST_DATA: case ST_OACC_LOOP: \
@@ -2786,6 +2795,12 @@ gfc_ascii_statement (gfc_statement st, bool strip_sentinel)
case ST_OMP_END_TEAMS_LOOP:
p = "!$OMP END TEAMS LOOP";
break;
+ case ST_OMP_END_TILE:
+ p = "!$OMP END TILE";
+ break;
+ case ST_OMP_END_UNROLL:
+ p = "!$OMP END UNROLL";
+ break;
case ST_OMP_END_WORKSHARE:
p = "!$OMP END WORKSHARE";
break;
@@ -2968,6 +2983,12 @@ gfc_ascii_statement (gfc_statement st, bool strip_sentinel)
case ST_OMP_THREADPRIVATE:
p = "!$OMP THREADPRIVATE";
break;
+ case ST_OMP_TILE:
+ p = "!$OMP TILE";
+ break;
+ case ST_OMP_UNROLL:
+ p = "!$OMP UNROLL";
+ break;
case ST_OMP_WORKSHARE:
p = "!$OMP WORKSHARE";
break;
@@ -5441,7 +5462,7 @@ loop:
/* Parse the statements of OpenMP do/parallel do. */
static gfc_statement
-parse_omp_do (gfc_statement omp_st)
+parse_omp_do (gfc_statement omp_st, int nested)
{
gfc_statement st;
gfc_code *cp, *np;
@@ -5462,11 +5483,20 @@ parse_omp_do (gfc_statement omp_st)
unexpected_eof ();
else if (st == ST_DO)
break;
+ else if (st == ST_OMP_UNROLL || st == ST_OMP_TILE)
+ {
+ st = parse_omp_do (st, nested + 1);
+ if (st == ST_IMPLIED_ENDDO)
+ return st;
+ goto do_end;
+ }
else
unexpected_statement (st);
}
parse_do_block ();
+ for (; nested; --nested)
+ pop_state ();
if (gfc_statement_label != NULL
&& gfc_state_stack->previous != NULL
&& gfc_state_stack->previous->state == COMP_DO
@@ -5487,6 +5517,7 @@ parse_omp_do (gfc_statement omp_st)
pop_state ();
st = next_statement ();
+do_end:
gfc_statement omp_end_st = ST_OMP_END_DO;
switch (omp_st)
{
@@ -5570,9 +5601,9 @@ parse_omp_do (gfc_statement omp_st)
case ST_OMP_TEAMS_DISTRIBUTE_SIMD:
omp_end_st = ST_OMP_END_TEAMS_DISTRIBUTE_SIMD;
break;
- case ST_OMP_TEAMS_LOOP:
- omp_end_st = ST_OMP_END_TEAMS_LOOP;
- break;
+ case ST_OMP_TEAMS_LOOP: omp_end_st = ST_OMP_END_TEAMS_LOOP; break;
+ case ST_OMP_TILE: omp_end_st = ST_OMP_END_TILE; break;
+ case ST_OMP_UNROLL: omp_end_st = ST_OMP_END_UNROLL; break;
default: gcc_unreachable ();
}
if (st == omp_end_st)
@@ -6073,7 +6104,7 @@ parse_omp_structured_block (gfc_statement omp_st, bool workshare_stmts_only)
case ST_OMP_PARALLEL_DO:
case ST_OMP_PARALLEL_DO_SIMD:
- st = parse_omp_do (st);
+ st = parse_omp_do (st, 0);
continue;
case ST_OMP_ATOMIC:
@@ -6370,7 +6401,9 @@ parse_executable (gfc_statement st)
case ST_OMP_TEAMS_DISTRIBUTE_PARALLEL_DO_SIMD:
case ST_OMP_TEAMS_DISTRIBUTE_SIMD:
case ST_OMP_TEAMS_LOOP:
- st = parse_omp_do (st);
+ case ST_OMP_TILE:
+ case ST_OMP_UNROLL:
+ st = parse_omp_do (st, 0);
if (st == ST_IMPLIED_ENDDO)
return st;
continue;
diff --git a/gcc/fortran/resolve.cc b/gcc/fortran/resolve.cc
index d7a0856..4f4fafa 100644
--- a/gcc/fortran/resolve.cc
+++ b/gcc/fortran/resolve.cc
@@ -11437,6 +11437,8 @@ gfc_resolve_blocks (gfc_code *b, gfc_namespace *ns)
case EXEC_OMP_TEAMS_DISTRIBUTE_PARALLEL_DO_SIMD:
case EXEC_OMP_TEAMS_LOOP:
case EXEC_OMP_TEAMS_DISTRIBUTE_SIMD:
+ case EXEC_OMP_TILE:
+ case EXEC_OMP_UNROLL:
case EXEC_OMP_WORKSHARE:
break;
@@ -12604,6 +12606,8 @@ gfc_resolve_code (gfc_code *code, gfc_namespace *ns)
case EXEC_OMP_LOOP:
case EXEC_OMP_SIMD:
case EXEC_OMP_TARGET_SIMD:
+ case EXEC_OMP_TILE:
+ case EXEC_OMP_UNROLL:
gfc_resolve_omp_do_blocks (code, ns);
break;
case EXEC_SELECT_TYPE:
@@ -13102,6 +13106,8 @@ start:
case EXEC_OMP_TEAMS_DISTRIBUTE_PARALLEL_DO_SIMD:
case EXEC_OMP_TEAMS_DISTRIBUTE_SIMD:
case EXEC_OMP_TEAMS_LOOP:
+ case EXEC_OMP_TILE:
+ case EXEC_OMP_UNROLL:
case EXEC_OMP_WORKSHARE:
gfc_resolve_omp_directive (code, ns);
break;
diff --git a/gcc/fortran/st.cc b/gcc/fortran/st.cc
index 6a605ad..0218d29 100644
--- a/gcc/fortran/st.cc
+++ b/gcc/fortran/st.cc
@@ -279,6 +279,8 @@ gfc_free_statement (gfc_code *p)
case EXEC_OMP_TEAMS_DISTRIBUTE_PARALLEL_DO_SIMD:
case EXEC_OMP_TEAMS_DISTRIBUTE_SIMD:
case EXEC_OMP_TEAMS_LOOP:
+ case EXEC_OMP_TILE:
+ case EXEC_OMP_UNROLL:
case EXEC_OMP_WORKSHARE:
gfc_free_omp_clauses (p->ext.omp_clauses);
break;
diff --git a/gcc/fortran/trans-openmp.cc b/gcc/fortran/trans-openmp.cc
index f867e22..df1bf14 100644
--- a/gcc/fortran/trans-openmp.cc
+++ b/gcc/fortran/trans-openmp.cc
@@ -4327,6 +4327,34 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses,
omp_clauses = gfc_trans_add_clause (c, omp_clauses);
}
+ if (clauses->full)
+ {
+ c = build_omp_clause (gfc_get_location (&where), OMP_CLAUSE_FULL);
+ omp_clauses = gfc_trans_add_clause (c, omp_clauses);
+ }
+
+ if (clauses->partial)
+ {
+ c = build_omp_clause (gfc_get_location (&where), OMP_CLAUSE_PARTIAL);
+ OMP_CLAUSE_PARTIAL_EXPR (c)
+ = (clauses->partial > 0
+ ? build_int_cst (integer_type_node, clauses->partial)
+ : NULL_TREE);
+ omp_clauses = gfc_trans_add_clause (c, omp_clauses);
+ }
+
+ if (clauses->sizes_list)
+ {
+ tree list = NULL_TREE;
+ for (gfc_expr_list *el = clauses->sizes_list; el; el = el->next)
+ list = tree_cons (NULL_TREE, gfc_convert_expr_to_tree (block, el->expr),
+ list);
+
+ c = build_omp_clause (gfc_get_location (&where), OMP_CLAUSE_SIZES);
+ OMP_CLAUSE_SIZES_LIST (c) = nreverse (list);
+ omp_clauses = gfc_trans_add_clause (c, omp_clauses);
+ }
+
if (clauses->ordered)
{
c = build_omp_clause (gfc_get_location (&where), OMP_CLAUSE_ORDERED);
@@ -4783,18 +4811,14 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses,
}
if (clauses->tile_list)
{
- vec<tree, va_gc> *tvec;
- gfc_expr_list *el;
-
- vec_alloc (tvec, 4);
-
- for (el = clauses->tile_list; el; el = el->next)
- vec_safe_push (tvec, gfc_convert_expr_to_tree (block, el->expr));
+ tree list = NULL_TREE;
+ for (gfc_expr_list *el = clauses->tile_list; el; el = el->next)
+ list = tree_cons (NULL_TREE, gfc_convert_expr_to_tree (block, el->expr),
+ list);
c = build_omp_clause (gfc_get_location (&where), OMP_CLAUSE_TILE);
- OMP_CLAUSE_TILE_LIST (c) = build_tree_list_vec (tvec);
+ OMP_CLAUSE_TILE_LIST (c) = nreverse (list);
omp_clauses = gfc_trans_add_clause (c, omp_clauses);
- tvec->truncate (0);
}
if (clauses->vector)
{
@@ -5718,6 +5742,16 @@ gfc_nonrect_loop_expr (stmtblock_t *pblock, gfc_se *sep, int loop_n,
return true;
}
+int
+gfc_expr_list_len (gfc_expr_list *list)
+{
+ unsigned len = 0;
+ for (; list; list = list->next)
+ len++;
+
+ return len;
+}
+
static tree
gfc_trans_omp_do (gfc_code *code, gfc_exec_op op, stmtblock_t *pblock,
gfc_omp_clauses *do_clauses, tree par_clauses)
@@ -5733,18 +5767,19 @@ gfc_trans_omp_do (gfc_code *code, gfc_exec_op op, stmtblock_t *pblock,
dovar_init *di;
unsigned ix;
vec<tree, va_heap, vl_embed> *saved_doacross_steps = doacross_steps;
- gfc_expr_list *tile = do_clauses ? do_clauses->tile_list : clauses->tile_list;
+ gfc_expr_list *oacc_tile
+ = do_clauses ? do_clauses->tile_list : clauses->tile_list;
+ gfc_expr_list *sizes
+ = do_clauses ? do_clauses->sizes_list : clauses->sizes_list;
gfc_code *orig_code = code;
/* Both collapsed and tiled loops are lowered the same way. In
OpenACC, those clauses are not compatible, so prioritize the tile
clause, if present. */
- if (tile)
- {
- collapse = 0;
- for (gfc_expr_list *el = tile; el; el = el->next)
- collapse++;
- }
+ if (oacc_tile)
+ collapse = gfc_expr_list_len (oacc_tile);
+ else if (sizes)
+ collapse = gfc_expr_list_len (sizes);
doacross_steps = NULL;
if (clauses->orderedc)
@@ -5753,7 +5788,6 @@ gfc_trans_omp_do (gfc_code *code, gfc_exec_op op, stmtblock_t *pblock,
collapse = 1;
code = code->block->next;
- gcc_assert (code->op == EXEC_DO);
init = make_tree_vec (collapse);
cond = make_tree_vec (collapse);
@@ -5779,6 +5813,17 @@ gfc_trans_omp_do (gfc_code *code, gfc_exec_op op, stmtblock_t *pblock,
int dovar_found = 0;
tree dovar_decl;
+ if (code->op == EXEC_OMP_TILE || code->op == EXEC_OMP_UNROLL)
+ {
+ TREE_VEC_ELT (init, i) = NULL_TREE;
+ TREE_VEC_ELT (cond, i) = NULL_TREE;
+ TREE_VEC_ELT (incr, i) = NULL_TREE;
+ TREE_VEC_ELT (incr, i) = NULL_TREE;
+ if (orig_decls)
+ TREE_VEC_ELT (orig_decls, i) = NULL_TREE;
+ continue;
+ }
+ gcc_assert (code->op == EXEC_DO);
if (clauses)
{
gfc_omp_namelist *n = NULL;
@@ -6092,6 +6137,8 @@ gfc_trans_omp_do (gfc_code *code, gfc_exec_op op, stmtblock_t *pblock,
if (code1 != scan)
tmpcode->next = scan;
}
+ else if (code->op == EXEC_OMP_TILE || code->op == EXEC_OMP_UNROLL)
+ tmp = gfc_trans_omp_code (code, true);
else
tmp = gfc_trans_omp_code (code->block->next, true);
gfc_add_expr_to_block (&body, tmp);
@@ -6112,6 +6159,8 @@ gfc_trans_omp_do (gfc_code *code, gfc_exec_op op, stmtblock_t *pblock,
case EXEC_OMP_LOOP: stmt = make_node (OMP_LOOP); break;
case EXEC_OMP_TASKLOOP: stmt = make_node (OMP_TASKLOOP); break;
case EXEC_OACC_LOOP: stmt = make_node (OACC_LOOP); break;
+ case EXEC_OMP_TILE: stmt = make_node (OMP_TILE); break;
+ case EXEC_OMP_UNROLL: stmt = make_node (OMP_UNROLL); break;
default: gcc_unreachable ();
}
@@ -8219,6 +8268,8 @@ gfc_trans_omp_directive (gfc_code *code)
case EXEC_OMP_LOOP:
case EXEC_OMP_SIMD:
case EXEC_OMP_TASKLOOP:
+ case EXEC_OMP_TILE:
+ case EXEC_OMP_UNROLL:
return gfc_trans_omp_do (code, code->op, NULL, code->ext.omp_clauses,
NULL);
case EXEC_OMP_DISTRIBUTE_PARALLEL_DO:
diff --git a/gcc/fortran/trans.cc b/gcc/fortran/trans.cc
index badad6a..a208afe 100644
--- a/gcc/fortran/trans.cc
+++ b/gcc/fortran/trans.cc
@@ -2656,6 +2656,8 @@ trans_code (gfc_code * code, tree cond)
case EXEC_OMP_TEAMS_DISTRIBUTE_PARALLEL_DO_SIMD:
case EXEC_OMP_TEAMS_DISTRIBUTE_SIMD:
case EXEC_OMP_TEAMS_LOOP:
+ case EXEC_OMP_TILE:
+ case EXEC_OMP_UNROLL:
case EXEC_OMP_WORKSHARE:
res = gfc_trans_omp_directive (code);
break;