aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorThomas Koenig <tkoenig@gcc.gnu.org>2019-05-29 20:30:45 +0000
committerThomas Koenig <tkoenig@gcc.gnu.org>2019-05-29 20:30:45 +0000
commit1585b483236dc2e9a9460a11c14cf3b32a967a84 (patch)
tree6beebca4391189d74ba97af399031e32ebef0dd4 /gcc
parent987c9fc581ffb04d5ab7a782bb7aee6205c45663 (diff)
downloadgcc-1585b483236dc2e9a9460a11c14cf3b32a967a84.zip
gcc-1585b483236dc2e9a9460a11c14cf3b32a967a84.tar.gz
gcc-1585b483236dc2e9a9460a11c14cf3b32a967a84.tar.bz2
re PR fortran/90539 (481.wrf slowdown by 25% on Intel Kaby with -Ofast -march=native starting with r271377)
2019-05-29 Thomas Koenig <tkoenig@gcc.gnu.org> PR fortran/90539 * gfortran.h (gfc_has_dimen_vector_ref): Add prototype. * trans.h (gfc_conv_subref_array_arg): Add argument check_contiguous. (gfc_conv_is_contiguous_expr): Add prototype. * frontend-passes.c (has_dimen_vector_ref): Remove prototype, rename to (gfc_has_dimen_vector_ref): New function name. (matmul_temp_args): Use gfc_has_dimen_vector_ref. (inline_matmul_assign): Likewise. * trans-array.c (gfc_conv_array_parameter): Also check for absence of a vector subscript before calling gfc_conv_subref_array_arg. Pass additional argument to gfc_conv_subref_array_arg. * trans-expr.c (gfc_conv_subref_array_arg): Add argument check_contiguous. If that is true, check if the argument is contiguous and do not repack in that case. * trans-intrinsic.c (gfc_conv_intrinsic_is_contiguous): Split away most of the work into, and call (gfc_conv_intrinsic_is_coniguous_expr): New function. 2019-05-29 Thomas Koenig <tkoenig@gcc.gnu.org> PR fortran/90539 * gfortran.dg/internal_pack_21.f90: Adjust scan patterns. * gfortran.dg/internal_pack_22.f90: New test. * gfortran.dg/internal_pack_23.f90: New test. From-SVN: r271751
Diffstat (limited to 'gcc')
-rw-r--r--gcc/fortran/ChangeLog21
-rw-r--r--gcc/fortran/frontend-passes.c13
-rw-r--r--gcc/fortran/gfortran.h1
-rw-r--r--gcc/fortran/trans-array.c6
-rw-r--r--gcc/fortran/trans-expr.c150
-rw-r--r--gcc/fortran/trans-intrinsic.c17
-rw-r--r--gcc/fortran/trans.h5
-rw-r--r--gcc/testsuite/ChangeLog7
-rw-r--r--gcc/testsuite/gfortran.dg/internal_pack_21.f902
-rw-r--r--gcc/testsuite/gfortran.dg/internal_pack_22.f9036
-rw-r--r--gcc/testsuite/gfortran.dg/internal_pack_23.f9027
11 files changed, 237 insertions, 48 deletions
diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog
index 07b485b..4c126b7 100644
--- a/gcc/fortran/ChangeLog
+++ b/gcc/fortran/ChangeLog
@@ -1,3 +1,24 @@
+2019-05-29 Thomas Koenig <tkoenig@gcc.gnu.org>
+
+ PR fortran/90539
+ * gfortran.h (gfc_has_dimen_vector_ref): Add prototype.
+ * trans.h (gfc_conv_subref_array_arg): Add argument check_contiguous.
+ (gfc_conv_is_contiguous_expr): Add prototype.
+ * frontend-passes.c (has_dimen_vector_ref): Remove prototype,
+ rename to
+ (gfc_has_dimen_vector_ref): New function name.
+ (matmul_temp_args): Use gfc_has_dimen_vector_ref.
+ (inline_matmul_assign): Likewise.
+ * trans-array.c (gfc_conv_array_parameter): Also check for absence
+ of a vector subscript before calling gfc_conv_subref_array_arg.
+ Pass additional argument to gfc_conv_subref_array_arg.
+ * trans-expr.c (gfc_conv_subref_array_arg): Add argument
+ check_contiguous. If that is true, check if the argument
+ is contiguous and do not repack in that case.
+ * trans-intrinsic.c (gfc_conv_intrinsic_is_contiguous): Split
+ away most of the work into, and call
+ (gfc_conv_intrinsic_is_coniguous_expr): New function.
+
2019-05-29 Jakub Jelinek <jakub@redhat.com>
PR fortran/90329
diff --git a/gcc/fortran/frontend-passes.c b/gcc/fortran/frontend-passes.c
index d4264da..87df504 100644
--- a/gcc/fortran/frontend-passes.c
+++ b/gcc/fortran/frontend-passes.c
@@ -54,7 +54,6 @@ static gfc_code * create_do_loop (gfc_expr *, gfc_expr *, gfc_expr *,
static gfc_expr* check_conjg_transpose_variable (gfc_expr *, bool *,
bool *);
static int call_external_blas (gfc_code **, int *, void *);
-static bool has_dimen_vector_ref (gfc_expr *);
static int matmul_temp_args (gfc_code **, int *,void *data);
static int index_interchange (gfc_code **, int*, void *);
@@ -2868,7 +2867,7 @@ matmul_temp_args (gfc_code **c, int *walk_subtrees ATTRIBUTE_UNUSED,
{
if (matrix_a->expr_type == EXPR_VARIABLE
&& (gfc_check_dependency (matrix_a, expr1, true)
- || has_dimen_vector_ref (matrix_a)))
+ || gfc_has_dimen_vector_ref (matrix_a)))
a_tmp = true;
}
else
@@ -2881,7 +2880,7 @@ matmul_temp_args (gfc_code **c, int *walk_subtrees ATTRIBUTE_UNUSED,
{
if (matrix_b->expr_type == EXPR_VARIABLE
&& (gfc_check_dependency (matrix_b, expr1, true)
- || has_dimen_vector_ref (matrix_b)))
+ || gfc_has_dimen_vector_ref (matrix_b)))
b_tmp = true;
}
else
@@ -3681,8 +3680,8 @@ scalarized_expr (gfc_expr *e_in, gfc_expr **index, int count_index)
/* Helper function to check for a dimen vector as subscript. */
-static bool
-has_dimen_vector_ref (gfc_expr *e)
+bool
+gfc_has_dimen_vector_ref (gfc_expr *e)
{
gfc_array_ref *ar;
int i;
@@ -3838,8 +3837,8 @@ inline_matmul_assign (gfc_code **c, int *walk_subtrees,
if (matrix_b == NULL)
return 0;
- if (has_dimen_vector_ref (expr1) || has_dimen_vector_ref (matrix_a)
- || has_dimen_vector_ref (matrix_b))
+ if (gfc_has_dimen_vector_ref (expr1) || gfc_has_dimen_vector_ref (matrix_a)
+ || gfc_has_dimen_vector_ref (matrix_b))
return 0;
/* We do not handle data dependencies yet. */
diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h
index 54987ac..798297b 100644
--- a/gcc/fortran/gfortran.h
+++ b/gcc/fortran/gfortran.h
@@ -3535,6 +3535,7 @@ typedef int (*walk_expr_fn_t) (gfc_expr **, int *, void *);
int gfc_dummy_code_callback (gfc_code **, int *, void *);
int gfc_expr_walker (gfc_expr **, walk_expr_fn_t, void *);
int gfc_code_walker (gfc_code **, walk_code_fn_t, walk_expr_fn_t, void *);
+bool gfc_has_dimen_vector_ref (gfc_expr *e);
/* simplify.c */
diff --git a/gcc/fortran/trans-array.c b/gcc/fortran/trans-array.c
index 9c96d89..56d534d 100644
--- a/gcc/fortran/trans-array.c
+++ b/gcc/fortran/trans-array.c
@@ -8139,12 +8139,12 @@ gfc_conv_array_parameter (gfc_se * se, gfc_expr * expr, bool g77,
optimizers. */
if (g77 && optimize && !optimize_size && expr->expr_type == EXPR_VARIABLE
- && !is_pointer (expr) && (fsym == NULL
- || fsym->ts.type != BT_ASSUMED))
+ && !is_pointer (expr) && ! gfc_has_dimen_vector_ref (expr)
+ && (fsym == NULL || fsym->ts.type != BT_ASSUMED))
{
gfc_conv_subref_array_arg (se, expr, g77,
fsym ? fsym->attr.intent : INTENT_INOUT,
- false, fsym, proc_name, sym);
+ false, fsym, proc_name, sym, true);
return;
}
diff --git a/gcc/fortran/trans-expr.c b/gcc/fortran/trans-expr.c
index b7a8456..5183029 100644
--- a/gcc/fortran/trans-expr.c
+++ b/gcc/fortran/trans-expr.c
@@ -4579,7 +4579,7 @@ void
gfc_conv_subref_array_arg (gfc_se *se, gfc_expr * expr, int g77,
sym_intent intent, bool formal_ptr,
const gfc_symbol *fsym, const char *proc_name,
- gfc_symbol *sym)
+ gfc_symbol *sym, bool check_contiguous)
{
gfc_se lse;
gfc_se rse;
@@ -4602,7 +4602,7 @@ gfc_conv_subref_array_arg (gfc_se *se, gfc_expr * expr, int g77,
pass_optional = fsym && fsym->attr.optional && sym && sym->attr.optional;
- if (pass_optional)
+ if (pass_optional || check_contiguous)
{
gfc_init_se (&work_se, NULL);
parmse = &work_se;
@@ -4880,50 +4880,136 @@ class_array_fcn:
else
parmse->expr = gfc_build_addr_expr (NULL_TREE, parmse->expr);
- if (pass_optional)
+ /* Basically make this into
+
+ if (present)
+ {
+ if (contiguous)
+ {
+ pointer = a;
+ }
+ else
+ {
+ parmse->pre();
+ pointer = parmse->expr;
+ }
+ }
+ else
+ pointer = NULL;
+
+ foo (pointer);
+ if (present && !contiguous)
+ se->post();
+
+ */
+
+ if (pass_optional || check_contiguous)
{
- tree present;
tree type;
stmtblock_t else_block;
tree pre_stmts, post_stmts;
tree pointer;
tree else_stmt;
+ tree present_var = NULL_TREE;
+ tree cont_var = NULL_TREE;
+ tree post_cond;
- /* Make this into
+ type = TREE_TYPE (parmse->expr);
+ pointer = gfc_create_var (type, "arg_ptr");
+
+ if (check_contiguous)
+ {
+ gfc_se cont_se, array_se;
+ stmtblock_t if_block, else_block;
+ tree if_stmt, else_stmt;
+
+ cont_var = gfc_create_var (boolean_type_node, "contiguous");
+
+ /* cont_var = is_contiguous (expr); . */
+ gfc_init_se (&cont_se, parmse);
+ gfc_conv_is_contiguous_expr (&cont_se, expr);
+ gfc_add_block_to_block (&se->pre, &(&cont_se)->pre);
+ gfc_add_modify (&se->pre, cont_var, cont_se.expr);
+ gfc_add_block_to_block (&se->pre, &(&cont_se)->post);
+
+ /* arrayse->expr = descriptor of a. */
+ gfc_init_se (&array_se, se);
+ gfc_conv_expr_descriptor (&array_se, expr);
+ gfc_add_block_to_block (&se->pre, &(&array_se)->pre);
+ gfc_add_block_to_block (&se->pre, &(&array_se)->post);
+
+ /* if_stmt = { pointer = &a[0]; } . */
+ gfc_init_block (&if_block);
+ tmp = gfc_conv_array_data (array_se.expr);
+ tmp = fold_convert (type, tmp);
+ gfc_add_modify (&if_block, pointer, tmp);
+ if_stmt = gfc_finish_block (&if_block);
+
+ /* else_stmt = { parmse->pre(); pointer = parmse->expr; } . */
+ gfc_init_block (&else_block);
+ gfc_add_block_to_block (&else_block, &parmse->pre);
+ gfc_add_modify (&else_block, pointer, parmse->expr);
+ else_stmt = gfc_finish_block (&else_block);
+
+ /* And put the above into an if statement. */
+ pre_stmts = fold_build3_loc (input_location, COND_EXPR, void_type_node,
+ cont_var, if_stmt, else_stmt);
+ }
+ else
+ {
+ /* pointer = pramse->expr; . */
+ gfc_add_modify (&parmse->pre, pointer, parmse->expr);
+ pre_stmts = gfc_finish_block (&parmse->pre);
+ }
- if (present (a))
- {
- parmse->pre;
- optional = parse->expr;
- }
- else
- optional = NULL;
- call foo (optional);
- if (present (a))
- parmse->post;
+ if (pass_optional)
+ {
+ present_var = gfc_create_var (boolean_type_node, "present");
- */
+ /* present_var = present(sym); . */
+ tmp = gfc_conv_expr_present (sym);
+ tmp = fold_convert (boolean_type_node, tmp);
+ gfc_add_modify (&se->pre, present_var, tmp);
- type = TREE_TYPE (parmse->expr);
- pointer = gfc_create_var (type, "optional");
- tmp = gfc_conv_expr_present (sym);
- present = gfc_evaluate_now (tmp, &se->pre);
- gfc_add_modify (&parmse->pre, pointer, parmse->expr);
- pre_stmts = gfc_finish_block (&parmse->pre);
-
- gfc_init_block (&else_block);
- gfc_add_modify (&else_block, pointer, build_int_cst (type, 0));
- else_stmt = gfc_finish_block (&else_block);
-
- tmp = fold_build3_loc (input_location, COND_EXPR, void_type_node, present,
- pre_stmts, else_stmt);
- gfc_add_expr_to_block (&se->pre, tmp);
+ /* else_stmt = { pointer = NULL; } . */
+ gfc_init_block (&else_block);
+ gfc_add_modify (&else_block, pointer, build_int_cst (type, 0));
+ else_stmt = gfc_finish_block (&else_block);
+
+ tmp = fold_build3_loc (input_location, COND_EXPR, void_type_node, present_var,
+ pre_stmts, else_stmt);
+ gfc_add_expr_to_block (&se->pre, tmp);
+
+
+ }
+ else
+ gfc_add_expr_to_block (&se->pre, pre_stmts);
post_stmts = gfc_finish_block (&parmse->post);
- tmp = fold_build3_loc (input_location, COND_EXPR, void_type_node, present,
+
+ /* Put together the post stuff, plus the optional
+ deallocation. */
+ if (check_contiguous)
+ {
+ /* !cont_var. */
+ tmp = fold_build2_loc (input_location, EQ_EXPR, boolean_type_node,
+ cont_var,
+ build_zero_cst (boolean_type_node));
+ if (pass_optional)
+ post_cond = fold_build2_loc (input_location, TRUTH_ANDIF_EXPR,
+ boolean_type_node, present_var, tmp);
+ else
+ post_cond = tmp;
+ }
+ else
+ {
+ gcc_assert (pass_optional);
+ post_cond = present_var;
+ }
+
+ tmp = fold_build3_loc (input_location, COND_EXPR, void_type_node, post_cond,
post_stmts, build_empty_stmt (input_location));
gfc_add_expr_to_block (&se->post, tmp);
-
se->expr = pointer;
}
diff --git a/gcc/fortran/trans-intrinsic.c b/gcc/fortran/trans-intrinsic.c
index e0a4c67..f6edd68 100644
--- a/gcc/fortran/trans-intrinsic.c
+++ b/gcc/fortran/trans-intrinsic.c
@@ -2832,6 +2832,17 @@ static void
gfc_conv_intrinsic_is_contiguous (gfc_se * se, gfc_expr * expr)
{
gfc_expr *arg;
+ arg = expr->value.function.actual->expr;
+ gfc_conv_is_contiguous_expr (se, arg);
+ se->expr = fold_convert (gfc_typenode_for_spec (&expr->ts), se->expr);
+}
+
+/* This function does the work for gfc_conv_intrinsic_is_contiguous,
+ plus it can be called directly. */
+
+void
+gfc_conv_is_contiguous_expr (gfc_se *se, gfc_expr *arg)
+{
gfc_ss *ss;
gfc_se argse;
tree desc, tmp, stride, extent, cond;
@@ -2839,8 +2850,6 @@ gfc_conv_intrinsic_is_contiguous (gfc_se * se, gfc_expr * expr)
tree fncall0;
gfc_array_spec *as;
- arg = expr->value.function.actual->expr;
-
if (arg->ts.type == BT_CLASS)
gfc_add_class_array_ref (arg);
@@ -2878,7 +2887,7 @@ gfc_conv_intrinsic_is_contiguous (gfc_se * se, gfc_expr * expr)
cond = fold_build2_loc (input_location, EQ_EXPR, boolean_type_node,
stride, build_int_cst (TREE_TYPE (stride), 1));
- for (i = 0; i < expr->value.function.actual->expr->rank - 1; i++)
+ for (i = 0; i < arg->rank - 1; i++)
{
tmp = gfc_conv_descriptor_lbound_get (desc, gfc_rank_cst[i]);
extent = gfc_conv_descriptor_ubound_get (desc, gfc_rank_cst[i]);
@@ -2896,7 +2905,7 @@ gfc_conv_intrinsic_is_contiguous (gfc_se * se, gfc_expr * expr)
cond = fold_build2_loc (input_location, TRUTH_AND_EXPR,
boolean_type_node, cond, tmp);
}
- se->expr = convert (gfc_typenode_for_spec (&expr->ts), cond);
+ se->expr = cond;
}
}
diff --git a/gcc/fortran/trans.h b/gcc/fortran/trans.h
index e0118ab..0305d33 100644
--- a/gcc/fortran/trans.h
+++ b/gcc/fortran/trans.h
@@ -535,7 +535,10 @@ int gfc_conv_procedure_call (gfc_se *, gfc_symbol *, gfc_actual_arglist *,
void gfc_conv_subref_array_arg (gfc_se *, gfc_expr *, int, sym_intent, bool,
const gfc_symbol *fsym = NULL,
const char *proc_name = NULL,
- gfc_symbol *sym = NULL);
+ gfc_symbol *sym = NULL,
+ bool check_contiguous = false);
+
+void gfc_conv_is_contiguous_expr (gfc_se *, gfc_expr *);
/* Generate code for a scalar assignment. */
tree gfc_trans_scalar_assign (gfc_se *, gfc_se *, gfc_typespec, bool, bool,
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index bb500f0..a0e8b7b 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,10 @@
+2019-05-29 Thomas Koenig <tkoenig@gcc.gnu.org>
+
+ PR fortran/90539
+ * gfortran.dg/internal_pack_21.f90: Adjust scan patterns.
+ * gfortran.dg/internal_pack_22.f90: New test.
+ * gfortran.dg/internal_pack_23.f90: New test.
+
2019-05-29 Jan Hubicka <hubicka@ucw.cz>
* tree-ssa/alias-access-spath-1.c: new testcase.
diff --git a/gcc/testsuite/gfortran.dg/internal_pack_21.f90 b/gcc/testsuite/gfortran.dg/internal_pack_21.f90
index d0ce942..54e43ff 100644
--- a/gcc/testsuite/gfortran.dg/internal_pack_21.f90
+++ b/gcc/testsuite/gfortran.dg/internal_pack_21.f90
@@ -20,5 +20,5 @@ END MODULE M1
USE M1
CALL S2()
END
-! { dg-final { scan-tree-dump-times "optional" 4 "original" } }
+! { dg-final { scan-tree-dump-times "arg_ptr" 5 "original" } }
! { dg-final { scan-tree-dump-not "_gfortran_internal_unpack" "original" } }
diff --git a/gcc/testsuite/gfortran.dg/internal_pack_22.f90 b/gcc/testsuite/gfortran.dg/internal_pack_22.f90
new file mode 100644
index 0000000..4e9fe59
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/internal_pack_22.f90
@@ -0,0 +1,36 @@
+! { dg-do run }
+! { dg-additional-options "-fdump-tree-original -O" }
+! Check that absent and present dummy arguments work with
+! packing when handing them down to an old-fashioned argument.
+
+module x
+ implicit none
+contains
+ subroutine foo (a,b)
+ real, dimension(:), intent(inout), optional :: a, b
+ if (present(a)) stop 1
+ if (.not. present(b)) stop 2
+ call bar (a, b)
+ end subroutine foo
+
+ subroutine bar (a,b)
+ real, dimension(2), intent(inout), optional :: a, b
+ real :: tmp
+ if (present(a)) stop 3
+ if (.not. present(b)) stop 4
+ tmp = b(2)
+ b(2) = b(1)
+ b(1) = tmp
+ end subroutine bar
+end module x
+
+program main
+ use x
+ implicit none
+ real, dimension(2) :: b
+ b(1) = 1.
+ b(2) = 42.
+ call foo(b=b)
+ if (b(1) /= 42. .or. b(2) /= 1.) stop 5
+end program main
+! { dg-final { scan-tree-dump-not "_gfortran_internal_unpack" "original" } }
diff --git a/gcc/testsuite/gfortran.dg/internal_pack_23.f90 b/gcc/testsuite/gfortran.dg/internal_pack_23.f90
new file mode 100644
index 0000000..8df82c8
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/internal_pack_23.f90
@@ -0,0 +1,27 @@
+! { dg-do run }
+! PR fortran/90539 - this used to cause an ICE.
+
+module t2
+ implicit none
+contains
+ subroutine foo(a)
+ real, dimension(*) :: a
+ if (a(1) /= 1.0 .or. a(2) /= 2.0) stop 1
+ end subroutine foo
+end module t2
+
+module t1
+ use t2
+ implicit none
+contains
+ subroutine bar(a)
+ real, dimension(:) :: a
+ if (a(1) /= 1.0 .or. a(2) /= 2.0) stop 1
+ call foo(a)
+ end subroutine bar
+end module t1
+
+program main
+ use t1
+ call bar([1.0, 2.0])
+end program main