aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTobias Burnus <tburnus@baylibre.com>2025-06-24 23:28:57 +0200
committerTobias Burnus <tburnus@baylibre.com>2025-06-24 23:46:53 +0200
commit1e35a518258e8cd970a2326bba5a4c8b10695439 (patch)
tree458bb0a67b5ffdadac1001bf180147a0c9e37cb0
parent92e1893e0155b6b3baef2a935efd5936d23a67ea (diff)
downloadgcc-1e35a518258e8cd970a2326bba5a4c8b10695439.zip
gcc-1e35a518258e8cd970a2326bba5a4c8b10695439.tar.gz
gcc-1e35a518258e8cd970a2326bba5a4c8b10695439.tar.bz2
Fortran/OpenACC: Add Fortran support for acc_attach/acc_detach
While C/++ support the routines acc_attach{,_async} and acc_detach{,_finalize}{,_async} routines since a long time, the Fortran API routines where only added in OpenACC 3.3. Unfortunately, they cannot directly be implemented in the library as GCC will introduce a temporary array descriptor in some cases, which causes the attempted attachment to the this temporary variable instead of to the original one. Therefore, those API routines are handled in a special way in the compiler. gcc/fortran/ChangeLog: * trans-stmt.cc (gfc_trans_call_acc_attach_detach): New. (gfc_trans_call): Call it. libgomp/ChangeLog: * libgomp.texi (acc_attach, acc_detach): Update for Fortran version. * openacc.f90 (acc_attach{,_async}, acc_detach{,_finalize}{,_async}): Add. * openacc_lib.h: Likewise. * testsuite/libgomp.oacc-fortran/acc-attach-detach-1.f90: New test. * testsuite/libgomp.oacc-fortran/acc-attach-detach-2.f90: New test.
-rw-r--r--gcc/fortran/trans-stmt.cc74
-rw-r--r--libgomp/libgomp.texi40
-rw-r--r--libgomp/openacc.f9044
-rw-r--r--libgomp/openacc_lib.h42
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-1.f9025
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-2.f9062
6 files changed, 265 insertions, 22 deletions
diff --git a/gcc/fortran/trans-stmt.cc b/gcc/fortran/trans-stmt.cc
index 487b768..f105401 100644
--- a/gcc/fortran/trans-stmt.cc
+++ b/gcc/fortran/trans-stmt.cc
@@ -377,6 +377,57 @@ get_intrinsic_for_code (gfc_code *code)
}
+/* Handle the OpenACC routines acc_attach{,_async} and
+ acc_detach{,_finalize}{,_async} explicitly. This is required as the
+ the corresponding device pointee is attached to the corresponding device
+ pointer, but if a temporary array descriptor is created for the call,
+ that one is used as pointer instead of the original pointer. */
+
+tree
+gfc_trans_call_acc_attach_detach (gfc_code *code)
+{
+ stmtblock_t block;
+ gfc_se ptr_addr_se, async_se;
+ tree fn;
+
+ fn = code->resolved_sym->backend_decl;
+ if (fn == NULL)
+ {
+ fn = gfc_get_symbol_decl (code->resolved_sym);
+ code->resolved_sym->backend_decl = fn;
+ }
+
+ gfc_start_block (&block);
+
+ gfc_init_se (&ptr_addr_se, NULL);
+ ptr_addr_se.descriptor_only = 1;
+ ptr_addr_se.want_pointer = 1;
+ gfc_conv_expr (&ptr_addr_se, code->ext.actual->expr);
+ gfc_add_block_to_block (&block, &ptr_addr_se.pre);
+ if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (ptr_addr_se.expr)))
+ ptr_addr_se.expr = gfc_conv_descriptor_data_get (ptr_addr_se.expr);
+ ptr_addr_se.expr = build_fold_addr_expr (ptr_addr_se.expr);
+
+ bool async = code->ext.actual->next != NULL;
+ if (async)
+ {
+ gfc_init_se (&async_se, NULL);
+ gfc_conv_expr (&async_se, code->ext.actual->next->expr);
+ fn = build_call_expr_loc (gfc_get_location (&code->loc), fn, 2,
+ ptr_addr_se.expr, async_se.expr);
+ }
+ else
+ fn = build_call_expr_loc (gfc_get_location (&code->loc),
+ fn, 1, ptr_addr_se.expr);
+ gfc_add_expr_to_block (&block, fn);
+ gfc_add_block_to_block (&block, &ptr_addr_se.post);
+ if (async)
+ gfc_add_block_to_block (&block, &async_se.post);
+
+ return gfc_finish_block (&block);
+}
+
+
/* Translate the CALL statement. Builds a call to an F95 subroutine. */
tree
@@ -392,13 +443,32 @@ gfc_trans_call (gfc_code * code, bool dependency_check,
tree tmp;
bool is_intrinsic_mvbits;
+ gcc_assert (code->resolved_sym);
+
+ /* Unfortunately, acc_attach* and acc_detach* need some special treatment for
+ attaching the the pointee to a pointer as GCC might introduce a temporary
+ array descriptor, whose data component is then used as to be attached to
+ pointer. */
+ if (flag_openacc
+ && code->resolved_sym->attr.subroutine
+ && code->resolved_sym->formal
+ && code->resolved_sym->formal->sym->ts.type == BT_ASSUMED
+ && code->resolved_sym->formal->sym->attr.dimension
+ && code->resolved_sym->formal->sym->as->type == AS_ASSUMED_RANK
+ && startswith (code->resolved_sym->name, "acc_")
+ && (!strcmp (code->resolved_sym->name + 4, "attach")
+ || !strcmp (code->resolved_sym->name + 4, "attach_async")
+ || !strcmp (code->resolved_sym->name + 4, "detach")
+ || !strcmp (code->resolved_sym->name + 4, "detach_async")
+ || !strcmp (code->resolved_sym->name + 4, "detach_finalize")
+ || !strcmp (code->resolved_sym->name + 4, "detach_finalize_async")))
+ return gfc_trans_call_acc_attach_detach (code);
+
/* A CALL starts a new block because the actual arguments may have to
be evaluated first. */
gfc_init_se (&se, NULL);
gfc_start_block (&se.pre);
- gcc_assert (code->resolved_sym);
-
ss = gfc_ss_terminator;
if (code->resolved_sym->attr.elemental)
ss = gfc_walk_elemental_function_args (ss, code->ext.actual,
diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index 9f53f16..5518033 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -5967,19 +5967,19 @@ address to pointing to the corresponding device data.
@item @emph{Prototype}: @tab @code{void acc_attach_async(h_void **ptr_addr, int async);}
@end multitable
-@c @item @emph{Fortran}:
-@c @multitable @columnfractions .20 .80
-@c @item @emph{Interface}: @tab @code{subroutine acc_attach(ptr_addr)}
-@c @item @emph{Interface}: @tab @code{subroutine acc_attach_async(ptr_addr, async_arg)}
-@c @item @tab @code{type(*), dimension(..) :: ptr_addr}
-@c @item @tab @code{integer(acc_handle_kind), value :: async_arg}
-@c @end multitable
+@item @emph{Fortran}:
+@multitable @columnfractions .20 .80
+@item @emph{Interface}: @tab @code{subroutine acc_attach(ptr_addr)}
+@item @emph{Interface}: @tab @code{subroutine acc_attach_async(ptr_addr, async_arg)}
+@item @tab @code{type(*), dimension(..) :: ptr_addr}
+@item @tab @code{integer(acc_handle_kind), value :: async_arg}
+@end multitable
@item @emph{Reference}:
@uref{https://www.openacc.org, OpenACC specification v2.6}, section
3.2.34.
-@c @uref{https://www.openacc.org, OpenACC specification v3.3}, section
-@c 3.2.29.
+ @uref{https://www.openacc.org, OpenACC specification v3.3}, section
+3.2.29.
@end table
@@ -5999,21 +5999,21 @@ address to pointing to the corresponding host data.
@item @emph{Prototype}: @tab @code{void acc_detach_finalize_async(h_void **ptr_addr, int async);}
@end multitable
-@c @item @emph{Fortran}:
-@c @multitable @columnfractions .20 .80
-@c @item @emph{Interface}: @tab @code{subroutine acc_detach(ptr_addr)}
-@c @item @emph{Interface}: @tab @code{subroutine acc_detach_async(ptr_addr, async_arg)}
-@c @item @emph{Interface}: @tab @code{subroutine acc_detach_finalize(ptr_addr)}
-@c @item @emph{Interface}: @tab @code{subroutine acc_detach_finalize_async(ptr_addr, async_arg)}
-@c @item @tab @code{type(*), dimension(..) :: ptr_addr}
-@c @item @tab @code{integer(acc_handle_kind), value :: async_arg}
-@c @end multitable
+@item @emph{Fortran}:
+@multitable @columnfractions .20 .80
+@item @emph{Interface}: @tab @code{subroutine acc_detach(ptr_addr)}
+@item @emph{Interface}: @tab @code{subroutine acc_detach_async(ptr_addr, async_arg)}
+@item @emph{Interface}: @tab @code{subroutine acc_detach_finalize(ptr_addr)}
+@item @emph{Interface}: @tab @code{subroutine acc_detach_finalize_async(ptr_addr, async_arg)}
+@item @tab @code{type(*), dimension(..) :: ptr_addr}
+@item @tab @code{integer(acc_handle_kind), value :: async_arg}
+@end multitable
@item @emph{Reference}:
@uref{https://www.openacc.org, OpenACC specification v2.6}, section
3.2.35.
-@c @uref{https://www.openacc.org, OpenACC specification v3.3}, section
-@c 3.2.29.
+@uref{https://www.openacc.org, OpenACC specification v3.3}, section
+3.2.29.
@end table
diff --git a/libgomp/openacc.f90 b/libgomp/openacc.f90
index 9d51f01..3f2db45 100644
--- a/libgomp/openacc.f90
+++ b/libgomp/openacc.f90
@@ -798,6 +798,8 @@ module openacc
public :: acc_memcpy_to_device, acc_memcpy_to_device_async
public :: acc_memcpy_from_device, acc_memcpy_from_device_async
public :: acc_memcpy_device, acc_memcpy_device_async
+ public :: acc_attach, acc_attach_async, acc_detach, acc_detach_async
+ public :: acc_detach_finalize, acc_detach_finalize_async
integer, parameter :: openacc_version = 201711
@@ -1068,6 +1070,48 @@ module openacc
end subroutine
end interface
+ interface
+ subroutine acc_attach (ptr_addr) bind(C)
+ type(*), dimension(..) :: ptr_addr
+ end subroutine
+ end interface
+
+ interface
+ subroutine acc_attach_async (ptr_addr, async_arg) bind(C)
+ import :: acc_handle_kind
+ type(*), dimension(..) :: ptr_addr
+ integer(acc_handle_kind), value :: async_arg
+ end subroutine
+ end interface
+
+ interface
+ subroutine acc_detach (ptr_addr) bind(C)
+ type(*), dimension(..) :: ptr_addr
+ end subroutine
+ end interface
+
+ interface
+ subroutine acc_detach_async (ptr_addr, async_arg) bind(C)
+ import :: acc_handle_kind
+ type(*), dimension(..) :: ptr_addr
+ integer(acc_handle_kind), value :: async_arg
+ end subroutine
+ end interface
+
+ interface
+ subroutine acc_detach_finalize (ptr_addr) bind(C)
+ type(*), dimension(..) :: ptr_addr
+ end subroutine
+ end interface
+
+ interface
+ subroutine acc_detach_finalize_async (ptr_addr, async_arg) bind(C)
+ import :: acc_handle_kind
+ type(*), dimension(..) :: ptr_addr
+ integer(acc_handle_kind), value :: async_arg
+ end subroutine
+ end interface
+
interface acc_copyin_async
procedure :: acc_copyin_async_32_h
procedure :: acc_copyin_async_64_h
diff --git a/libgomp/openacc_lib.h b/libgomp/openacc_lib.h
index 9333c48..dbdc4d7 100644
--- a/libgomp/openacc_lib.h
+++ b/libgomp/openacc_lib.h
@@ -707,3 +707,45 @@
integer (acc_handle_kind) async_
end subroutine
end interface
+
+ interface
+ subroutine acc_attach (ptr_addr) bind(C)
+ type(*), dimension(..) :: ptr_addr
+ end subroutine
+ end interface
+
+ interface
+ subroutine acc_attach_async (ptr_addr, async_arg) bind(C)
+ import :: acc_handle_kind
+ type(*), dimension(..) :: ptr_addr
+ integer(acc_handle_kind), value :: async_arg
+ end subroutine
+ end interface
+
+ interface
+ subroutine acc_detach (ptr_addr) bind(C)
+ type(*), dimension(..) :: ptr_addr
+ end subroutine
+ end interface
+
+ interface
+ subroutine acc_detach_async (ptr_addr, async_arg) bind(C)
+ import :: acc_handle_kind
+ type(*), dimension(..) :: ptr_addr
+ integer(acc_handle_kind), value :: async_arg
+ end subroutine
+ end interface
+
+ interface
+ subroutine acc_detach_finalize (ptr_addr) bind(C)
+ type(*), dimension(..) :: ptr_addr
+ end subroutine
+ end interface
+
+ interface
+ subroutine acc_detach_finalize_async(ptr_addr, async_arg)bind(C)
+ import :: acc_handle_kind
+ type(*), dimension(..) :: ptr_addr
+ integer(acc_handle_kind), value :: async_arg
+ end subroutine
+ end interface
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-1.f90
new file mode 100644
index 0000000..15393b4
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-1.f90
@@ -0,0 +1,25 @@
+! { dg-do compile }
+! { dg-additional-options "-fdump-tree-original" }
+
+use openacc
+implicit none (type, external)
+integer,pointer :: a, b(:)
+integer,allocatable :: c, d(:)
+
+call acc_attach(a) ! ICE
+call acc_attach_async(b, 4)
+call acc_attach(c)
+
+call acc_detach(a)
+call acc_detach_async(b, 4)
+call acc_detach_finalize(c)
+call acc_detach_finalize_async(d,7)
+end
+
+! { dg-final { scan-tree-dump-times "acc_attach \\(&a\\);" 1 "original" } }
+! { dg-final { scan-tree-dump-times "acc_attach_async \\(&\\(integer\\(kind=4\\)\\\[0:\\\] \\*\\) b.data, 4\\);" 1 "original" } }
+! { dg-final { scan-tree-dump-times "acc_attach \\(&c\\);" 1 "original" } }
+! { dg-final { scan-tree-dump-times "acc_detach \\(&a\\);" 1 "original" } }
+! { dg-final { scan-tree-dump-times "acc_detach_async \\(&\\(integer\\(kind=4\\)\\\[0:\\\] \\*\\) b.data, 4\\);" 1 "original" } }
+! { dg-final { scan-tree-dump-times "acc_detach_finalize \\(&c\\);" 1 "original" } }
+! { dg-final { scan-tree-dump-times "acc_detach_finalize_async \\(&\\(integer\\(kind=4\\)\\\[0:\\\] \\* restrict\\) d.data, 7\\);" 1 "original" } }
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-2.f90
new file mode 100644
index 0000000..b2204ac
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-2.f90
@@ -0,0 +1,62 @@
+! { dg-do run }
+
+use openacc
+implicit none (type, external)
+integer, target :: tgt_a, tgt_b(5)
+
+integer, pointer :: p1, p2(:)
+
+type t
+ integer,pointer :: a => null ()
+ integer,pointer :: b(:) => null ()
+ integer,allocatable :: c, d(:)
+end type t
+
+type(t), target :: var
+
+tgt_a = 51
+tgt_b = [11,22,33,44,55]
+
+var%b => tgt_b
+!$acc enter data copyin(var, tgt_a, tgt_b)
+var%a => tgt_a
+
+call acc_attach(var%a)
+call acc_attach(var%b)
+
+!$acc serial
+! { dg-warning "using .vector_length \\(32\\)., ignoring 1" "" { target openacc_nvidia_accel_selected } .-1 }
+ if (var%a /= 51) stop 1
+ if (any (var%b /= [11,22,33,44,55])) stop 2
+!$acc end serial
+
+call acc_detach(var%a)
+call acc_detach(var%b)
+
+!$acc exit data delete(var, tgt_a, tgt_b)
+
+var%c = 9
+var%d = [1,2,3]
+
+p1 => var%c
+p2 => var%d
+
+!$acc enter data copyin(p1, p2)
+!$acc enter data copyin(var)
+call acc_attach(var%c)
+call acc_attach(var%d)
+
+!$acc serial
+! { dg-warning "using .vector_length \\(32\\)., ignoring 1" "" { target openacc_nvidia_accel_selected } .-1 }
+ if (var%c /= 9) stop 3
+ if (any (var%d /= [1,2,3])) stop 4
+!$acc end serial
+
+call acc_detach(var%c)
+call acc_detach(var%d)
+
+!$acc exit data delete(var, p1, p2)
+
+deallocate(var%d)
+
+end