diff options
author | Thomas Koenig <tkoenig@gcc.gnu.org> | 2021-12-06 19:57:32 +0100 |
---|---|---|
committer | Jakub Jelinek <jakub@redhat.com> | 2022-01-11 23:39:54 +0100 |
commit | 49ad4d2c30387ec916b16ddc1e235bcb5e53b3b2 (patch) | |
tree | 981b8805239e35c9094a5f75155be37d20c1d847 /libgfortran/generated | |
parent | db8d94a0570dc1f3de10c9eb9fe6e91df804272b (diff) | |
download | gcc-49ad4d2c30387ec916b16ddc1e235bcb5e53b3b2.zip gcc-49ad4d2c30387ec916b16ddc1e235bcb5e53b3b2.tar.gz gcc-49ad4d2c30387ec916b16ddc1e235bcb5e53b3b2.tar.bz2 |
Prepare library for REAL(KIND=17).
This prepares the library side for REAL(KIND=17). It is
not yet tested, but at least compiles cleanly on POWER 9
and x86_64.
2021-10-19 Thomas Koenig <tkoenig@gcc.gnu.org>
* Makefile.am: Add _r17 and _c17 files. Build them
with -mabi=ieeelongdouble on POWER.
* Makefile.in: Regenerate.
* configure: Regenerate.
* configure.ac: New flag HAVE_REAL_17.
* kinds-override.h: (HAVE_GFC_REAL_17): New macro.
(HAVE_GFC_COMPLEX_17): New macro.
(GFC_REAL_17_HUGE): New macro.
(GFC_REAL_17_LITERAL_SUFFIX): New macro.
(GFC_REAL_17_LITERAL): New macro.
(GFC_REAL_17_DIGITS): New macro.
(GFC_REAL_17_RADIX): New macro.
* libgfortran.h (POWER_IEEE128): New macro.
(gfc_array_r17): Typedef.
(GFC_DTYPE_REAL_17): New macro.
(GFC_DTYPE_COMPLEX_17): New macro.
(__acoshieee128): Prototype.
(__acosieee128): Prototype.
(__asinhieee128): Prototype.
(__asinieee128): Prototype.
(__atan2ieee128): Prototype.
(__atanhieee128): Prototype.
(__atanieee128): Prototype.
(__coshieee128): Prototype.
(__cosieee128): Prototype.
(__erfieee128): Prototype.
(__expieee128): Prototype.
(__fabsieee128): Prototype.
(__jnieee128): Prototype.
(__log10ieee128): Prototype.
(__logieee128): Prototype.
(__powieee128): Prototype.
(__sinhieee128): Prototype.
(__sinieee128): Prototype.
(__sqrtieee128): Prototype.
(__tanhieee128): Prototype.
(__tanieee128): Prototype.
(__ynieee128): Prototype.
* m4/mtype.m4: Make a bit more readable. Add KIND=17.
* generated/_abs_c17.F90: New file.
* generated/_abs_r17.F90: New file.
* generated/_acos_r17.F90: New file.
* generated/_acosh_r17.F90: New file.
* generated/_aimag_c17.F90: New file.
* generated/_aint_r17.F90: New file.
* generated/_anint_r17.F90: New file.
* generated/_asin_r17.F90: New file.
* generated/_asinh_r17.F90: New file.
* generated/_atan2_r17.F90: New file.
* generated/_atan_r17.F90: New file.
* generated/_atanh_r17.F90: New file.
* generated/_conjg_c17.F90: New file.
* generated/_cos_c17.F90: New file.
* generated/_cos_r17.F90: New file.
* generated/_cosh_r17.F90: New file.
* generated/_dim_r17.F90: New file.
* generated/_exp_c17.F90: New file.
* generated/_exp_r17.F90: New file.
* generated/_log10_r17.F90: New file.
* generated/_log_c17.F90: New file.
* generated/_log_r17.F90: New file.
* generated/_mod_r17.F90: New file.
* generated/_sign_r17.F90: New file.
* generated/_sin_c17.F90: New file.
* generated/_sin_r17.F90: New file.
* generated/_sinh_r17.F90: New file.
* generated/_sqrt_c17.F90: New file.
* generated/_sqrt_r17.F90: New file.
* generated/_tan_r17.F90: New file.
* generated/_tanh_r17.F90: New file.
* generated/bessel_r17.c: New file.
* generated/cshift0_c17.c: New file.
* generated/cshift0_r17.c: New file.
* generated/cshift1_16_c17.c: New file.
* generated/cshift1_16_r17.c: New file.
* generated/cshift1_4_c17.c: New file.
* generated/cshift1_4_r17.c: New file.
* generated/cshift1_8_c17.c: New file.
* generated/cshift1_8_r17.c: New file.
* generated/findloc0_c17.c: New file.
* generated/findloc0_r17.c: New file.
* generated/findloc1_c17.c: New file.
* generated/findloc1_r17.c: New file.
* generated/in_pack_c17.c: New file.
* generated/in_pack_r17.c: New file.
* generated/in_unpack_c17.c: New file.
* generated/in_unpack_r17.c: New file.
* generated/matmul_c17.c: New file.
* generated/matmul_r17.c: New file.
* generated/matmulavx128_c17.c: New file.
* generated/matmulavx128_r17.c: New file.
* generated/maxloc0_16_r17.c: New file.
* generated/maxloc0_4_r17.c: New file.
* generated/maxloc0_8_r17.c: New file.
* generated/maxloc1_16_r17.c: New file.
* generated/maxloc1_4_r17.c: New file.
* generated/maxloc1_8_r17.c: New file.
* generated/maxval_r17.c: New file.
* generated/minloc0_16_r17.c: New file.
* generated/minloc0_4_r17.c: New file.
* generated/minloc0_8_r17.c: New file.
* generated/minloc1_16_r17.c: New file.
* generated/minloc1_4_r17.c: New file.
* generated/minloc1_8_r17.c: New file.
* generated/minval_r17.c: New file.
* generated/norm2_r17.c: New file.
* generated/pack_c17.c: New file.
* generated/pack_r17.c: New file.
* generated/pow_c17_i16.c: New file.
* generated/pow_c17_i4.c: New file.
* generated/pow_c17_i8.c: New file.
* generated/pow_r17_i16.c: New file.
* generated/pow_r17_i4.c: New file.
* generated/pow_r17_i8.c: New file.
* generated/product_c17.c: New file.
* generated/product_r17.c: New file.
* generated/reshape_c17.c: New file.
* generated/reshape_r17.c: New file.
* generated/spread_c17.c: New file.
* generated/spread_r17.c: New file.
* generated/sum_c17.c: New file.
* generated/sum_r17.c: New file.
* generated/unpack_c17.c: New file.
* generated/unpack_r17.c: New file.
Diffstat (limited to 'libgfortran/generated')
85 files changed, 26418 insertions, 0 deletions
diff --git a/libgfortran/generated/_abs_c17.F90 b/libgfortran/generated/_abs_c17.F90 new file mode 100644 index 0000000..3c186a2 --- /dev/null +++ b/libgfortran/generated/_abs_c17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_COMPLEX_17) +#ifdef HAVE_CABS_17 + +elemental function _gfortran_specific__abs_c17 (parm) + complex (kind=17), intent (in) :: parm + real (kind=17) :: _gfortran_specific__abs_c17 + + _gfortran_specific__abs_c17 = abs (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_abs_r17.F90 b/libgfortran/generated/_abs_r17.F90 new file mode 100644 index 0000000..bc8f3eb --- /dev/null +++ b/libgfortran/generated/_abs_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) +#ifdef HAVE_FABS_17 + +elemental function _gfortran_specific__abs_r17 (parm) + real (kind=17), intent (in) :: parm + real (kind=17) :: _gfortran_specific__abs_r17 + + _gfortran_specific__abs_r17 = abs (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_acos_r17.F90 b/libgfortran/generated/_acos_r17.F90 new file mode 100644 index 0000000..f9e928c --- /dev/null +++ b/libgfortran/generated/_acos_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) +#ifdef HAVE_ACOS_17 + +elemental function _gfortran_specific__acos_r17 (parm) + real (kind=17), intent (in) :: parm + real (kind=17) :: _gfortran_specific__acos_r17 + + _gfortran_specific__acos_r17 = acos (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_acosh_r17.F90 b/libgfortran/generated/_acosh_r17.F90 new file mode 100644 index 0000000..4508691 --- /dev/null +++ b/libgfortran/generated/_acosh_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) +#ifdef HAVE_ACOSH_17 + +elemental function _gfortran_specific__acosh_r17 (parm) + real (kind=17), intent (in) :: parm + real (kind=17) :: _gfortran_specific__acosh_r17 + + _gfortran_specific__acosh_r17 = acosh (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_aimag_c17.F90 b/libgfortran/generated/_aimag_c17.F90 new file mode 100644 index 0000000..b5a84fa --- /dev/null +++ b/libgfortran/generated/_aimag_c17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_COMPLEX_17) + + +elemental function _gfortran_specific__aimag_c17 (parm) + complex (kind=17), intent (in) :: parm + real (kind=17) :: _gfortran_specific__aimag_c17 + + _gfortran_specific__aimag_c17 = aimag (parm) +end function + + +#endif diff --git a/libgfortran/generated/_aint_r17.F90 b/libgfortran/generated/_aint_r17.F90 new file mode 100644 index 0000000..4e73b24 --- /dev/null +++ b/libgfortran/generated/_aint_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) +#ifdef HAVE_TRUNC_17 + +elemental function _gfortran_specific__aint_r17 (parm) + real (kind=17), intent (in) :: parm + real (kind=17) :: _gfortran_specific__aint_r17 + + _gfortran_specific__aint_r17 = aint (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_anint_r17.F90 b/libgfortran/generated/_anint_r17.F90 new file mode 100644 index 0000000..6b60e11 --- /dev/null +++ b/libgfortran/generated/_anint_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) +#ifdef HAVE_ROUND_17 + +elemental function _gfortran_specific__anint_r17 (parm) + real (kind=17), intent (in) :: parm + real (kind=17) :: _gfortran_specific__anint_r17 + + _gfortran_specific__anint_r17 = anint (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_asin_r17.F90 b/libgfortran/generated/_asin_r17.F90 new file mode 100644 index 0000000..120be04 --- /dev/null +++ b/libgfortran/generated/_asin_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) +#ifdef HAVE_ASIN_17 + +elemental function _gfortran_specific__asin_r17 (parm) + real (kind=17), intent (in) :: parm + real (kind=17) :: _gfortran_specific__asin_r17 + + _gfortran_specific__asin_r17 = asin (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_asinh_r17.F90 b/libgfortran/generated/_asinh_r17.F90 new file mode 100644 index 0000000..64f18d2 --- /dev/null +++ b/libgfortran/generated/_asinh_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) +#ifdef HAVE_ASINH_17 + +elemental function _gfortran_specific__asinh_r17 (parm) + real (kind=17), intent (in) :: parm + real (kind=17) :: _gfortran_specific__asinh_r17 + + _gfortran_specific__asinh_r17 = asinh (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_atan2_r17.F90 b/libgfortran/generated/_atan2_r17.F90 new file mode 100644 index 0000000..f0fc9b8 --- /dev/null +++ b/libgfortran/generated/_atan2_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) + +#ifdef HAVE_ATAN2_17 + +elemental function _gfortran_specific__atan2_r17 (p1, p2) + real (kind=17), intent (in) :: p1, p2 + real (kind=17) :: _gfortran_specific__atan2_r17 + + _gfortran_specific__atan2_r17 = atan2 (p1, p2) +end function + +#endif + +#endif diff --git a/libgfortran/generated/_atan_r17.F90 b/libgfortran/generated/_atan_r17.F90 new file mode 100644 index 0000000..a8b0bbd --- /dev/null +++ b/libgfortran/generated/_atan_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) +#ifdef HAVE_ATAN_17 + +elemental function _gfortran_specific__atan_r17 (parm) + real (kind=17), intent (in) :: parm + real (kind=17) :: _gfortran_specific__atan_r17 + + _gfortran_specific__atan_r17 = atan (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_atanh_r17.F90 b/libgfortran/generated/_atanh_r17.F90 new file mode 100644 index 0000000..dc4d8a3 --- /dev/null +++ b/libgfortran/generated/_atanh_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) +#ifdef HAVE_ATANH_17 + +elemental function _gfortran_specific__atanh_r17 (parm) + real (kind=17), intent (in) :: parm + real (kind=17) :: _gfortran_specific__atanh_r17 + + _gfortran_specific__atanh_r17 = atanh (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_conjg_c17.F90 b/libgfortran/generated/_conjg_c17.F90 new file mode 100644 index 0000000..86a237c --- /dev/null +++ b/libgfortran/generated/_conjg_c17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_COMPLEX_17) + + +elemental function _gfortran_specific__conjg_17 (parm) + complex (kind=17), intent (in) :: parm + complex (kind=17) :: _gfortran_specific__conjg_17 + + _gfortran_specific__conjg_17 = conjg (parm) +end function + + +#endif diff --git a/libgfortran/generated/_cos_c17.F90 b/libgfortran/generated/_cos_c17.F90 new file mode 100644 index 0000000..8d50ac8 --- /dev/null +++ b/libgfortran/generated/_cos_c17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_COMPLEX_17) +#ifdef HAVE_CCOS_17 + +elemental function _gfortran_specific__cos_c17 (parm) + complex (kind=17), intent (in) :: parm + complex (kind=17) :: _gfortran_specific__cos_c17 + + _gfortran_specific__cos_c17 = cos (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_cos_r17.F90 b/libgfortran/generated/_cos_r17.F90 new file mode 100644 index 0000000..fde038e --- /dev/null +++ b/libgfortran/generated/_cos_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) +#ifdef HAVE_COS_17 + +elemental function _gfortran_specific__cos_r17 (parm) + real (kind=17), intent (in) :: parm + real (kind=17) :: _gfortran_specific__cos_r17 + + _gfortran_specific__cos_r17 = cos (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_cosh_r17.F90 b/libgfortran/generated/_cosh_r17.F90 new file mode 100644 index 0000000..c8fe7c8 --- /dev/null +++ b/libgfortran/generated/_cosh_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) +#ifdef HAVE_COSH_17 + +elemental function _gfortran_specific__cosh_r17 (parm) + real (kind=17), intent (in) :: parm + real (kind=17) :: _gfortran_specific__cosh_r17 + + _gfortran_specific__cosh_r17 = cosh (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_dim_r17.F90 b/libgfortran/generated/_dim_r17.F90 new file mode 100644 index 0000000..d4b78c1 --- /dev/null +++ b/libgfortran/generated/_dim_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) + + + +elemental function _gfortran_specific__dim_r17 (p1, p2) + real (kind=17), intent (in) :: p1, p2 + real (kind=17) :: _gfortran_specific__dim_r17 + + _gfortran_specific__dim_r17 = dim (p1, p2) +end function + + + +#endif diff --git a/libgfortran/generated/_exp_c17.F90 b/libgfortran/generated/_exp_c17.F90 new file mode 100644 index 0000000..a2672a4 --- /dev/null +++ b/libgfortran/generated/_exp_c17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_COMPLEX_17) +#ifdef HAVE_CEXP_17 + +elemental function _gfortran_specific__exp_c17 (parm) + complex (kind=17), intent (in) :: parm + complex (kind=17) :: _gfortran_specific__exp_c17 + + _gfortran_specific__exp_c17 = exp (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_exp_r17.F90 b/libgfortran/generated/_exp_r17.F90 new file mode 100644 index 0000000..14cac70 --- /dev/null +++ b/libgfortran/generated/_exp_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) +#ifdef HAVE_EXP_17 + +elemental function _gfortran_specific__exp_r17 (parm) + real (kind=17), intent (in) :: parm + real (kind=17) :: _gfortran_specific__exp_r17 + + _gfortran_specific__exp_r17 = exp (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_log10_r17.F90 b/libgfortran/generated/_log10_r17.F90 new file mode 100644 index 0000000..ef3481a --- /dev/null +++ b/libgfortran/generated/_log10_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) +#ifdef HAVE_LOG10_17 + +elemental function _gfortran_specific__log10_r17 (parm) + real (kind=17), intent (in) :: parm + real (kind=17) :: _gfortran_specific__log10_r17 + + _gfortran_specific__log10_r17 = log10 (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_log_c17.F90 b/libgfortran/generated/_log_c17.F90 new file mode 100644 index 0000000..65c758d --- /dev/null +++ b/libgfortran/generated/_log_c17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_COMPLEX_17) +#ifdef HAVE_CLOG_17 + +elemental function _gfortran_specific__log_c17 (parm) + complex (kind=17), intent (in) :: parm + complex (kind=17) :: _gfortran_specific__log_c17 + + _gfortran_specific__log_c17 = log (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_log_r17.F90 b/libgfortran/generated/_log_r17.F90 new file mode 100644 index 0000000..95cc9ef --- /dev/null +++ b/libgfortran/generated/_log_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) +#ifdef HAVE_LOG_17 + +elemental function _gfortran_specific__log_r17 (parm) + real (kind=17), intent (in) :: parm + real (kind=17) :: _gfortran_specific__log_r17 + + _gfortran_specific__log_r17 = log (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_mod_r17.F90 b/libgfortran/generated/_mod_r17.F90 new file mode 100644 index 0000000..2397395 --- /dev/null +++ b/libgfortran/generated/_mod_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) + + + +elemental function _gfortran_specific__mod_r17 (p1, p2) + real (kind=17), intent (in) :: p1, p2 + real (kind=17) :: _gfortran_specific__mod_r17 + + _gfortran_specific__mod_r17 = mod (p1, p2) +end function + + + +#endif diff --git a/libgfortran/generated/_sign_r17.F90 b/libgfortran/generated/_sign_r17.F90 new file mode 100644 index 0000000..d8afb5e --- /dev/null +++ b/libgfortran/generated/_sign_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) + + + +elemental function _gfortran_specific__sign_r17 (p1, p2) + real (kind=17), intent (in) :: p1, p2 + real (kind=17) :: _gfortran_specific__sign_r17 + + _gfortran_specific__sign_r17 = sign (p1, p2) +end function + + + +#endif diff --git a/libgfortran/generated/_sin_c17.F90 b/libgfortran/generated/_sin_c17.F90 new file mode 100644 index 0000000..6453ec7 --- /dev/null +++ b/libgfortran/generated/_sin_c17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_COMPLEX_17) +#ifdef HAVE_CSIN_17 + +elemental function _gfortran_specific__sin_c17 (parm) + complex (kind=17), intent (in) :: parm + complex (kind=17) :: _gfortran_specific__sin_c17 + + _gfortran_specific__sin_c17 = sin (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_sin_r17.F90 b/libgfortran/generated/_sin_r17.F90 new file mode 100644 index 0000000..8632c4d --- /dev/null +++ b/libgfortran/generated/_sin_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) +#ifdef HAVE_SIN_17 + +elemental function _gfortran_specific__sin_r17 (parm) + real (kind=17), intent (in) :: parm + real (kind=17) :: _gfortran_specific__sin_r17 + + _gfortran_specific__sin_r17 = sin (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_sinh_r17.F90 b/libgfortran/generated/_sinh_r17.F90 new file mode 100644 index 0000000..bfe0f2a --- /dev/null +++ b/libgfortran/generated/_sinh_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) +#ifdef HAVE_SINH_17 + +elemental function _gfortran_specific__sinh_r17 (parm) + real (kind=17), intent (in) :: parm + real (kind=17) :: _gfortran_specific__sinh_r17 + + _gfortran_specific__sinh_r17 = sinh (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_sqrt_c17.F90 b/libgfortran/generated/_sqrt_c17.F90 new file mode 100644 index 0000000..5878b87 --- /dev/null +++ b/libgfortran/generated/_sqrt_c17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_COMPLEX_17) +#ifdef HAVE_CSQRT_17 + +elemental function _gfortran_specific__sqrt_c17 (parm) + complex (kind=17), intent (in) :: parm + complex (kind=17) :: _gfortran_specific__sqrt_c17 + + _gfortran_specific__sqrt_c17 = sqrt (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_sqrt_r17.F90 b/libgfortran/generated/_sqrt_r17.F90 new file mode 100644 index 0000000..01c3a04 --- /dev/null +++ b/libgfortran/generated/_sqrt_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) +#ifdef HAVE_SQRT_17 + +elemental function _gfortran_specific__sqrt_r17 (parm) + real (kind=17), intent (in) :: parm + real (kind=17) :: _gfortran_specific__sqrt_r17 + + _gfortran_specific__sqrt_r17 = sqrt (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_tan_r17.F90 b/libgfortran/generated/_tan_r17.F90 new file mode 100644 index 0000000..01f1bfa --- /dev/null +++ b/libgfortran/generated/_tan_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) +#ifdef HAVE_TAN_17 + +elemental function _gfortran_specific__tan_r17 (parm) + real (kind=17), intent (in) :: parm + real (kind=17) :: _gfortran_specific__tan_r17 + + _gfortran_specific__tan_r17 = tan (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_tanh_r17.F90 b/libgfortran/generated/_tanh_r17.F90 new file mode 100644 index 0000000..e04aae3 --- /dev/null +++ b/libgfortran/generated/_tanh_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) +#ifdef HAVE_TANH_17 + +elemental function _gfortran_specific__tanh_r17 (parm) + real (kind=17), intent (in) :: parm + real (kind=17) :: _gfortran_specific__tanh_r17 + + _gfortran_specific__tanh_r17 = tanh (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/bessel_r17.c b/libgfortran/generated/bessel_r17.c new file mode 100644 index 0000000..cca2623 --- /dev/null +++ b/libgfortran/generated/bessel_r17.c @@ -0,0 +1,186 @@ +/* Implementation of the BESSEL_JN and BESSEL_YN transformational + function using a recurrence algorithm. + Copyright (C) 2010-2022 Free Software Foundation, Inc. + Contributed by Tobias Burnus <burnus@net-b.de> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" + + + +#if defined(POWER_IEEE128) +#define MATHFUNC(funcname) __ ## funcname ## ieee128 +#else +#define MATHFUNC(funcname) funcname ## q +#endif + +#if defined (HAVE_GFC_REAL_17) + + + +#if 1 /* FIXME: figure this out later. */ +extern void bessel_jn_r17 (gfc_array_r17 * const restrict ret, int n1, + int n2, GFC_REAL_17 x); +export_proto(bessel_jn_r17); + +void +bessel_jn_r17 (gfc_array_r17 * const restrict ret, int n1, int n2, GFC_REAL_17 x) +{ + int i; + index_type stride; + + GFC_REAL_17 last1, last2, x2rev; + + stride = GFC_DESCRIPTOR_STRIDE(ret,0); + + if (ret->base_addr == NULL) + { + size_t size = n2 < n1 ? 0 : n2-n1+1; + GFC_DIMENSION_SET(ret->dim[0], 0, size-1, 1); + ret->base_addr = xmallocarray (size, sizeof (GFC_REAL_17)); + ret->offset = 0; + } + + if (unlikely (n2 < n1)) + return; + + if (unlikely (compile_options.bounds_check) + && GFC_DESCRIPTOR_EXTENT(ret,0) != (n2-n1+1)) + runtime_error("Incorrect extent in return value of BESSEL_JN " + "(%ld vs. %ld)", (long int) n2-n1, + (long int) GFC_DESCRIPTOR_EXTENT(ret,0)); + + stride = GFC_DESCRIPTOR_STRIDE(ret,0); + + if (unlikely (x == 0)) + { + ret->base_addr[0] = 1; + for (i = 1; i <= n2-n1; i++) + ret->base_addr[i*stride] = 0; + return; + } + + last1 = MATHFUNC(jn) (n2, x); + ret->base_addr[(n2-n1)*stride] = last1; + + if (n1 == n2) + return; + + last2 = MATHFUNC(jn) (n2 - 1, x); + ret->base_addr[(n2-n1-1)*stride] = last2; + + if (n1 + 1 == n2) + return; + + x2rev = GFC_REAL_17_LITERAL(2.)/x; + + for (i = n2-n1-2; i >= 0; i--) + { + ret->base_addr[i*stride] = x2rev * (i+1+n1) * last2 - last1; + last1 = last2; + last2 = ret->base_addr[i*stride]; + } +} + +#endif + +#if 1 /* FIXME: figure this out later. */ +extern void bessel_yn_r17 (gfc_array_r17 * const restrict ret, + int n1, int n2, GFC_REAL_17 x); +export_proto(bessel_yn_r17); + +void +bessel_yn_r17 (gfc_array_r17 * const restrict ret, int n1, int n2, + GFC_REAL_17 x) +{ + int i; + index_type stride; + + GFC_REAL_17 last1, last2, x2rev; + + stride = GFC_DESCRIPTOR_STRIDE(ret,0); + + if (ret->base_addr == NULL) + { + size_t size = n2 < n1 ? 0 : n2-n1+1; + GFC_DIMENSION_SET(ret->dim[0], 0, size-1, 1); + ret->base_addr = xmallocarray (size, sizeof (GFC_REAL_17)); + ret->offset = 0; + } + + if (unlikely (n2 < n1)) + return; + + if (unlikely (compile_options.bounds_check) + && GFC_DESCRIPTOR_EXTENT(ret,0) != (n2-n1+1)) + runtime_error("Incorrect extent in return value of BESSEL_JN " + "(%ld vs. %ld)", (long int) n2-n1, + (long int) GFC_DESCRIPTOR_EXTENT(ret,0)); + + stride = GFC_DESCRIPTOR_STRIDE(ret,0); + + if (unlikely (x == 0)) + { + for (i = 0; i <= n2-n1; i++) +#if defined(GFC_REAL_17_INFINITY) + ret->base_addr[i*stride] = -GFC_REAL_17_INFINITY; +#else + ret->base_addr[i*stride] = -GFC_REAL_17_HUGE; +#endif + return; + } + + last1 = MATHFUNC(yn) (n1, x); + ret->base_addr[0] = last1; + + if (n1 == n2) + return; + + last2 = MATHFUNC(yn) (n1 + 1, x); + ret->base_addr[1*stride] = last2; + + if (n1 + 1 == n2) + return; + + x2rev = GFC_REAL_17_LITERAL(2.)/x; + + for (i = 2; i <= n2 - n1; i++) + { +#if defined(GFC_REAL_17_INFINITY) + if (unlikely (last2 == -GFC_REAL_17_INFINITY)) + { + ret->base_addr[i*stride] = -GFC_REAL_17_INFINITY; + } + else +#endif + { + ret->base_addr[i*stride] = x2rev * (i-1+n1) * last2 - last1; + last1 = last2; + last2 = ret->base_addr[i*stride]; + } + } +} +#endif + +#endif + diff --git a/libgfortran/generated/cshift0_c17.c b/libgfortran/generated/cshift0_c17.c new file mode 100644 index 0000000..73de471 --- /dev/null +++ b/libgfortran/generated/cshift0_c17.c @@ -0,0 +1,242 @@ +/* Helper function for cshift functions. + Copyright (C) 2008-2022 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <string.h> + + +#if defined (HAVE_GFC_COMPLEX_17) + +void +cshift0_c17 (gfc_array_c17 *ret, const gfc_array_c17 *array, ptrdiff_t shift, + int which) +{ + /* r.* indicates the return array. */ + index_type rstride[GFC_MAX_DIMENSIONS]; + index_type rstride0; + index_type roffset; + GFC_COMPLEX_17 *rptr; + + /* s.* indicates the source array. */ + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type sstride0; + index_type soffset; + const GFC_COMPLEX_17 *sptr; + + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type dim; + index_type len; + index_type n; + + bool do_blocked; + index_type r_ex, a_ex; + + which = which - 1; + sstride[0] = 0; + rstride[0] = 0; + + extent[0] = 1; + count[0] = 0; + n = 0; + /* Initialized for avoiding compiler warnings. */ + roffset = 1; + soffset = 1; + len = 0; + + r_ex = 1; + a_ex = 1; + + if (which > 0) + { + /* Test if both ret and array are contiguous. */ + do_blocked = true; + dim = GFC_DESCRIPTOR_RANK (array); + for (n = 0; n < dim; n ++) + { + index_type rs, as; + rs = GFC_DESCRIPTOR_STRIDE (ret, n); + if (rs != r_ex) + { + do_blocked = false; + break; + } + as = GFC_DESCRIPTOR_STRIDE (array, n); + if (as != a_ex) + { + do_blocked = false; + break; + } + r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n); + a_ex *= GFC_DESCRIPTOR_EXTENT (array, n); + } + } + else + do_blocked = false; + + n = 0; + + if (do_blocked) + { + /* For contiguous arrays, use the relationship that + + dimension(n1,n2,n3) :: a, b + b = cshift(a,sh,3) + + can be dealt with as if + + dimension(n1*n2*n3) :: an, bn + bn = cshift(a,sh*n1*n2,1) + + we can used a more blocked algorithm for dim>1. */ + sstride[0] = 1; + rstride[0] = 1; + roffset = 1; + soffset = 1; + len = GFC_DESCRIPTOR_STRIDE(array, which) + * GFC_DESCRIPTOR_EXTENT(array, which); + shift *= GFC_DESCRIPTOR_STRIDE(array, which); + for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + dim = GFC_DESCRIPTOR_RANK (array) - which; + } + else + { + for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + if (dim == which) + { + roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); + if (roffset == 0) + roffset = 1; + soffset = GFC_DESCRIPTOR_STRIDE(array,dim); + if (soffset == 0) + soffset = 1; + len = GFC_DESCRIPTOR_EXTENT(array,dim); + } + else + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + } + if (sstride[0] == 0) + sstride[0] = 1; + if (rstride[0] == 0) + rstride[0] = 1; + + dim = GFC_DESCRIPTOR_RANK (array); + } + + rstride0 = rstride[0]; + sstride0 = sstride[0]; + rptr = ret->base_addr; + sptr = array->base_addr; + + /* Avoid the costly modulo for trivially in-bound shifts. */ + if (shift < 0 || shift >= len) + { + shift = len == 0 ? 0 : shift % (ptrdiff_t)len; + if (shift < 0) + shift += len; + } + + while (rptr) + { + /* Do the shift for this dimension. */ + + /* If elements are contiguous, perform the operation + in two block moves. */ + if (soffset == 1 && roffset == 1) + { + size_t len1 = shift * sizeof (GFC_COMPLEX_17); + size_t len2 = (len - shift) * sizeof (GFC_COMPLEX_17); + memcpy (rptr, sptr + shift, len2); + memcpy (rptr + (len - shift), sptr, len1); + } + else + { + /* Otherwise, we will have to perform the copy one element at + a time. */ + GFC_COMPLEX_17 *dest = rptr; + const GFC_COMPLEX_17 *src = &sptr[shift * soffset]; + + for (n = 0; n < len - shift; n++) + { + *dest = *src; + dest += roffset; + src += soffset; + } + for (src = sptr, n = 0; n < shift; n++) + { + *dest = *src; + dest += roffset; + src += soffset; + } + } + + /* Advance to the next section. */ + rptr += rstride0; + sptr += sstride0; + count[0]++; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + rptr -= rstride[n] * extent[n]; + sptr -= sstride[n] * extent[n]; + n++; + if (n >= dim - 1) + { + /* Break out of the loop. */ + rptr = NULL; + break; + } + else + { + count[n]++; + rptr += rstride[n]; + sptr += sstride[n]; + } + } + } + + return; +} + +#endif diff --git a/libgfortran/generated/cshift0_r17.c b/libgfortran/generated/cshift0_r17.c new file mode 100644 index 0000000..670f2b7 --- /dev/null +++ b/libgfortran/generated/cshift0_r17.c @@ -0,0 +1,242 @@ +/* Helper function for cshift functions. + Copyright (C) 2008-2022 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <string.h> + + +#if defined (HAVE_GFC_REAL_17) + +void +cshift0_r17 (gfc_array_r17 *ret, const gfc_array_r17 *array, ptrdiff_t shift, + int which) +{ + /* r.* indicates the return array. */ + index_type rstride[GFC_MAX_DIMENSIONS]; + index_type rstride0; + index_type roffset; + GFC_REAL_17 *rptr; + + /* s.* indicates the source array. */ + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type sstride0; + index_type soffset; + const GFC_REAL_17 *sptr; + + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type dim; + index_type len; + index_type n; + + bool do_blocked; + index_type r_ex, a_ex; + + which = which - 1; + sstride[0] = 0; + rstride[0] = 0; + + extent[0] = 1; + count[0] = 0; + n = 0; + /* Initialized for avoiding compiler warnings. */ + roffset = 1; + soffset = 1; + len = 0; + + r_ex = 1; + a_ex = 1; + + if (which > 0) + { + /* Test if both ret and array are contiguous. */ + do_blocked = true; + dim = GFC_DESCRIPTOR_RANK (array); + for (n = 0; n < dim; n ++) + { + index_type rs, as; + rs = GFC_DESCRIPTOR_STRIDE (ret, n); + if (rs != r_ex) + { + do_blocked = false; + break; + } + as = GFC_DESCRIPTOR_STRIDE (array, n); + if (as != a_ex) + { + do_blocked = false; + break; + } + r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n); + a_ex *= GFC_DESCRIPTOR_EXTENT (array, n); + } + } + else + do_blocked = false; + + n = 0; + + if (do_blocked) + { + /* For contiguous arrays, use the relationship that + + dimension(n1,n2,n3) :: a, b + b = cshift(a,sh,3) + + can be dealt with as if + + dimension(n1*n2*n3) :: an, bn + bn = cshift(a,sh*n1*n2,1) + + we can used a more blocked algorithm for dim>1. */ + sstride[0] = 1; + rstride[0] = 1; + roffset = 1; + soffset = 1; + len = GFC_DESCRIPTOR_STRIDE(array, which) + * GFC_DESCRIPTOR_EXTENT(array, which); + shift *= GFC_DESCRIPTOR_STRIDE(array, which); + for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + dim = GFC_DESCRIPTOR_RANK (array) - which; + } + else + { + for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + if (dim == which) + { + roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); + if (roffset == 0) + roffset = 1; + soffset = GFC_DESCRIPTOR_STRIDE(array,dim); + if (soffset == 0) + soffset = 1; + len = GFC_DESCRIPTOR_EXTENT(array,dim); + } + else + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + } + if (sstride[0] == 0) + sstride[0] = 1; + if (rstride[0] == 0) + rstride[0] = 1; + + dim = GFC_DESCRIPTOR_RANK (array); + } + + rstride0 = rstride[0]; + sstride0 = sstride[0]; + rptr = ret->base_addr; + sptr = array->base_addr; + + /* Avoid the costly modulo for trivially in-bound shifts. */ + if (shift < 0 || shift >= len) + { + shift = len == 0 ? 0 : shift % (ptrdiff_t)len; + if (shift < 0) + shift += len; + } + + while (rptr) + { + /* Do the shift for this dimension. */ + + /* If elements are contiguous, perform the operation + in two block moves. */ + if (soffset == 1 && roffset == 1) + { + size_t len1 = shift * sizeof (GFC_REAL_17); + size_t len2 = (len - shift) * sizeof (GFC_REAL_17); + memcpy (rptr, sptr + shift, len2); + memcpy (rptr + (len - shift), sptr, len1); + } + else + { + /* Otherwise, we will have to perform the copy one element at + a time. */ + GFC_REAL_17 *dest = rptr; + const GFC_REAL_17 *src = &sptr[shift * soffset]; + + for (n = 0; n < len - shift; n++) + { + *dest = *src; + dest += roffset; + src += soffset; + } + for (src = sptr, n = 0; n < shift; n++) + { + *dest = *src; + dest += roffset; + src += soffset; + } + } + + /* Advance to the next section. */ + rptr += rstride0; + sptr += sstride0; + count[0]++; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + rptr -= rstride[n] * extent[n]; + sptr -= sstride[n] * extent[n]; + n++; + if (n >= dim - 1) + { + /* Break out of the loop. */ + rptr = NULL; + break; + } + else + { + count[n]++; + rptr += rstride[n]; + sptr += sstride[n]; + } + } + } + + return; +} + +#endif diff --git a/libgfortran/generated/cshift1_16_c17.c b/libgfortran/generated/cshift1_16_c17.c new file mode 100644 index 0000000..d646f6d --- /dev/null +++ b/libgfortran/generated/cshift1_16_c17.c @@ -0,0 +1,193 @@ +/* Implementation of the CSHIFT intrinsic. + Copyright (C) 2017-2022 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org> + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <string.h> + +#if defined (HAVE_GFC_COMPLEX_17) && defined (HAVE_GFC_INTEGER_16) + +void +cshift1_16_c17 (gfc_array_c17 * const restrict ret, + const gfc_array_c17 * const restrict array, + const gfc_array_i16 * const restrict h, + const GFC_INTEGER_16 * const restrict pwhich) +{ + /* r.* indicates the return array. */ + index_type rstride[GFC_MAX_DIMENSIONS]; + index_type rstride0; + index_type roffset; + GFC_COMPLEX_17 *rptr; + GFC_COMPLEX_17 *dest; + /* s.* indicates the source array. */ + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type sstride0; + index_type soffset; + const GFC_COMPLEX_17 *sptr; + const GFC_COMPLEX_17 *src; + /* h.* indicates the shift array. */ + index_type hstride[GFC_MAX_DIMENSIONS]; + index_type hstride0; + const GFC_INTEGER_16 *hptr; + + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type rs_ex[GFC_MAX_DIMENSIONS]; + index_type ss_ex[GFC_MAX_DIMENSIONS]; + index_type hs_ex[GFC_MAX_DIMENSIONS]; + + index_type dim; + index_type len; + index_type n; + int which; + GFC_INTEGER_16 sh; + + /* Bounds checking etc is already done by the caller. */ + + if (pwhich) + which = *pwhich - 1; + else + which = 0; + + extent[0] = 1; + count[0] = 0; + n = 0; + + /* Initialized for avoiding compiler warnings. */ + roffset = 1; + soffset = 1; + len = 0; + + for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + if (dim == which) + { + roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); + if (roffset == 0) + roffset = 1; + soffset = GFC_DESCRIPTOR_STRIDE(array,dim); + if (soffset == 0) + soffset = 1; + len = GFC_DESCRIPTOR_EXTENT(array,dim); + } + else + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + hstride[n] = GFC_DESCRIPTOR_STRIDE(h,n); + rs_ex[n] = rstride[n] * extent[n]; + ss_ex[n] = sstride[n] * extent[n]; + hs_ex[n] = hstride[n] * extent[n]; + n++; + } + } + if (sstride[0] == 0) + sstride[0] = 1; + if (rstride[0] == 0) + rstride[0] = 1; + if (hstride[0] == 0) + hstride[0] = 1; + + dim = GFC_DESCRIPTOR_RANK (array); + rstride0 = rstride[0]; + sstride0 = sstride[0]; + hstride0 = hstride[0]; + rptr = ret->base_addr; + sptr = array->base_addr; + hptr = h->base_addr; + + while (rptr) + { + /* Do the shift for this dimension. */ + sh = *hptr; + /* Normal case should be -len < sh < len; try to + avoid the expensive remainder operation if possible. */ + if (sh < 0) + sh += len; + if (unlikely(sh >= len || sh < 0)) + { + sh = sh % len; + if (sh < 0) + sh += len; + } + src = &sptr[sh * soffset]; + dest = rptr; + if (soffset == 1 && roffset == 1) + { + size_t len1 = sh * sizeof (GFC_COMPLEX_17); + size_t len2 = (len - sh) * sizeof (GFC_COMPLEX_17); + memcpy (rptr, sptr + sh, len2); + memcpy (rptr + (len - sh), sptr, len1); + } + else + { + for (n = 0; n < len - sh; n++) + { + *dest = *src; + dest += roffset; + src += soffset; + } + for (src = sptr, n = 0; n < sh; n++) + { + *dest = *src; + dest += roffset; + src += soffset; + } + } + + /* Advance to the next section. */ + rptr += rstride0; + sptr += sstride0; + hptr += hstride0; + count[0]++; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + rptr -= rs_ex[n]; + sptr -= ss_ex[n]; + hptr -= hs_ex[n]; + n++; + if (n >= dim - 1) + { + /* Break out of the loop. */ + rptr = NULL; + break; + } + else + { + count[n]++; + rptr += rstride[n]; + sptr += sstride[n]; + hptr += hstride[n]; + } + } + } +} + +#endif diff --git a/libgfortran/generated/cshift1_16_r17.c b/libgfortran/generated/cshift1_16_r17.c new file mode 100644 index 0000000..fe3fa27 --- /dev/null +++ b/libgfortran/generated/cshift1_16_r17.c @@ -0,0 +1,193 @@ +/* Implementation of the CSHIFT intrinsic. + Copyright (C) 2017-2022 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org> + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <string.h> + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_INTEGER_16) + +void +cshift1_16_r17 (gfc_array_r17 * const restrict ret, + const gfc_array_r17 * const restrict array, + const gfc_array_i16 * const restrict h, + const GFC_INTEGER_16 * const restrict pwhich) +{ + /* r.* indicates the return array. */ + index_type rstride[GFC_MAX_DIMENSIONS]; + index_type rstride0; + index_type roffset; + GFC_REAL_17 *rptr; + GFC_REAL_17 *dest; + /* s.* indicates the source array. */ + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type sstride0; + index_type soffset; + const GFC_REAL_17 *sptr; + const GFC_REAL_17 *src; + /* h.* indicates the shift array. */ + index_type hstride[GFC_MAX_DIMENSIONS]; + index_type hstride0; + const GFC_INTEGER_16 *hptr; + + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type rs_ex[GFC_MAX_DIMENSIONS]; + index_type ss_ex[GFC_MAX_DIMENSIONS]; + index_type hs_ex[GFC_MAX_DIMENSIONS]; + + index_type dim; + index_type len; + index_type n; + int which; + GFC_INTEGER_16 sh; + + /* Bounds checking etc is already done by the caller. */ + + if (pwhich) + which = *pwhich - 1; + else + which = 0; + + extent[0] = 1; + count[0] = 0; + n = 0; + + /* Initialized for avoiding compiler warnings. */ + roffset = 1; + soffset = 1; + len = 0; + + for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + if (dim == which) + { + roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); + if (roffset == 0) + roffset = 1; + soffset = GFC_DESCRIPTOR_STRIDE(array,dim); + if (soffset == 0) + soffset = 1; + len = GFC_DESCRIPTOR_EXTENT(array,dim); + } + else + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + hstride[n] = GFC_DESCRIPTOR_STRIDE(h,n); + rs_ex[n] = rstride[n] * extent[n]; + ss_ex[n] = sstride[n] * extent[n]; + hs_ex[n] = hstride[n] * extent[n]; + n++; + } + } + if (sstride[0] == 0) + sstride[0] = 1; + if (rstride[0] == 0) + rstride[0] = 1; + if (hstride[0] == 0) + hstride[0] = 1; + + dim = GFC_DESCRIPTOR_RANK (array); + rstride0 = rstride[0]; + sstride0 = sstride[0]; + hstride0 = hstride[0]; + rptr = ret->base_addr; + sptr = array->base_addr; + hptr = h->base_addr; + + while (rptr) + { + /* Do the shift for this dimension. */ + sh = *hptr; + /* Normal case should be -len < sh < len; try to + avoid the expensive remainder operation if possible. */ + if (sh < 0) + sh += len; + if (unlikely(sh >= len || sh < 0)) + { + sh = sh % len; + if (sh < 0) + sh += len; + } + src = &sptr[sh * soffset]; + dest = rptr; + if (soffset == 1 && roffset == 1) + { + size_t len1 = sh * sizeof (GFC_REAL_17); + size_t len2 = (len - sh) * sizeof (GFC_REAL_17); + memcpy (rptr, sptr + sh, len2); + memcpy (rptr + (len - sh), sptr, len1); + } + else + { + for (n = 0; n < len - sh; n++) + { + *dest = *src; + dest += roffset; + src += soffset; + } + for (src = sptr, n = 0; n < sh; n++) + { + *dest = *src; + dest += roffset; + src += soffset; + } + } + + /* Advance to the next section. */ + rptr += rstride0; + sptr += sstride0; + hptr += hstride0; + count[0]++; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + rptr -= rs_ex[n]; + sptr -= ss_ex[n]; + hptr -= hs_ex[n]; + n++; + if (n >= dim - 1) + { + /* Break out of the loop. */ + rptr = NULL; + break; + } + else + { + count[n]++; + rptr += rstride[n]; + sptr += sstride[n]; + hptr += hstride[n]; + } + } + } +} + +#endif diff --git a/libgfortran/generated/cshift1_4_c17.c b/libgfortran/generated/cshift1_4_c17.c new file mode 100644 index 0000000..c01f02d --- /dev/null +++ b/libgfortran/generated/cshift1_4_c17.c @@ -0,0 +1,193 @@ +/* Implementation of the CSHIFT intrinsic. + Copyright (C) 2017-2022 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org> + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <string.h> + +#if defined (HAVE_GFC_COMPLEX_17) && defined (HAVE_GFC_INTEGER_4) + +void +cshift1_4_c17 (gfc_array_c17 * const restrict ret, + const gfc_array_c17 * const restrict array, + const gfc_array_i4 * const restrict h, + const GFC_INTEGER_4 * const restrict pwhich) +{ + /* r.* indicates the return array. */ + index_type rstride[GFC_MAX_DIMENSIONS]; + index_type rstride0; + index_type roffset; + GFC_COMPLEX_17 *rptr; + GFC_COMPLEX_17 *dest; + /* s.* indicates the source array. */ + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type sstride0; + index_type soffset; + const GFC_COMPLEX_17 *sptr; + const GFC_COMPLEX_17 *src; + /* h.* indicates the shift array. */ + index_type hstride[GFC_MAX_DIMENSIONS]; + index_type hstride0; + const GFC_INTEGER_4 *hptr; + + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type rs_ex[GFC_MAX_DIMENSIONS]; + index_type ss_ex[GFC_MAX_DIMENSIONS]; + index_type hs_ex[GFC_MAX_DIMENSIONS]; + + index_type dim; + index_type len; + index_type n; + int which; + GFC_INTEGER_4 sh; + + /* Bounds checking etc is already done by the caller. */ + + if (pwhich) + which = *pwhich - 1; + else + which = 0; + + extent[0] = 1; + count[0] = 0; + n = 0; + + /* Initialized for avoiding compiler warnings. */ + roffset = 1; + soffset = 1; + len = 0; + + for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + if (dim == which) + { + roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); + if (roffset == 0) + roffset = 1; + soffset = GFC_DESCRIPTOR_STRIDE(array,dim); + if (soffset == 0) + soffset = 1; + len = GFC_DESCRIPTOR_EXTENT(array,dim); + } + else + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + hstride[n] = GFC_DESCRIPTOR_STRIDE(h,n); + rs_ex[n] = rstride[n] * extent[n]; + ss_ex[n] = sstride[n] * extent[n]; + hs_ex[n] = hstride[n] * extent[n]; + n++; + } + } + if (sstride[0] == 0) + sstride[0] = 1; + if (rstride[0] == 0) + rstride[0] = 1; + if (hstride[0] == 0) + hstride[0] = 1; + + dim = GFC_DESCRIPTOR_RANK (array); + rstride0 = rstride[0]; + sstride0 = sstride[0]; + hstride0 = hstride[0]; + rptr = ret->base_addr; + sptr = array->base_addr; + hptr = h->base_addr; + + while (rptr) + { + /* Do the shift for this dimension. */ + sh = *hptr; + /* Normal case should be -len < sh < len; try to + avoid the expensive remainder operation if possible. */ + if (sh < 0) + sh += len; + if (unlikely(sh >= len || sh < 0)) + { + sh = sh % len; + if (sh < 0) + sh += len; + } + src = &sptr[sh * soffset]; + dest = rptr; + if (soffset == 1 && roffset == 1) + { + size_t len1 = sh * sizeof (GFC_COMPLEX_17); + size_t len2 = (len - sh) * sizeof (GFC_COMPLEX_17); + memcpy (rptr, sptr + sh, len2); + memcpy (rptr + (len - sh), sptr, len1); + } + else + { + for (n = 0; n < len - sh; n++) + { + *dest = *src; + dest += roffset; + src += soffset; + } + for (src = sptr, n = 0; n < sh; n++) + { + *dest = *src; + dest += roffset; + src += soffset; + } + } + + /* Advance to the next section. */ + rptr += rstride0; + sptr += sstride0; + hptr += hstride0; + count[0]++; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + rptr -= rs_ex[n]; + sptr -= ss_ex[n]; + hptr -= hs_ex[n]; + n++; + if (n >= dim - 1) + { + /* Break out of the loop. */ + rptr = NULL; + break; + } + else + { + count[n]++; + rptr += rstride[n]; + sptr += sstride[n]; + hptr += hstride[n]; + } + } + } +} + +#endif diff --git a/libgfortran/generated/cshift1_4_r17.c b/libgfortran/generated/cshift1_4_r17.c new file mode 100644 index 0000000..7262627 --- /dev/null +++ b/libgfortran/generated/cshift1_4_r17.c @@ -0,0 +1,193 @@ +/* Implementation of the CSHIFT intrinsic. + Copyright (C) 2017-2022 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org> + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <string.h> + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_INTEGER_4) + +void +cshift1_4_r17 (gfc_array_r17 * const restrict ret, + const gfc_array_r17 * const restrict array, + const gfc_array_i4 * const restrict h, + const GFC_INTEGER_4 * const restrict pwhich) +{ + /* r.* indicates the return array. */ + index_type rstride[GFC_MAX_DIMENSIONS]; + index_type rstride0; + index_type roffset; + GFC_REAL_17 *rptr; + GFC_REAL_17 *dest; + /* s.* indicates the source array. */ + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type sstride0; + index_type soffset; + const GFC_REAL_17 *sptr; + const GFC_REAL_17 *src; + /* h.* indicates the shift array. */ + index_type hstride[GFC_MAX_DIMENSIONS]; + index_type hstride0; + const GFC_INTEGER_4 *hptr; + + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type rs_ex[GFC_MAX_DIMENSIONS]; + index_type ss_ex[GFC_MAX_DIMENSIONS]; + index_type hs_ex[GFC_MAX_DIMENSIONS]; + + index_type dim; + index_type len; + index_type n; + int which; + GFC_INTEGER_4 sh; + + /* Bounds checking etc is already done by the caller. */ + + if (pwhich) + which = *pwhich - 1; + else + which = 0; + + extent[0] = 1; + count[0] = 0; + n = 0; + + /* Initialized for avoiding compiler warnings. */ + roffset = 1; + soffset = 1; + len = 0; + + for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + if (dim == which) + { + roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); + if (roffset == 0) + roffset = 1; + soffset = GFC_DESCRIPTOR_STRIDE(array,dim); + if (soffset == 0) + soffset = 1; + len = GFC_DESCRIPTOR_EXTENT(array,dim); + } + else + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + hstride[n] = GFC_DESCRIPTOR_STRIDE(h,n); + rs_ex[n] = rstride[n] * extent[n]; + ss_ex[n] = sstride[n] * extent[n]; + hs_ex[n] = hstride[n] * extent[n]; + n++; + } + } + if (sstride[0] == 0) + sstride[0] = 1; + if (rstride[0] == 0) + rstride[0] = 1; + if (hstride[0] == 0) + hstride[0] = 1; + + dim = GFC_DESCRIPTOR_RANK (array); + rstride0 = rstride[0]; + sstride0 = sstride[0]; + hstride0 = hstride[0]; + rptr = ret->base_addr; + sptr = array->base_addr; + hptr = h->base_addr; + + while (rptr) + { + /* Do the shift for this dimension. */ + sh = *hptr; + /* Normal case should be -len < sh < len; try to + avoid the expensive remainder operation if possible. */ + if (sh < 0) + sh += len; + if (unlikely(sh >= len || sh < 0)) + { + sh = sh % len; + if (sh < 0) + sh += len; + } + src = &sptr[sh * soffset]; + dest = rptr; + if (soffset == 1 && roffset == 1) + { + size_t len1 = sh * sizeof (GFC_REAL_17); + size_t len2 = (len - sh) * sizeof (GFC_REAL_17); + memcpy (rptr, sptr + sh, len2); + memcpy (rptr + (len - sh), sptr, len1); + } + else + { + for (n = 0; n < len - sh; n++) + { + *dest = *src; + dest += roffset; + src += soffset; + } + for (src = sptr, n = 0; n < sh; n++) + { + *dest = *src; + dest += roffset; + src += soffset; + } + } + + /* Advance to the next section. */ + rptr += rstride0; + sptr += sstride0; + hptr += hstride0; + count[0]++; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + rptr -= rs_ex[n]; + sptr -= ss_ex[n]; + hptr -= hs_ex[n]; + n++; + if (n >= dim - 1) + { + /* Break out of the loop. */ + rptr = NULL; + break; + } + else + { + count[n]++; + rptr += rstride[n]; + sptr += sstride[n]; + hptr += hstride[n]; + } + } + } +} + +#endif diff --git a/libgfortran/generated/cshift1_8_c17.c b/libgfortran/generated/cshift1_8_c17.c new file mode 100644 index 0000000..85610a5 --- /dev/null +++ b/libgfortran/generated/cshift1_8_c17.c @@ -0,0 +1,193 @@ +/* Implementation of the CSHIFT intrinsic. + Copyright (C) 2017-2022 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org> + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <string.h> + +#if defined (HAVE_GFC_COMPLEX_17) && defined (HAVE_GFC_INTEGER_8) + +void +cshift1_8_c17 (gfc_array_c17 * const restrict ret, + const gfc_array_c17 * const restrict array, + const gfc_array_i8 * const restrict h, + const GFC_INTEGER_8 * const restrict pwhich) +{ + /* r.* indicates the return array. */ + index_type rstride[GFC_MAX_DIMENSIONS]; + index_type rstride0; + index_type roffset; + GFC_COMPLEX_17 *rptr; + GFC_COMPLEX_17 *dest; + /* s.* indicates the source array. */ + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type sstride0; + index_type soffset; + const GFC_COMPLEX_17 *sptr; + const GFC_COMPLEX_17 *src; + /* h.* indicates the shift array. */ + index_type hstride[GFC_MAX_DIMENSIONS]; + index_type hstride0; + const GFC_INTEGER_8 *hptr; + + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type rs_ex[GFC_MAX_DIMENSIONS]; + index_type ss_ex[GFC_MAX_DIMENSIONS]; + index_type hs_ex[GFC_MAX_DIMENSIONS]; + + index_type dim; + index_type len; + index_type n; + int which; + GFC_INTEGER_8 sh; + + /* Bounds checking etc is already done by the caller. */ + + if (pwhich) + which = *pwhich - 1; + else + which = 0; + + extent[0] = 1; + count[0] = 0; + n = 0; + + /* Initialized for avoiding compiler warnings. */ + roffset = 1; + soffset = 1; + len = 0; + + for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + if (dim == which) + { + roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); + if (roffset == 0) + roffset = 1; + soffset = GFC_DESCRIPTOR_STRIDE(array,dim); + if (soffset == 0) + soffset = 1; + len = GFC_DESCRIPTOR_EXTENT(array,dim); + } + else + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + hstride[n] = GFC_DESCRIPTOR_STRIDE(h,n); + rs_ex[n] = rstride[n] * extent[n]; + ss_ex[n] = sstride[n] * extent[n]; + hs_ex[n] = hstride[n] * extent[n]; + n++; + } + } + if (sstride[0] == 0) + sstride[0] = 1; + if (rstride[0] == 0) + rstride[0] = 1; + if (hstride[0] == 0) + hstride[0] = 1; + + dim = GFC_DESCRIPTOR_RANK (array); + rstride0 = rstride[0]; + sstride0 = sstride[0]; + hstride0 = hstride[0]; + rptr = ret->base_addr; + sptr = array->base_addr; + hptr = h->base_addr; + + while (rptr) + { + /* Do the shift for this dimension. */ + sh = *hptr; + /* Normal case should be -len < sh < len; try to + avoid the expensive remainder operation if possible. */ + if (sh < 0) + sh += len; + if (unlikely(sh >= len || sh < 0)) + { + sh = sh % len; + if (sh < 0) + sh += len; + } + src = &sptr[sh * soffset]; + dest = rptr; + if (soffset == 1 && roffset == 1) + { + size_t len1 = sh * sizeof (GFC_COMPLEX_17); + size_t len2 = (len - sh) * sizeof (GFC_COMPLEX_17); + memcpy (rptr, sptr + sh, len2); + memcpy (rptr + (len - sh), sptr, len1); + } + else + { + for (n = 0; n < len - sh; n++) + { + *dest = *src; + dest += roffset; + src += soffset; + } + for (src = sptr, n = 0; n < sh; n++) + { + *dest = *src; + dest += roffset; + src += soffset; + } + } + + /* Advance to the next section. */ + rptr += rstride0; + sptr += sstride0; + hptr += hstride0; + count[0]++; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + rptr -= rs_ex[n]; + sptr -= ss_ex[n]; + hptr -= hs_ex[n]; + n++; + if (n >= dim - 1) + { + /* Break out of the loop. */ + rptr = NULL; + break; + } + else + { + count[n]++; + rptr += rstride[n]; + sptr += sstride[n]; + hptr += hstride[n]; + } + } + } +} + +#endif diff --git a/libgfortran/generated/cshift1_8_r17.c b/libgfortran/generated/cshift1_8_r17.c new file mode 100644 index 0000000..cc58d3c --- /dev/null +++ b/libgfortran/generated/cshift1_8_r17.c @@ -0,0 +1,193 @@ +/* Implementation of the CSHIFT intrinsic. + Copyright (C) 2017-2022 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org> + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <string.h> + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_INTEGER_8) + +void +cshift1_8_r17 (gfc_array_r17 * const restrict ret, + const gfc_array_r17 * const restrict array, + const gfc_array_i8 * const restrict h, + const GFC_INTEGER_8 * const restrict pwhich) +{ + /* r.* indicates the return array. */ + index_type rstride[GFC_MAX_DIMENSIONS]; + index_type rstride0; + index_type roffset; + GFC_REAL_17 *rptr; + GFC_REAL_17 *dest; + /* s.* indicates the source array. */ + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type sstride0; + index_type soffset; + const GFC_REAL_17 *sptr; + const GFC_REAL_17 *src; + /* h.* indicates the shift array. */ + index_type hstride[GFC_MAX_DIMENSIONS]; + index_type hstride0; + const GFC_INTEGER_8 *hptr; + + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type rs_ex[GFC_MAX_DIMENSIONS]; + index_type ss_ex[GFC_MAX_DIMENSIONS]; + index_type hs_ex[GFC_MAX_DIMENSIONS]; + + index_type dim; + index_type len; + index_type n; + int which; + GFC_INTEGER_8 sh; + + /* Bounds checking etc is already done by the caller. */ + + if (pwhich) + which = *pwhich - 1; + else + which = 0; + + extent[0] = 1; + count[0] = 0; + n = 0; + + /* Initialized for avoiding compiler warnings. */ + roffset = 1; + soffset = 1; + len = 0; + + for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + if (dim == which) + { + roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); + if (roffset == 0) + roffset = 1; + soffset = GFC_DESCRIPTOR_STRIDE(array,dim); + if (soffset == 0) + soffset = 1; + len = GFC_DESCRIPTOR_EXTENT(array,dim); + } + else + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + hstride[n] = GFC_DESCRIPTOR_STRIDE(h,n); + rs_ex[n] = rstride[n] * extent[n]; + ss_ex[n] = sstride[n] * extent[n]; + hs_ex[n] = hstride[n] * extent[n]; + n++; + } + } + if (sstride[0] == 0) + sstride[0] = 1; + if (rstride[0] == 0) + rstride[0] = 1; + if (hstride[0] == 0) + hstride[0] = 1; + + dim = GFC_DESCRIPTOR_RANK (array); + rstride0 = rstride[0]; + sstride0 = sstride[0]; + hstride0 = hstride[0]; + rptr = ret->base_addr; + sptr = array->base_addr; + hptr = h->base_addr; + + while (rptr) + { + /* Do the shift for this dimension. */ + sh = *hptr; + /* Normal case should be -len < sh < len; try to + avoid the expensive remainder operation if possible. */ + if (sh < 0) + sh += len; + if (unlikely(sh >= len || sh < 0)) + { + sh = sh % len; + if (sh < 0) + sh += len; + } + src = &sptr[sh * soffset]; + dest = rptr; + if (soffset == 1 && roffset == 1) + { + size_t len1 = sh * sizeof (GFC_REAL_17); + size_t len2 = (len - sh) * sizeof (GFC_REAL_17); + memcpy (rptr, sptr + sh, len2); + memcpy (rptr + (len - sh), sptr, len1); + } + else + { + for (n = 0; n < len - sh; n++) + { + *dest = *src; + dest += roffset; + src += soffset; + } + for (src = sptr, n = 0; n < sh; n++) + { + *dest = *src; + dest += roffset; + src += soffset; + } + } + + /* Advance to the next section. */ + rptr += rstride0; + sptr += sstride0; + hptr += hstride0; + count[0]++; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + rptr -= rs_ex[n]; + sptr -= ss_ex[n]; + hptr -= hs_ex[n]; + n++; + if (n >= dim - 1) + { + /* Break out of the loop. */ + rptr = NULL; + break; + } + else + { + count[n]++; + rptr += rstride[n]; + sptr += sstride[n]; + hptr += hstride[n]; + } + } + } +} + +#endif diff --git a/libgfortran/generated/findloc0_c17.c b/libgfortran/generated/findloc0_c17.c new file mode 100644 index 0000000..2d8d257 --- /dev/null +++ b/libgfortran/generated/findloc0_c17.c @@ -0,0 +1,375 @@ + +/* Implementation of the FINDLOC intrinsic + Copyright (C) 2018-2022 Free Software Foundation, Inc. + Contributed by Thomas König <tk@tkoenig.net> + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <assert.h> + +#if defined (HAVE_GFC_COMPLEX_17) +extern void findloc0_c17 (gfc_array_index_type * const restrict retarray, + gfc_array_c17 * const restrict array, GFC_COMPLEX_17 value, + GFC_LOGICAL_4); +export_proto(findloc0_c17); + +void +findloc0_c17 (gfc_array_index_type * const restrict retarray, + gfc_array_c17 * const restrict array, GFC_COMPLEX_17 value, + GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride; + const GFC_COMPLEX_17 *base; + index_type * restrict dest; + index_type rank; + index_type n; + index_type sz; + + rank = GFC_DESCRIPTOR_RANK (array); + if (rank <= 0) + runtime_error ("Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (index_type)); + } + else + { + if (unlikely (compile_options.bounds_check)) + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "FINDLOC"); + } + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + + /* Set the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 0; + + sz = 1; + for (n = 0; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + sz *= extent[n]; + if (extent[n] <= 0) + return; + } + + for (n = 0; n < rank; n++) + count[n] = 0; + + if (back) + { + base = array->base_addr + (sz - 1) * 1; + + while (1) + { + do + { + if (unlikely(*base == value)) + { + for (n = 0; n < rank; n++) + dest[n * dstride] = extent[n] - count[n]; + + return; + } + base -= sstride[0] * 1; + } while(++count[0] != extent[0]); + + n = 0; + do + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base += sstride[n] * extent[n] * 1; + n++; + if (n >= rank) + return; + else + { + count[n]++; + base -= sstride[n] * 1; + } + } while (count[n] == extent[n]); + } + } + else + { + base = array->base_addr; + while (1) + { + do + { + if (unlikely(*base == value)) + { + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + + return; + } + base += sstride[0] * 1; + } while(++count[0] != extent[0]); + + n = 0; + do + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n] * 1; + n++; + if (n >= rank) + return; + else + { + count[n]++; + base += sstride[n] * 1; + } + } while (count[n] == extent[n]); + } + } + return; +} + +extern void mfindloc0_c17 (gfc_array_index_type * const restrict retarray, + gfc_array_c17 * const restrict array, GFC_COMPLEX_17 value, + gfc_array_l1 *const restrict, GFC_LOGICAL_4); +export_proto(mfindloc0_c17); + +void +mfindloc0_c17 (gfc_array_index_type * const restrict retarray, + gfc_array_c17 * const restrict array, GFC_COMPLEX_17 value, + gfc_array_l1 *const restrict mask, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type mstride[GFC_MAX_DIMENSIONS]; + index_type dstride; + const GFC_COMPLEX_17 *base; + index_type * restrict dest; + GFC_LOGICAL_1 *mbase; + index_type rank; + index_type n; + int mask_kind; + index_type sz; + + rank = GFC_DESCRIPTOR_RANK (array); + if (rank <= 0) + runtime_error ("Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (index_type)); + } + else + { + if (unlikely (compile_options.bounds_check)) + { + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "FINDLOC"); + bounds_equal_extents ((array_t *) mask, (array_t *) array, + "MASK argument", "FINDLOC"); + } + } + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + mbase = mask->base_addr; + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + internal_error (NULL, "Funny sized logical array"); + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + + /* Set the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 0; + + sz = 1; + for (n = 0; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + sz *= extent[n]; + if (extent[n] <= 0) + return; + } + + for (n = 0; n < rank; n++) + count[n] = 0; + + if (back) + { + base = array->base_addr + (sz - 1) * 1; + mbase = mbase + (sz - 1) * mask_kind; + while (1) + { + do + { + if (unlikely(*mbase && *base == value)) + { + for (n = 0; n < rank; n++) + dest[n * dstride] = extent[n] - count[n]; + + return; + } + base -= sstride[0] * 1; + mbase -= mstride[0]; + } while(++count[0] != extent[0]); + + n = 0; + do + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base += sstride[n] * extent[n] * 1; + mbase -= mstride[n] * extent[n]; + n++; + if (n >= rank) + return; + else + { + count[n]++; + base -= sstride[n] * 1; + mbase += mstride[n]; + } + } while (count[n] == extent[n]); + } + } + else + { + base = array->base_addr; + while (1) + { + do + { + if (unlikely(*mbase && *base == value)) + { + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + + return; + } + base += sstride[0] * 1; + mbase += mstride[0]; + } while(++count[0] != extent[0]); + + n = 0; + do + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n] * 1; + mbase -= mstride[n] * extent[n]; + n++; + if (n >= rank) + return; + else + { + count[n]++; + base += sstride[n]* 1; + mbase += mstride[n]; + } + } while (count[n] == extent[n]); + } + } + return; +} + +extern void sfindloc0_c17 (gfc_array_index_type * const restrict retarray, + gfc_array_c17 * const restrict array, GFC_COMPLEX_17 value, + GFC_LOGICAL_4 *, GFC_LOGICAL_4); +export_proto(sfindloc0_c17); + +void +sfindloc0_c17 (gfc_array_index_type * const restrict retarray, + gfc_array_c17 * const restrict array, GFC_COMPLEX_17 value, + GFC_LOGICAL_4 * mask, GFC_LOGICAL_4 back) +{ + index_type rank; + index_type dstride; + index_type * restrict dest; + index_type n; + + if (mask == NULL || *mask) + { + findloc0_c17 (retarray, array, value, back); + return; + } + + rank = GFC_DESCRIPTOR_RANK (array); + + if (rank <= 0) + internal_error (NULL, "Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (index_type)); + } + else if (unlikely (compile_options.bounds_check)) + { + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "FINDLOC"); + } + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + for (n = 0; n<rank; n++) + dest[n * dstride] = 0 ; +} + +#endif diff --git a/libgfortran/generated/findloc0_r17.c b/libgfortran/generated/findloc0_r17.c new file mode 100644 index 0000000..cdac81d --- /dev/null +++ b/libgfortran/generated/findloc0_r17.c @@ -0,0 +1,375 @@ + +/* Implementation of the FINDLOC intrinsic + Copyright (C) 2018-2022 Free Software Foundation, Inc. + Contributed by Thomas König <tk@tkoenig.net> + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <assert.h> + +#if defined (HAVE_GFC_REAL_17) +extern void findloc0_r17 (gfc_array_index_type * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_REAL_17 value, + GFC_LOGICAL_4); +export_proto(findloc0_r17); + +void +findloc0_r17 (gfc_array_index_type * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_REAL_17 value, + GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride; + const GFC_REAL_17 *base; + index_type * restrict dest; + index_type rank; + index_type n; + index_type sz; + + rank = GFC_DESCRIPTOR_RANK (array); + if (rank <= 0) + runtime_error ("Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (index_type)); + } + else + { + if (unlikely (compile_options.bounds_check)) + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "FINDLOC"); + } + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + + /* Set the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 0; + + sz = 1; + for (n = 0; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + sz *= extent[n]; + if (extent[n] <= 0) + return; + } + + for (n = 0; n < rank; n++) + count[n] = 0; + + if (back) + { + base = array->base_addr + (sz - 1) * 1; + + while (1) + { + do + { + if (unlikely(*base == value)) + { + for (n = 0; n < rank; n++) + dest[n * dstride] = extent[n] - count[n]; + + return; + } + base -= sstride[0] * 1; + } while(++count[0] != extent[0]); + + n = 0; + do + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base += sstride[n] * extent[n] * 1; + n++; + if (n >= rank) + return; + else + { + count[n]++; + base -= sstride[n] * 1; + } + } while (count[n] == extent[n]); + } + } + else + { + base = array->base_addr; + while (1) + { + do + { + if (unlikely(*base == value)) + { + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + + return; + } + base += sstride[0] * 1; + } while(++count[0] != extent[0]); + + n = 0; + do + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n] * 1; + n++; + if (n >= rank) + return; + else + { + count[n]++; + base += sstride[n] * 1; + } + } while (count[n] == extent[n]); + } + } + return; +} + +extern void mfindloc0_r17 (gfc_array_index_type * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_REAL_17 value, + gfc_array_l1 *const restrict, GFC_LOGICAL_4); +export_proto(mfindloc0_r17); + +void +mfindloc0_r17 (gfc_array_index_type * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_REAL_17 value, + gfc_array_l1 *const restrict mask, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type mstride[GFC_MAX_DIMENSIONS]; + index_type dstride; + const GFC_REAL_17 *base; + index_type * restrict dest; + GFC_LOGICAL_1 *mbase; + index_type rank; + index_type n; + int mask_kind; + index_type sz; + + rank = GFC_DESCRIPTOR_RANK (array); + if (rank <= 0) + runtime_error ("Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (index_type)); + } + else + { + if (unlikely (compile_options.bounds_check)) + { + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "FINDLOC"); + bounds_equal_extents ((array_t *) mask, (array_t *) array, + "MASK argument", "FINDLOC"); + } + } + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + mbase = mask->base_addr; + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + internal_error (NULL, "Funny sized logical array"); + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + + /* Set the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 0; + + sz = 1; + for (n = 0; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + sz *= extent[n]; + if (extent[n] <= 0) + return; + } + + for (n = 0; n < rank; n++) + count[n] = 0; + + if (back) + { + base = array->base_addr + (sz - 1) * 1; + mbase = mbase + (sz - 1) * mask_kind; + while (1) + { + do + { + if (unlikely(*mbase && *base == value)) + { + for (n = 0; n < rank; n++) + dest[n * dstride] = extent[n] - count[n]; + + return; + } + base -= sstride[0] * 1; + mbase -= mstride[0]; + } while(++count[0] != extent[0]); + + n = 0; + do + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base += sstride[n] * extent[n] * 1; + mbase -= mstride[n] * extent[n]; + n++; + if (n >= rank) + return; + else + { + count[n]++; + base -= sstride[n] * 1; + mbase += mstride[n]; + } + } while (count[n] == extent[n]); + } + } + else + { + base = array->base_addr; + while (1) + { + do + { + if (unlikely(*mbase && *base == value)) + { + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + + return; + } + base += sstride[0] * 1; + mbase += mstride[0]; + } while(++count[0] != extent[0]); + + n = 0; + do + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n] * 1; + mbase -= mstride[n] * extent[n]; + n++; + if (n >= rank) + return; + else + { + count[n]++; + base += sstride[n]* 1; + mbase += mstride[n]; + } + } while (count[n] == extent[n]); + } + } + return; +} + +extern void sfindloc0_r17 (gfc_array_index_type * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_REAL_17 value, + GFC_LOGICAL_4 *, GFC_LOGICAL_4); +export_proto(sfindloc0_r17); + +void +sfindloc0_r17 (gfc_array_index_type * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_REAL_17 value, + GFC_LOGICAL_4 * mask, GFC_LOGICAL_4 back) +{ + index_type rank; + index_type dstride; + index_type * restrict dest; + index_type n; + + if (mask == NULL || *mask) + { + findloc0_r17 (retarray, array, value, back); + return; + } + + rank = GFC_DESCRIPTOR_RANK (array); + + if (rank <= 0) + internal_error (NULL, "Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (index_type)); + } + else if (unlikely (compile_options.bounds_check)) + { + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "FINDLOC"); + } + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + for (n = 0; n<rank; n++) + dest[n * dstride] = 0 ; +} + +#endif diff --git a/libgfortran/generated/findloc1_c17.c b/libgfortran/generated/findloc1_c17.c new file mode 100644 index 0000000..3021eb4 --- /dev/null +++ b/libgfortran/generated/findloc1_c17.c @@ -0,0 +1,523 @@ +/* Implementation of the FINDLOC intrinsic + Copyright (C) 2018-2022 Free Software Foundation, Inc. + Contributed by Thomas König <tk@tkoenig.net> + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <assert.h> + +#if defined (HAVE_GFC_COMPLEX_17) +extern void findloc1_c17 (gfc_array_index_type * const restrict retarray, + gfc_array_c17 * const restrict array, GFC_COMPLEX_17 value, + const index_type * restrict pdim, GFC_LOGICAL_4 back); +export_proto(findloc1_c17); + +extern void +findloc1_c17 (gfc_array_index_type * const restrict retarray, + gfc_array_c17 * const restrict array, GFC_COMPLEX_17 value, + const index_type * restrict pdim, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + const GFC_COMPLEX_17 * restrict base; + index_type * restrict dest; + index_type rank; + index_type n; + index_type len; + index_type delta; + index_type dim; + int continue_loop; + + /* Make dim zero based to avoid confusion. */ + rank = GFC_DESCRIPTOR_RANK (array) - 1; + dim = (*pdim) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in FINDLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len < 0) + len = 0; + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->base_addr = xmallocarray (alloc_size, sizeof (index_type)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " FINDLOC intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "FINDLOC"); + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + dest = retarray->base_addr; + continue_loop = 1; + + base = array->base_addr; + while (continue_loop) + { + const GFC_COMPLEX_17 * restrict src; + index_type result; + + result = 0; + if (back) + { + src = base + (len - 1) * delta * 1; + for (n = len; n > 0; n--, src -= delta * 1) + { + if (*src == value) + { + result = n; + break; + } + } + } + else + { + src = base; + for (n = 1; n <= len; n++, src += delta * 1) + { + if (*src == value) + { + result = n; + break; + } + } + } + *dest = result; + + count[0]++; + base += sstride[0] * 1; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + count[n] = 0; + base -= sstride[n] * extent[n] * 1; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + continue_loop = 0; + break; + } + else + { + count[n]++; + base += sstride[n] * 1; + dest += dstride[n]; + } + } + } +} +extern void mfindloc1_c17 (gfc_array_index_type * const restrict retarray, + gfc_array_c17 * const restrict array, GFC_COMPLEX_17 value, + const index_type * restrict pdim, gfc_array_l1 *const restrict mask, + GFC_LOGICAL_4 back); +export_proto(mfindloc1_c17); + +extern void +mfindloc1_c17 (gfc_array_index_type * const restrict retarray, + gfc_array_c17 * const restrict array, GFC_COMPLEX_17 value, + const index_type * restrict pdim, gfc_array_l1 *const restrict mask, + GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type mstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + const GFC_COMPLEX_17 * restrict base; + const GFC_LOGICAL_1 * restrict mbase; + index_type * restrict dest; + index_type rank; + index_type n; + index_type len; + index_type delta; + index_type mdelta; + index_type dim; + int mask_kind; + int continue_loop; + + /* Make dim zero based to avoid confusion. */ + rank = GFC_DESCRIPTOR_RANK (array) - 1; + dim = (*pdim) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in FINDLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len < 0) + len = 0; + + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + mdelta = GFC_DESCRIPTOR_STRIDE_BYTES(mask,dim); + + mbase = mask->base_addr; + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + internal_error (NULL, "Funny sized logical array"); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array, n + 1); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->base_addr = xmallocarray (alloc_size, sizeof (index_type)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " FINDLOC intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "FINDLOC"); + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + dest = retarray->base_addr; + continue_loop = 1; + + base = array->base_addr; + while (continue_loop) + { + const GFC_COMPLEX_17 * restrict src; + const GFC_LOGICAL_1 * restrict msrc; + index_type result; + + result = 0; + if (back) + { + src = base + (len - 1) * delta * 1; + msrc = mbase + (len - 1) * mdelta; + for (n = len; n > 0; n--, src -= delta * 1, msrc -= mdelta) + { + if (*msrc && *src == value) + { + result = n; + break; + } + } + } + else + { + src = base; + msrc = mbase; + for (n = 1; n <= len; n++, src += delta * 1, msrc += mdelta) + { + if (*msrc && *src == value) + { + result = n; + break; + } + } + } + *dest = result; + + count[0]++; + base += sstride[0] * 1; + mbase += mstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + count[n] = 0; + base -= sstride[n] * extent[n] * 1; + mbase -= mstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + continue_loop = 0; + break; + } + else + { + count[n]++; + base += sstride[n] * 1; + dest += dstride[n]; + } + } + } +} +extern void sfindloc1_c17 (gfc_array_index_type * const restrict retarray, + gfc_array_c17 * const restrict array, GFC_COMPLEX_17 value, + const index_type * restrict pdim, GFC_LOGICAL_4 *const restrict mask, + GFC_LOGICAL_4 back); +export_proto(sfindloc1_c17); + +extern void +sfindloc1_c17 (gfc_array_index_type * const restrict retarray, + gfc_array_c17 * const restrict array, GFC_COMPLEX_17 value, + const index_type * restrict pdim, GFC_LOGICAL_4 *const restrict mask, + GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + index_type * restrict dest; + index_type rank; + index_type n; + index_type len; + index_type dim; + bool continue_loop; + + if (mask == NULL || *mask) + { + findloc1_c17 (retarray, array, value, pdim, back); + return; + } + /* Make dim zero based to avoid confusion. */ + rank = GFC_DESCRIPTOR_RANK (array) - 1; + dim = (*pdim) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in FINDLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len < 0) + len = 0; + + for (n = 0; n < dim; n++) + { + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] <= 0) + extent[n] = 0; + } + + for (n = dim; n < rank; n++) + { + extent[n] = + GFC_DESCRIPTOR_EXTENT(array,n + 1); + + if (extent[n] <= 0) + extent[n] = 0; + } + + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->base_addr = xmallocarray (alloc_size, sizeof (index_type)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " FINDLOC intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "FINDLOC"); + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + dest = retarray->base_addr; + continue_loop = 1; + + while (continue_loop) + { + *dest = 0; + + count[0]++; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + count[n] = 0; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + continue_loop = 0; + break; + } + else + { + count[n]++; + dest += dstride[n]; + } + } + } +} +#endif diff --git a/libgfortran/generated/findloc1_r17.c b/libgfortran/generated/findloc1_r17.c new file mode 100644 index 0000000..e32fd69 --- /dev/null +++ b/libgfortran/generated/findloc1_r17.c @@ -0,0 +1,523 @@ +/* Implementation of the FINDLOC intrinsic + Copyright (C) 2018-2022 Free Software Foundation, Inc. + Contributed by Thomas König <tk@tkoenig.net> + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <assert.h> + +#if defined (HAVE_GFC_REAL_17) +extern void findloc1_r17 (gfc_array_index_type * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_REAL_17 value, + const index_type * restrict pdim, GFC_LOGICAL_4 back); +export_proto(findloc1_r17); + +extern void +findloc1_r17 (gfc_array_index_type * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_REAL_17 value, + const index_type * restrict pdim, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + const GFC_REAL_17 * restrict base; + index_type * restrict dest; + index_type rank; + index_type n; + index_type len; + index_type delta; + index_type dim; + int continue_loop; + + /* Make dim zero based to avoid confusion. */ + rank = GFC_DESCRIPTOR_RANK (array) - 1; + dim = (*pdim) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in FINDLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len < 0) + len = 0; + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->base_addr = xmallocarray (alloc_size, sizeof (index_type)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " FINDLOC intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "FINDLOC"); + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + dest = retarray->base_addr; + continue_loop = 1; + + base = array->base_addr; + while (continue_loop) + { + const GFC_REAL_17 * restrict src; + index_type result; + + result = 0; + if (back) + { + src = base + (len - 1) * delta * 1; + for (n = len; n > 0; n--, src -= delta * 1) + { + if (*src == value) + { + result = n; + break; + } + } + } + else + { + src = base; + for (n = 1; n <= len; n++, src += delta * 1) + { + if (*src == value) + { + result = n; + break; + } + } + } + *dest = result; + + count[0]++; + base += sstride[0] * 1; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + count[n] = 0; + base -= sstride[n] * extent[n] * 1; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + continue_loop = 0; + break; + } + else + { + count[n]++; + base += sstride[n] * 1; + dest += dstride[n]; + } + } + } +} +extern void mfindloc1_r17 (gfc_array_index_type * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_REAL_17 value, + const index_type * restrict pdim, gfc_array_l1 *const restrict mask, + GFC_LOGICAL_4 back); +export_proto(mfindloc1_r17); + +extern void +mfindloc1_r17 (gfc_array_index_type * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_REAL_17 value, + const index_type * restrict pdim, gfc_array_l1 *const restrict mask, + GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type mstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + const GFC_REAL_17 * restrict base; + const GFC_LOGICAL_1 * restrict mbase; + index_type * restrict dest; + index_type rank; + index_type n; + index_type len; + index_type delta; + index_type mdelta; + index_type dim; + int mask_kind; + int continue_loop; + + /* Make dim zero based to avoid confusion. */ + rank = GFC_DESCRIPTOR_RANK (array) - 1; + dim = (*pdim) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in FINDLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len < 0) + len = 0; + + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + mdelta = GFC_DESCRIPTOR_STRIDE_BYTES(mask,dim); + + mbase = mask->base_addr; + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + internal_error (NULL, "Funny sized logical array"); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array, n + 1); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->base_addr = xmallocarray (alloc_size, sizeof (index_type)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " FINDLOC intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "FINDLOC"); + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + dest = retarray->base_addr; + continue_loop = 1; + + base = array->base_addr; + while (continue_loop) + { + const GFC_REAL_17 * restrict src; + const GFC_LOGICAL_1 * restrict msrc; + index_type result; + + result = 0; + if (back) + { + src = base + (len - 1) * delta * 1; + msrc = mbase + (len - 1) * mdelta; + for (n = len; n > 0; n--, src -= delta * 1, msrc -= mdelta) + { + if (*msrc && *src == value) + { + result = n; + break; + } + } + } + else + { + src = base; + msrc = mbase; + for (n = 1; n <= len; n++, src += delta * 1, msrc += mdelta) + { + if (*msrc && *src == value) + { + result = n; + break; + } + } + } + *dest = result; + + count[0]++; + base += sstride[0] * 1; + mbase += mstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + count[n] = 0; + base -= sstride[n] * extent[n] * 1; + mbase -= mstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + continue_loop = 0; + break; + } + else + { + count[n]++; + base += sstride[n] * 1; + dest += dstride[n]; + } + } + } +} +extern void sfindloc1_r17 (gfc_array_index_type * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_REAL_17 value, + const index_type * restrict pdim, GFC_LOGICAL_4 *const restrict mask, + GFC_LOGICAL_4 back); +export_proto(sfindloc1_r17); + +extern void +sfindloc1_r17 (gfc_array_index_type * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_REAL_17 value, + const index_type * restrict pdim, GFC_LOGICAL_4 *const restrict mask, + GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + index_type * restrict dest; + index_type rank; + index_type n; + index_type len; + index_type dim; + bool continue_loop; + + if (mask == NULL || *mask) + { + findloc1_r17 (retarray, array, value, pdim, back); + return; + } + /* Make dim zero based to avoid confusion. */ + rank = GFC_DESCRIPTOR_RANK (array) - 1; + dim = (*pdim) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in FINDLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len < 0) + len = 0; + + for (n = 0; n < dim; n++) + { + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] <= 0) + extent[n] = 0; + } + + for (n = dim; n < rank; n++) + { + extent[n] = + GFC_DESCRIPTOR_EXTENT(array,n + 1); + + if (extent[n] <= 0) + extent[n] = 0; + } + + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->base_addr = xmallocarray (alloc_size, sizeof (index_type)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " FINDLOC intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "FINDLOC"); + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + dest = retarray->base_addr; + continue_loop = 1; + + while (continue_loop) + { + *dest = 0; + + count[0]++; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + count[n] = 0; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + continue_loop = 0; + break; + } + else + { + count[n]++; + dest += dstride[n]; + } + } + } +} +#endif diff --git a/libgfortran/generated/in_pack_c17.c b/libgfortran/generated/in_pack_c17.c new file mode 100644 index 0000000..fcc6552 --- /dev/null +++ b/libgfortran/generated/in_pack_c17.c @@ -0,0 +1,116 @@ +/* Helper function for repacking arrays. + Copyright (C) 2003-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" + + +#if defined (HAVE_GFC_COMPLEX_17) + +/* Allocates a block of memory with internal_malloc if the array needs + repacking. */ + +GFC_COMPLEX_17 * +internal_pack_c17 (gfc_array_c17 * source) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type stride0; + index_type dim; + index_type ssize; + const GFC_COMPLEX_17 *src; + GFC_COMPLEX_17 * restrict dest; + GFC_COMPLEX_17 *destptr; + int packed; + + /* TODO: Investigate how we can figure out if this is a temporary + since the stride=0 thing has been removed from the frontend. */ + + dim = GFC_DESCRIPTOR_RANK (source); + ssize = 1; + packed = 1; + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE(source,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(source,n); + if (extent[n] <= 0) + { + /* Do nothing. */ + packed = 1; + break; + } + + if (ssize != stride[n]) + packed = 0; + + ssize *= extent[n]; + } + + if (packed) + return source->base_addr; + + /* Allocate storage for the destination. */ + destptr = xmallocarray (ssize, sizeof (GFC_COMPLEX_17)); + dest = destptr; + src = source->base_addr; + stride0 = stride[0]; + + + while (src) + { + /* Copy the data. */ + *(dest++) = *src; + /* Advance to the next element. */ + src += stride0; + count[0]++; + /* Advance to the next source element. */ + index_type n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + return destptr; +} + +#endif + diff --git a/libgfortran/generated/in_pack_r17.c b/libgfortran/generated/in_pack_r17.c new file mode 100644 index 0000000..759094c --- /dev/null +++ b/libgfortran/generated/in_pack_r17.c @@ -0,0 +1,116 @@ +/* Helper function for repacking arrays. + Copyright (C) 2003-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" + + +#if defined (HAVE_GFC_REAL_17) + +/* Allocates a block of memory with internal_malloc if the array needs + repacking. */ + +GFC_REAL_17 * +internal_pack_r17 (gfc_array_r17 * source) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type stride0; + index_type dim; + index_type ssize; + const GFC_REAL_17 *src; + GFC_REAL_17 * restrict dest; + GFC_REAL_17 *destptr; + int packed; + + /* TODO: Investigate how we can figure out if this is a temporary + since the stride=0 thing has been removed from the frontend. */ + + dim = GFC_DESCRIPTOR_RANK (source); + ssize = 1; + packed = 1; + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE(source,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(source,n); + if (extent[n] <= 0) + { + /* Do nothing. */ + packed = 1; + break; + } + + if (ssize != stride[n]) + packed = 0; + + ssize *= extent[n]; + } + + if (packed) + return source->base_addr; + + /* Allocate storage for the destination. */ + destptr = xmallocarray (ssize, sizeof (GFC_REAL_17)); + dest = destptr; + src = source->base_addr; + stride0 = stride[0]; + + + while (src) + { + /* Copy the data. */ + *(dest++) = *src; + /* Advance to the next element. */ + src += stride0; + count[0]++; + /* Advance to the next source element. */ + index_type n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + return destptr; +} + +#endif + diff --git a/libgfortran/generated/in_unpack_c17.c b/libgfortran/generated/in_unpack_c17.c new file mode 100644 index 0000000..9d74b30 --- /dev/null +++ b/libgfortran/generated/in_unpack_c17.c @@ -0,0 +1,104 @@ +/* Helper function for repacking arrays. + Copyright (C) 2003-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <string.h> + + +#if defined (HAVE_GFC_COMPLEX_17) + +void +internal_unpack_c17 (gfc_array_c17 * d, const GFC_COMPLEX_17 * src) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type stride0; + index_type dim; + index_type dsize; + GFC_COMPLEX_17 * restrict dest; + + dest = d->base_addr; + if (src == dest || !src) + return; + + dim = GFC_DESCRIPTOR_RANK (d); + dsize = 1; + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE(d,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(d,n); + if (extent[n] <= 0) + return; + + if (dsize == stride[n]) + dsize *= extent[n]; + else + dsize = 0; + } + + if (dsize != 0) + { + memcpy (dest, src, dsize * sizeof (GFC_COMPLEX_17)); + return; + } + + stride0 = stride[0]; + + while (dest) + { + /* Copy the data. */ + *dest = *(src++); + /* Advance to the next element. */ + dest += stride0; + count[0]++; + /* Advance to the next source element. */ + index_type n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } +} + +#endif + diff --git a/libgfortran/generated/in_unpack_r17.c b/libgfortran/generated/in_unpack_r17.c new file mode 100644 index 0000000..5742151 --- /dev/null +++ b/libgfortran/generated/in_unpack_r17.c @@ -0,0 +1,104 @@ +/* Helper function for repacking arrays. + Copyright (C) 2003-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <string.h> + + +#if defined (HAVE_GFC_REAL_17) + +void +internal_unpack_r17 (gfc_array_r17 * d, const GFC_REAL_17 * src) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type stride0; + index_type dim; + index_type dsize; + GFC_REAL_17 * restrict dest; + + dest = d->base_addr; + if (src == dest || !src) + return; + + dim = GFC_DESCRIPTOR_RANK (d); + dsize = 1; + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE(d,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(d,n); + if (extent[n] <= 0) + return; + + if (dsize == stride[n]) + dsize *= extent[n]; + else + dsize = 0; + } + + if (dsize != 0) + { + memcpy (dest, src, dsize * sizeof (GFC_REAL_17)); + return; + } + + stride0 = stride[0]; + + while (dest) + { + /* Copy the data. */ + *dest = *(src++); + /* Advance to the next element. */ + dest += stride0; + count[0]++; + /* Advance to the next source element. */ + index_type n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } +} + +#endif + diff --git a/libgfortran/generated/matmul_c17.c b/libgfortran/generated/matmul_c17.c new file mode 100644 index 0000000..a1a7d42 --- /dev/null +++ b/libgfortran/generated/matmul_c17.c @@ -0,0 +1,3013 @@ +/* Implementation of the MATMUL intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <string.h> +#include <assert.h> + + +#if defined (HAVE_GFC_COMPLEX_17) + +/* Prototype for the BLAS ?gemm subroutine, a pointer to which can be + passed to us by the front-end, in which case we call it for large + matrices. */ + +typedef void (*blas_call)(const char *, const char *, const int *, const int *, + const int *, const GFC_COMPLEX_17 *, const GFC_COMPLEX_17 *, + const int *, const GFC_COMPLEX_17 *, const int *, + const GFC_COMPLEX_17 *, GFC_COMPLEX_17 *, const int *, + int, int); + +/* The order of loops is different in the case of plain matrix + multiplication C=MATMUL(A,B), and in the frequent special case where + the argument A is the temporary result of a TRANSPOSE intrinsic: + C=MATMUL(TRANSPOSE(A),B). Transposed temporaries are detected by + looking at their strides. + + The equivalent Fortran pseudo-code is: + + DIMENSION A(M,COUNT), B(COUNT,N), C(M,N) + IF (.NOT.IS_TRANSPOSED(A)) THEN + C = 0 + DO J=1,N + DO K=1,COUNT + DO I=1,M + C(I,J) = C(I,J)+A(I,K)*B(K,J) + ELSE + DO J=1,N + DO I=1,M + S = 0 + DO K=1,COUNT + S = S+A(I,K)*B(K,J) + C(I,J) = S + ENDIF +*/ + +/* If try_blas is set to a nonzero value, then the matmul function will + see if there is a way to perform the matrix multiplication by a call + to the BLAS gemm function. */ + +extern void matmul_c17 (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm); +export_proto(matmul_c17); + +/* Put exhaustive list of possible architectures here here, ORed together. */ + +#if defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_AVX512F) + +#ifdef HAVE_AVX +static void +matmul_c17_avx (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) __attribute__((__target__("avx"))); +static void +matmul_c17_avx (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) +{ + const GFC_COMPLEX_17 * restrict abase; + const GFC_COMPLEX_17 * restrict bbase; + GFC_COMPLEX_17 * restrict dest; + + index_type rxstride, rystride, axstride, aystride, bxstride, bystride; + index_type x, y, n, count, xcount, ycount; + + assert (GFC_DESCRIPTOR_RANK (a) == 2 + || GFC_DESCRIPTOR_RANK (b) == 2); + +/* C[xcount,ycount] = A[xcount, count] * B[count,ycount] + + Either A or B (but not both) can be rank 1: + + o One-dimensional argument A is implicitly treated as a row matrix + dimensioned [1,count], so xcount=1. + + o One-dimensional argument B is implicitly treated as a column matrix + dimensioned [count, 1], so ycount=1. +*/ + + if (retarray->base_addr == NULL) + { + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + } + else + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + + GFC_DIMENSION_SET(retarray->dim[1], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, + GFC_DESCRIPTOR_EXTENT(retarray,0)); + } + + retarray->base_addr + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_COMPLEX_17)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) + { + index_type ret_extent, arg_extent; + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 2 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + } + + + if (GFC_DESCRIPTOR_RANK (retarray) == 1) + { + /* One-dimensional result may be addressed in the code below + either as a row or a column matrix. We want both cases to + work. */ + rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0); + } + else + { + rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + rystride = GFC_DESCRIPTOR_STRIDE(retarray,1); + } + + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + /* Treat it as a a row matrix A[1,count]. */ + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = 1; + + xcount = 1; + count = GFC_DESCRIPTOR_EXTENT(a,0); + } + else + { + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = GFC_DESCRIPTOR_STRIDE(a,1); + + count = GFC_DESCRIPTOR_EXTENT(a,1); + xcount = GFC_DESCRIPTOR_EXTENT(a,0); + } + + if (count != GFC_DESCRIPTOR_EXTENT(b,0)) + { + if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0) + runtime_error ("Incorrect extent in argument B in MATMUL intrinsic " + "in dimension 1: is %ld, should be %ld", + (long int) GFC_DESCRIPTOR_EXTENT(b,0), (long int) count); + } + + if (GFC_DESCRIPTOR_RANK (b) == 1) + { + /* Treat it as a column matrix B[count,1] */ + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + + /* bystride should never be used for 1-dimensional b. + The value is only used for calculation of the + memory by the buffer. */ + bystride = 256; + ycount = 1; + } + else + { + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + bystride = GFC_DESCRIPTOR_STRIDE(b,1); + ycount = GFC_DESCRIPTOR_EXTENT(b,1); + } + + abase = a->base_addr; + bbase = b->base_addr; + dest = retarray->base_addr; + + /* Now that everything is set up, we perform the multiplication + itself. */ + +#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x))) +#define min(a,b) ((a) <= (b) ? (a) : (b)) +#define max(a,b) ((a) >= (b) ? (a) : (b)) + + if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1) + && (bxstride == 1 || bystride == 1) + && (((float) xcount) * ((float) ycount) * ((float) count) + > POW3(blas_limit))) + { + const int m = xcount, n = ycount, k = count, ldc = rystride; + const GFC_COMPLEX_17 one = 1, zero = 0; + const int lda = (axstride == 1) ? aystride : axstride, + ldb = (bxstride == 1) ? bystride : bxstride; + + if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1) + { + assert (gemm != NULL); + const char *transa, *transb; + if (try_blas & 2) + transa = "C"; + else + transa = axstride == 1 ? "N" : "T"; + + if (try_blas & 4) + transb = "C"; + else + transb = bxstride == 1 ? "N" : "T"; + + gemm (transa, transb , &m, + &n, &k, &one, abase, &lda, bbase, &ldb, &zero, dest, + &ldc, 1, 1); + return; + } + } + + if (rxstride == 1 && axstride == 1 && bxstride == 1 + && GFC_DESCRIPTOR_RANK (b) != 1) + { + /* This block of code implements a tuned matmul, derived from + Superscalar GEMM-based level 3 BLAS, Beta version 0.1 + + Bo Kagstrom and Per Ling + Department of Computing Science + Umea University + S-901 87 Umea, Sweden + + from netlib.org, translated to C, and modified for matmul.m4. */ + + const GFC_COMPLEX_17 *a, *b; + GFC_COMPLEX_17 *c; + const index_type m = xcount, n = ycount, k = count; + + /* System generated locals */ + index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, + i1, i2, i3, i4, i5, i6; + + /* Local variables */ + GFC_COMPLEX_17 f11, f12, f21, f22, f31, f32, f41, f42, + f13, f14, f23, f24, f33, f34, f43, f44; + index_type i, j, l, ii, jj, ll; + index_type isec, jsec, lsec, uisec, ujsec, ulsec; + GFC_COMPLEX_17 *t1; + + a = abase; + b = bbase; + c = retarray->base_addr; + + /* Parameter adjustments */ + c_dim1 = rystride; + c_offset = 1 + c_dim1; + c -= c_offset; + a_dim1 = aystride; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = bystride; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_17)0; + + /* Early exit if possible */ + if (m == 0 || n == 0 || k == 0) + return; + + /* Adjust size of t1 to what is needed. */ + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + + t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_17)); + + /* Start turning the crank. */ + i1 = n; + for (jj = 1; jj <= i1; jj += 512) + { + /* Computing MIN */ + i2 = 512; + i3 = n - jj + 1; + jsec = min(i2,i3); + ujsec = jsec - jsec % 4; + i2 = k; + for (ll = 1; ll <= i2; ll += 256) + { + /* Computing MIN */ + i3 = 256; + i4 = k - ll + 1; + lsec = min(i3,i4); + ulsec = lsec - lsec % 2; + + i3 = m; + for (ii = 1; ii <= i3; ii += 256) + { + /* Computing MIN */ + i4 = 256; + i5 = m - ii + 1; + isec = min(i4,i5); + uisec = isec - isec % 2; + i4 = ll + ulsec - 1; + for (l = ll; l <= i4; l += 2) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 2) + { + t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] = + a[i + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] = + a[i + (l + 1) * a_dim1]; + t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + (l + 1) * a_dim1]; + } + if (uisec < isec) + { + t1[l - ll + 1 + (isec << 8) - 257] = + a[ii + isec - 1 + l * a_dim1]; + t1[l - ll + 2 + (isec << 8) - 257] = + a[ii + isec - 1 + (l + 1) * a_dim1]; + } + } + if (ulsec < lsec) + { + i4 = ii + isec - 1; + for (i = ii; i<= i4; ++i) + { + t1[lsec + ((i - ii + 1) << 8) - 257] = + a[i + (ll + lsec - 1) * a_dim1]; + } + } + + uisec = isec - isec % 4; + i4 = jj + ujsec - 1; + for (j = jj; j <= i4; j += 4) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f22 = c[i + 1 + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f23 = c[i + 1 + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + f24 = c[i + 1 + (j + 3) * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + f32 = c[i + 2 + (j + 1) * c_dim1]; + f42 = c[i + 3 + (j + 1) * c_dim1]; + f33 = c[i + 2 + (j + 2) * c_dim1]; + f43 = c[i + 3 + (j + 2) * c_dim1]; + f34 = c[i + 2 + (j + 3) * c_dim1]; + f44 = c[i + 3 + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + j * b_dim1]; + f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + (j + 1) * c_dim1] = f12; + c[i + 1 + (j + 1) * c_dim1] = f22; + c[i + (j + 2) * c_dim1] = f13; + c[i + 1 + (j + 2) * c_dim1] = f23; + c[i + (j + 3) * c_dim1] = f14; + c[i + 1 + (j + 3) * c_dim1] = f24; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + c[i + 2 + (j + 1) * c_dim1] = f32; + c[i + 3 + (j + 1) * c_dim1] = f42; + c[i + 2 + (j + 2) * c_dim1] = f33; + c[i + 3 + (j + 2) * c_dim1] = f43; + c[i + 2 + (j + 3) * c_dim1] = f34; + c[i + 3 + (j + 3) * c_dim1] = f44; + } + if (uisec < isec) + { + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + (j + 1) * c_dim1] = f12; + c[i + (j + 2) * c_dim1] = f13; + c[i + (j + 3) * c_dim1] = f14; + } + } + } + if (ujsec < jsec) + { + i4 = jj + jsec - 1; + for (j = jj + ujsec; j <= i4; ++j) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - + 257] * b[l + j * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - + 257] * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + } + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + } + } + } + } + } + } + free(t1); + return; + } + else if (rxstride == 1 && aystride == 1 && bxstride == 1) + { + if (GFC_DESCRIPTOR_RANK (a) != 1) + { + const GFC_COMPLEX_17 *restrict abase_x; + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 *restrict dest_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n] * bbase_y[n]; + dest_y[x] = s; + } + } + } + else + { + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n]; + dest[y*rystride] = s; + } + } + } + else if (GFC_DESCRIPTOR_RANK (a) == 1) + { + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n*bxstride]; + dest[y*rxstride] = s; + } + } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_COMPLEX_17)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } + else + { + const GFC_COMPLEX_17 *restrict abase_x; + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 *restrict dest_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n*aystride] * bbase_y[n*bxstride]; + dest_y[x*rxstride] = s; + } + } + } +} +#undef POW3 +#undef min +#undef max + +#endif /* HAVE_AVX */ + +#ifdef HAVE_AVX2 +static void +matmul_c17_avx2 (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma"))); +static void +matmul_c17_avx2 (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) +{ + const GFC_COMPLEX_17 * restrict abase; + const GFC_COMPLEX_17 * restrict bbase; + GFC_COMPLEX_17 * restrict dest; + + index_type rxstride, rystride, axstride, aystride, bxstride, bystride; + index_type x, y, n, count, xcount, ycount; + + assert (GFC_DESCRIPTOR_RANK (a) == 2 + || GFC_DESCRIPTOR_RANK (b) == 2); + +/* C[xcount,ycount] = A[xcount, count] * B[count,ycount] + + Either A or B (but not both) can be rank 1: + + o One-dimensional argument A is implicitly treated as a row matrix + dimensioned [1,count], so xcount=1. + + o One-dimensional argument B is implicitly treated as a column matrix + dimensioned [count, 1], so ycount=1. +*/ + + if (retarray->base_addr == NULL) + { + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + } + else + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + + GFC_DIMENSION_SET(retarray->dim[1], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, + GFC_DESCRIPTOR_EXTENT(retarray,0)); + } + + retarray->base_addr + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_COMPLEX_17)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) + { + index_type ret_extent, arg_extent; + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 2 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + } + + + if (GFC_DESCRIPTOR_RANK (retarray) == 1) + { + /* One-dimensional result may be addressed in the code below + either as a row or a column matrix. We want both cases to + work. */ + rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0); + } + else + { + rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + rystride = GFC_DESCRIPTOR_STRIDE(retarray,1); + } + + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + /* Treat it as a a row matrix A[1,count]. */ + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = 1; + + xcount = 1; + count = GFC_DESCRIPTOR_EXTENT(a,0); + } + else + { + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = GFC_DESCRIPTOR_STRIDE(a,1); + + count = GFC_DESCRIPTOR_EXTENT(a,1); + xcount = GFC_DESCRIPTOR_EXTENT(a,0); + } + + if (count != GFC_DESCRIPTOR_EXTENT(b,0)) + { + if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0) + runtime_error ("Incorrect extent in argument B in MATMUL intrinsic " + "in dimension 1: is %ld, should be %ld", + (long int) GFC_DESCRIPTOR_EXTENT(b,0), (long int) count); + } + + if (GFC_DESCRIPTOR_RANK (b) == 1) + { + /* Treat it as a column matrix B[count,1] */ + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + + /* bystride should never be used for 1-dimensional b. + The value is only used for calculation of the + memory by the buffer. */ + bystride = 256; + ycount = 1; + } + else + { + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + bystride = GFC_DESCRIPTOR_STRIDE(b,1); + ycount = GFC_DESCRIPTOR_EXTENT(b,1); + } + + abase = a->base_addr; + bbase = b->base_addr; + dest = retarray->base_addr; + + /* Now that everything is set up, we perform the multiplication + itself. */ + +#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x))) +#define min(a,b) ((a) <= (b) ? (a) : (b)) +#define max(a,b) ((a) >= (b) ? (a) : (b)) + + if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1) + && (bxstride == 1 || bystride == 1) + && (((float) xcount) * ((float) ycount) * ((float) count) + > POW3(blas_limit))) + { + const int m = xcount, n = ycount, k = count, ldc = rystride; + const GFC_COMPLEX_17 one = 1, zero = 0; + const int lda = (axstride == 1) ? aystride : axstride, + ldb = (bxstride == 1) ? bystride : bxstride; + + if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1) + { + assert (gemm != NULL); + const char *transa, *transb; + if (try_blas & 2) + transa = "C"; + else + transa = axstride == 1 ? "N" : "T"; + + if (try_blas & 4) + transb = "C"; + else + transb = bxstride == 1 ? "N" : "T"; + + gemm (transa, transb , &m, + &n, &k, &one, abase, &lda, bbase, &ldb, &zero, dest, + &ldc, 1, 1); + return; + } + } + + if (rxstride == 1 && axstride == 1 && bxstride == 1 + && GFC_DESCRIPTOR_RANK (b) != 1) + { + /* This block of code implements a tuned matmul, derived from + Superscalar GEMM-based level 3 BLAS, Beta version 0.1 + + Bo Kagstrom and Per Ling + Department of Computing Science + Umea University + S-901 87 Umea, Sweden + + from netlib.org, translated to C, and modified for matmul.m4. */ + + const GFC_COMPLEX_17 *a, *b; + GFC_COMPLEX_17 *c; + const index_type m = xcount, n = ycount, k = count; + + /* System generated locals */ + index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, + i1, i2, i3, i4, i5, i6; + + /* Local variables */ + GFC_COMPLEX_17 f11, f12, f21, f22, f31, f32, f41, f42, + f13, f14, f23, f24, f33, f34, f43, f44; + index_type i, j, l, ii, jj, ll; + index_type isec, jsec, lsec, uisec, ujsec, ulsec; + GFC_COMPLEX_17 *t1; + + a = abase; + b = bbase; + c = retarray->base_addr; + + /* Parameter adjustments */ + c_dim1 = rystride; + c_offset = 1 + c_dim1; + c -= c_offset; + a_dim1 = aystride; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = bystride; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_17)0; + + /* Early exit if possible */ + if (m == 0 || n == 0 || k == 0) + return; + + /* Adjust size of t1 to what is needed. */ + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + + t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_17)); + + /* Start turning the crank. */ + i1 = n; + for (jj = 1; jj <= i1; jj += 512) + { + /* Computing MIN */ + i2 = 512; + i3 = n - jj + 1; + jsec = min(i2,i3); + ujsec = jsec - jsec % 4; + i2 = k; + for (ll = 1; ll <= i2; ll += 256) + { + /* Computing MIN */ + i3 = 256; + i4 = k - ll + 1; + lsec = min(i3,i4); + ulsec = lsec - lsec % 2; + + i3 = m; + for (ii = 1; ii <= i3; ii += 256) + { + /* Computing MIN */ + i4 = 256; + i5 = m - ii + 1; + isec = min(i4,i5); + uisec = isec - isec % 2; + i4 = ll + ulsec - 1; + for (l = ll; l <= i4; l += 2) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 2) + { + t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] = + a[i + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] = + a[i + (l + 1) * a_dim1]; + t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + (l + 1) * a_dim1]; + } + if (uisec < isec) + { + t1[l - ll + 1 + (isec << 8) - 257] = + a[ii + isec - 1 + l * a_dim1]; + t1[l - ll + 2 + (isec << 8) - 257] = + a[ii + isec - 1 + (l + 1) * a_dim1]; + } + } + if (ulsec < lsec) + { + i4 = ii + isec - 1; + for (i = ii; i<= i4; ++i) + { + t1[lsec + ((i - ii + 1) << 8) - 257] = + a[i + (ll + lsec - 1) * a_dim1]; + } + } + + uisec = isec - isec % 4; + i4 = jj + ujsec - 1; + for (j = jj; j <= i4; j += 4) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f22 = c[i + 1 + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f23 = c[i + 1 + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + f24 = c[i + 1 + (j + 3) * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + f32 = c[i + 2 + (j + 1) * c_dim1]; + f42 = c[i + 3 + (j + 1) * c_dim1]; + f33 = c[i + 2 + (j + 2) * c_dim1]; + f43 = c[i + 3 + (j + 2) * c_dim1]; + f34 = c[i + 2 + (j + 3) * c_dim1]; + f44 = c[i + 3 + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + j * b_dim1]; + f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + (j + 1) * c_dim1] = f12; + c[i + 1 + (j + 1) * c_dim1] = f22; + c[i + (j + 2) * c_dim1] = f13; + c[i + 1 + (j + 2) * c_dim1] = f23; + c[i + (j + 3) * c_dim1] = f14; + c[i + 1 + (j + 3) * c_dim1] = f24; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + c[i + 2 + (j + 1) * c_dim1] = f32; + c[i + 3 + (j + 1) * c_dim1] = f42; + c[i + 2 + (j + 2) * c_dim1] = f33; + c[i + 3 + (j + 2) * c_dim1] = f43; + c[i + 2 + (j + 3) * c_dim1] = f34; + c[i + 3 + (j + 3) * c_dim1] = f44; + } + if (uisec < isec) + { + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + (j + 1) * c_dim1] = f12; + c[i + (j + 2) * c_dim1] = f13; + c[i + (j + 3) * c_dim1] = f14; + } + } + } + if (ujsec < jsec) + { + i4 = jj + jsec - 1; + for (j = jj + ujsec; j <= i4; ++j) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - + 257] * b[l + j * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - + 257] * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + } + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + } + } + } + } + } + } + free(t1); + return; + } + else if (rxstride == 1 && aystride == 1 && bxstride == 1) + { + if (GFC_DESCRIPTOR_RANK (a) != 1) + { + const GFC_COMPLEX_17 *restrict abase_x; + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 *restrict dest_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n] * bbase_y[n]; + dest_y[x] = s; + } + } + } + else + { + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n]; + dest[y*rystride] = s; + } + } + } + else if (GFC_DESCRIPTOR_RANK (a) == 1) + { + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n*bxstride]; + dest[y*rxstride] = s; + } + } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_COMPLEX_17)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } + else + { + const GFC_COMPLEX_17 *restrict abase_x; + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 *restrict dest_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n*aystride] * bbase_y[n*bxstride]; + dest_y[x*rxstride] = s; + } + } + } +} +#undef POW3 +#undef min +#undef max + +#endif /* HAVE_AVX2 */ + +#ifdef HAVE_AVX512F +static void +matmul_c17_avx512f (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) __attribute__((__target__("avx512f"))); +static void +matmul_c17_avx512f (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) +{ + const GFC_COMPLEX_17 * restrict abase; + const GFC_COMPLEX_17 * restrict bbase; + GFC_COMPLEX_17 * restrict dest; + + index_type rxstride, rystride, axstride, aystride, bxstride, bystride; + index_type x, y, n, count, xcount, ycount; + + assert (GFC_DESCRIPTOR_RANK (a) == 2 + || GFC_DESCRIPTOR_RANK (b) == 2); + +/* C[xcount,ycount] = A[xcount, count] * B[count,ycount] + + Either A or B (but not both) can be rank 1: + + o One-dimensional argument A is implicitly treated as a row matrix + dimensioned [1,count], so xcount=1. + + o One-dimensional argument B is implicitly treated as a column matrix + dimensioned [count, 1], so ycount=1. +*/ + + if (retarray->base_addr == NULL) + { + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + } + else + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + + GFC_DIMENSION_SET(retarray->dim[1], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, + GFC_DESCRIPTOR_EXTENT(retarray,0)); + } + + retarray->base_addr + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_COMPLEX_17)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) + { + index_type ret_extent, arg_extent; + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 2 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + } + + + if (GFC_DESCRIPTOR_RANK (retarray) == 1) + { + /* One-dimensional result may be addressed in the code below + either as a row or a column matrix. We want both cases to + work. */ + rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0); + } + else + { + rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + rystride = GFC_DESCRIPTOR_STRIDE(retarray,1); + } + + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + /* Treat it as a a row matrix A[1,count]. */ + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = 1; + + xcount = 1; + count = GFC_DESCRIPTOR_EXTENT(a,0); + } + else + { + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = GFC_DESCRIPTOR_STRIDE(a,1); + + count = GFC_DESCRIPTOR_EXTENT(a,1); + xcount = GFC_DESCRIPTOR_EXTENT(a,0); + } + + if (count != GFC_DESCRIPTOR_EXTENT(b,0)) + { + if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0) + runtime_error ("Incorrect extent in argument B in MATMUL intrinsic " + "in dimension 1: is %ld, should be %ld", + (long int) GFC_DESCRIPTOR_EXTENT(b,0), (long int) count); + } + + if (GFC_DESCRIPTOR_RANK (b) == 1) + { + /* Treat it as a column matrix B[count,1] */ + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + + /* bystride should never be used for 1-dimensional b. + The value is only used for calculation of the + memory by the buffer. */ + bystride = 256; + ycount = 1; + } + else + { + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + bystride = GFC_DESCRIPTOR_STRIDE(b,1); + ycount = GFC_DESCRIPTOR_EXTENT(b,1); + } + + abase = a->base_addr; + bbase = b->base_addr; + dest = retarray->base_addr; + + /* Now that everything is set up, we perform the multiplication + itself. */ + +#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x))) +#define min(a,b) ((a) <= (b) ? (a) : (b)) +#define max(a,b) ((a) >= (b) ? (a) : (b)) + + if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1) + && (bxstride == 1 || bystride == 1) + && (((float) xcount) * ((float) ycount) * ((float) count) + > POW3(blas_limit))) + { + const int m = xcount, n = ycount, k = count, ldc = rystride; + const GFC_COMPLEX_17 one = 1, zero = 0; + const int lda = (axstride == 1) ? aystride : axstride, + ldb = (bxstride == 1) ? bystride : bxstride; + + if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1) + { + assert (gemm != NULL); + const char *transa, *transb; + if (try_blas & 2) + transa = "C"; + else + transa = axstride == 1 ? "N" : "T"; + + if (try_blas & 4) + transb = "C"; + else + transb = bxstride == 1 ? "N" : "T"; + + gemm (transa, transb , &m, + &n, &k, &one, abase, &lda, bbase, &ldb, &zero, dest, + &ldc, 1, 1); + return; + } + } + + if (rxstride == 1 && axstride == 1 && bxstride == 1 + && GFC_DESCRIPTOR_RANK (b) != 1) + { + /* This block of code implements a tuned matmul, derived from + Superscalar GEMM-based level 3 BLAS, Beta version 0.1 + + Bo Kagstrom and Per Ling + Department of Computing Science + Umea University + S-901 87 Umea, Sweden + + from netlib.org, translated to C, and modified for matmul.m4. */ + + const GFC_COMPLEX_17 *a, *b; + GFC_COMPLEX_17 *c; + const index_type m = xcount, n = ycount, k = count; + + /* System generated locals */ + index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, + i1, i2, i3, i4, i5, i6; + + /* Local variables */ + GFC_COMPLEX_17 f11, f12, f21, f22, f31, f32, f41, f42, + f13, f14, f23, f24, f33, f34, f43, f44; + index_type i, j, l, ii, jj, ll; + index_type isec, jsec, lsec, uisec, ujsec, ulsec; + GFC_COMPLEX_17 *t1; + + a = abase; + b = bbase; + c = retarray->base_addr; + + /* Parameter adjustments */ + c_dim1 = rystride; + c_offset = 1 + c_dim1; + c -= c_offset; + a_dim1 = aystride; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = bystride; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_17)0; + + /* Early exit if possible */ + if (m == 0 || n == 0 || k == 0) + return; + + /* Adjust size of t1 to what is needed. */ + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + + t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_17)); + + /* Start turning the crank. */ + i1 = n; + for (jj = 1; jj <= i1; jj += 512) + { + /* Computing MIN */ + i2 = 512; + i3 = n - jj + 1; + jsec = min(i2,i3); + ujsec = jsec - jsec % 4; + i2 = k; + for (ll = 1; ll <= i2; ll += 256) + { + /* Computing MIN */ + i3 = 256; + i4 = k - ll + 1; + lsec = min(i3,i4); + ulsec = lsec - lsec % 2; + + i3 = m; + for (ii = 1; ii <= i3; ii += 256) + { + /* Computing MIN */ + i4 = 256; + i5 = m - ii + 1; + isec = min(i4,i5); + uisec = isec - isec % 2; + i4 = ll + ulsec - 1; + for (l = ll; l <= i4; l += 2) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 2) + { + t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] = + a[i + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] = + a[i + (l + 1) * a_dim1]; + t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + (l + 1) * a_dim1]; + } + if (uisec < isec) + { + t1[l - ll + 1 + (isec << 8) - 257] = + a[ii + isec - 1 + l * a_dim1]; + t1[l - ll + 2 + (isec << 8) - 257] = + a[ii + isec - 1 + (l + 1) * a_dim1]; + } + } + if (ulsec < lsec) + { + i4 = ii + isec - 1; + for (i = ii; i<= i4; ++i) + { + t1[lsec + ((i - ii + 1) << 8) - 257] = + a[i + (ll + lsec - 1) * a_dim1]; + } + } + + uisec = isec - isec % 4; + i4 = jj + ujsec - 1; + for (j = jj; j <= i4; j += 4) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f22 = c[i + 1 + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f23 = c[i + 1 + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + f24 = c[i + 1 + (j + 3) * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + f32 = c[i + 2 + (j + 1) * c_dim1]; + f42 = c[i + 3 + (j + 1) * c_dim1]; + f33 = c[i + 2 + (j + 2) * c_dim1]; + f43 = c[i + 3 + (j + 2) * c_dim1]; + f34 = c[i + 2 + (j + 3) * c_dim1]; + f44 = c[i + 3 + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + j * b_dim1]; + f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + (j + 1) * c_dim1] = f12; + c[i + 1 + (j + 1) * c_dim1] = f22; + c[i + (j + 2) * c_dim1] = f13; + c[i + 1 + (j + 2) * c_dim1] = f23; + c[i + (j + 3) * c_dim1] = f14; + c[i + 1 + (j + 3) * c_dim1] = f24; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + c[i + 2 + (j + 1) * c_dim1] = f32; + c[i + 3 + (j + 1) * c_dim1] = f42; + c[i + 2 + (j + 2) * c_dim1] = f33; + c[i + 3 + (j + 2) * c_dim1] = f43; + c[i + 2 + (j + 3) * c_dim1] = f34; + c[i + 3 + (j + 3) * c_dim1] = f44; + } + if (uisec < isec) + { + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + (j + 1) * c_dim1] = f12; + c[i + (j + 2) * c_dim1] = f13; + c[i + (j + 3) * c_dim1] = f14; + } + } + } + if (ujsec < jsec) + { + i4 = jj + jsec - 1; + for (j = jj + ujsec; j <= i4; ++j) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - + 257] * b[l + j * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - + 257] * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + } + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + } + } + } + } + } + } + free(t1); + return; + } + else if (rxstride == 1 && aystride == 1 && bxstride == 1) + { + if (GFC_DESCRIPTOR_RANK (a) != 1) + { + const GFC_COMPLEX_17 *restrict abase_x; + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 *restrict dest_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n] * bbase_y[n]; + dest_y[x] = s; + } + } + } + else + { + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n]; + dest[y*rystride] = s; + } + } + } + else if (GFC_DESCRIPTOR_RANK (a) == 1) + { + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n*bxstride]; + dest[y*rxstride] = s; + } + } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_COMPLEX_17)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } + else + { + const GFC_COMPLEX_17 *restrict abase_x; + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 *restrict dest_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n*aystride] * bbase_y[n*bxstride]; + dest_y[x*rxstride] = s; + } + } + } +} +#undef POW3 +#undef min +#undef max + +#endif /* HAVE_AVX512F */ + +/* AMD-specifix funtions with AVX128 and FMA3/FMA4. */ + +#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128) +void +matmul_c17_avx128_fma3 (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma"))); +internal_proto(matmul_c17_avx128_fma3); +#endif + +#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128) +void +matmul_c17_avx128_fma4 (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4"))); +internal_proto(matmul_c17_avx128_fma4); +#endif + +/* Function to fall back to if there is no special processor-specific version. */ +static void +matmul_c17_vanilla (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) +{ + const GFC_COMPLEX_17 * restrict abase; + const GFC_COMPLEX_17 * restrict bbase; + GFC_COMPLEX_17 * restrict dest; + + index_type rxstride, rystride, axstride, aystride, bxstride, bystride; + index_type x, y, n, count, xcount, ycount; + + assert (GFC_DESCRIPTOR_RANK (a) == 2 + || GFC_DESCRIPTOR_RANK (b) == 2); + +/* C[xcount,ycount] = A[xcount, count] * B[count,ycount] + + Either A or B (but not both) can be rank 1: + + o One-dimensional argument A is implicitly treated as a row matrix + dimensioned [1,count], so xcount=1. + + o One-dimensional argument B is implicitly treated as a column matrix + dimensioned [count, 1], so ycount=1. +*/ + + if (retarray->base_addr == NULL) + { + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + } + else + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + + GFC_DIMENSION_SET(retarray->dim[1], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, + GFC_DESCRIPTOR_EXTENT(retarray,0)); + } + + retarray->base_addr + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_COMPLEX_17)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) + { + index_type ret_extent, arg_extent; + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 2 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + } + + + if (GFC_DESCRIPTOR_RANK (retarray) == 1) + { + /* One-dimensional result may be addressed in the code below + either as a row or a column matrix. We want both cases to + work. */ + rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0); + } + else + { + rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + rystride = GFC_DESCRIPTOR_STRIDE(retarray,1); + } + + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + /* Treat it as a a row matrix A[1,count]. */ + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = 1; + + xcount = 1; + count = GFC_DESCRIPTOR_EXTENT(a,0); + } + else + { + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = GFC_DESCRIPTOR_STRIDE(a,1); + + count = GFC_DESCRIPTOR_EXTENT(a,1); + xcount = GFC_DESCRIPTOR_EXTENT(a,0); + } + + if (count != GFC_DESCRIPTOR_EXTENT(b,0)) + { + if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0) + runtime_error ("Incorrect extent in argument B in MATMUL intrinsic " + "in dimension 1: is %ld, should be %ld", + (long int) GFC_DESCRIPTOR_EXTENT(b,0), (long int) count); + } + + if (GFC_DESCRIPTOR_RANK (b) == 1) + { + /* Treat it as a column matrix B[count,1] */ + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + + /* bystride should never be used for 1-dimensional b. + The value is only used for calculation of the + memory by the buffer. */ + bystride = 256; + ycount = 1; + } + else + { + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + bystride = GFC_DESCRIPTOR_STRIDE(b,1); + ycount = GFC_DESCRIPTOR_EXTENT(b,1); + } + + abase = a->base_addr; + bbase = b->base_addr; + dest = retarray->base_addr; + + /* Now that everything is set up, we perform the multiplication + itself. */ + +#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x))) +#define min(a,b) ((a) <= (b) ? (a) : (b)) +#define max(a,b) ((a) >= (b) ? (a) : (b)) + + if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1) + && (bxstride == 1 || bystride == 1) + && (((float) xcount) * ((float) ycount) * ((float) count) + > POW3(blas_limit))) + { + const int m = xcount, n = ycount, k = count, ldc = rystride; + const GFC_COMPLEX_17 one = 1, zero = 0; + const int lda = (axstride == 1) ? aystride : axstride, + ldb = (bxstride == 1) ? bystride : bxstride; + + if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1) + { + assert (gemm != NULL); + const char *transa, *transb; + if (try_blas & 2) + transa = "C"; + else + transa = axstride == 1 ? "N" : "T"; + + if (try_blas & 4) + transb = "C"; + else + transb = bxstride == 1 ? "N" : "T"; + + gemm (transa, transb , &m, + &n, &k, &one, abase, &lda, bbase, &ldb, &zero, dest, + &ldc, 1, 1); + return; + } + } + + if (rxstride == 1 && axstride == 1 && bxstride == 1 + && GFC_DESCRIPTOR_RANK (b) != 1) + { + /* This block of code implements a tuned matmul, derived from + Superscalar GEMM-based level 3 BLAS, Beta version 0.1 + + Bo Kagstrom and Per Ling + Department of Computing Science + Umea University + S-901 87 Umea, Sweden + + from netlib.org, translated to C, and modified for matmul.m4. */ + + const GFC_COMPLEX_17 *a, *b; + GFC_COMPLEX_17 *c; + const index_type m = xcount, n = ycount, k = count; + + /* System generated locals */ + index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, + i1, i2, i3, i4, i5, i6; + + /* Local variables */ + GFC_COMPLEX_17 f11, f12, f21, f22, f31, f32, f41, f42, + f13, f14, f23, f24, f33, f34, f43, f44; + index_type i, j, l, ii, jj, ll; + index_type isec, jsec, lsec, uisec, ujsec, ulsec; + GFC_COMPLEX_17 *t1; + + a = abase; + b = bbase; + c = retarray->base_addr; + + /* Parameter adjustments */ + c_dim1 = rystride; + c_offset = 1 + c_dim1; + c -= c_offset; + a_dim1 = aystride; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = bystride; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_17)0; + + /* Early exit if possible */ + if (m == 0 || n == 0 || k == 0) + return; + + /* Adjust size of t1 to what is needed. */ + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + + t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_17)); + + /* Start turning the crank. */ + i1 = n; + for (jj = 1; jj <= i1; jj += 512) + { + /* Computing MIN */ + i2 = 512; + i3 = n - jj + 1; + jsec = min(i2,i3); + ujsec = jsec - jsec % 4; + i2 = k; + for (ll = 1; ll <= i2; ll += 256) + { + /* Computing MIN */ + i3 = 256; + i4 = k - ll + 1; + lsec = min(i3,i4); + ulsec = lsec - lsec % 2; + + i3 = m; + for (ii = 1; ii <= i3; ii += 256) + { + /* Computing MIN */ + i4 = 256; + i5 = m - ii + 1; + isec = min(i4,i5); + uisec = isec - isec % 2; + i4 = ll + ulsec - 1; + for (l = ll; l <= i4; l += 2) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 2) + { + t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] = + a[i + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] = + a[i + (l + 1) * a_dim1]; + t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + (l + 1) * a_dim1]; + } + if (uisec < isec) + { + t1[l - ll + 1 + (isec << 8) - 257] = + a[ii + isec - 1 + l * a_dim1]; + t1[l - ll + 2 + (isec << 8) - 257] = + a[ii + isec - 1 + (l + 1) * a_dim1]; + } + } + if (ulsec < lsec) + { + i4 = ii + isec - 1; + for (i = ii; i<= i4; ++i) + { + t1[lsec + ((i - ii + 1) << 8) - 257] = + a[i + (ll + lsec - 1) * a_dim1]; + } + } + + uisec = isec - isec % 4; + i4 = jj + ujsec - 1; + for (j = jj; j <= i4; j += 4) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f22 = c[i + 1 + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f23 = c[i + 1 + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + f24 = c[i + 1 + (j + 3) * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + f32 = c[i + 2 + (j + 1) * c_dim1]; + f42 = c[i + 3 + (j + 1) * c_dim1]; + f33 = c[i + 2 + (j + 2) * c_dim1]; + f43 = c[i + 3 + (j + 2) * c_dim1]; + f34 = c[i + 2 + (j + 3) * c_dim1]; + f44 = c[i + 3 + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + j * b_dim1]; + f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + (j + 1) * c_dim1] = f12; + c[i + 1 + (j + 1) * c_dim1] = f22; + c[i + (j + 2) * c_dim1] = f13; + c[i + 1 + (j + 2) * c_dim1] = f23; + c[i + (j + 3) * c_dim1] = f14; + c[i + 1 + (j + 3) * c_dim1] = f24; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + c[i + 2 + (j + 1) * c_dim1] = f32; + c[i + 3 + (j + 1) * c_dim1] = f42; + c[i + 2 + (j + 2) * c_dim1] = f33; + c[i + 3 + (j + 2) * c_dim1] = f43; + c[i + 2 + (j + 3) * c_dim1] = f34; + c[i + 3 + (j + 3) * c_dim1] = f44; + } + if (uisec < isec) + { + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + (j + 1) * c_dim1] = f12; + c[i + (j + 2) * c_dim1] = f13; + c[i + (j + 3) * c_dim1] = f14; + } + } + } + if (ujsec < jsec) + { + i4 = jj + jsec - 1; + for (j = jj + ujsec; j <= i4; ++j) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - + 257] * b[l + j * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - + 257] * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + } + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + } + } + } + } + } + } + free(t1); + return; + } + else if (rxstride == 1 && aystride == 1 && bxstride == 1) + { + if (GFC_DESCRIPTOR_RANK (a) != 1) + { + const GFC_COMPLEX_17 *restrict abase_x; + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 *restrict dest_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n] * bbase_y[n]; + dest_y[x] = s; + } + } + } + else + { + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n]; + dest[y*rystride] = s; + } + } + } + else if (GFC_DESCRIPTOR_RANK (a) == 1) + { + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n*bxstride]; + dest[y*rxstride] = s; + } + } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_COMPLEX_17)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } + else + { + const GFC_COMPLEX_17 *restrict abase_x; + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 *restrict dest_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n*aystride] * bbase_y[n*bxstride]; + dest_y[x*rxstride] = s; + } + } + } +} +#undef POW3 +#undef min +#undef max + + +/* Compiling main function, with selection code for the processor. */ + +/* Currently, this is i386 only. Adjust for other architectures. */ + +void matmul_c17 (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) +{ + static void (*matmul_p) (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm); + + void (*matmul_fn) (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm); + + matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED); + if (matmul_fn == NULL) + { + matmul_fn = matmul_c17_vanilla; + if (__builtin_cpu_is ("intel")) + { + /* Run down the available processors in order of preference. */ +#ifdef HAVE_AVX512F + if (__builtin_cpu_supports ("avx512f")) + { + matmul_fn = matmul_c17_avx512f; + goto store; + } + +#endif /* HAVE_AVX512F */ + +#ifdef HAVE_AVX2 + if (__builtin_cpu_supports ("avx2") + && __builtin_cpu_supports ("fma")) + { + matmul_fn = matmul_c17_avx2; + goto store; + } + +#endif + +#ifdef HAVE_AVX + if (__builtin_cpu_supports ("avx")) + { + matmul_fn = matmul_c17_avx; + goto store; + } +#endif /* HAVE_AVX */ + } + else if (__builtin_cpu_is ("amd")) + { +#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128) + if (__builtin_cpu_supports ("avx") + && __builtin_cpu_supports ("fma")) + { + matmul_fn = matmul_c17_avx128_fma3; + goto store; + } +#endif +#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128) + if (__builtin_cpu_supports ("avx") + && __builtin_cpu_supports ("fma4")) + { + matmul_fn = matmul_c17_avx128_fma4; + goto store; + } +#endif + + } + store: + __atomic_store_n (&matmul_p, matmul_fn, __ATOMIC_RELAXED); + } + + (*matmul_fn) (retarray, a, b, try_blas, blas_limit, gemm); +} + +#else /* Just the vanilla function. */ + +void +matmul_c17 (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) +{ + const GFC_COMPLEX_17 * restrict abase; + const GFC_COMPLEX_17 * restrict bbase; + GFC_COMPLEX_17 * restrict dest; + + index_type rxstride, rystride, axstride, aystride, bxstride, bystride; + index_type x, y, n, count, xcount, ycount; + + assert (GFC_DESCRIPTOR_RANK (a) == 2 + || GFC_DESCRIPTOR_RANK (b) == 2); + +/* C[xcount,ycount] = A[xcount, count] * B[count,ycount] + + Either A or B (but not both) can be rank 1: + + o One-dimensional argument A is implicitly treated as a row matrix + dimensioned [1,count], so xcount=1. + + o One-dimensional argument B is implicitly treated as a column matrix + dimensioned [count, 1], so ycount=1. +*/ + + if (retarray->base_addr == NULL) + { + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + } + else + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + + GFC_DIMENSION_SET(retarray->dim[1], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, + GFC_DESCRIPTOR_EXTENT(retarray,0)); + } + + retarray->base_addr + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_COMPLEX_17)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) + { + index_type ret_extent, arg_extent; + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 2 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + } + + + if (GFC_DESCRIPTOR_RANK (retarray) == 1) + { + /* One-dimensional result may be addressed in the code below + either as a row or a column matrix. We want both cases to + work. */ + rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0); + } + else + { + rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + rystride = GFC_DESCRIPTOR_STRIDE(retarray,1); + } + + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + /* Treat it as a a row matrix A[1,count]. */ + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = 1; + + xcount = 1; + count = GFC_DESCRIPTOR_EXTENT(a,0); + } + else + { + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = GFC_DESCRIPTOR_STRIDE(a,1); + + count = GFC_DESCRIPTOR_EXTENT(a,1); + xcount = GFC_DESCRIPTOR_EXTENT(a,0); + } + + if (count != GFC_DESCRIPTOR_EXTENT(b,0)) + { + if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0) + runtime_error ("Incorrect extent in argument B in MATMUL intrinsic " + "in dimension 1: is %ld, should be %ld", + (long int) GFC_DESCRIPTOR_EXTENT(b,0), (long int) count); + } + + if (GFC_DESCRIPTOR_RANK (b) == 1) + { + /* Treat it as a column matrix B[count,1] */ + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + + /* bystride should never be used for 1-dimensional b. + The value is only used for calculation of the + memory by the buffer. */ + bystride = 256; + ycount = 1; + } + else + { + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + bystride = GFC_DESCRIPTOR_STRIDE(b,1); + ycount = GFC_DESCRIPTOR_EXTENT(b,1); + } + + abase = a->base_addr; + bbase = b->base_addr; + dest = retarray->base_addr; + + /* Now that everything is set up, we perform the multiplication + itself. */ + +#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x))) +#define min(a,b) ((a) <= (b) ? (a) : (b)) +#define max(a,b) ((a) >= (b) ? (a) : (b)) + + if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1) + && (bxstride == 1 || bystride == 1) + && (((float) xcount) * ((float) ycount) * ((float) count) + > POW3(blas_limit))) + { + const int m = xcount, n = ycount, k = count, ldc = rystride; + const GFC_COMPLEX_17 one = 1, zero = 0; + const int lda = (axstride == 1) ? aystride : axstride, + ldb = (bxstride == 1) ? bystride : bxstride; + + if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1) + { + assert (gemm != NULL); + const char *transa, *transb; + if (try_blas & 2) + transa = "C"; + else + transa = axstride == 1 ? "N" : "T"; + + if (try_blas & 4) + transb = "C"; + else + transb = bxstride == 1 ? "N" : "T"; + + gemm (transa, transb , &m, + &n, &k, &one, abase, &lda, bbase, &ldb, &zero, dest, + &ldc, 1, 1); + return; + } + } + + if (rxstride == 1 && axstride == 1 && bxstride == 1 + && GFC_DESCRIPTOR_RANK (b) != 1) + { + /* This block of code implements a tuned matmul, derived from + Superscalar GEMM-based level 3 BLAS, Beta version 0.1 + + Bo Kagstrom and Per Ling + Department of Computing Science + Umea University + S-901 87 Umea, Sweden + + from netlib.org, translated to C, and modified for matmul.m4. */ + + const GFC_COMPLEX_17 *a, *b; + GFC_COMPLEX_17 *c; + const index_type m = xcount, n = ycount, k = count; + + /* System generated locals */ + index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, + i1, i2, i3, i4, i5, i6; + + /* Local variables */ + GFC_COMPLEX_17 f11, f12, f21, f22, f31, f32, f41, f42, + f13, f14, f23, f24, f33, f34, f43, f44; + index_type i, j, l, ii, jj, ll; + index_type isec, jsec, lsec, uisec, ujsec, ulsec; + GFC_COMPLEX_17 *t1; + + a = abase; + b = bbase; + c = retarray->base_addr; + + /* Parameter adjustments */ + c_dim1 = rystride; + c_offset = 1 + c_dim1; + c -= c_offset; + a_dim1 = aystride; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = bystride; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_17)0; + + /* Early exit if possible */ + if (m == 0 || n == 0 || k == 0) + return; + + /* Adjust size of t1 to what is needed. */ + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + + t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_17)); + + /* Start turning the crank. */ + i1 = n; + for (jj = 1; jj <= i1; jj += 512) + { + /* Computing MIN */ + i2 = 512; + i3 = n - jj + 1; + jsec = min(i2,i3); + ujsec = jsec - jsec % 4; + i2 = k; + for (ll = 1; ll <= i2; ll += 256) + { + /* Computing MIN */ + i3 = 256; + i4 = k - ll + 1; + lsec = min(i3,i4); + ulsec = lsec - lsec % 2; + + i3 = m; + for (ii = 1; ii <= i3; ii += 256) + { + /* Computing MIN */ + i4 = 256; + i5 = m - ii + 1; + isec = min(i4,i5); + uisec = isec - isec % 2; + i4 = ll + ulsec - 1; + for (l = ll; l <= i4; l += 2) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 2) + { + t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] = + a[i + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] = + a[i + (l + 1) * a_dim1]; + t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + (l + 1) * a_dim1]; + } + if (uisec < isec) + { + t1[l - ll + 1 + (isec << 8) - 257] = + a[ii + isec - 1 + l * a_dim1]; + t1[l - ll + 2 + (isec << 8) - 257] = + a[ii + isec - 1 + (l + 1) * a_dim1]; + } + } + if (ulsec < lsec) + { + i4 = ii + isec - 1; + for (i = ii; i<= i4; ++i) + { + t1[lsec + ((i - ii + 1) << 8) - 257] = + a[i + (ll + lsec - 1) * a_dim1]; + } + } + + uisec = isec - isec % 4; + i4 = jj + ujsec - 1; + for (j = jj; j <= i4; j += 4) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f22 = c[i + 1 + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f23 = c[i + 1 + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + f24 = c[i + 1 + (j + 3) * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + f32 = c[i + 2 + (j + 1) * c_dim1]; + f42 = c[i + 3 + (j + 1) * c_dim1]; + f33 = c[i + 2 + (j + 2) * c_dim1]; + f43 = c[i + 3 + (j + 2) * c_dim1]; + f34 = c[i + 2 + (j + 3) * c_dim1]; + f44 = c[i + 3 + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + j * b_dim1]; + f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + (j + 1) * c_dim1] = f12; + c[i + 1 + (j + 1) * c_dim1] = f22; + c[i + (j + 2) * c_dim1] = f13; + c[i + 1 + (j + 2) * c_dim1] = f23; + c[i + (j + 3) * c_dim1] = f14; + c[i + 1 + (j + 3) * c_dim1] = f24; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + c[i + 2 + (j + 1) * c_dim1] = f32; + c[i + 3 + (j + 1) * c_dim1] = f42; + c[i + 2 + (j + 2) * c_dim1] = f33; + c[i + 3 + (j + 2) * c_dim1] = f43; + c[i + 2 + (j + 3) * c_dim1] = f34; + c[i + 3 + (j + 3) * c_dim1] = f44; + } + if (uisec < isec) + { + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + (j + 1) * c_dim1] = f12; + c[i + (j + 2) * c_dim1] = f13; + c[i + (j + 3) * c_dim1] = f14; + } + } + } + if (ujsec < jsec) + { + i4 = jj + jsec - 1; + for (j = jj + ujsec; j <= i4; ++j) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - + 257] * b[l + j * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - + 257] * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + } + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + } + } + } + } + } + } + free(t1); + return; + } + else if (rxstride == 1 && aystride == 1 && bxstride == 1) + { + if (GFC_DESCRIPTOR_RANK (a) != 1) + { + const GFC_COMPLEX_17 *restrict abase_x; + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 *restrict dest_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n] * bbase_y[n]; + dest_y[x] = s; + } + } + } + else + { + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n]; + dest[y*rystride] = s; + } + } + } + else if (GFC_DESCRIPTOR_RANK (a) == 1) + { + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n*bxstride]; + dest[y*rxstride] = s; + } + } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_COMPLEX_17)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } + else + { + const GFC_COMPLEX_17 *restrict abase_x; + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 *restrict dest_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n*aystride] * bbase_y[n*bxstride]; + dest_y[x*rxstride] = s; + } + } + } +} +#undef POW3 +#undef min +#undef max + +#endif +#endif + diff --git a/libgfortran/generated/matmul_r17.c b/libgfortran/generated/matmul_r17.c new file mode 100644 index 0000000..db7f05b --- /dev/null +++ b/libgfortran/generated/matmul_r17.c @@ -0,0 +1,3013 @@ +/* Implementation of the MATMUL intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <string.h> +#include <assert.h> + + +#if defined (HAVE_GFC_REAL_17) + +/* Prototype for the BLAS ?gemm subroutine, a pointer to which can be + passed to us by the front-end, in which case we call it for large + matrices. */ + +typedef void (*blas_call)(const char *, const char *, const int *, const int *, + const int *, const GFC_REAL_17 *, const GFC_REAL_17 *, + const int *, const GFC_REAL_17 *, const int *, + const GFC_REAL_17 *, GFC_REAL_17 *, const int *, + int, int); + +/* The order of loops is different in the case of plain matrix + multiplication C=MATMUL(A,B), and in the frequent special case where + the argument A is the temporary result of a TRANSPOSE intrinsic: + C=MATMUL(TRANSPOSE(A),B). Transposed temporaries are detected by + looking at their strides. + + The equivalent Fortran pseudo-code is: + + DIMENSION A(M,COUNT), B(COUNT,N), C(M,N) + IF (.NOT.IS_TRANSPOSED(A)) THEN + C = 0 + DO J=1,N + DO K=1,COUNT + DO I=1,M + C(I,J) = C(I,J)+A(I,K)*B(K,J) + ELSE + DO J=1,N + DO I=1,M + S = 0 + DO K=1,COUNT + S = S+A(I,K)*B(K,J) + C(I,J) = S + ENDIF +*/ + +/* If try_blas is set to a nonzero value, then the matmul function will + see if there is a way to perform the matrix multiplication by a call + to the BLAS gemm function. */ + +extern void matmul_r17 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm); +export_proto(matmul_r17); + +/* Put exhaustive list of possible architectures here here, ORed together. */ + +#if defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_AVX512F) + +#ifdef HAVE_AVX +static void +matmul_r17_avx (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) __attribute__((__target__("avx"))); +static void +matmul_r17_avx (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) +{ + const GFC_REAL_17 * restrict abase; + const GFC_REAL_17 * restrict bbase; + GFC_REAL_17 * restrict dest; + + index_type rxstride, rystride, axstride, aystride, bxstride, bystride; + index_type x, y, n, count, xcount, ycount; + + assert (GFC_DESCRIPTOR_RANK (a) == 2 + || GFC_DESCRIPTOR_RANK (b) == 2); + +/* C[xcount,ycount] = A[xcount, count] * B[count,ycount] + + Either A or B (but not both) can be rank 1: + + o One-dimensional argument A is implicitly treated as a row matrix + dimensioned [1,count], so xcount=1. + + o One-dimensional argument B is implicitly treated as a column matrix + dimensioned [count, 1], so ycount=1. +*/ + + if (retarray->base_addr == NULL) + { + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + } + else + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + + GFC_DIMENSION_SET(retarray->dim[1], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, + GFC_DESCRIPTOR_EXTENT(retarray,0)); + } + + retarray->base_addr + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_REAL_17)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) + { + index_type ret_extent, arg_extent; + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 2 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + } + + + if (GFC_DESCRIPTOR_RANK (retarray) == 1) + { + /* One-dimensional result may be addressed in the code below + either as a row or a column matrix. We want both cases to + work. */ + rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0); + } + else + { + rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + rystride = GFC_DESCRIPTOR_STRIDE(retarray,1); + } + + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + /* Treat it as a a row matrix A[1,count]. */ + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = 1; + + xcount = 1; + count = GFC_DESCRIPTOR_EXTENT(a,0); + } + else + { + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = GFC_DESCRIPTOR_STRIDE(a,1); + + count = GFC_DESCRIPTOR_EXTENT(a,1); + xcount = GFC_DESCRIPTOR_EXTENT(a,0); + } + + if (count != GFC_DESCRIPTOR_EXTENT(b,0)) + { + if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0) + runtime_error ("Incorrect extent in argument B in MATMUL intrinsic " + "in dimension 1: is %ld, should be %ld", + (long int) GFC_DESCRIPTOR_EXTENT(b,0), (long int) count); + } + + if (GFC_DESCRIPTOR_RANK (b) == 1) + { + /* Treat it as a column matrix B[count,1] */ + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + + /* bystride should never be used for 1-dimensional b. + The value is only used for calculation of the + memory by the buffer. */ + bystride = 256; + ycount = 1; + } + else + { + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + bystride = GFC_DESCRIPTOR_STRIDE(b,1); + ycount = GFC_DESCRIPTOR_EXTENT(b,1); + } + + abase = a->base_addr; + bbase = b->base_addr; + dest = retarray->base_addr; + + /* Now that everything is set up, we perform the multiplication + itself. */ + +#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x))) +#define min(a,b) ((a) <= (b) ? (a) : (b)) +#define max(a,b) ((a) >= (b) ? (a) : (b)) + + if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1) + && (bxstride == 1 || bystride == 1) + && (((float) xcount) * ((float) ycount) * ((float) count) + > POW3(blas_limit))) + { + const int m = xcount, n = ycount, k = count, ldc = rystride; + const GFC_REAL_17 one = 1, zero = 0; + const int lda = (axstride == 1) ? aystride : axstride, + ldb = (bxstride == 1) ? bystride : bxstride; + + if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1) + { + assert (gemm != NULL); + const char *transa, *transb; + if (try_blas & 2) + transa = "C"; + else + transa = axstride == 1 ? "N" : "T"; + + if (try_blas & 4) + transb = "C"; + else + transb = bxstride == 1 ? "N" : "T"; + + gemm (transa, transb , &m, + &n, &k, &one, abase, &lda, bbase, &ldb, &zero, dest, + &ldc, 1, 1); + return; + } + } + + if (rxstride == 1 && axstride == 1 && bxstride == 1 + && GFC_DESCRIPTOR_RANK (b) != 1) + { + /* This block of code implements a tuned matmul, derived from + Superscalar GEMM-based level 3 BLAS, Beta version 0.1 + + Bo Kagstrom and Per Ling + Department of Computing Science + Umea University + S-901 87 Umea, Sweden + + from netlib.org, translated to C, and modified for matmul.m4. */ + + const GFC_REAL_17 *a, *b; + GFC_REAL_17 *c; + const index_type m = xcount, n = ycount, k = count; + + /* System generated locals */ + index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, + i1, i2, i3, i4, i5, i6; + + /* Local variables */ + GFC_REAL_17 f11, f12, f21, f22, f31, f32, f41, f42, + f13, f14, f23, f24, f33, f34, f43, f44; + index_type i, j, l, ii, jj, ll; + index_type isec, jsec, lsec, uisec, ujsec, ulsec; + GFC_REAL_17 *t1; + + a = abase; + b = bbase; + c = retarray->base_addr; + + /* Parameter adjustments */ + c_dim1 = rystride; + c_offset = 1 + c_dim1; + c -= c_offset; + a_dim1 = aystride; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = bystride; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_17)0; + + /* Early exit if possible */ + if (m == 0 || n == 0 || k == 0) + return; + + /* Adjust size of t1 to what is needed. */ + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + + t1 = malloc (t1_dim * sizeof(GFC_REAL_17)); + + /* Start turning the crank. */ + i1 = n; + for (jj = 1; jj <= i1; jj += 512) + { + /* Computing MIN */ + i2 = 512; + i3 = n - jj + 1; + jsec = min(i2,i3); + ujsec = jsec - jsec % 4; + i2 = k; + for (ll = 1; ll <= i2; ll += 256) + { + /* Computing MIN */ + i3 = 256; + i4 = k - ll + 1; + lsec = min(i3,i4); + ulsec = lsec - lsec % 2; + + i3 = m; + for (ii = 1; ii <= i3; ii += 256) + { + /* Computing MIN */ + i4 = 256; + i5 = m - ii + 1; + isec = min(i4,i5); + uisec = isec - isec % 2; + i4 = ll + ulsec - 1; + for (l = ll; l <= i4; l += 2) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 2) + { + t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] = + a[i + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] = + a[i + (l + 1) * a_dim1]; + t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + (l + 1) * a_dim1]; + } + if (uisec < isec) + { + t1[l - ll + 1 + (isec << 8) - 257] = + a[ii + isec - 1 + l * a_dim1]; + t1[l - ll + 2 + (isec << 8) - 257] = + a[ii + isec - 1 + (l + 1) * a_dim1]; + } + } + if (ulsec < lsec) + { + i4 = ii + isec - 1; + for (i = ii; i<= i4; ++i) + { + t1[lsec + ((i - ii + 1) << 8) - 257] = + a[i + (ll + lsec - 1) * a_dim1]; + } + } + + uisec = isec - isec % 4; + i4 = jj + ujsec - 1; + for (j = jj; j <= i4; j += 4) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f22 = c[i + 1 + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f23 = c[i + 1 + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + f24 = c[i + 1 + (j + 3) * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + f32 = c[i + 2 + (j + 1) * c_dim1]; + f42 = c[i + 3 + (j + 1) * c_dim1]; + f33 = c[i + 2 + (j + 2) * c_dim1]; + f43 = c[i + 3 + (j + 2) * c_dim1]; + f34 = c[i + 2 + (j + 3) * c_dim1]; + f44 = c[i + 3 + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + j * b_dim1]; + f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + (j + 1) * c_dim1] = f12; + c[i + 1 + (j + 1) * c_dim1] = f22; + c[i + (j + 2) * c_dim1] = f13; + c[i + 1 + (j + 2) * c_dim1] = f23; + c[i + (j + 3) * c_dim1] = f14; + c[i + 1 + (j + 3) * c_dim1] = f24; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + c[i + 2 + (j + 1) * c_dim1] = f32; + c[i + 3 + (j + 1) * c_dim1] = f42; + c[i + 2 + (j + 2) * c_dim1] = f33; + c[i + 3 + (j + 2) * c_dim1] = f43; + c[i + 2 + (j + 3) * c_dim1] = f34; + c[i + 3 + (j + 3) * c_dim1] = f44; + } + if (uisec < isec) + { + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + (j + 1) * c_dim1] = f12; + c[i + (j + 2) * c_dim1] = f13; + c[i + (j + 3) * c_dim1] = f14; + } + } + } + if (ujsec < jsec) + { + i4 = jj + jsec - 1; + for (j = jj + ujsec; j <= i4; ++j) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - + 257] * b[l + j * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - + 257] * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + } + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + } + } + } + } + } + } + free(t1); + return; + } + else if (rxstride == 1 && aystride == 1 && bxstride == 1) + { + if (GFC_DESCRIPTOR_RANK (a) != 1) + { + const GFC_REAL_17 *restrict abase_x; + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 *restrict dest_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n] * bbase_y[n]; + dest_y[x] = s; + } + } + } + else + { + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n]; + dest[y*rystride] = s; + } + } + } + else if (GFC_DESCRIPTOR_RANK (a) == 1) + { + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n*bxstride]; + dest[y*rxstride] = s; + } + } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_17)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } + else + { + const GFC_REAL_17 *restrict abase_x; + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 *restrict dest_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n*aystride] * bbase_y[n*bxstride]; + dest_y[x*rxstride] = s; + } + } + } +} +#undef POW3 +#undef min +#undef max + +#endif /* HAVE_AVX */ + +#ifdef HAVE_AVX2 +static void +matmul_r17_avx2 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma"))); +static void +matmul_r17_avx2 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) +{ + const GFC_REAL_17 * restrict abase; + const GFC_REAL_17 * restrict bbase; + GFC_REAL_17 * restrict dest; + + index_type rxstride, rystride, axstride, aystride, bxstride, bystride; + index_type x, y, n, count, xcount, ycount; + + assert (GFC_DESCRIPTOR_RANK (a) == 2 + || GFC_DESCRIPTOR_RANK (b) == 2); + +/* C[xcount,ycount] = A[xcount, count] * B[count,ycount] + + Either A or B (but not both) can be rank 1: + + o One-dimensional argument A is implicitly treated as a row matrix + dimensioned [1,count], so xcount=1. + + o One-dimensional argument B is implicitly treated as a column matrix + dimensioned [count, 1], so ycount=1. +*/ + + if (retarray->base_addr == NULL) + { + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + } + else + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + + GFC_DIMENSION_SET(retarray->dim[1], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, + GFC_DESCRIPTOR_EXTENT(retarray,0)); + } + + retarray->base_addr + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_REAL_17)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) + { + index_type ret_extent, arg_extent; + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 2 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + } + + + if (GFC_DESCRIPTOR_RANK (retarray) == 1) + { + /* One-dimensional result may be addressed in the code below + either as a row or a column matrix. We want both cases to + work. */ + rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0); + } + else + { + rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + rystride = GFC_DESCRIPTOR_STRIDE(retarray,1); + } + + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + /* Treat it as a a row matrix A[1,count]. */ + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = 1; + + xcount = 1; + count = GFC_DESCRIPTOR_EXTENT(a,0); + } + else + { + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = GFC_DESCRIPTOR_STRIDE(a,1); + + count = GFC_DESCRIPTOR_EXTENT(a,1); + xcount = GFC_DESCRIPTOR_EXTENT(a,0); + } + + if (count != GFC_DESCRIPTOR_EXTENT(b,0)) + { + if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0) + runtime_error ("Incorrect extent in argument B in MATMUL intrinsic " + "in dimension 1: is %ld, should be %ld", + (long int) GFC_DESCRIPTOR_EXTENT(b,0), (long int) count); + } + + if (GFC_DESCRIPTOR_RANK (b) == 1) + { + /* Treat it as a column matrix B[count,1] */ + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + + /* bystride should never be used for 1-dimensional b. + The value is only used for calculation of the + memory by the buffer. */ + bystride = 256; + ycount = 1; + } + else + { + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + bystride = GFC_DESCRIPTOR_STRIDE(b,1); + ycount = GFC_DESCRIPTOR_EXTENT(b,1); + } + + abase = a->base_addr; + bbase = b->base_addr; + dest = retarray->base_addr; + + /* Now that everything is set up, we perform the multiplication + itself. */ + +#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x))) +#define min(a,b) ((a) <= (b) ? (a) : (b)) +#define max(a,b) ((a) >= (b) ? (a) : (b)) + + if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1) + && (bxstride == 1 || bystride == 1) + && (((float) xcount) * ((float) ycount) * ((float) count) + > POW3(blas_limit))) + { + const int m = xcount, n = ycount, k = count, ldc = rystride; + const GFC_REAL_17 one = 1, zero = 0; + const int lda = (axstride == 1) ? aystride : axstride, + ldb = (bxstride == 1) ? bystride : bxstride; + + if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1) + { + assert (gemm != NULL); + const char *transa, *transb; + if (try_blas & 2) + transa = "C"; + else + transa = axstride == 1 ? "N" : "T"; + + if (try_blas & 4) + transb = "C"; + else + transb = bxstride == 1 ? "N" : "T"; + + gemm (transa, transb , &m, + &n, &k, &one, abase, &lda, bbase, &ldb, &zero, dest, + &ldc, 1, 1); + return; + } + } + + if (rxstride == 1 && axstride == 1 && bxstride == 1 + && GFC_DESCRIPTOR_RANK (b) != 1) + { + /* This block of code implements a tuned matmul, derived from + Superscalar GEMM-based level 3 BLAS, Beta version 0.1 + + Bo Kagstrom and Per Ling + Department of Computing Science + Umea University + S-901 87 Umea, Sweden + + from netlib.org, translated to C, and modified for matmul.m4. */ + + const GFC_REAL_17 *a, *b; + GFC_REAL_17 *c; + const index_type m = xcount, n = ycount, k = count; + + /* System generated locals */ + index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, + i1, i2, i3, i4, i5, i6; + + /* Local variables */ + GFC_REAL_17 f11, f12, f21, f22, f31, f32, f41, f42, + f13, f14, f23, f24, f33, f34, f43, f44; + index_type i, j, l, ii, jj, ll; + index_type isec, jsec, lsec, uisec, ujsec, ulsec; + GFC_REAL_17 *t1; + + a = abase; + b = bbase; + c = retarray->base_addr; + + /* Parameter adjustments */ + c_dim1 = rystride; + c_offset = 1 + c_dim1; + c -= c_offset; + a_dim1 = aystride; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = bystride; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_17)0; + + /* Early exit if possible */ + if (m == 0 || n == 0 || k == 0) + return; + + /* Adjust size of t1 to what is needed. */ + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + + t1 = malloc (t1_dim * sizeof(GFC_REAL_17)); + + /* Start turning the crank. */ + i1 = n; + for (jj = 1; jj <= i1; jj += 512) + { + /* Computing MIN */ + i2 = 512; + i3 = n - jj + 1; + jsec = min(i2,i3); + ujsec = jsec - jsec % 4; + i2 = k; + for (ll = 1; ll <= i2; ll += 256) + { + /* Computing MIN */ + i3 = 256; + i4 = k - ll + 1; + lsec = min(i3,i4); + ulsec = lsec - lsec % 2; + + i3 = m; + for (ii = 1; ii <= i3; ii += 256) + { + /* Computing MIN */ + i4 = 256; + i5 = m - ii + 1; + isec = min(i4,i5); + uisec = isec - isec % 2; + i4 = ll + ulsec - 1; + for (l = ll; l <= i4; l += 2) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 2) + { + t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] = + a[i + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] = + a[i + (l + 1) * a_dim1]; + t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + (l + 1) * a_dim1]; + } + if (uisec < isec) + { + t1[l - ll + 1 + (isec << 8) - 257] = + a[ii + isec - 1 + l * a_dim1]; + t1[l - ll + 2 + (isec << 8) - 257] = + a[ii + isec - 1 + (l + 1) * a_dim1]; + } + } + if (ulsec < lsec) + { + i4 = ii + isec - 1; + for (i = ii; i<= i4; ++i) + { + t1[lsec + ((i - ii + 1) << 8) - 257] = + a[i + (ll + lsec - 1) * a_dim1]; + } + } + + uisec = isec - isec % 4; + i4 = jj + ujsec - 1; + for (j = jj; j <= i4; j += 4) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f22 = c[i + 1 + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f23 = c[i + 1 + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + f24 = c[i + 1 + (j + 3) * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + f32 = c[i + 2 + (j + 1) * c_dim1]; + f42 = c[i + 3 + (j + 1) * c_dim1]; + f33 = c[i + 2 + (j + 2) * c_dim1]; + f43 = c[i + 3 + (j + 2) * c_dim1]; + f34 = c[i + 2 + (j + 3) * c_dim1]; + f44 = c[i + 3 + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + j * b_dim1]; + f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + (j + 1) * c_dim1] = f12; + c[i + 1 + (j + 1) * c_dim1] = f22; + c[i + (j + 2) * c_dim1] = f13; + c[i + 1 + (j + 2) * c_dim1] = f23; + c[i + (j + 3) * c_dim1] = f14; + c[i + 1 + (j + 3) * c_dim1] = f24; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + c[i + 2 + (j + 1) * c_dim1] = f32; + c[i + 3 + (j + 1) * c_dim1] = f42; + c[i + 2 + (j + 2) * c_dim1] = f33; + c[i + 3 + (j + 2) * c_dim1] = f43; + c[i + 2 + (j + 3) * c_dim1] = f34; + c[i + 3 + (j + 3) * c_dim1] = f44; + } + if (uisec < isec) + { + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + (j + 1) * c_dim1] = f12; + c[i + (j + 2) * c_dim1] = f13; + c[i + (j + 3) * c_dim1] = f14; + } + } + } + if (ujsec < jsec) + { + i4 = jj + jsec - 1; + for (j = jj + ujsec; j <= i4; ++j) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - + 257] * b[l + j * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - + 257] * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + } + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + } + } + } + } + } + } + free(t1); + return; + } + else if (rxstride == 1 && aystride == 1 && bxstride == 1) + { + if (GFC_DESCRIPTOR_RANK (a) != 1) + { + const GFC_REAL_17 *restrict abase_x; + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 *restrict dest_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n] * bbase_y[n]; + dest_y[x] = s; + } + } + } + else + { + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n]; + dest[y*rystride] = s; + } + } + } + else if (GFC_DESCRIPTOR_RANK (a) == 1) + { + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n*bxstride]; + dest[y*rxstride] = s; + } + } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_17)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } + else + { + const GFC_REAL_17 *restrict abase_x; + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 *restrict dest_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n*aystride] * bbase_y[n*bxstride]; + dest_y[x*rxstride] = s; + } + } + } +} +#undef POW3 +#undef min +#undef max + +#endif /* HAVE_AVX2 */ + +#ifdef HAVE_AVX512F +static void +matmul_r17_avx512f (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) __attribute__((__target__("avx512f"))); +static void +matmul_r17_avx512f (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) +{ + const GFC_REAL_17 * restrict abase; + const GFC_REAL_17 * restrict bbase; + GFC_REAL_17 * restrict dest; + + index_type rxstride, rystride, axstride, aystride, bxstride, bystride; + index_type x, y, n, count, xcount, ycount; + + assert (GFC_DESCRIPTOR_RANK (a) == 2 + || GFC_DESCRIPTOR_RANK (b) == 2); + +/* C[xcount,ycount] = A[xcount, count] * B[count,ycount] + + Either A or B (but not both) can be rank 1: + + o One-dimensional argument A is implicitly treated as a row matrix + dimensioned [1,count], so xcount=1. + + o One-dimensional argument B is implicitly treated as a column matrix + dimensioned [count, 1], so ycount=1. +*/ + + if (retarray->base_addr == NULL) + { + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + } + else + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + + GFC_DIMENSION_SET(retarray->dim[1], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, + GFC_DESCRIPTOR_EXTENT(retarray,0)); + } + + retarray->base_addr + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_REAL_17)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) + { + index_type ret_extent, arg_extent; + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 2 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + } + + + if (GFC_DESCRIPTOR_RANK (retarray) == 1) + { + /* One-dimensional result may be addressed in the code below + either as a row or a column matrix. We want both cases to + work. */ + rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0); + } + else + { + rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + rystride = GFC_DESCRIPTOR_STRIDE(retarray,1); + } + + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + /* Treat it as a a row matrix A[1,count]. */ + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = 1; + + xcount = 1; + count = GFC_DESCRIPTOR_EXTENT(a,0); + } + else + { + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = GFC_DESCRIPTOR_STRIDE(a,1); + + count = GFC_DESCRIPTOR_EXTENT(a,1); + xcount = GFC_DESCRIPTOR_EXTENT(a,0); + } + + if (count != GFC_DESCRIPTOR_EXTENT(b,0)) + { + if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0) + runtime_error ("Incorrect extent in argument B in MATMUL intrinsic " + "in dimension 1: is %ld, should be %ld", + (long int) GFC_DESCRIPTOR_EXTENT(b,0), (long int) count); + } + + if (GFC_DESCRIPTOR_RANK (b) == 1) + { + /* Treat it as a column matrix B[count,1] */ + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + + /* bystride should never be used for 1-dimensional b. + The value is only used for calculation of the + memory by the buffer. */ + bystride = 256; + ycount = 1; + } + else + { + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + bystride = GFC_DESCRIPTOR_STRIDE(b,1); + ycount = GFC_DESCRIPTOR_EXTENT(b,1); + } + + abase = a->base_addr; + bbase = b->base_addr; + dest = retarray->base_addr; + + /* Now that everything is set up, we perform the multiplication + itself. */ + +#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x))) +#define min(a,b) ((a) <= (b) ? (a) : (b)) +#define max(a,b) ((a) >= (b) ? (a) : (b)) + + if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1) + && (bxstride == 1 || bystride == 1) + && (((float) xcount) * ((float) ycount) * ((float) count) + > POW3(blas_limit))) + { + const int m = xcount, n = ycount, k = count, ldc = rystride; + const GFC_REAL_17 one = 1, zero = 0; + const int lda = (axstride == 1) ? aystride : axstride, + ldb = (bxstride == 1) ? bystride : bxstride; + + if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1) + { + assert (gemm != NULL); + const char *transa, *transb; + if (try_blas & 2) + transa = "C"; + else + transa = axstride == 1 ? "N" : "T"; + + if (try_blas & 4) + transb = "C"; + else + transb = bxstride == 1 ? "N" : "T"; + + gemm (transa, transb , &m, + &n, &k, &one, abase, &lda, bbase, &ldb, &zero, dest, + &ldc, 1, 1); + return; + } + } + + if (rxstride == 1 && axstride == 1 && bxstride == 1 + && GFC_DESCRIPTOR_RANK (b) != 1) + { + /* This block of code implements a tuned matmul, derived from + Superscalar GEMM-based level 3 BLAS, Beta version 0.1 + + Bo Kagstrom and Per Ling + Department of Computing Science + Umea University + S-901 87 Umea, Sweden + + from netlib.org, translated to C, and modified for matmul.m4. */ + + const GFC_REAL_17 *a, *b; + GFC_REAL_17 *c; + const index_type m = xcount, n = ycount, k = count; + + /* System generated locals */ + index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, + i1, i2, i3, i4, i5, i6; + + /* Local variables */ + GFC_REAL_17 f11, f12, f21, f22, f31, f32, f41, f42, + f13, f14, f23, f24, f33, f34, f43, f44; + index_type i, j, l, ii, jj, ll; + index_type isec, jsec, lsec, uisec, ujsec, ulsec; + GFC_REAL_17 *t1; + + a = abase; + b = bbase; + c = retarray->base_addr; + + /* Parameter adjustments */ + c_dim1 = rystride; + c_offset = 1 + c_dim1; + c -= c_offset; + a_dim1 = aystride; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = bystride; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_17)0; + + /* Early exit if possible */ + if (m == 0 || n == 0 || k == 0) + return; + + /* Adjust size of t1 to what is needed. */ + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + + t1 = malloc (t1_dim * sizeof(GFC_REAL_17)); + + /* Start turning the crank. */ + i1 = n; + for (jj = 1; jj <= i1; jj += 512) + { + /* Computing MIN */ + i2 = 512; + i3 = n - jj + 1; + jsec = min(i2,i3); + ujsec = jsec - jsec % 4; + i2 = k; + for (ll = 1; ll <= i2; ll += 256) + { + /* Computing MIN */ + i3 = 256; + i4 = k - ll + 1; + lsec = min(i3,i4); + ulsec = lsec - lsec % 2; + + i3 = m; + for (ii = 1; ii <= i3; ii += 256) + { + /* Computing MIN */ + i4 = 256; + i5 = m - ii + 1; + isec = min(i4,i5); + uisec = isec - isec % 2; + i4 = ll + ulsec - 1; + for (l = ll; l <= i4; l += 2) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 2) + { + t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] = + a[i + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] = + a[i + (l + 1) * a_dim1]; + t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + (l + 1) * a_dim1]; + } + if (uisec < isec) + { + t1[l - ll + 1 + (isec << 8) - 257] = + a[ii + isec - 1 + l * a_dim1]; + t1[l - ll + 2 + (isec << 8) - 257] = + a[ii + isec - 1 + (l + 1) * a_dim1]; + } + } + if (ulsec < lsec) + { + i4 = ii + isec - 1; + for (i = ii; i<= i4; ++i) + { + t1[lsec + ((i - ii + 1) << 8) - 257] = + a[i + (ll + lsec - 1) * a_dim1]; + } + } + + uisec = isec - isec % 4; + i4 = jj + ujsec - 1; + for (j = jj; j <= i4; j += 4) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f22 = c[i + 1 + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f23 = c[i + 1 + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + f24 = c[i + 1 + (j + 3) * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + f32 = c[i + 2 + (j + 1) * c_dim1]; + f42 = c[i + 3 + (j + 1) * c_dim1]; + f33 = c[i + 2 + (j + 2) * c_dim1]; + f43 = c[i + 3 + (j + 2) * c_dim1]; + f34 = c[i + 2 + (j + 3) * c_dim1]; + f44 = c[i + 3 + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + j * b_dim1]; + f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + (j + 1) * c_dim1] = f12; + c[i + 1 + (j + 1) * c_dim1] = f22; + c[i + (j + 2) * c_dim1] = f13; + c[i + 1 + (j + 2) * c_dim1] = f23; + c[i + (j + 3) * c_dim1] = f14; + c[i + 1 + (j + 3) * c_dim1] = f24; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + c[i + 2 + (j + 1) * c_dim1] = f32; + c[i + 3 + (j + 1) * c_dim1] = f42; + c[i + 2 + (j + 2) * c_dim1] = f33; + c[i + 3 + (j + 2) * c_dim1] = f43; + c[i + 2 + (j + 3) * c_dim1] = f34; + c[i + 3 + (j + 3) * c_dim1] = f44; + } + if (uisec < isec) + { + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + (j + 1) * c_dim1] = f12; + c[i + (j + 2) * c_dim1] = f13; + c[i + (j + 3) * c_dim1] = f14; + } + } + } + if (ujsec < jsec) + { + i4 = jj + jsec - 1; + for (j = jj + ujsec; j <= i4; ++j) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - + 257] * b[l + j * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - + 257] * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + } + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + } + } + } + } + } + } + free(t1); + return; + } + else if (rxstride == 1 && aystride == 1 && bxstride == 1) + { + if (GFC_DESCRIPTOR_RANK (a) != 1) + { + const GFC_REAL_17 *restrict abase_x; + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 *restrict dest_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n] * bbase_y[n]; + dest_y[x] = s; + } + } + } + else + { + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n]; + dest[y*rystride] = s; + } + } + } + else if (GFC_DESCRIPTOR_RANK (a) == 1) + { + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n*bxstride]; + dest[y*rxstride] = s; + } + } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_17)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } + else + { + const GFC_REAL_17 *restrict abase_x; + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 *restrict dest_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n*aystride] * bbase_y[n*bxstride]; + dest_y[x*rxstride] = s; + } + } + } +} +#undef POW3 +#undef min +#undef max + +#endif /* HAVE_AVX512F */ + +/* AMD-specifix funtions with AVX128 and FMA3/FMA4. */ + +#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128) +void +matmul_r17_avx128_fma3 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma"))); +internal_proto(matmul_r17_avx128_fma3); +#endif + +#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128) +void +matmul_r17_avx128_fma4 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4"))); +internal_proto(matmul_r17_avx128_fma4); +#endif + +/* Function to fall back to if there is no special processor-specific version. */ +static void +matmul_r17_vanilla (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) +{ + const GFC_REAL_17 * restrict abase; + const GFC_REAL_17 * restrict bbase; + GFC_REAL_17 * restrict dest; + + index_type rxstride, rystride, axstride, aystride, bxstride, bystride; + index_type x, y, n, count, xcount, ycount; + + assert (GFC_DESCRIPTOR_RANK (a) == 2 + || GFC_DESCRIPTOR_RANK (b) == 2); + +/* C[xcount,ycount] = A[xcount, count] * B[count,ycount] + + Either A or B (but not both) can be rank 1: + + o One-dimensional argument A is implicitly treated as a row matrix + dimensioned [1,count], so xcount=1. + + o One-dimensional argument B is implicitly treated as a column matrix + dimensioned [count, 1], so ycount=1. +*/ + + if (retarray->base_addr == NULL) + { + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + } + else + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + + GFC_DIMENSION_SET(retarray->dim[1], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, + GFC_DESCRIPTOR_EXTENT(retarray,0)); + } + + retarray->base_addr + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_REAL_17)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) + { + index_type ret_extent, arg_extent; + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 2 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + } + + + if (GFC_DESCRIPTOR_RANK (retarray) == 1) + { + /* One-dimensional result may be addressed in the code below + either as a row or a column matrix. We want both cases to + work. */ + rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0); + } + else + { + rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + rystride = GFC_DESCRIPTOR_STRIDE(retarray,1); + } + + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + /* Treat it as a a row matrix A[1,count]. */ + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = 1; + + xcount = 1; + count = GFC_DESCRIPTOR_EXTENT(a,0); + } + else + { + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = GFC_DESCRIPTOR_STRIDE(a,1); + + count = GFC_DESCRIPTOR_EXTENT(a,1); + xcount = GFC_DESCRIPTOR_EXTENT(a,0); + } + + if (count != GFC_DESCRIPTOR_EXTENT(b,0)) + { + if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0) + runtime_error ("Incorrect extent in argument B in MATMUL intrinsic " + "in dimension 1: is %ld, should be %ld", + (long int) GFC_DESCRIPTOR_EXTENT(b,0), (long int) count); + } + + if (GFC_DESCRIPTOR_RANK (b) == 1) + { + /* Treat it as a column matrix B[count,1] */ + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + + /* bystride should never be used for 1-dimensional b. + The value is only used for calculation of the + memory by the buffer. */ + bystride = 256; + ycount = 1; + } + else + { + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + bystride = GFC_DESCRIPTOR_STRIDE(b,1); + ycount = GFC_DESCRIPTOR_EXTENT(b,1); + } + + abase = a->base_addr; + bbase = b->base_addr; + dest = retarray->base_addr; + + /* Now that everything is set up, we perform the multiplication + itself. */ + +#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x))) +#define min(a,b) ((a) <= (b) ? (a) : (b)) +#define max(a,b) ((a) >= (b) ? (a) : (b)) + + if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1) + && (bxstride == 1 || bystride == 1) + && (((float) xcount) * ((float) ycount) * ((float) count) + > POW3(blas_limit))) + { + const int m = xcount, n = ycount, k = count, ldc = rystride; + const GFC_REAL_17 one = 1, zero = 0; + const int lda = (axstride == 1) ? aystride : axstride, + ldb = (bxstride == 1) ? bystride : bxstride; + + if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1) + { + assert (gemm != NULL); + const char *transa, *transb; + if (try_blas & 2) + transa = "C"; + else + transa = axstride == 1 ? "N" : "T"; + + if (try_blas & 4) + transb = "C"; + else + transb = bxstride == 1 ? "N" : "T"; + + gemm (transa, transb , &m, + &n, &k, &one, abase, &lda, bbase, &ldb, &zero, dest, + &ldc, 1, 1); + return; + } + } + + if (rxstride == 1 && axstride == 1 && bxstride == 1 + && GFC_DESCRIPTOR_RANK (b) != 1) + { + /* This block of code implements a tuned matmul, derived from + Superscalar GEMM-based level 3 BLAS, Beta version 0.1 + + Bo Kagstrom and Per Ling + Department of Computing Science + Umea University + S-901 87 Umea, Sweden + + from netlib.org, translated to C, and modified for matmul.m4. */ + + const GFC_REAL_17 *a, *b; + GFC_REAL_17 *c; + const index_type m = xcount, n = ycount, k = count; + + /* System generated locals */ + index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, + i1, i2, i3, i4, i5, i6; + + /* Local variables */ + GFC_REAL_17 f11, f12, f21, f22, f31, f32, f41, f42, + f13, f14, f23, f24, f33, f34, f43, f44; + index_type i, j, l, ii, jj, ll; + index_type isec, jsec, lsec, uisec, ujsec, ulsec; + GFC_REAL_17 *t1; + + a = abase; + b = bbase; + c = retarray->base_addr; + + /* Parameter adjustments */ + c_dim1 = rystride; + c_offset = 1 + c_dim1; + c -= c_offset; + a_dim1 = aystride; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = bystride; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_17)0; + + /* Early exit if possible */ + if (m == 0 || n == 0 || k == 0) + return; + + /* Adjust size of t1 to what is needed. */ + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + + t1 = malloc (t1_dim * sizeof(GFC_REAL_17)); + + /* Start turning the crank. */ + i1 = n; + for (jj = 1; jj <= i1; jj += 512) + { + /* Computing MIN */ + i2 = 512; + i3 = n - jj + 1; + jsec = min(i2,i3); + ujsec = jsec - jsec % 4; + i2 = k; + for (ll = 1; ll <= i2; ll += 256) + { + /* Computing MIN */ + i3 = 256; + i4 = k - ll + 1; + lsec = min(i3,i4); + ulsec = lsec - lsec % 2; + + i3 = m; + for (ii = 1; ii <= i3; ii += 256) + { + /* Computing MIN */ + i4 = 256; + i5 = m - ii + 1; + isec = min(i4,i5); + uisec = isec - isec % 2; + i4 = ll + ulsec - 1; + for (l = ll; l <= i4; l += 2) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 2) + { + t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] = + a[i + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] = + a[i + (l + 1) * a_dim1]; + t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + (l + 1) * a_dim1]; + } + if (uisec < isec) + { + t1[l - ll + 1 + (isec << 8) - 257] = + a[ii + isec - 1 + l * a_dim1]; + t1[l - ll + 2 + (isec << 8) - 257] = + a[ii + isec - 1 + (l + 1) * a_dim1]; + } + } + if (ulsec < lsec) + { + i4 = ii + isec - 1; + for (i = ii; i<= i4; ++i) + { + t1[lsec + ((i - ii + 1) << 8) - 257] = + a[i + (ll + lsec - 1) * a_dim1]; + } + } + + uisec = isec - isec % 4; + i4 = jj + ujsec - 1; + for (j = jj; j <= i4; j += 4) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f22 = c[i + 1 + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f23 = c[i + 1 + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + f24 = c[i + 1 + (j + 3) * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + f32 = c[i + 2 + (j + 1) * c_dim1]; + f42 = c[i + 3 + (j + 1) * c_dim1]; + f33 = c[i + 2 + (j + 2) * c_dim1]; + f43 = c[i + 3 + (j + 2) * c_dim1]; + f34 = c[i + 2 + (j + 3) * c_dim1]; + f44 = c[i + 3 + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + j * b_dim1]; + f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + (j + 1) * c_dim1] = f12; + c[i + 1 + (j + 1) * c_dim1] = f22; + c[i + (j + 2) * c_dim1] = f13; + c[i + 1 + (j + 2) * c_dim1] = f23; + c[i + (j + 3) * c_dim1] = f14; + c[i + 1 + (j + 3) * c_dim1] = f24; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + c[i + 2 + (j + 1) * c_dim1] = f32; + c[i + 3 + (j + 1) * c_dim1] = f42; + c[i + 2 + (j + 2) * c_dim1] = f33; + c[i + 3 + (j + 2) * c_dim1] = f43; + c[i + 2 + (j + 3) * c_dim1] = f34; + c[i + 3 + (j + 3) * c_dim1] = f44; + } + if (uisec < isec) + { + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + (j + 1) * c_dim1] = f12; + c[i + (j + 2) * c_dim1] = f13; + c[i + (j + 3) * c_dim1] = f14; + } + } + } + if (ujsec < jsec) + { + i4 = jj + jsec - 1; + for (j = jj + ujsec; j <= i4; ++j) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - + 257] * b[l + j * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - + 257] * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + } + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + } + } + } + } + } + } + free(t1); + return; + } + else if (rxstride == 1 && aystride == 1 && bxstride == 1) + { + if (GFC_DESCRIPTOR_RANK (a) != 1) + { + const GFC_REAL_17 *restrict abase_x; + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 *restrict dest_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n] * bbase_y[n]; + dest_y[x] = s; + } + } + } + else + { + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n]; + dest[y*rystride] = s; + } + } + } + else if (GFC_DESCRIPTOR_RANK (a) == 1) + { + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n*bxstride]; + dest[y*rxstride] = s; + } + } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_17)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } + else + { + const GFC_REAL_17 *restrict abase_x; + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 *restrict dest_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n*aystride] * bbase_y[n*bxstride]; + dest_y[x*rxstride] = s; + } + } + } +} +#undef POW3 +#undef min +#undef max + + +/* Compiling main function, with selection code for the processor. */ + +/* Currently, this is i386 only. Adjust for other architectures. */ + +void matmul_r17 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) +{ + static void (*matmul_p) (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm); + + void (*matmul_fn) (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm); + + matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED); + if (matmul_fn == NULL) + { + matmul_fn = matmul_r17_vanilla; + if (__builtin_cpu_is ("intel")) + { + /* Run down the available processors in order of preference. */ +#ifdef HAVE_AVX512F + if (__builtin_cpu_supports ("avx512f")) + { + matmul_fn = matmul_r17_avx512f; + goto store; + } + +#endif /* HAVE_AVX512F */ + +#ifdef HAVE_AVX2 + if (__builtin_cpu_supports ("avx2") + && __builtin_cpu_supports ("fma")) + { + matmul_fn = matmul_r17_avx2; + goto store; + } + +#endif + +#ifdef HAVE_AVX + if (__builtin_cpu_supports ("avx")) + { + matmul_fn = matmul_r17_avx; + goto store; + } +#endif /* HAVE_AVX */ + } + else if (__builtin_cpu_is ("amd")) + { +#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128) + if (__builtin_cpu_supports ("avx") + && __builtin_cpu_supports ("fma")) + { + matmul_fn = matmul_r17_avx128_fma3; + goto store; + } +#endif +#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128) + if (__builtin_cpu_supports ("avx") + && __builtin_cpu_supports ("fma4")) + { + matmul_fn = matmul_r17_avx128_fma4; + goto store; + } +#endif + + } + store: + __atomic_store_n (&matmul_p, matmul_fn, __ATOMIC_RELAXED); + } + + (*matmul_fn) (retarray, a, b, try_blas, blas_limit, gemm); +} + +#else /* Just the vanilla function. */ + +void +matmul_r17 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) +{ + const GFC_REAL_17 * restrict abase; + const GFC_REAL_17 * restrict bbase; + GFC_REAL_17 * restrict dest; + + index_type rxstride, rystride, axstride, aystride, bxstride, bystride; + index_type x, y, n, count, xcount, ycount; + + assert (GFC_DESCRIPTOR_RANK (a) == 2 + || GFC_DESCRIPTOR_RANK (b) == 2); + +/* C[xcount,ycount] = A[xcount, count] * B[count,ycount] + + Either A or B (but not both) can be rank 1: + + o One-dimensional argument A is implicitly treated as a row matrix + dimensioned [1,count], so xcount=1. + + o One-dimensional argument B is implicitly treated as a column matrix + dimensioned [count, 1], so ycount=1. +*/ + + if (retarray->base_addr == NULL) + { + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + } + else + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + + GFC_DIMENSION_SET(retarray->dim[1], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, + GFC_DESCRIPTOR_EXTENT(retarray,0)); + } + + retarray->base_addr + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_REAL_17)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) + { + index_type ret_extent, arg_extent; + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 2 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + } + + + if (GFC_DESCRIPTOR_RANK (retarray) == 1) + { + /* One-dimensional result may be addressed in the code below + either as a row or a column matrix. We want both cases to + work. */ + rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0); + } + else + { + rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + rystride = GFC_DESCRIPTOR_STRIDE(retarray,1); + } + + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + /* Treat it as a a row matrix A[1,count]. */ + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = 1; + + xcount = 1; + count = GFC_DESCRIPTOR_EXTENT(a,0); + } + else + { + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = GFC_DESCRIPTOR_STRIDE(a,1); + + count = GFC_DESCRIPTOR_EXTENT(a,1); + xcount = GFC_DESCRIPTOR_EXTENT(a,0); + } + + if (count != GFC_DESCRIPTOR_EXTENT(b,0)) + { + if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0) + runtime_error ("Incorrect extent in argument B in MATMUL intrinsic " + "in dimension 1: is %ld, should be %ld", + (long int) GFC_DESCRIPTOR_EXTENT(b,0), (long int) count); + } + + if (GFC_DESCRIPTOR_RANK (b) == 1) + { + /* Treat it as a column matrix B[count,1] */ + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + + /* bystride should never be used for 1-dimensional b. + The value is only used for calculation of the + memory by the buffer. */ + bystride = 256; + ycount = 1; + } + else + { + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + bystride = GFC_DESCRIPTOR_STRIDE(b,1); + ycount = GFC_DESCRIPTOR_EXTENT(b,1); + } + + abase = a->base_addr; + bbase = b->base_addr; + dest = retarray->base_addr; + + /* Now that everything is set up, we perform the multiplication + itself. */ + +#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x))) +#define min(a,b) ((a) <= (b) ? (a) : (b)) +#define max(a,b) ((a) >= (b) ? (a) : (b)) + + if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1) + && (bxstride == 1 || bystride == 1) + && (((float) xcount) * ((float) ycount) * ((float) count) + > POW3(blas_limit))) + { + const int m = xcount, n = ycount, k = count, ldc = rystride; + const GFC_REAL_17 one = 1, zero = 0; + const int lda = (axstride == 1) ? aystride : axstride, + ldb = (bxstride == 1) ? bystride : bxstride; + + if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1) + { + assert (gemm != NULL); + const char *transa, *transb; + if (try_blas & 2) + transa = "C"; + else + transa = axstride == 1 ? "N" : "T"; + + if (try_blas & 4) + transb = "C"; + else + transb = bxstride == 1 ? "N" : "T"; + + gemm (transa, transb , &m, + &n, &k, &one, abase, &lda, bbase, &ldb, &zero, dest, + &ldc, 1, 1); + return; + } + } + + if (rxstride == 1 && axstride == 1 && bxstride == 1 + && GFC_DESCRIPTOR_RANK (b) != 1) + { + /* This block of code implements a tuned matmul, derived from + Superscalar GEMM-based level 3 BLAS, Beta version 0.1 + + Bo Kagstrom and Per Ling + Department of Computing Science + Umea University + S-901 87 Umea, Sweden + + from netlib.org, translated to C, and modified for matmul.m4. */ + + const GFC_REAL_17 *a, *b; + GFC_REAL_17 *c; + const index_type m = xcount, n = ycount, k = count; + + /* System generated locals */ + index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, + i1, i2, i3, i4, i5, i6; + + /* Local variables */ + GFC_REAL_17 f11, f12, f21, f22, f31, f32, f41, f42, + f13, f14, f23, f24, f33, f34, f43, f44; + index_type i, j, l, ii, jj, ll; + index_type isec, jsec, lsec, uisec, ujsec, ulsec; + GFC_REAL_17 *t1; + + a = abase; + b = bbase; + c = retarray->base_addr; + + /* Parameter adjustments */ + c_dim1 = rystride; + c_offset = 1 + c_dim1; + c -= c_offset; + a_dim1 = aystride; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = bystride; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_17)0; + + /* Early exit if possible */ + if (m == 0 || n == 0 || k == 0) + return; + + /* Adjust size of t1 to what is needed. */ + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + + t1 = malloc (t1_dim * sizeof(GFC_REAL_17)); + + /* Start turning the crank. */ + i1 = n; + for (jj = 1; jj <= i1; jj += 512) + { + /* Computing MIN */ + i2 = 512; + i3 = n - jj + 1; + jsec = min(i2,i3); + ujsec = jsec - jsec % 4; + i2 = k; + for (ll = 1; ll <= i2; ll += 256) + { + /* Computing MIN */ + i3 = 256; + i4 = k - ll + 1; + lsec = min(i3,i4); + ulsec = lsec - lsec % 2; + + i3 = m; + for (ii = 1; ii <= i3; ii += 256) + { + /* Computing MIN */ + i4 = 256; + i5 = m - ii + 1; + isec = min(i4,i5); + uisec = isec - isec % 2; + i4 = ll + ulsec - 1; + for (l = ll; l <= i4; l += 2) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 2) + { + t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] = + a[i + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] = + a[i + (l + 1) * a_dim1]; + t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + (l + 1) * a_dim1]; + } + if (uisec < isec) + { + t1[l - ll + 1 + (isec << 8) - 257] = + a[ii + isec - 1 + l * a_dim1]; + t1[l - ll + 2 + (isec << 8) - 257] = + a[ii + isec - 1 + (l + 1) * a_dim1]; + } + } + if (ulsec < lsec) + { + i4 = ii + isec - 1; + for (i = ii; i<= i4; ++i) + { + t1[lsec + ((i - ii + 1) << 8) - 257] = + a[i + (ll + lsec - 1) * a_dim1]; + } + } + + uisec = isec - isec % 4; + i4 = jj + ujsec - 1; + for (j = jj; j <= i4; j += 4) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f22 = c[i + 1 + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f23 = c[i + 1 + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + f24 = c[i + 1 + (j + 3) * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + f32 = c[i + 2 + (j + 1) * c_dim1]; + f42 = c[i + 3 + (j + 1) * c_dim1]; + f33 = c[i + 2 + (j + 2) * c_dim1]; + f43 = c[i + 3 + (j + 2) * c_dim1]; + f34 = c[i + 2 + (j + 3) * c_dim1]; + f44 = c[i + 3 + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + j * b_dim1]; + f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + (j + 1) * c_dim1] = f12; + c[i + 1 + (j + 1) * c_dim1] = f22; + c[i + (j + 2) * c_dim1] = f13; + c[i + 1 + (j + 2) * c_dim1] = f23; + c[i + (j + 3) * c_dim1] = f14; + c[i + 1 + (j + 3) * c_dim1] = f24; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + c[i + 2 + (j + 1) * c_dim1] = f32; + c[i + 3 + (j + 1) * c_dim1] = f42; + c[i + 2 + (j + 2) * c_dim1] = f33; + c[i + 3 + (j + 2) * c_dim1] = f43; + c[i + 2 + (j + 3) * c_dim1] = f34; + c[i + 3 + (j + 3) * c_dim1] = f44; + } + if (uisec < isec) + { + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + (j + 1) * c_dim1] = f12; + c[i + (j + 2) * c_dim1] = f13; + c[i + (j + 3) * c_dim1] = f14; + } + } + } + if (ujsec < jsec) + { + i4 = jj + jsec - 1; + for (j = jj + ujsec; j <= i4; ++j) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - + 257] * b[l + j * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - + 257] * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + } + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + } + } + } + } + } + } + free(t1); + return; + } + else if (rxstride == 1 && aystride == 1 && bxstride == 1) + { + if (GFC_DESCRIPTOR_RANK (a) != 1) + { + const GFC_REAL_17 *restrict abase_x; + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 *restrict dest_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n] * bbase_y[n]; + dest_y[x] = s; + } + } + } + else + { + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n]; + dest[y*rystride] = s; + } + } + } + else if (GFC_DESCRIPTOR_RANK (a) == 1) + { + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n*bxstride]; + dest[y*rxstride] = s; + } + } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_17)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } + else + { + const GFC_REAL_17 *restrict abase_x; + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 *restrict dest_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n*aystride] * bbase_y[n*bxstride]; + dest_y[x*rxstride] = s; + } + } + } +} +#undef POW3 +#undef min +#undef max + +#endif +#endif + diff --git a/libgfortran/generated/matmulavx128_c17.c b/libgfortran/generated/matmulavx128_c17.c new file mode 100644 index 0000000..79cfe5b --- /dev/null +++ b/libgfortran/generated/matmulavx128_c17.c @@ -0,0 +1,1186 @@ +/* Implementation of the MATMUL intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>. + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <string.h> +#include <assert.h> + + +/* These are the specific versions of matmul with -mprefer-avx128. */ + +#if defined (HAVE_GFC_COMPLEX_17) + +/* Prototype for the BLAS ?gemm subroutine, a pointer to which can be + passed to us by the front-end, in which case we call it for large + matrices. */ + +typedef void (*blas_call)(const char *, const char *, const int *, const int *, + const int *, const GFC_COMPLEX_17 *, const GFC_COMPLEX_17 *, + const int *, const GFC_COMPLEX_17 *, const int *, + const GFC_COMPLEX_17 *, GFC_COMPLEX_17 *, const int *, + int, int); + +#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128) +void +matmul_c17_avx128_fma3 (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma"))); +internal_proto(matmul_c17_avx128_fma3); +void +matmul_c17_avx128_fma3 (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) +{ + const GFC_COMPLEX_17 * restrict abase; + const GFC_COMPLEX_17 * restrict bbase; + GFC_COMPLEX_17 * restrict dest; + + index_type rxstride, rystride, axstride, aystride, bxstride, bystride; + index_type x, y, n, count, xcount, ycount; + + assert (GFC_DESCRIPTOR_RANK (a) == 2 + || GFC_DESCRIPTOR_RANK (b) == 2); + +/* C[xcount,ycount] = A[xcount, count] * B[count,ycount] + + Either A or B (but not both) can be rank 1: + + o One-dimensional argument A is implicitly treated as a row matrix + dimensioned [1,count], so xcount=1. + + o One-dimensional argument B is implicitly treated as a column matrix + dimensioned [count, 1], so ycount=1. +*/ + + if (retarray->base_addr == NULL) + { + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + } + else + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + + GFC_DIMENSION_SET(retarray->dim[1], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, + GFC_DESCRIPTOR_EXTENT(retarray,0)); + } + + retarray->base_addr + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_COMPLEX_17)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) + { + index_type ret_extent, arg_extent; + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 2 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + } + + + if (GFC_DESCRIPTOR_RANK (retarray) == 1) + { + /* One-dimensional result may be addressed in the code below + either as a row or a column matrix. We want both cases to + work. */ + rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0); + } + else + { + rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + rystride = GFC_DESCRIPTOR_STRIDE(retarray,1); + } + + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + /* Treat it as a a row matrix A[1,count]. */ + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = 1; + + xcount = 1; + count = GFC_DESCRIPTOR_EXTENT(a,0); + } + else + { + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = GFC_DESCRIPTOR_STRIDE(a,1); + + count = GFC_DESCRIPTOR_EXTENT(a,1); + xcount = GFC_DESCRIPTOR_EXTENT(a,0); + } + + if (count != GFC_DESCRIPTOR_EXTENT(b,0)) + { + if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0) + runtime_error ("Incorrect extent in argument B in MATMUL intrinsic " + "in dimension 1: is %ld, should be %ld", + (long int) GFC_DESCRIPTOR_EXTENT(b,0), (long int) count); + } + + if (GFC_DESCRIPTOR_RANK (b) == 1) + { + /* Treat it as a column matrix B[count,1] */ + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + + /* bystride should never be used for 1-dimensional b. + The value is only used for calculation of the + memory by the buffer. */ + bystride = 256; + ycount = 1; + } + else + { + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + bystride = GFC_DESCRIPTOR_STRIDE(b,1); + ycount = GFC_DESCRIPTOR_EXTENT(b,1); + } + + abase = a->base_addr; + bbase = b->base_addr; + dest = retarray->base_addr; + + /* Now that everything is set up, we perform the multiplication + itself. */ + +#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x))) +#define min(a,b) ((a) <= (b) ? (a) : (b)) +#define max(a,b) ((a) >= (b) ? (a) : (b)) + + if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1) + && (bxstride == 1 || bystride == 1) + && (((float) xcount) * ((float) ycount) * ((float) count) + > POW3(blas_limit))) + { + const int m = xcount, n = ycount, k = count, ldc = rystride; + const GFC_COMPLEX_17 one = 1, zero = 0; + const int lda = (axstride == 1) ? aystride : axstride, + ldb = (bxstride == 1) ? bystride : bxstride; + + if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1) + { + assert (gemm != NULL); + const char *transa, *transb; + if (try_blas & 2) + transa = "C"; + else + transa = axstride == 1 ? "N" : "T"; + + if (try_blas & 4) + transb = "C"; + else + transb = bxstride == 1 ? "N" : "T"; + + gemm (transa, transb , &m, + &n, &k, &one, abase, &lda, bbase, &ldb, &zero, dest, + &ldc, 1, 1); + return; + } + } + + if (rxstride == 1 && axstride == 1 && bxstride == 1 + && GFC_DESCRIPTOR_RANK (b) != 1) + { + /* This block of code implements a tuned matmul, derived from + Superscalar GEMM-based level 3 BLAS, Beta version 0.1 + + Bo Kagstrom and Per Ling + Department of Computing Science + Umea University + S-901 87 Umea, Sweden + + from netlib.org, translated to C, and modified for matmul.m4. */ + + const GFC_COMPLEX_17 *a, *b; + GFC_COMPLEX_17 *c; + const index_type m = xcount, n = ycount, k = count; + + /* System generated locals */ + index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, + i1, i2, i3, i4, i5, i6; + + /* Local variables */ + GFC_COMPLEX_17 f11, f12, f21, f22, f31, f32, f41, f42, + f13, f14, f23, f24, f33, f34, f43, f44; + index_type i, j, l, ii, jj, ll; + index_type isec, jsec, lsec, uisec, ujsec, ulsec; + GFC_COMPLEX_17 *t1; + + a = abase; + b = bbase; + c = retarray->base_addr; + + /* Parameter adjustments */ + c_dim1 = rystride; + c_offset = 1 + c_dim1; + c -= c_offset; + a_dim1 = aystride; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = bystride; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_17)0; + + /* Early exit if possible */ + if (m == 0 || n == 0 || k == 0) + return; + + /* Adjust size of t1 to what is needed. */ + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + + t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_17)); + + /* Start turning the crank. */ + i1 = n; + for (jj = 1; jj <= i1; jj += 512) + { + /* Computing MIN */ + i2 = 512; + i3 = n - jj + 1; + jsec = min(i2,i3); + ujsec = jsec - jsec % 4; + i2 = k; + for (ll = 1; ll <= i2; ll += 256) + { + /* Computing MIN */ + i3 = 256; + i4 = k - ll + 1; + lsec = min(i3,i4); + ulsec = lsec - lsec % 2; + + i3 = m; + for (ii = 1; ii <= i3; ii += 256) + { + /* Computing MIN */ + i4 = 256; + i5 = m - ii + 1; + isec = min(i4,i5); + uisec = isec - isec % 2; + i4 = ll + ulsec - 1; + for (l = ll; l <= i4; l += 2) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 2) + { + t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] = + a[i + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] = + a[i + (l + 1) * a_dim1]; + t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + (l + 1) * a_dim1]; + } + if (uisec < isec) + { + t1[l - ll + 1 + (isec << 8) - 257] = + a[ii + isec - 1 + l * a_dim1]; + t1[l - ll + 2 + (isec << 8) - 257] = + a[ii + isec - 1 + (l + 1) * a_dim1]; + } + } + if (ulsec < lsec) + { + i4 = ii + isec - 1; + for (i = ii; i<= i4; ++i) + { + t1[lsec + ((i - ii + 1) << 8) - 257] = + a[i + (ll + lsec - 1) * a_dim1]; + } + } + + uisec = isec - isec % 4; + i4 = jj + ujsec - 1; + for (j = jj; j <= i4; j += 4) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f22 = c[i + 1 + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f23 = c[i + 1 + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + f24 = c[i + 1 + (j + 3) * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + f32 = c[i + 2 + (j + 1) * c_dim1]; + f42 = c[i + 3 + (j + 1) * c_dim1]; + f33 = c[i + 2 + (j + 2) * c_dim1]; + f43 = c[i + 3 + (j + 2) * c_dim1]; + f34 = c[i + 2 + (j + 3) * c_dim1]; + f44 = c[i + 3 + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + j * b_dim1]; + f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + (j + 1) * c_dim1] = f12; + c[i + 1 + (j + 1) * c_dim1] = f22; + c[i + (j + 2) * c_dim1] = f13; + c[i + 1 + (j + 2) * c_dim1] = f23; + c[i + (j + 3) * c_dim1] = f14; + c[i + 1 + (j + 3) * c_dim1] = f24; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + c[i + 2 + (j + 1) * c_dim1] = f32; + c[i + 3 + (j + 1) * c_dim1] = f42; + c[i + 2 + (j + 2) * c_dim1] = f33; + c[i + 3 + (j + 2) * c_dim1] = f43; + c[i + 2 + (j + 3) * c_dim1] = f34; + c[i + 3 + (j + 3) * c_dim1] = f44; + } + if (uisec < isec) + { + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + (j + 1) * c_dim1] = f12; + c[i + (j + 2) * c_dim1] = f13; + c[i + (j + 3) * c_dim1] = f14; + } + } + } + if (ujsec < jsec) + { + i4 = jj + jsec - 1; + for (j = jj + ujsec; j <= i4; ++j) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - + 257] * b[l + j * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - + 257] * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + } + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + } + } + } + } + } + } + free(t1); + return; + } + else if (rxstride == 1 && aystride == 1 && bxstride == 1) + { + if (GFC_DESCRIPTOR_RANK (a) != 1) + { + const GFC_COMPLEX_17 *restrict abase_x; + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 *restrict dest_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n] * bbase_y[n]; + dest_y[x] = s; + } + } + } + else + { + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n]; + dest[y*rystride] = s; + } + } + } + else if (GFC_DESCRIPTOR_RANK (a) == 1) + { + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n*bxstride]; + dest[y*rxstride] = s; + } + } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_COMPLEX_17)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } + else + { + const GFC_COMPLEX_17 *restrict abase_x; + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 *restrict dest_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n*aystride] * bbase_y[n*bxstride]; + dest_y[x*rxstride] = s; + } + } + } +} +#undef POW3 +#undef min +#undef max + +#endif + +#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128) +void +matmul_c17_avx128_fma4 (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4"))); +internal_proto(matmul_c17_avx128_fma4); +void +matmul_c17_avx128_fma4 (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) +{ + const GFC_COMPLEX_17 * restrict abase; + const GFC_COMPLEX_17 * restrict bbase; + GFC_COMPLEX_17 * restrict dest; + + index_type rxstride, rystride, axstride, aystride, bxstride, bystride; + index_type x, y, n, count, xcount, ycount; + + assert (GFC_DESCRIPTOR_RANK (a) == 2 + || GFC_DESCRIPTOR_RANK (b) == 2); + +/* C[xcount,ycount] = A[xcount, count] * B[count,ycount] + + Either A or B (but not both) can be rank 1: + + o One-dimensional argument A is implicitly treated as a row matrix + dimensioned [1,count], so xcount=1. + + o One-dimensional argument B is implicitly treated as a column matrix + dimensioned [count, 1], so ycount=1. +*/ + + if (retarray->base_addr == NULL) + { + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + } + else + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + + GFC_DIMENSION_SET(retarray->dim[1], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, + GFC_DESCRIPTOR_EXTENT(retarray,0)); + } + + retarray->base_addr + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_COMPLEX_17)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) + { + index_type ret_extent, arg_extent; + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 2 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + } + + + if (GFC_DESCRIPTOR_RANK (retarray) == 1) + { + /* One-dimensional result may be addressed in the code below + either as a row or a column matrix. We want both cases to + work. */ + rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0); + } + else + { + rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + rystride = GFC_DESCRIPTOR_STRIDE(retarray,1); + } + + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + /* Treat it as a a row matrix A[1,count]. */ + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = 1; + + xcount = 1; + count = GFC_DESCRIPTOR_EXTENT(a,0); + } + else + { + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = GFC_DESCRIPTOR_STRIDE(a,1); + + count = GFC_DESCRIPTOR_EXTENT(a,1); + xcount = GFC_DESCRIPTOR_EXTENT(a,0); + } + + if (count != GFC_DESCRIPTOR_EXTENT(b,0)) + { + if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0) + runtime_error ("Incorrect extent in argument B in MATMUL intrinsic " + "in dimension 1: is %ld, should be %ld", + (long int) GFC_DESCRIPTOR_EXTENT(b,0), (long int) count); + } + + if (GFC_DESCRIPTOR_RANK (b) == 1) + { + /* Treat it as a column matrix B[count,1] */ + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + + /* bystride should never be used for 1-dimensional b. + The value is only used for calculation of the + memory by the buffer. */ + bystride = 256; + ycount = 1; + } + else + { + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + bystride = GFC_DESCRIPTOR_STRIDE(b,1); + ycount = GFC_DESCRIPTOR_EXTENT(b,1); + } + + abase = a->base_addr; + bbase = b->base_addr; + dest = retarray->base_addr; + + /* Now that everything is set up, we perform the multiplication + itself. */ + +#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x))) +#define min(a,b) ((a) <= (b) ? (a) : (b)) +#define max(a,b) ((a) >= (b) ? (a) : (b)) + + if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1) + && (bxstride == 1 || bystride == 1) + && (((float) xcount) * ((float) ycount) * ((float) count) + > POW3(blas_limit))) + { + const int m = xcount, n = ycount, k = count, ldc = rystride; + const GFC_COMPLEX_17 one = 1, zero = 0; + const int lda = (axstride == 1) ? aystride : axstride, + ldb = (bxstride == 1) ? bystride : bxstride; + + if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1) + { + assert (gemm != NULL); + const char *transa, *transb; + if (try_blas & 2) + transa = "C"; + else + transa = axstride == 1 ? "N" : "T"; + + if (try_blas & 4) + transb = "C"; + else + transb = bxstride == 1 ? "N" : "T"; + + gemm (transa, transb , &m, + &n, &k, &one, abase, &lda, bbase, &ldb, &zero, dest, + &ldc, 1, 1); + return; + } + } + + if (rxstride == 1 && axstride == 1 && bxstride == 1 + && GFC_DESCRIPTOR_RANK (b) != 1) + { + /* This block of code implements a tuned matmul, derived from + Superscalar GEMM-based level 3 BLAS, Beta version 0.1 + + Bo Kagstrom and Per Ling + Department of Computing Science + Umea University + S-901 87 Umea, Sweden + + from netlib.org, translated to C, and modified for matmul.m4. */ + + const GFC_COMPLEX_17 *a, *b; + GFC_COMPLEX_17 *c; + const index_type m = xcount, n = ycount, k = count; + + /* System generated locals */ + index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, + i1, i2, i3, i4, i5, i6; + + /* Local variables */ + GFC_COMPLEX_17 f11, f12, f21, f22, f31, f32, f41, f42, + f13, f14, f23, f24, f33, f34, f43, f44; + index_type i, j, l, ii, jj, ll; + index_type isec, jsec, lsec, uisec, ujsec, ulsec; + GFC_COMPLEX_17 *t1; + + a = abase; + b = bbase; + c = retarray->base_addr; + + /* Parameter adjustments */ + c_dim1 = rystride; + c_offset = 1 + c_dim1; + c -= c_offset; + a_dim1 = aystride; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = bystride; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_17)0; + + /* Early exit if possible */ + if (m == 0 || n == 0 || k == 0) + return; + + /* Adjust size of t1 to what is needed. */ + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + + t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_17)); + + /* Start turning the crank. */ + i1 = n; + for (jj = 1; jj <= i1; jj += 512) + { + /* Computing MIN */ + i2 = 512; + i3 = n - jj + 1; + jsec = min(i2,i3); + ujsec = jsec - jsec % 4; + i2 = k; + for (ll = 1; ll <= i2; ll += 256) + { + /* Computing MIN */ + i3 = 256; + i4 = k - ll + 1; + lsec = min(i3,i4); + ulsec = lsec - lsec % 2; + + i3 = m; + for (ii = 1; ii <= i3; ii += 256) + { + /* Computing MIN */ + i4 = 256; + i5 = m - ii + 1; + isec = min(i4,i5); + uisec = isec - isec % 2; + i4 = ll + ulsec - 1; + for (l = ll; l <= i4; l += 2) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 2) + { + t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] = + a[i + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] = + a[i + (l + 1) * a_dim1]; + t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + (l + 1) * a_dim1]; + } + if (uisec < isec) + { + t1[l - ll + 1 + (isec << 8) - 257] = + a[ii + isec - 1 + l * a_dim1]; + t1[l - ll + 2 + (isec << 8) - 257] = + a[ii + isec - 1 + (l + 1) * a_dim1]; + } + } + if (ulsec < lsec) + { + i4 = ii + isec - 1; + for (i = ii; i<= i4; ++i) + { + t1[lsec + ((i - ii + 1) << 8) - 257] = + a[i + (ll + lsec - 1) * a_dim1]; + } + } + + uisec = isec - isec % 4; + i4 = jj + ujsec - 1; + for (j = jj; j <= i4; j += 4) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f22 = c[i + 1 + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f23 = c[i + 1 + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + f24 = c[i + 1 + (j + 3) * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + f32 = c[i + 2 + (j + 1) * c_dim1]; + f42 = c[i + 3 + (j + 1) * c_dim1]; + f33 = c[i + 2 + (j + 2) * c_dim1]; + f43 = c[i + 3 + (j + 2) * c_dim1]; + f34 = c[i + 2 + (j + 3) * c_dim1]; + f44 = c[i + 3 + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + j * b_dim1]; + f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + (j + 1) * c_dim1] = f12; + c[i + 1 + (j + 1) * c_dim1] = f22; + c[i + (j + 2) * c_dim1] = f13; + c[i + 1 + (j + 2) * c_dim1] = f23; + c[i + (j + 3) * c_dim1] = f14; + c[i + 1 + (j + 3) * c_dim1] = f24; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + c[i + 2 + (j + 1) * c_dim1] = f32; + c[i + 3 + (j + 1) * c_dim1] = f42; + c[i + 2 + (j + 2) * c_dim1] = f33; + c[i + 3 + (j + 2) * c_dim1] = f43; + c[i + 2 + (j + 3) * c_dim1] = f34; + c[i + 3 + (j + 3) * c_dim1] = f44; + } + if (uisec < isec) + { + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + (j + 1) * c_dim1] = f12; + c[i + (j + 2) * c_dim1] = f13; + c[i + (j + 3) * c_dim1] = f14; + } + } + } + if (ujsec < jsec) + { + i4 = jj + jsec - 1; + for (j = jj + ujsec; j <= i4; ++j) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - + 257] * b[l + j * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - + 257] * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + } + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + } + } + } + } + } + } + free(t1); + return; + } + else if (rxstride == 1 && aystride == 1 && bxstride == 1) + { + if (GFC_DESCRIPTOR_RANK (a) != 1) + { + const GFC_COMPLEX_17 *restrict abase_x; + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 *restrict dest_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n] * bbase_y[n]; + dest_y[x] = s; + } + } + } + else + { + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n]; + dest[y*rystride] = s; + } + } + } + else if (GFC_DESCRIPTOR_RANK (a) == 1) + { + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n*bxstride]; + dest[y*rxstride] = s; + } + } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_COMPLEX_17)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } + else + { + const GFC_COMPLEX_17 *restrict abase_x; + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 *restrict dest_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n*aystride] * bbase_y[n*bxstride]; + dest_y[x*rxstride] = s; + } + } + } +} +#undef POW3 +#undef min +#undef max + +#endif + +#endif + diff --git a/libgfortran/generated/matmulavx128_r17.c b/libgfortran/generated/matmulavx128_r17.c new file mode 100644 index 0000000..a3a0480 --- /dev/null +++ b/libgfortran/generated/matmulavx128_r17.c @@ -0,0 +1,1186 @@ +/* Implementation of the MATMUL intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>. + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <string.h> +#include <assert.h> + + +/* These are the specific versions of matmul with -mprefer-avx128. */ + +#if defined (HAVE_GFC_REAL_17) + +/* Prototype for the BLAS ?gemm subroutine, a pointer to which can be + passed to us by the front-end, in which case we call it for large + matrices. */ + +typedef void (*blas_call)(const char *, const char *, const int *, const int *, + const int *, const GFC_REAL_17 *, const GFC_REAL_17 *, + const int *, const GFC_REAL_17 *, const int *, + const GFC_REAL_17 *, GFC_REAL_17 *, const int *, + int, int); + +#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128) +void +matmul_r17_avx128_fma3 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma"))); +internal_proto(matmul_r17_avx128_fma3); +void +matmul_r17_avx128_fma3 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) +{ + const GFC_REAL_17 * restrict abase; + const GFC_REAL_17 * restrict bbase; + GFC_REAL_17 * restrict dest; + + index_type rxstride, rystride, axstride, aystride, bxstride, bystride; + index_type x, y, n, count, xcount, ycount; + + assert (GFC_DESCRIPTOR_RANK (a) == 2 + || GFC_DESCRIPTOR_RANK (b) == 2); + +/* C[xcount,ycount] = A[xcount, count] * B[count,ycount] + + Either A or B (but not both) can be rank 1: + + o One-dimensional argument A is implicitly treated as a row matrix + dimensioned [1,count], so xcount=1. + + o One-dimensional argument B is implicitly treated as a column matrix + dimensioned [count, 1], so ycount=1. +*/ + + if (retarray->base_addr == NULL) + { + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + } + else + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + + GFC_DIMENSION_SET(retarray->dim[1], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, + GFC_DESCRIPTOR_EXTENT(retarray,0)); + } + + retarray->base_addr + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_REAL_17)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) + { + index_type ret_extent, arg_extent; + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 2 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + } + + + if (GFC_DESCRIPTOR_RANK (retarray) == 1) + { + /* One-dimensional result may be addressed in the code below + either as a row or a column matrix. We want both cases to + work. */ + rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0); + } + else + { + rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + rystride = GFC_DESCRIPTOR_STRIDE(retarray,1); + } + + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + /* Treat it as a a row matrix A[1,count]. */ + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = 1; + + xcount = 1; + count = GFC_DESCRIPTOR_EXTENT(a,0); + } + else + { + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = GFC_DESCRIPTOR_STRIDE(a,1); + + count = GFC_DESCRIPTOR_EXTENT(a,1); + xcount = GFC_DESCRIPTOR_EXTENT(a,0); + } + + if (count != GFC_DESCRIPTOR_EXTENT(b,0)) + { + if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0) + runtime_error ("Incorrect extent in argument B in MATMUL intrinsic " + "in dimension 1: is %ld, should be %ld", + (long int) GFC_DESCRIPTOR_EXTENT(b,0), (long int) count); + } + + if (GFC_DESCRIPTOR_RANK (b) == 1) + { + /* Treat it as a column matrix B[count,1] */ + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + + /* bystride should never be used for 1-dimensional b. + The value is only used for calculation of the + memory by the buffer. */ + bystride = 256; + ycount = 1; + } + else + { + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + bystride = GFC_DESCRIPTOR_STRIDE(b,1); + ycount = GFC_DESCRIPTOR_EXTENT(b,1); + } + + abase = a->base_addr; + bbase = b->base_addr; + dest = retarray->base_addr; + + /* Now that everything is set up, we perform the multiplication + itself. */ + +#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x))) +#define min(a,b) ((a) <= (b) ? (a) : (b)) +#define max(a,b) ((a) >= (b) ? (a) : (b)) + + if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1) + && (bxstride == 1 || bystride == 1) + && (((float) xcount) * ((float) ycount) * ((float) count) + > POW3(blas_limit))) + { + const int m = xcount, n = ycount, k = count, ldc = rystride; + const GFC_REAL_17 one = 1, zero = 0; + const int lda = (axstride == 1) ? aystride : axstride, + ldb = (bxstride == 1) ? bystride : bxstride; + + if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1) + { + assert (gemm != NULL); + const char *transa, *transb; + if (try_blas & 2) + transa = "C"; + else + transa = axstride == 1 ? "N" : "T"; + + if (try_blas & 4) + transb = "C"; + else + transb = bxstride == 1 ? "N" : "T"; + + gemm (transa, transb , &m, + &n, &k, &one, abase, &lda, bbase, &ldb, &zero, dest, + &ldc, 1, 1); + return; + } + } + + if (rxstride == 1 && axstride == 1 && bxstride == 1 + && GFC_DESCRIPTOR_RANK (b) != 1) + { + /* This block of code implements a tuned matmul, derived from + Superscalar GEMM-based level 3 BLAS, Beta version 0.1 + + Bo Kagstrom and Per Ling + Department of Computing Science + Umea University + S-901 87 Umea, Sweden + + from netlib.org, translated to C, and modified for matmul.m4. */ + + const GFC_REAL_17 *a, *b; + GFC_REAL_17 *c; + const index_type m = xcount, n = ycount, k = count; + + /* System generated locals */ + index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, + i1, i2, i3, i4, i5, i6; + + /* Local variables */ + GFC_REAL_17 f11, f12, f21, f22, f31, f32, f41, f42, + f13, f14, f23, f24, f33, f34, f43, f44; + index_type i, j, l, ii, jj, ll; + index_type isec, jsec, lsec, uisec, ujsec, ulsec; + GFC_REAL_17 *t1; + + a = abase; + b = bbase; + c = retarray->base_addr; + + /* Parameter adjustments */ + c_dim1 = rystride; + c_offset = 1 + c_dim1; + c -= c_offset; + a_dim1 = aystride; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = bystride; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_17)0; + + /* Early exit if possible */ + if (m == 0 || n == 0 || k == 0) + return; + + /* Adjust size of t1 to what is needed. */ + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + + t1 = malloc (t1_dim * sizeof(GFC_REAL_17)); + + /* Start turning the crank. */ + i1 = n; + for (jj = 1; jj <= i1; jj += 512) + { + /* Computing MIN */ + i2 = 512; + i3 = n - jj + 1; + jsec = min(i2,i3); + ujsec = jsec - jsec % 4; + i2 = k; + for (ll = 1; ll <= i2; ll += 256) + { + /* Computing MIN */ + i3 = 256; + i4 = k - ll + 1; + lsec = min(i3,i4); + ulsec = lsec - lsec % 2; + + i3 = m; + for (ii = 1; ii <= i3; ii += 256) + { + /* Computing MIN */ + i4 = 256; + i5 = m - ii + 1; + isec = min(i4,i5); + uisec = isec - isec % 2; + i4 = ll + ulsec - 1; + for (l = ll; l <= i4; l += 2) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 2) + { + t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] = + a[i + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] = + a[i + (l + 1) * a_dim1]; + t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + (l + 1) * a_dim1]; + } + if (uisec < isec) + { + t1[l - ll + 1 + (isec << 8) - 257] = + a[ii + isec - 1 + l * a_dim1]; + t1[l - ll + 2 + (isec << 8) - 257] = + a[ii + isec - 1 + (l + 1) * a_dim1]; + } + } + if (ulsec < lsec) + { + i4 = ii + isec - 1; + for (i = ii; i<= i4; ++i) + { + t1[lsec + ((i - ii + 1) << 8) - 257] = + a[i + (ll + lsec - 1) * a_dim1]; + } + } + + uisec = isec - isec % 4; + i4 = jj + ujsec - 1; + for (j = jj; j <= i4; j += 4) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f22 = c[i + 1 + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f23 = c[i + 1 + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + f24 = c[i + 1 + (j + 3) * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + f32 = c[i + 2 + (j + 1) * c_dim1]; + f42 = c[i + 3 + (j + 1) * c_dim1]; + f33 = c[i + 2 + (j + 2) * c_dim1]; + f43 = c[i + 3 + (j + 2) * c_dim1]; + f34 = c[i + 2 + (j + 3) * c_dim1]; + f44 = c[i + 3 + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + j * b_dim1]; + f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + (j + 1) * c_dim1] = f12; + c[i + 1 + (j + 1) * c_dim1] = f22; + c[i + (j + 2) * c_dim1] = f13; + c[i + 1 + (j + 2) * c_dim1] = f23; + c[i + (j + 3) * c_dim1] = f14; + c[i + 1 + (j + 3) * c_dim1] = f24; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + c[i + 2 + (j + 1) * c_dim1] = f32; + c[i + 3 + (j + 1) * c_dim1] = f42; + c[i + 2 + (j + 2) * c_dim1] = f33; + c[i + 3 + (j + 2) * c_dim1] = f43; + c[i + 2 + (j + 3) * c_dim1] = f34; + c[i + 3 + (j + 3) * c_dim1] = f44; + } + if (uisec < isec) + { + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + (j + 1) * c_dim1] = f12; + c[i + (j + 2) * c_dim1] = f13; + c[i + (j + 3) * c_dim1] = f14; + } + } + } + if (ujsec < jsec) + { + i4 = jj + jsec - 1; + for (j = jj + ujsec; j <= i4; ++j) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - + 257] * b[l + j * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - + 257] * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + } + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + } + } + } + } + } + } + free(t1); + return; + } + else if (rxstride == 1 && aystride == 1 && bxstride == 1) + { + if (GFC_DESCRIPTOR_RANK (a) != 1) + { + const GFC_REAL_17 *restrict abase_x; + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 *restrict dest_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n] * bbase_y[n]; + dest_y[x] = s; + } + } + } + else + { + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n]; + dest[y*rystride] = s; + } + } + } + else if (GFC_DESCRIPTOR_RANK (a) == 1) + { + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n*bxstride]; + dest[y*rxstride] = s; + } + } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_17)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } + else + { + const GFC_REAL_17 *restrict abase_x; + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 *restrict dest_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n*aystride] * bbase_y[n*bxstride]; + dest_y[x*rxstride] = s; + } + } + } +} +#undef POW3 +#undef min +#undef max + +#endif + +#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128) +void +matmul_r17_avx128_fma4 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4"))); +internal_proto(matmul_r17_avx128_fma4); +void +matmul_r17_avx128_fma4 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) +{ + const GFC_REAL_17 * restrict abase; + const GFC_REAL_17 * restrict bbase; + GFC_REAL_17 * restrict dest; + + index_type rxstride, rystride, axstride, aystride, bxstride, bystride; + index_type x, y, n, count, xcount, ycount; + + assert (GFC_DESCRIPTOR_RANK (a) == 2 + || GFC_DESCRIPTOR_RANK (b) == 2); + +/* C[xcount,ycount] = A[xcount, count] * B[count,ycount] + + Either A or B (but not both) can be rank 1: + + o One-dimensional argument A is implicitly treated as a row matrix + dimensioned [1,count], so xcount=1. + + o One-dimensional argument B is implicitly treated as a column matrix + dimensioned [count, 1], so ycount=1. +*/ + + if (retarray->base_addr == NULL) + { + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + } + else + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + + GFC_DIMENSION_SET(retarray->dim[1], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, + GFC_DESCRIPTOR_EXTENT(retarray,0)); + } + + retarray->base_addr + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_REAL_17)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) + { + index_type ret_extent, arg_extent; + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 2 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + } + + + if (GFC_DESCRIPTOR_RANK (retarray) == 1) + { + /* One-dimensional result may be addressed in the code below + either as a row or a column matrix. We want both cases to + work. */ + rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0); + } + else + { + rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + rystride = GFC_DESCRIPTOR_STRIDE(retarray,1); + } + + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + /* Treat it as a a row matrix A[1,count]. */ + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = 1; + + xcount = 1; + count = GFC_DESCRIPTOR_EXTENT(a,0); + } + else + { + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = GFC_DESCRIPTOR_STRIDE(a,1); + + count = GFC_DESCRIPTOR_EXTENT(a,1); + xcount = GFC_DESCRIPTOR_EXTENT(a,0); + } + + if (count != GFC_DESCRIPTOR_EXTENT(b,0)) + { + if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0) + runtime_error ("Incorrect extent in argument B in MATMUL intrinsic " + "in dimension 1: is %ld, should be %ld", + (long int) GFC_DESCRIPTOR_EXTENT(b,0), (long int) count); + } + + if (GFC_DESCRIPTOR_RANK (b) == 1) + { + /* Treat it as a column matrix B[count,1] */ + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + + /* bystride should never be used for 1-dimensional b. + The value is only used for calculation of the + memory by the buffer. */ + bystride = 256; + ycount = 1; + } + else + { + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + bystride = GFC_DESCRIPTOR_STRIDE(b,1); + ycount = GFC_DESCRIPTOR_EXTENT(b,1); + } + + abase = a->base_addr; + bbase = b->base_addr; + dest = retarray->base_addr; + + /* Now that everything is set up, we perform the multiplication + itself. */ + +#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x))) +#define min(a,b) ((a) <= (b) ? (a) : (b)) +#define max(a,b) ((a) >= (b) ? (a) : (b)) + + if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1) + && (bxstride == 1 || bystride == 1) + && (((float) xcount) * ((float) ycount) * ((float) count) + > POW3(blas_limit))) + { + const int m = xcount, n = ycount, k = count, ldc = rystride; + const GFC_REAL_17 one = 1, zero = 0; + const int lda = (axstride == 1) ? aystride : axstride, + ldb = (bxstride == 1) ? bystride : bxstride; + + if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1) + { + assert (gemm != NULL); + const char *transa, *transb; + if (try_blas & 2) + transa = "C"; + else + transa = axstride == 1 ? "N" : "T"; + + if (try_blas & 4) + transb = "C"; + else + transb = bxstride == 1 ? "N" : "T"; + + gemm (transa, transb , &m, + &n, &k, &one, abase, &lda, bbase, &ldb, &zero, dest, + &ldc, 1, 1); + return; + } + } + + if (rxstride == 1 && axstride == 1 && bxstride == 1 + && GFC_DESCRIPTOR_RANK (b) != 1) + { + /* This block of code implements a tuned matmul, derived from + Superscalar GEMM-based level 3 BLAS, Beta version 0.1 + + Bo Kagstrom and Per Ling + Department of Computing Science + Umea University + S-901 87 Umea, Sweden + + from netlib.org, translated to C, and modified for matmul.m4. */ + + const GFC_REAL_17 *a, *b; + GFC_REAL_17 *c; + const index_type m = xcount, n = ycount, k = count; + + /* System generated locals */ + index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, + i1, i2, i3, i4, i5, i6; + + /* Local variables */ + GFC_REAL_17 f11, f12, f21, f22, f31, f32, f41, f42, + f13, f14, f23, f24, f33, f34, f43, f44; + index_type i, j, l, ii, jj, ll; + index_type isec, jsec, lsec, uisec, ujsec, ulsec; + GFC_REAL_17 *t1; + + a = abase; + b = bbase; + c = retarray->base_addr; + + /* Parameter adjustments */ + c_dim1 = rystride; + c_offset = 1 + c_dim1; + c -= c_offset; + a_dim1 = aystride; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = bystride; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_17)0; + + /* Early exit if possible */ + if (m == 0 || n == 0 || k == 0) + return; + + /* Adjust size of t1 to what is needed. */ + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + + t1 = malloc (t1_dim * sizeof(GFC_REAL_17)); + + /* Start turning the crank. */ + i1 = n; + for (jj = 1; jj <= i1; jj += 512) + { + /* Computing MIN */ + i2 = 512; + i3 = n - jj + 1; + jsec = min(i2,i3); + ujsec = jsec - jsec % 4; + i2 = k; + for (ll = 1; ll <= i2; ll += 256) + { + /* Computing MIN */ + i3 = 256; + i4 = k - ll + 1; + lsec = min(i3,i4); + ulsec = lsec - lsec % 2; + + i3 = m; + for (ii = 1; ii <= i3; ii += 256) + { + /* Computing MIN */ + i4 = 256; + i5 = m - ii + 1; + isec = min(i4,i5); + uisec = isec - isec % 2; + i4 = ll + ulsec - 1; + for (l = ll; l <= i4; l += 2) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 2) + { + t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] = + a[i + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] = + a[i + (l + 1) * a_dim1]; + t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + (l + 1) * a_dim1]; + } + if (uisec < isec) + { + t1[l - ll + 1 + (isec << 8) - 257] = + a[ii + isec - 1 + l * a_dim1]; + t1[l - ll + 2 + (isec << 8) - 257] = + a[ii + isec - 1 + (l + 1) * a_dim1]; + } + } + if (ulsec < lsec) + { + i4 = ii + isec - 1; + for (i = ii; i<= i4; ++i) + { + t1[lsec + ((i - ii + 1) << 8) - 257] = + a[i + (ll + lsec - 1) * a_dim1]; + } + } + + uisec = isec - isec % 4; + i4 = jj + ujsec - 1; + for (j = jj; j <= i4; j += 4) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f22 = c[i + 1 + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f23 = c[i + 1 + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + f24 = c[i + 1 + (j + 3) * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + f32 = c[i + 2 + (j + 1) * c_dim1]; + f42 = c[i + 3 + (j + 1) * c_dim1]; + f33 = c[i + 2 + (j + 2) * c_dim1]; + f43 = c[i + 3 + (j + 2) * c_dim1]; + f34 = c[i + 2 + (j + 3) * c_dim1]; + f44 = c[i + 3 + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + j * b_dim1]; + f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + (j + 1) * c_dim1] = f12; + c[i + 1 + (j + 1) * c_dim1] = f22; + c[i + (j + 2) * c_dim1] = f13; + c[i + 1 + (j + 2) * c_dim1] = f23; + c[i + (j + 3) * c_dim1] = f14; + c[i + 1 + (j + 3) * c_dim1] = f24; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + c[i + 2 + (j + 1) * c_dim1] = f32; + c[i + 3 + (j + 1) * c_dim1] = f42; + c[i + 2 + (j + 2) * c_dim1] = f33; + c[i + 3 + (j + 2) * c_dim1] = f43; + c[i + 2 + (j + 3) * c_dim1] = f34; + c[i + 3 + (j + 3) * c_dim1] = f44; + } + if (uisec < isec) + { + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + (j + 1) * c_dim1] = f12; + c[i + (j + 2) * c_dim1] = f13; + c[i + (j + 3) * c_dim1] = f14; + } + } + } + if (ujsec < jsec) + { + i4 = jj + jsec - 1; + for (j = jj + ujsec; j <= i4; ++j) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - + 257] * b[l + j * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - + 257] * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + } + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + } + } + } + } + } + } + free(t1); + return; + } + else if (rxstride == 1 && aystride == 1 && bxstride == 1) + { + if (GFC_DESCRIPTOR_RANK (a) != 1) + { + const GFC_REAL_17 *restrict abase_x; + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 *restrict dest_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n] * bbase_y[n]; + dest_y[x] = s; + } + } + } + else + { + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n]; + dest[y*rystride] = s; + } + } + } + else if (GFC_DESCRIPTOR_RANK (a) == 1) + { + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n*bxstride]; + dest[y*rxstride] = s; + } + } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_17)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } + else + { + const GFC_REAL_17 *restrict abase_x; + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 *restrict dest_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n*aystride] * bbase_y[n*bxstride]; + dest_y[x*rxstride] = s; + } + } + } +} +#undef POW3 +#undef min +#undef max + +#endif + +#endif + diff --git a/libgfortran/generated/maxloc0_16_r17.c b/libgfortran/generated/maxloc0_16_r17.c new file mode 100644 index 0000000..0bd627c --- /dev/null +++ b/libgfortran/generated/maxloc0_16_r17.c @@ -0,0 +1,408 @@ +/* Implementation of the MAXLOC intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <assert.h> + + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_INTEGER_16) + + +extern void maxloc0_16_r17 (gfc_array_i16 * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_LOGICAL_4); +export_proto(maxloc0_16_r17); + +void +maxloc0_16_r17 (gfc_array_i16 * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride; + const GFC_REAL_17 *base; + GFC_INTEGER_16 * restrict dest; + index_type rank; + index_type n; + + rank = GFC_DESCRIPTOR_RANK (array); + if (rank <= 0) + runtime_error ("Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else + { + if (unlikely (compile_options.bounds_check)) + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "MAXLOC"); + } + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + for (n = 0; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + count[n] = 0; + if (extent[n] <= 0) + { + /* Set the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 0; + return; + } + } + + base = array->base_addr; + + /* Initialize the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 1; + { + + GFC_REAL_17 maxval; +#if defined(GFC_REAL_17_QUIET_NAN) + int fast = 0; +#endif + +#if defined(GFC_REAL_17_INFINITY) + maxval = -GFC_REAL_17_INFINITY; +#else + maxval = -GFC_REAL_17_HUGE; +#endif + while (base) + { + /* Implementation start. */ + +#if defined(GFC_REAL_17_QUIET_NAN) + if (unlikely (!fast)) + { + do + { + if (*base >= maxval) + { + fast = 1; + maxval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + break; + } + base += sstride[0]; + } + while (++count[0] != extent[0]); + if (likely (fast)) + continue; + } + else +#endif + if (back) + do + { + if (unlikely (*base >= maxval)) + { + maxval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + base += sstride[0]; + } + while (++count[0] != extent[0]); + else + do + { + if (unlikely (*base > maxval)) + { + maxval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + /* Implementation end. */ + /* Advance to the next element. */ + base += sstride[0]; + } + while (++count[0] != extent[0]); + n = 0; + do + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + } + } + while (count[n] == extent[n]); + } + } +} + +extern void mmaxloc0_16_r17 (gfc_array_i16 * const restrict, + gfc_array_r17 * const restrict, gfc_array_l1 * const restrict, + GFC_LOGICAL_4); +export_proto(mmaxloc0_16_r17); + +void +mmaxloc0_16_r17 (gfc_array_i16 * const restrict retarray, + gfc_array_r17 * const restrict array, + gfc_array_l1 * const restrict mask, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type mstride[GFC_MAX_DIMENSIONS]; + index_type dstride; + GFC_INTEGER_16 *dest; + const GFC_REAL_17 *base; + GFC_LOGICAL_1 *mbase; + int rank; + index_type n; + int mask_kind; + + + if (mask == NULL) + { + maxloc0_16_r17 (retarray, array, back); + return; + } + + rank = GFC_DESCRIPTOR_RANK (array); + if (rank <= 0) + runtime_error ("Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else + { + if (unlikely (compile_options.bounds_check)) + { + + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "MAXLOC"); + bounds_equal_extents ((array_t *) mask, (array_t *) array, + "MASK argument", "MAXLOC"); + } + } + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + mbase = mask->base_addr; + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + runtime_error ("Funny sized logical array"); + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + for (n = 0; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + count[n] = 0; + if (extent[n] <= 0) + { + /* Set the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 0; + return; + } + } + + base = array->base_addr; + + /* Initialize the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 0; + { + + GFC_REAL_17 maxval; + int fast = 0; + +#if defined(GFC_REAL_17_INFINITY) + maxval = -GFC_REAL_17_INFINITY; +#else + maxval = -GFC_REAL_17_HUGE; +#endif + while (base) + { + /* Implementation start. */ + + if (unlikely (!fast)) + { + do + { + if (*mbase) + { +#if defined(GFC_REAL_17_QUIET_NAN) + if (unlikely (dest[0] == 0)) + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + if (*base >= maxval) +#endif + { + fast = 1; + maxval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + break; + } + } + base += sstride[0]; + mbase += mstride[0]; + } + while (++count[0] != extent[0]); + if (likely (fast)) + continue; + } + else + if (back) + do + { + if (*mbase && *base >= maxval) + { + maxval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + base += sstride[0]; + } + while (++count[0] != extent[0]); + else + do + { + if (*mbase && unlikely (*base > maxval)) + { + maxval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + /* Implementation end. */ + /* Advance to the next element. */ + base += sstride[0]; + mbase += mstride[0]; + } + while (++count[0] != extent[0]); + n = 0; + do + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + mbase -= mstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + mbase += mstride[n]; + } + } + while (count[n] == extent[n]); + } + } +} + + +extern void smaxloc0_16_r17 (gfc_array_i16 * const restrict, + gfc_array_r17 * const restrict, GFC_LOGICAL_4 *, GFC_LOGICAL_4); +export_proto(smaxloc0_16_r17); + +void +smaxloc0_16_r17 (gfc_array_i16 * const restrict retarray, + gfc_array_r17 * const restrict array, + GFC_LOGICAL_4 * mask, GFC_LOGICAL_4 back) +{ + index_type rank; + index_type dstride; + index_type n; + GFC_INTEGER_16 *dest; + + if (mask == NULL || *mask) + { + maxloc0_16_r17 (retarray, array, back); + return; + } + + rank = GFC_DESCRIPTOR_RANK (array); + + if (rank <= 0) + runtime_error ("Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else if (unlikely (compile_options.bounds_check)) + { + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "MAXLOC"); + } + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + for (n = 0; n<rank; n++) + dest[n * dstride] = 0 ; +} +#endif diff --git a/libgfortran/generated/maxloc0_4_r17.c b/libgfortran/generated/maxloc0_4_r17.c new file mode 100644 index 0000000..cff16c7 --- /dev/null +++ b/libgfortran/generated/maxloc0_4_r17.c @@ -0,0 +1,408 @@ +/* Implementation of the MAXLOC intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <assert.h> + + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_INTEGER_4) + + +extern void maxloc0_4_r17 (gfc_array_i4 * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_LOGICAL_4); +export_proto(maxloc0_4_r17); + +void +maxloc0_4_r17 (gfc_array_i4 * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride; + const GFC_REAL_17 *base; + GFC_INTEGER_4 * restrict dest; + index_type rank; + index_type n; + + rank = GFC_DESCRIPTOR_RANK (array); + if (rank <= 0) + runtime_error ("Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else + { + if (unlikely (compile_options.bounds_check)) + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "MAXLOC"); + } + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + for (n = 0; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + count[n] = 0; + if (extent[n] <= 0) + { + /* Set the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 0; + return; + } + } + + base = array->base_addr; + + /* Initialize the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 1; + { + + GFC_REAL_17 maxval; +#if defined(GFC_REAL_17_QUIET_NAN) + int fast = 0; +#endif + +#if defined(GFC_REAL_17_INFINITY) + maxval = -GFC_REAL_17_INFINITY; +#else + maxval = -GFC_REAL_17_HUGE; +#endif + while (base) + { + /* Implementation start. */ + +#if defined(GFC_REAL_17_QUIET_NAN) + if (unlikely (!fast)) + { + do + { + if (*base >= maxval) + { + fast = 1; + maxval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + break; + } + base += sstride[0]; + } + while (++count[0] != extent[0]); + if (likely (fast)) + continue; + } + else +#endif + if (back) + do + { + if (unlikely (*base >= maxval)) + { + maxval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + base += sstride[0]; + } + while (++count[0] != extent[0]); + else + do + { + if (unlikely (*base > maxval)) + { + maxval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + /* Implementation end. */ + /* Advance to the next element. */ + base += sstride[0]; + } + while (++count[0] != extent[0]); + n = 0; + do + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + } + } + while (count[n] == extent[n]); + } + } +} + +extern void mmaxloc0_4_r17 (gfc_array_i4 * const restrict, + gfc_array_r17 * const restrict, gfc_array_l1 * const restrict, + GFC_LOGICAL_4); +export_proto(mmaxloc0_4_r17); + +void +mmaxloc0_4_r17 (gfc_array_i4 * const restrict retarray, + gfc_array_r17 * const restrict array, + gfc_array_l1 * const restrict mask, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type mstride[GFC_MAX_DIMENSIONS]; + index_type dstride; + GFC_INTEGER_4 *dest; + const GFC_REAL_17 *base; + GFC_LOGICAL_1 *mbase; + int rank; + index_type n; + int mask_kind; + + + if (mask == NULL) + { + maxloc0_4_r17 (retarray, array, back); + return; + } + + rank = GFC_DESCRIPTOR_RANK (array); + if (rank <= 0) + runtime_error ("Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else + { + if (unlikely (compile_options.bounds_check)) + { + + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "MAXLOC"); + bounds_equal_extents ((array_t *) mask, (array_t *) array, + "MASK argument", "MAXLOC"); + } + } + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + mbase = mask->base_addr; + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + runtime_error ("Funny sized logical array"); + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + for (n = 0; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + count[n] = 0; + if (extent[n] <= 0) + { + /* Set the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 0; + return; + } + } + + base = array->base_addr; + + /* Initialize the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 0; + { + + GFC_REAL_17 maxval; + int fast = 0; + +#if defined(GFC_REAL_17_INFINITY) + maxval = -GFC_REAL_17_INFINITY; +#else + maxval = -GFC_REAL_17_HUGE; +#endif + while (base) + { + /* Implementation start. */ + + if (unlikely (!fast)) + { + do + { + if (*mbase) + { +#if defined(GFC_REAL_17_QUIET_NAN) + if (unlikely (dest[0] == 0)) + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + if (*base >= maxval) +#endif + { + fast = 1; + maxval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + break; + } + } + base += sstride[0]; + mbase += mstride[0]; + } + while (++count[0] != extent[0]); + if (likely (fast)) + continue; + } + else + if (back) + do + { + if (*mbase && *base >= maxval) + { + maxval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + base += sstride[0]; + } + while (++count[0] != extent[0]); + else + do + { + if (*mbase && unlikely (*base > maxval)) + { + maxval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + /* Implementation end. */ + /* Advance to the next element. */ + base += sstride[0]; + mbase += mstride[0]; + } + while (++count[0] != extent[0]); + n = 0; + do + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + mbase -= mstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + mbase += mstride[n]; + } + } + while (count[n] == extent[n]); + } + } +} + + +extern void smaxloc0_4_r17 (gfc_array_i4 * const restrict, + gfc_array_r17 * const restrict, GFC_LOGICAL_4 *, GFC_LOGICAL_4); +export_proto(smaxloc0_4_r17); + +void +smaxloc0_4_r17 (gfc_array_i4 * const restrict retarray, + gfc_array_r17 * const restrict array, + GFC_LOGICAL_4 * mask, GFC_LOGICAL_4 back) +{ + index_type rank; + index_type dstride; + index_type n; + GFC_INTEGER_4 *dest; + + if (mask == NULL || *mask) + { + maxloc0_4_r17 (retarray, array, back); + return; + } + + rank = GFC_DESCRIPTOR_RANK (array); + + if (rank <= 0) + runtime_error ("Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else if (unlikely (compile_options.bounds_check)) + { + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "MAXLOC"); + } + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + for (n = 0; n<rank; n++) + dest[n * dstride] = 0 ; +} +#endif diff --git a/libgfortran/generated/maxloc0_8_r17.c b/libgfortran/generated/maxloc0_8_r17.c new file mode 100644 index 0000000..910ac65 --- /dev/null +++ b/libgfortran/generated/maxloc0_8_r17.c @@ -0,0 +1,408 @@ +/* Implementation of the MAXLOC intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <assert.h> + + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_INTEGER_8) + + +extern void maxloc0_8_r17 (gfc_array_i8 * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_LOGICAL_4); +export_proto(maxloc0_8_r17); + +void +maxloc0_8_r17 (gfc_array_i8 * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride; + const GFC_REAL_17 *base; + GFC_INTEGER_8 * restrict dest; + index_type rank; + index_type n; + + rank = GFC_DESCRIPTOR_RANK (array); + if (rank <= 0) + runtime_error ("Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else + { + if (unlikely (compile_options.bounds_check)) + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "MAXLOC"); + } + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + for (n = 0; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + count[n] = 0; + if (extent[n] <= 0) + { + /* Set the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 0; + return; + } + } + + base = array->base_addr; + + /* Initialize the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 1; + { + + GFC_REAL_17 maxval; +#if defined(GFC_REAL_17_QUIET_NAN) + int fast = 0; +#endif + +#if defined(GFC_REAL_17_INFINITY) + maxval = -GFC_REAL_17_INFINITY; +#else + maxval = -GFC_REAL_17_HUGE; +#endif + while (base) + { + /* Implementation start. */ + +#if defined(GFC_REAL_17_QUIET_NAN) + if (unlikely (!fast)) + { + do + { + if (*base >= maxval) + { + fast = 1; + maxval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + break; + } + base += sstride[0]; + } + while (++count[0] != extent[0]); + if (likely (fast)) + continue; + } + else +#endif + if (back) + do + { + if (unlikely (*base >= maxval)) + { + maxval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + base += sstride[0]; + } + while (++count[0] != extent[0]); + else + do + { + if (unlikely (*base > maxval)) + { + maxval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + /* Implementation end. */ + /* Advance to the next element. */ + base += sstride[0]; + } + while (++count[0] != extent[0]); + n = 0; + do + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + } + } + while (count[n] == extent[n]); + } + } +} + +extern void mmaxloc0_8_r17 (gfc_array_i8 * const restrict, + gfc_array_r17 * const restrict, gfc_array_l1 * const restrict, + GFC_LOGICAL_4); +export_proto(mmaxloc0_8_r17); + +void +mmaxloc0_8_r17 (gfc_array_i8 * const restrict retarray, + gfc_array_r17 * const restrict array, + gfc_array_l1 * const restrict mask, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type mstride[GFC_MAX_DIMENSIONS]; + index_type dstride; + GFC_INTEGER_8 *dest; + const GFC_REAL_17 *base; + GFC_LOGICAL_1 *mbase; + int rank; + index_type n; + int mask_kind; + + + if (mask == NULL) + { + maxloc0_8_r17 (retarray, array, back); + return; + } + + rank = GFC_DESCRIPTOR_RANK (array); + if (rank <= 0) + runtime_error ("Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else + { + if (unlikely (compile_options.bounds_check)) + { + + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "MAXLOC"); + bounds_equal_extents ((array_t *) mask, (array_t *) array, + "MASK argument", "MAXLOC"); + } + } + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + mbase = mask->base_addr; + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + runtime_error ("Funny sized logical array"); + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + for (n = 0; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + count[n] = 0; + if (extent[n] <= 0) + { + /* Set the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 0; + return; + } + } + + base = array->base_addr; + + /* Initialize the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 0; + { + + GFC_REAL_17 maxval; + int fast = 0; + +#if defined(GFC_REAL_17_INFINITY) + maxval = -GFC_REAL_17_INFINITY; +#else + maxval = -GFC_REAL_17_HUGE; +#endif + while (base) + { + /* Implementation start. */ + + if (unlikely (!fast)) + { + do + { + if (*mbase) + { +#if defined(GFC_REAL_17_QUIET_NAN) + if (unlikely (dest[0] == 0)) + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + if (*base >= maxval) +#endif + { + fast = 1; + maxval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + break; + } + } + base += sstride[0]; + mbase += mstride[0]; + } + while (++count[0] != extent[0]); + if (likely (fast)) + continue; + } + else + if (back) + do + { + if (*mbase && *base >= maxval) + { + maxval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + base += sstride[0]; + } + while (++count[0] != extent[0]); + else + do + { + if (*mbase && unlikely (*base > maxval)) + { + maxval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + /* Implementation end. */ + /* Advance to the next element. */ + base += sstride[0]; + mbase += mstride[0]; + } + while (++count[0] != extent[0]); + n = 0; + do + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + mbase -= mstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + mbase += mstride[n]; + } + } + while (count[n] == extent[n]); + } + } +} + + +extern void smaxloc0_8_r17 (gfc_array_i8 * const restrict, + gfc_array_r17 * const restrict, GFC_LOGICAL_4 *, GFC_LOGICAL_4); +export_proto(smaxloc0_8_r17); + +void +smaxloc0_8_r17 (gfc_array_i8 * const restrict retarray, + gfc_array_r17 * const restrict array, + GFC_LOGICAL_4 * mask, GFC_LOGICAL_4 back) +{ + index_type rank; + index_type dstride; + index_type n; + GFC_INTEGER_8 *dest; + + if (mask == NULL || *mask) + { + maxloc0_8_r17 (retarray, array, back); + return; + } + + rank = GFC_DESCRIPTOR_RANK (array); + + if (rank <= 0) + runtime_error ("Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else if (unlikely (compile_options.bounds_check)) + { + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "MAXLOC"); + } + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + for (n = 0; n<rank; n++) + dest[n * dstride] = 0 ; +} +#endif diff --git a/libgfortran/generated/maxloc1_16_r17.c b/libgfortran/generated/maxloc1_16_r17.c new file mode 100644 index 0000000..31f06e2 --- /dev/null +++ b/libgfortran/generated/maxloc1_16_r17.c @@ -0,0 +1,607 @@ +/* Implementation of the MAXLOC intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <assert.h> + + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_INTEGER_16) + +#define HAVE_BACK_ARG 1 + + +extern void maxloc1_16_r17 (gfc_array_i16 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, GFC_LOGICAL_4 back); +export_proto(maxloc1_16_r17); + +void +maxloc1_16_r17 (gfc_array_i16 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + const GFC_REAL_17 * restrict base; + GFC_INTEGER_16 * restrict dest; + index_type rank; + index_type n; + index_type len; + index_type delta; + index_type dim; + int continue_loop; + + /* Make dim zero based to avoid confusion. */ + rank = GFC_DESCRIPTOR_RANK (array) - 1; + dim = (*pdim) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MAXLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len < 0) + len = 0; + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + + } + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " MAXLOC intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "MAXLOC"); + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + base = array->base_addr; + dest = retarray->base_addr; + + continue_loop = 1; + while (continue_loop) + { + const GFC_REAL_17 * restrict src; + GFC_INTEGER_16 result; + src = base; + { + + GFC_REAL_17 maxval; +#if defined (GFC_REAL_17_INFINITY) + maxval = -GFC_REAL_17_INFINITY; +#else + maxval = -GFC_REAL_17_HUGE; +#endif + result = 1; + if (len <= 0) + *dest = 0; + else + { +#if ! defined HAVE_BACK_ARG + for (n = 0; n < len; n++, src += delta) + { +#endif + +#if defined (GFC_REAL_17_QUIET_NAN) + for (n = 0; n < len; n++, src += delta) + { + if (*src >= maxval) + { + maxval = *src; + result = (GFC_INTEGER_16)n + 1; + break; + } + } +#else + n = 0; +#endif + for (; n < len; n++, src += delta) + { + if (back ? *src >= maxval : *src > maxval) + { + maxval = *src; + result = (GFC_INTEGER_16)n + 1; + } + } + + *dest = result; + } + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + continue_loop = 0; + break; + } + else + { + count[n]++; + base += sstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void mmaxloc1_16_r17 (gfc_array_i16 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, + gfc_array_l1 * const restrict, GFC_LOGICAL_4 back); +export_proto(mmaxloc1_16_r17); + +void +mmaxloc1_16_r17 (gfc_array_i16 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, + gfc_array_l1 * const restrict mask, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + index_type mstride[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_16 * restrict dest; + const GFC_REAL_17 * restrict base; + const GFC_LOGICAL_1 * restrict mbase; + index_type rank; + index_type dim; + index_type n; + index_type len; + index_type delta; + index_type mdelta; + int mask_kind; + + if (mask == NULL) + { +#ifdef HAVE_BACK_ARG + maxloc1_16_r17 (retarray, array, pdim, back); +#else + maxloc1_16_r17 (retarray, array, pdim); +#endif + return; + } + + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MAXLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len <= 0) + return; + + mbase = mask->base_addr; + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + runtime_error ("Funny sized logical array"); + + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + mdelta = GFC_DESCRIPTOR_STRIDE_BYTES(mask,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n + 1); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str= GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype.rank = rank; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in MAXLOC intrinsic"); + + if (unlikely (compile_options.bounds_check)) + { + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "MAXLOC"); + bounds_equal_extents ((array_t *) mask, (array_t *) array, + "MASK argument", "MAXLOC"); + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + dest = retarray->base_addr; + base = array->base_addr; + + while (base) + { + const GFC_REAL_17 * restrict src; + const GFC_LOGICAL_1 * restrict msrc; + GFC_INTEGER_16 result; + src = base; + msrc = mbase; + { + + GFC_REAL_17 maxval; +#if defined (GFC_REAL_17_INFINITY) + maxval = -GFC_REAL_17_INFINITY; +#else + maxval = -GFC_REAL_17_HUGE; +#endif +#if defined (GFC_REAL_17_QUIET_NAN) + GFC_INTEGER_16 result2 = 0; +#endif + result = 0; + for (n = 0; n < len; n++, src += delta, msrc += mdelta) + { + + if (*msrc) + { +#if defined (GFC_REAL_17_QUIET_NAN) + if (!result2) + result2 = (GFC_INTEGER_16)n + 1; + if (*src >= maxval) +#endif + { + maxval = *src; + result = (GFC_INTEGER_16)n + 1; + break; + } + } + } +#if defined (GFC_REAL_17_QUIET_NAN) + if (unlikely (n >= len)) + result = result2; + else +#endif + if (back) + for (; n < len; n++, src += delta, msrc += mdelta) + { + if (*msrc && unlikely (*src >= maxval)) + { + maxval = *src; + result = (GFC_INTEGER_16)n + 1; + } + } + else + for (; n < len; n++, src += delta, msrc += mdelta) + { + if (*msrc && unlikely (*src > maxval)) + { + maxval = *src; + result = (GFC_INTEGER_16)n + 1; + } + } + *dest = result; + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + mbase += mstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + mbase -= mstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + mbase += mstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void smaxloc1_16_r17 (gfc_array_i16 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, + GFC_LOGICAL_4 *, GFC_LOGICAL_4 back); +export_proto(smaxloc1_16_r17); + +void +smaxloc1_16_r17 (gfc_array_i16 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, + GFC_LOGICAL_4 * mask, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_16 * restrict dest; + index_type rank; + index_type n; + index_type dim; + + + if (mask == NULL || *mask) + { +#ifdef HAVE_BACK_ARG + maxloc1_16_r17 (retarray, array, pdim, back); +#else + maxloc1_16_r17 (retarray, array, pdim); +#endif + return; + } + /* Make dim zero based to avoid confusion. */ + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MAXLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + for (n = 0; n < dim; n++) + { + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] <= 0) + extent[n] = 0; + } + + for (n = dim; n < rank; n++) + { + extent[n] = + GFC_DESCRIPTOR_EXTENT(array,n + 1); + + if (extent[n] <= 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " MAXLOC intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + { + for (n=0; n < rank; n++) + { + index_type ret_extent; + + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,n); + if (extent[n] != ret_extent) + runtime_error ("Incorrect extent in return value of" + " MAXLOC intrinsic in dimension %ld:" + " is %ld, should be %ld", (long int) n + 1, + (long int) ret_extent, (long int) extent[n]); + } + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + } + + dest = retarray->base_addr; + + while(1) + { + *dest = 0; + count[0]++; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + return; + else + { + count[n]++; + dest += dstride[n]; + } + } + } +} + +#endif diff --git a/libgfortran/generated/maxloc1_4_r17.c b/libgfortran/generated/maxloc1_4_r17.c new file mode 100644 index 0000000..de825c5 --- /dev/null +++ b/libgfortran/generated/maxloc1_4_r17.c @@ -0,0 +1,607 @@ +/* Implementation of the MAXLOC intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <assert.h> + + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_INTEGER_4) + +#define HAVE_BACK_ARG 1 + + +extern void maxloc1_4_r17 (gfc_array_i4 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, GFC_LOGICAL_4 back); +export_proto(maxloc1_4_r17); + +void +maxloc1_4_r17 (gfc_array_i4 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + const GFC_REAL_17 * restrict base; + GFC_INTEGER_4 * restrict dest; + index_type rank; + index_type n; + index_type len; + index_type delta; + index_type dim; + int continue_loop; + + /* Make dim zero based to avoid confusion. */ + rank = GFC_DESCRIPTOR_RANK (array) - 1; + dim = (*pdim) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MAXLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len < 0) + len = 0; + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + + } + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " MAXLOC intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "MAXLOC"); + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + base = array->base_addr; + dest = retarray->base_addr; + + continue_loop = 1; + while (continue_loop) + { + const GFC_REAL_17 * restrict src; + GFC_INTEGER_4 result; + src = base; + { + + GFC_REAL_17 maxval; +#if defined (GFC_REAL_17_INFINITY) + maxval = -GFC_REAL_17_INFINITY; +#else + maxval = -GFC_REAL_17_HUGE; +#endif + result = 1; + if (len <= 0) + *dest = 0; + else + { +#if ! defined HAVE_BACK_ARG + for (n = 0; n < len; n++, src += delta) + { +#endif + +#if defined (GFC_REAL_17_QUIET_NAN) + for (n = 0; n < len; n++, src += delta) + { + if (*src >= maxval) + { + maxval = *src; + result = (GFC_INTEGER_4)n + 1; + break; + } + } +#else + n = 0; +#endif + for (; n < len; n++, src += delta) + { + if (back ? *src >= maxval : *src > maxval) + { + maxval = *src; + result = (GFC_INTEGER_4)n + 1; + } + } + + *dest = result; + } + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + continue_loop = 0; + break; + } + else + { + count[n]++; + base += sstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void mmaxloc1_4_r17 (gfc_array_i4 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, + gfc_array_l1 * const restrict, GFC_LOGICAL_4 back); +export_proto(mmaxloc1_4_r17); + +void +mmaxloc1_4_r17 (gfc_array_i4 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, + gfc_array_l1 * const restrict mask, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + index_type mstride[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_4 * restrict dest; + const GFC_REAL_17 * restrict base; + const GFC_LOGICAL_1 * restrict mbase; + index_type rank; + index_type dim; + index_type n; + index_type len; + index_type delta; + index_type mdelta; + int mask_kind; + + if (mask == NULL) + { +#ifdef HAVE_BACK_ARG + maxloc1_4_r17 (retarray, array, pdim, back); +#else + maxloc1_4_r17 (retarray, array, pdim); +#endif + return; + } + + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MAXLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len <= 0) + return; + + mbase = mask->base_addr; + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + runtime_error ("Funny sized logical array"); + + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + mdelta = GFC_DESCRIPTOR_STRIDE_BYTES(mask,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n + 1); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str= GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype.rank = rank; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in MAXLOC intrinsic"); + + if (unlikely (compile_options.bounds_check)) + { + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "MAXLOC"); + bounds_equal_extents ((array_t *) mask, (array_t *) array, + "MASK argument", "MAXLOC"); + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + dest = retarray->base_addr; + base = array->base_addr; + + while (base) + { + const GFC_REAL_17 * restrict src; + const GFC_LOGICAL_1 * restrict msrc; + GFC_INTEGER_4 result; + src = base; + msrc = mbase; + { + + GFC_REAL_17 maxval; +#if defined (GFC_REAL_17_INFINITY) + maxval = -GFC_REAL_17_INFINITY; +#else + maxval = -GFC_REAL_17_HUGE; +#endif +#if defined (GFC_REAL_17_QUIET_NAN) + GFC_INTEGER_4 result2 = 0; +#endif + result = 0; + for (n = 0; n < len; n++, src += delta, msrc += mdelta) + { + + if (*msrc) + { +#if defined (GFC_REAL_17_QUIET_NAN) + if (!result2) + result2 = (GFC_INTEGER_4)n + 1; + if (*src >= maxval) +#endif + { + maxval = *src; + result = (GFC_INTEGER_4)n + 1; + break; + } + } + } +#if defined (GFC_REAL_17_QUIET_NAN) + if (unlikely (n >= len)) + result = result2; + else +#endif + if (back) + for (; n < len; n++, src += delta, msrc += mdelta) + { + if (*msrc && unlikely (*src >= maxval)) + { + maxval = *src; + result = (GFC_INTEGER_4)n + 1; + } + } + else + for (; n < len; n++, src += delta, msrc += mdelta) + { + if (*msrc && unlikely (*src > maxval)) + { + maxval = *src; + result = (GFC_INTEGER_4)n + 1; + } + } + *dest = result; + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + mbase += mstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + mbase -= mstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + mbase += mstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void smaxloc1_4_r17 (gfc_array_i4 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, + GFC_LOGICAL_4 *, GFC_LOGICAL_4 back); +export_proto(smaxloc1_4_r17); + +void +smaxloc1_4_r17 (gfc_array_i4 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, + GFC_LOGICAL_4 * mask, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_4 * restrict dest; + index_type rank; + index_type n; + index_type dim; + + + if (mask == NULL || *mask) + { +#ifdef HAVE_BACK_ARG + maxloc1_4_r17 (retarray, array, pdim, back); +#else + maxloc1_4_r17 (retarray, array, pdim); +#endif + return; + } + /* Make dim zero based to avoid confusion. */ + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MAXLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + for (n = 0; n < dim; n++) + { + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] <= 0) + extent[n] = 0; + } + + for (n = dim; n < rank; n++) + { + extent[n] = + GFC_DESCRIPTOR_EXTENT(array,n + 1); + + if (extent[n] <= 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " MAXLOC intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + { + for (n=0; n < rank; n++) + { + index_type ret_extent; + + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,n); + if (extent[n] != ret_extent) + runtime_error ("Incorrect extent in return value of" + " MAXLOC intrinsic in dimension %ld:" + " is %ld, should be %ld", (long int) n + 1, + (long int) ret_extent, (long int) extent[n]); + } + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + } + + dest = retarray->base_addr; + + while(1) + { + *dest = 0; + count[0]++; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + return; + else + { + count[n]++; + dest += dstride[n]; + } + } + } +} + +#endif diff --git a/libgfortran/generated/maxloc1_8_r17.c b/libgfortran/generated/maxloc1_8_r17.c new file mode 100644 index 0000000..666e3a6 --- /dev/null +++ b/libgfortran/generated/maxloc1_8_r17.c @@ -0,0 +1,607 @@ +/* Implementation of the MAXLOC intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <assert.h> + + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_INTEGER_8) + +#define HAVE_BACK_ARG 1 + + +extern void maxloc1_8_r17 (gfc_array_i8 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, GFC_LOGICAL_4 back); +export_proto(maxloc1_8_r17); + +void +maxloc1_8_r17 (gfc_array_i8 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + const GFC_REAL_17 * restrict base; + GFC_INTEGER_8 * restrict dest; + index_type rank; + index_type n; + index_type len; + index_type delta; + index_type dim; + int continue_loop; + + /* Make dim zero based to avoid confusion. */ + rank = GFC_DESCRIPTOR_RANK (array) - 1; + dim = (*pdim) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MAXLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len < 0) + len = 0; + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + + } + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " MAXLOC intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "MAXLOC"); + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + base = array->base_addr; + dest = retarray->base_addr; + + continue_loop = 1; + while (continue_loop) + { + const GFC_REAL_17 * restrict src; + GFC_INTEGER_8 result; + src = base; + { + + GFC_REAL_17 maxval; +#if defined (GFC_REAL_17_INFINITY) + maxval = -GFC_REAL_17_INFINITY; +#else + maxval = -GFC_REAL_17_HUGE; +#endif + result = 1; + if (len <= 0) + *dest = 0; + else + { +#if ! defined HAVE_BACK_ARG + for (n = 0; n < len; n++, src += delta) + { +#endif + +#if defined (GFC_REAL_17_QUIET_NAN) + for (n = 0; n < len; n++, src += delta) + { + if (*src >= maxval) + { + maxval = *src; + result = (GFC_INTEGER_8)n + 1; + break; + } + } +#else + n = 0; +#endif + for (; n < len; n++, src += delta) + { + if (back ? *src >= maxval : *src > maxval) + { + maxval = *src; + result = (GFC_INTEGER_8)n + 1; + } + } + + *dest = result; + } + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + continue_loop = 0; + break; + } + else + { + count[n]++; + base += sstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void mmaxloc1_8_r17 (gfc_array_i8 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, + gfc_array_l1 * const restrict, GFC_LOGICAL_4 back); +export_proto(mmaxloc1_8_r17); + +void +mmaxloc1_8_r17 (gfc_array_i8 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, + gfc_array_l1 * const restrict mask, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + index_type mstride[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_8 * restrict dest; + const GFC_REAL_17 * restrict base; + const GFC_LOGICAL_1 * restrict mbase; + index_type rank; + index_type dim; + index_type n; + index_type len; + index_type delta; + index_type mdelta; + int mask_kind; + + if (mask == NULL) + { +#ifdef HAVE_BACK_ARG + maxloc1_8_r17 (retarray, array, pdim, back); +#else + maxloc1_8_r17 (retarray, array, pdim); +#endif + return; + } + + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MAXLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len <= 0) + return; + + mbase = mask->base_addr; + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + runtime_error ("Funny sized logical array"); + + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + mdelta = GFC_DESCRIPTOR_STRIDE_BYTES(mask,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n + 1); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str= GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype.rank = rank; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in MAXLOC intrinsic"); + + if (unlikely (compile_options.bounds_check)) + { + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "MAXLOC"); + bounds_equal_extents ((array_t *) mask, (array_t *) array, + "MASK argument", "MAXLOC"); + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + dest = retarray->base_addr; + base = array->base_addr; + + while (base) + { + const GFC_REAL_17 * restrict src; + const GFC_LOGICAL_1 * restrict msrc; + GFC_INTEGER_8 result; + src = base; + msrc = mbase; + { + + GFC_REAL_17 maxval; +#if defined (GFC_REAL_17_INFINITY) + maxval = -GFC_REAL_17_INFINITY; +#else + maxval = -GFC_REAL_17_HUGE; +#endif +#if defined (GFC_REAL_17_QUIET_NAN) + GFC_INTEGER_8 result2 = 0; +#endif + result = 0; + for (n = 0; n < len; n++, src += delta, msrc += mdelta) + { + + if (*msrc) + { +#if defined (GFC_REAL_17_QUIET_NAN) + if (!result2) + result2 = (GFC_INTEGER_8)n + 1; + if (*src >= maxval) +#endif + { + maxval = *src; + result = (GFC_INTEGER_8)n + 1; + break; + } + } + } +#if defined (GFC_REAL_17_QUIET_NAN) + if (unlikely (n >= len)) + result = result2; + else +#endif + if (back) + for (; n < len; n++, src += delta, msrc += mdelta) + { + if (*msrc && unlikely (*src >= maxval)) + { + maxval = *src; + result = (GFC_INTEGER_8)n + 1; + } + } + else + for (; n < len; n++, src += delta, msrc += mdelta) + { + if (*msrc && unlikely (*src > maxval)) + { + maxval = *src; + result = (GFC_INTEGER_8)n + 1; + } + } + *dest = result; + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + mbase += mstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + mbase -= mstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + mbase += mstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void smaxloc1_8_r17 (gfc_array_i8 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, + GFC_LOGICAL_4 *, GFC_LOGICAL_4 back); +export_proto(smaxloc1_8_r17); + +void +smaxloc1_8_r17 (gfc_array_i8 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, + GFC_LOGICAL_4 * mask, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_8 * restrict dest; + index_type rank; + index_type n; + index_type dim; + + + if (mask == NULL || *mask) + { +#ifdef HAVE_BACK_ARG + maxloc1_8_r17 (retarray, array, pdim, back); +#else + maxloc1_8_r17 (retarray, array, pdim); +#endif + return; + } + /* Make dim zero based to avoid confusion. */ + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MAXLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + for (n = 0; n < dim; n++) + { + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] <= 0) + extent[n] = 0; + } + + for (n = dim; n < rank; n++) + { + extent[n] = + GFC_DESCRIPTOR_EXTENT(array,n + 1); + + if (extent[n] <= 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " MAXLOC intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + { + for (n=0; n < rank; n++) + { + index_type ret_extent; + + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,n); + if (extent[n] != ret_extent) + runtime_error ("Incorrect extent in return value of" + " MAXLOC intrinsic in dimension %ld:" + " is %ld, should be %ld", (long int) n + 1, + (long int) ret_extent, (long int) extent[n]); + } + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + } + + dest = retarray->base_addr; + + while(1) + { + *dest = 0; + count[0]++; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + return; + else + { + count[n]++; + dest += dstride[n]; + } + } + } +} + +#endif diff --git a/libgfortran/generated/maxval_r17.c b/libgfortran/generated/maxval_r17.c new file mode 100644 index 0000000..aafdfae --- /dev/null +++ b/libgfortran/generated/maxval_r17.c @@ -0,0 +1,578 @@ +/* Implementation of the MAXVAL intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" + + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_REAL_17) + + +extern void maxval_r17 (gfc_array_r17 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict); +export_proto(maxval_r17); + +void +maxval_r17 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + const GFC_REAL_17 * restrict base; + GFC_REAL_17 * restrict dest; + index_type rank; + index_type n; + index_type len; + index_type delta; + index_type dim; + int continue_loop; + + /* Make dim zero based to avoid confusion. */ + rank = GFC_DESCRIPTOR_RANK (array) - 1; + dim = (*pdim) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MAXVAL intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len < 0) + len = 0; + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_17)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + + } + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " MAXVAL intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "MAXVAL"); + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + base = array->base_addr; + dest = retarray->base_addr; + + continue_loop = 1; + while (continue_loop) + { + const GFC_REAL_17 * restrict src; + GFC_REAL_17 result; + src = base; + { + +#if defined (GFC_REAL_17_INFINITY) + result = -GFC_REAL_17_INFINITY; +#else + result = -GFC_REAL_17_HUGE; +#endif + if (len <= 0) + *dest = -GFC_REAL_17_HUGE; + else + { +#if ! defined HAVE_BACK_ARG + for (n = 0; n < len; n++, src += delta) + { +#endif + +#if defined (GFC_REAL_17_QUIET_NAN) + if (*src >= result) + break; + } + if (unlikely (n >= len)) + result = GFC_REAL_17_QUIET_NAN; + else for (; n < len; n++, src += delta) + { +#endif + if (*src > result) + result = *src; + } + + *dest = result; + } + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + continue_loop = 0; + break; + } + else + { + count[n]++; + base += sstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void mmaxval_r17 (gfc_array_r17 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, + gfc_array_l1 * const restrict); +export_proto(mmaxval_r17); + +void +mmaxval_r17 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, + gfc_array_l1 * const restrict mask) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + index_type mstride[GFC_MAX_DIMENSIONS]; + GFC_REAL_17 * restrict dest; + const GFC_REAL_17 * restrict base; + const GFC_LOGICAL_1 * restrict mbase; + index_type rank; + index_type dim; + index_type n; + index_type len; + index_type delta; + index_type mdelta; + int mask_kind; + + if (mask == NULL) + { +#ifdef HAVE_BACK_ARG + maxval_r17 (retarray, array, pdim, back); +#else + maxval_r17 (retarray, array, pdim); +#endif + return; + } + + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MAXVAL intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len <= 0) + return; + + mbase = mask->base_addr; + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + runtime_error ("Funny sized logical array"); + + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + mdelta = GFC_DESCRIPTOR_STRIDE_BYTES(mask,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n + 1); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str= GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype.rank = rank; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_17)); + + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in MAXVAL intrinsic"); + + if (unlikely (compile_options.bounds_check)) + { + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "MAXVAL"); + bounds_equal_extents ((array_t *) mask, (array_t *) array, + "MASK argument", "MAXVAL"); + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + dest = retarray->base_addr; + base = array->base_addr; + + while (base) + { + const GFC_REAL_17 * restrict src; + const GFC_LOGICAL_1 * restrict msrc; + GFC_REAL_17 result; + src = base; + msrc = mbase; + { + +#if defined (GFC_REAL_17_INFINITY) + result = -GFC_REAL_17_INFINITY; +#else + result = -GFC_REAL_17_HUGE; +#endif +#if defined (GFC_REAL_17_QUIET_NAN) + int non_empty_p = 0; +#endif + for (n = 0; n < len; n++, src += delta, msrc += mdelta) + { + +#if defined (GFC_REAL_17_INFINITY) || defined (GFC_REAL_17_QUIET_NAN) + if (*msrc) + { +#if defined (GFC_REAL_17_QUIET_NAN) + non_empty_p = 1; + if (*src >= result) +#endif + break; + } + } + if (unlikely (n >= len)) + { +#if defined (GFC_REAL_17_QUIET_NAN) + result = non_empty_p ? GFC_REAL_17_QUIET_NAN : -GFC_REAL_17_HUGE; +#else + result = -GFC_REAL_17_HUGE; +#endif + } + else for (; n < len; n++, src += delta, msrc += mdelta) + { +#endif + if (*msrc && *src > result) + result = *src; + } + *dest = result; + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + mbase += mstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + mbase -= mstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + mbase += mstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void smaxval_r17 (gfc_array_r17 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, + GFC_LOGICAL_4 *); +export_proto(smaxval_r17); + +void +smaxval_r17 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, + GFC_LOGICAL_4 * mask) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + GFC_REAL_17 * restrict dest; + index_type rank; + index_type n; + index_type dim; + + + if (mask == NULL || *mask) + { +#ifdef HAVE_BACK_ARG + maxval_r17 (retarray, array, pdim, back); +#else + maxval_r17 (retarray, array, pdim); +#endif + return; + } + /* Make dim zero based to avoid confusion. */ + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MAXVAL intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + for (n = 0; n < dim; n++) + { + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] <= 0) + extent[n] = 0; + } + + for (n = dim; n < rank; n++) + { + extent[n] = + GFC_DESCRIPTOR_EXTENT(array,n + 1); + + if (extent[n] <= 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_17)); + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " MAXVAL intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + { + for (n=0; n < rank; n++) + { + index_type ret_extent; + + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,n); + if (extent[n] != ret_extent) + runtime_error ("Incorrect extent in return value of" + " MAXVAL intrinsic in dimension %ld:" + " is %ld, should be %ld", (long int) n + 1, + (long int) ret_extent, (long int) extent[n]); + } + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + } + + dest = retarray->base_addr; + + while(1) + { + *dest = -GFC_REAL_17_HUGE; + count[0]++; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + return; + else + { + count[n]++; + dest += dstride[n]; + } + } + } +} + +#endif diff --git a/libgfortran/generated/minloc0_16_r17.c b/libgfortran/generated/minloc0_16_r17.c new file mode 100644 index 0000000..e6a77c4 --- /dev/null +++ b/libgfortran/generated/minloc0_16_r17.c @@ -0,0 +1,407 @@ +/* Implementation of the MINLOC intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <assert.h> + + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_INTEGER_16) + + +extern void minloc0_16_r17 (gfc_array_i16 * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_LOGICAL_4); +export_proto(minloc0_16_r17); + +void +minloc0_16_r17 (gfc_array_i16 * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride; + const GFC_REAL_17 *base; + GFC_INTEGER_16 * restrict dest; + index_type rank; + index_type n; + + rank = GFC_DESCRIPTOR_RANK (array); + if (rank <= 0) + runtime_error ("Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else + { + if (unlikely (compile_options.bounds_check)) + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "MINLOC"); + } + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + for (n = 0; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + count[n] = 0; + if (extent[n] <= 0) + { + /* Set the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 0; + return; + } + } + + base = array->base_addr; + + /* Initialize the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 1; + { + + GFC_REAL_17 minval; +#if defined(GFC_REAL_17_QUIET_NAN) + int fast = 0; +#endif + +#if defined(GFC_REAL_17_INFINITY) + minval = GFC_REAL_17_INFINITY; +#else + minval = GFC_REAL_17_HUGE; +#endif + while (base) + { + /* Implementation start. */ + +#if defined(GFC_REAL_17_QUIET_NAN) + if (unlikely (!fast)) + { + do + { + if (*base <= minval) + { + fast = 1; + minval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + break; + } + base += sstride[0]; + } + while (++count[0] != extent[0]); + if (likely (fast)) + continue; + } + else +#endif + if (back) + do + { + if (unlikely (*base <= minval)) + { + minval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + base += sstride[0]; + } + while (++count[0] != extent[0]); + else + do + { + if (unlikely (*base < minval)) + { + minval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + /* Implementation end. */ + /* Advance to the next element. */ + base += sstride[0]; + } + while (++count[0] != extent[0]); + n = 0; + do + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + } + } + while (count[n] == extent[n]); + } + } +} + +extern void mminloc0_16_r17 (gfc_array_i16 * const restrict, + gfc_array_r17 * const restrict, gfc_array_l1 * const restrict, + GFC_LOGICAL_4); +export_proto(mminloc0_16_r17); + +void +mminloc0_16_r17 (gfc_array_i16 * const restrict retarray, + gfc_array_r17 * const restrict array, + gfc_array_l1 * const restrict mask, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type mstride[GFC_MAX_DIMENSIONS]; + index_type dstride; + GFC_INTEGER_16 *dest; + const GFC_REAL_17 *base; + GFC_LOGICAL_1 *mbase; + int rank; + index_type n; + int mask_kind; + + + if (mask == NULL) + { + minloc0_16_r17 (retarray, array, back); + return; + } + + rank = GFC_DESCRIPTOR_RANK (array); + if (rank <= 0) + runtime_error ("Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else + { + if (unlikely (compile_options.bounds_check)) + { + + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "MINLOC"); + bounds_equal_extents ((array_t *) mask, (array_t *) array, + "MASK argument", "MINLOC"); + } + } + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + mbase = mask->base_addr; + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + runtime_error ("Funny sized logical array"); + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + for (n = 0; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + count[n] = 0; + if (extent[n] <= 0) + { + /* Set the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 0; + return; + } + } + + base = array->base_addr; + + /* Initialize the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 0; + { + + GFC_REAL_17 minval; + int fast = 0; + +#if defined(GFC_REAL_17_INFINITY) + minval = GFC_REAL_17_INFINITY; +#else + minval = GFC_REAL_17_HUGE; +#endif + while (base) + { + /* Implementation start. */ + + if (unlikely (!fast)) + { + do + { + if (*mbase) + { +#if defined(GFC_REAL_17_QUIET_NAN) + if (unlikely (dest[0] == 0)) + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + if (*base <= minval) +#endif + { + fast = 1; + minval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + break; + } + } + base += sstride[0]; + mbase += mstride[0]; + } + while (++count[0] != extent[0]); + if (likely (fast)) + continue; + } + else + if (back) + do + { + if (unlikely (*mbase && (*base <= minval))) + { + minval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + base += sstride[0]; + } + while (++count[0] != extent[0]); + else + do + { + if (unlikely (*mbase && (*base < minval))) + { + minval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + /* Implementation end. */ + /* Advance to the next element. */ + base += sstride[0]; + mbase += mstride[0]; + } + while (++count[0] != extent[0]); + n = 0; + do + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + mbase -= mstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + mbase += mstride[n]; + } + } + while (count[n] == extent[n]); + } + } +} + +extern void sminloc0_16_r17 (gfc_array_i16 * const restrict, + gfc_array_r17 * const restrict, GFC_LOGICAL_4 *, GFC_LOGICAL_4); +export_proto(sminloc0_16_r17); + +void +sminloc0_16_r17 (gfc_array_i16 * const restrict retarray, + gfc_array_r17 * const restrict array, + GFC_LOGICAL_4 * mask, GFC_LOGICAL_4 back) +{ + index_type rank; + index_type dstride; + index_type n; + GFC_INTEGER_16 *dest; + + if (mask == NULL || *mask) + { + minloc0_16_r17 (retarray, array, back); + return; + } + + rank = GFC_DESCRIPTOR_RANK (array); + + if (rank <= 0) + runtime_error ("Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else if (unlikely (compile_options.bounds_check)) + { + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "MINLOC"); + } + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + for (n = 0; n<rank; n++) + dest[n * dstride] = 0 ; +} +#endif diff --git a/libgfortran/generated/minloc0_4_r17.c b/libgfortran/generated/minloc0_4_r17.c new file mode 100644 index 0000000..7718d5c --- /dev/null +++ b/libgfortran/generated/minloc0_4_r17.c @@ -0,0 +1,407 @@ +/* Implementation of the MINLOC intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <assert.h> + + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_INTEGER_4) + + +extern void minloc0_4_r17 (gfc_array_i4 * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_LOGICAL_4); +export_proto(minloc0_4_r17); + +void +minloc0_4_r17 (gfc_array_i4 * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride; + const GFC_REAL_17 *base; + GFC_INTEGER_4 * restrict dest; + index_type rank; + index_type n; + + rank = GFC_DESCRIPTOR_RANK (array); + if (rank <= 0) + runtime_error ("Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else + { + if (unlikely (compile_options.bounds_check)) + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "MINLOC"); + } + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + for (n = 0; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + count[n] = 0; + if (extent[n] <= 0) + { + /* Set the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 0; + return; + } + } + + base = array->base_addr; + + /* Initialize the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 1; + { + + GFC_REAL_17 minval; +#if defined(GFC_REAL_17_QUIET_NAN) + int fast = 0; +#endif + +#if defined(GFC_REAL_17_INFINITY) + minval = GFC_REAL_17_INFINITY; +#else + minval = GFC_REAL_17_HUGE; +#endif + while (base) + { + /* Implementation start. */ + +#if defined(GFC_REAL_17_QUIET_NAN) + if (unlikely (!fast)) + { + do + { + if (*base <= minval) + { + fast = 1; + minval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + break; + } + base += sstride[0]; + } + while (++count[0] != extent[0]); + if (likely (fast)) + continue; + } + else +#endif + if (back) + do + { + if (unlikely (*base <= minval)) + { + minval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + base += sstride[0]; + } + while (++count[0] != extent[0]); + else + do + { + if (unlikely (*base < minval)) + { + minval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + /* Implementation end. */ + /* Advance to the next element. */ + base += sstride[0]; + } + while (++count[0] != extent[0]); + n = 0; + do + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + } + } + while (count[n] == extent[n]); + } + } +} + +extern void mminloc0_4_r17 (gfc_array_i4 * const restrict, + gfc_array_r17 * const restrict, gfc_array_l1 * const restrict, + GFC_LOGICAL_4); +export_proto(mminloc0_4_r17); + +void +mminloc0_4_r17 (gfc_array_i4 * const restrict retarray, + gfc_array_r17 * const restrict array, + gfc_array_l1 * const restrict mask, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type mstride[GFC_MAX_DIMENSIONS]; + index_type dstride; + GFC_INTEGER_4 *dest; + const GFC_REAL_17 *base; + GFC_LOGICAL_1 *mbase; + int rank; + index_type n; + int mask_kind; + + + if (mask == NULL) + { + minloc0_4_r17 (retarray, array, back); + return; + } + + rank = GFC_DESCRIPTOR_RANK (array); + if (rank <= 0) + runtime_error ("Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else + { + if (unlikely (compile_options.bounds_check)) + { + + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "MINLOC"); + bounds_equal_extents ((array_t *) mask, (array_t *) array, + "MASK argument", "MINLOC"); + } + } + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + mbase = mask->base_addr; + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + runtime_error ("Funny sized logical array"); + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + for (n = 0; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + count[n] = 0; + if (extent[n] <= 0) + { + /* Set the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 0; + return; + } + } + + base = array->base_addr; + + /* Initialize the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 0; + { + + GFC_REAL_17 minval; + int fast = 0; + +#if defined(GFC_REAL_17_INFINITY) + minval = GFC_REAL_17_INFINITY; +#else + minval = GFC_REAL_17_HUGE; +#endif + while (base) + { + /* Implementation start. */ + + if (unlikely (!fast)) + { + do + { + if (*mbase) + { +#if defined(GFC_REAL_17_QUIET_NAN) + if (unlikely (dest[0] == 0)) + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + if (*base <= minval) +#endif + { + fast = 1; + minval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + break; + } + } + base += sstride[0]; + mbase += mstride[0]; + } + while (++count[0] != extent[0]); + if (likely (fast)) + continue; + } + else + if (back) + do + { + if (unlikely (*mbase && (*base <= minval))) + { + minval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + base += sstride[0]; + } + while (++count[0] != extent[0]); + else + do + { + if (unlikely (*mbase && (*base < minval))) + { + minval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + /* Implementation end. */ + /* Advance to the next element. */ + base += sstride[0]; + mbase += mstride[0]; + } + while (++count[0] != extent[0]); + n = 0; + do + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + mbase -= mstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + mbase += mstride[n]; + } + } + while (count[n] == extent[n]); + } + } +} + +extern void sminloc0_4_r17 (gfc_array_i4 * const restrict, + gfc_array_r17 * const restrict, GFC_LOGICAL_4 *, GFC_LOGICAL_4); +export_proto(sminloc0_4_r17); + +void +sminloc0_4_r17 (gfc_array_i4 * const restrict retarray, + gfc_array_r17 * const restrict array, + GFC_LOGICAL_4 * mask, GFC_LOGICAL_4 back) +{ + index_type rank; + index_type dstride; + index_type n; + GFC_INTEGER_4 *dest; + + if (mask == NULL || *mask) + { + minloc0_4_r17 (retarray, array, back); + return; + } + + rank = GFC_DESCRIPTOR_RANK (array); + + if (rank <= 0) + runtime_error ("Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else if (unlikely (compile_options.bounds_check)) + { + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "MINLOC"); + } + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + for (n = 0; n<rank; n++) + dest[n * dstride] = 0 ; +} +#endif diff --git a/libgfortran/generated/minloc0_8_r17.c b/libgfortran/generated/minloc0_8_r17.c new file mode 100644 index 0000000..8c8f8bd --- /dev/null +++ b/libgfortran/generated/minloc0_8_r17.c @@ -0,0 +1,407 @@ +/* Implementation of the MINLOC intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <assert.h> + + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_INTEGER_8) + + +extern void minloc0_8_r17 (gfc_array_i8 * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_LOGICAL_4); +export_proto(minloc0_8_r17); + +void +minloc0_8_r17 (gfc_array_i8 * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride; + const GFC_REAL_17 *base; + GFC_INTEGER_8 * restrict dest; + index_type rank; + index_type n; + + rank = GFC_DESCRIPTOR_RANK (array); + if (rank <= 0) + runtime_error ("Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else + { + if (unlikely (compile_options.bounds_check)) + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "MINLOC"); + } + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + for (n = 0; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + count[n] = 0; + if (extent[n] <= 0) + { + /* Set the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 0; + return; + } + } + + base = array->base_addr; + + /* Initialize the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 1; + { + + GFC_REAL_17 minval; +#if defined(GFC_REAL_17_QUIET_NAN) + int fast = 0; +#endif + +#if defined(GFC_REAL_17_INFINITY) + minval = GFC_REAL_17_INFINITY; +#else + minval = GFC_REAL_17_HUGE; +#endif + while (base) + { + /* Implementation start. */ + +#if defined(GFC_REAL_17_QUIET_NAN) + if (unlikely (!fast)) + { + do + { + if (*base <= minval) + { + fast = 1; + minval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + break; + } + base += sstride[0]; + } + while (++count[0] != extent[0]); + if (likely (fast)) + continue; + } + else +#endif + if (back) + do + { + if (unlikely (*base <= minval)) + { + minval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + base += sstride[0]; + } + while (++count[0] != extent[0]); + else + do + { + if (unlikely (*base < minval)) + { + minval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + /* Implementation end. */ + /* Advance to the next element. */ + base += sstride[0]; + } + while (++count[0] != extent[0]); + n = 0; + do + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + } + } + while (count[n] == extent[n]); + } + } +} + +extern void mminloc0_8_r17 (gfc_array_i8 * const restrict, + gfc_array_r17 * const restrict, gfc_array_l1 * const restrict, + GFC_LOGICAL_4); +export_proto(mminloc0_8_r17); + +void +mminloc0_8_r17 (gfc_array_i8 * const restrict retarray, + gfc_array_r17 * const restrict array, + gfc_array_l1 * const restrict mask, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type mstride[GFC_MAX_DIMENSIONS]; + index_type dstride; + GFC_INTEGER_8 *dest; + const GFC_REAL_17 *base; + GFC_LOGICAL_1 *mbase; + int rank; + index_type n; + int mask_kind; + + + if (mask == NULL) + { + minloc0_8_r17 (retarray, array, back); + return; + } + + rank = GFC_DESCRIPTOR_RANK (array); + if (rank <= 0) + runtime_error ("Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else + { + if (unlikely (compile_options.bounds_check)) + { + + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "MINLOC"); + bounds_equal_extents ((array_t *) mask, (array_t *) array, + "MASK argument", "MINLOC"); + } + } + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + mbase = mask->base_addr; + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + runtime_error ("Funny sized logical array"); + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + for (n = 0; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + count[n] = 0; + if (extent[n] <= 0) + { + /* Set the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 0; + return; + } + } + + base = array->base_addr; + + /* Initialize the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 0; + { + + GFC_REAL_17 minval; + int fast = 0; + +#if defined(GFC_REAL_17_INFINITY) + minval = GFC_REAL_17_INFINITY; +#else + minval = GFC_REAL_17_HUGE; +#endif + while (base) + { + /* Implementation start. */ + + if (unlikely (!fast)) + { + do + { + if (*mbase) + { +#if defined(GFC_REAL_17_QUIET_NAN) + if (unlikely (dest[0] == 0)) + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + if (*base <= minval) +#endif + { + fast = 1; + minval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + break; + } + } + base += sstride[0]; + mbase += mstride[0]; + } + while (++count[0] != extent[0]); + if (likely (fast)) + continue; + } + else + if (back) + do + { + if (unlikely (*mbase && (*base <= minval))) + { + minval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + base += sstride[0]; + } + while (++count[0] != extent[0]); + else + do + { + if (unlikely (*mbase && (*base < minval))) + { + minval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + /* Implementation end. */ + /* Advance to the next element. */ + base += sstride[0]; + mbase += mstride[0]; + } + while (++count[0] != extent[0]); + n = 0; + do + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + mbase -= mstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + mbase += mstride[n]; + } + } + while (count[n] == extent[n]); + } + } +} + +extern void sminloc0_8_r17 (gfc_array_i8 * const restrict, + gfc_array_r17 * const restrict, GFC_LOGICAL_4 *, GFC_LOGICAL_4); +export_proto(sminloc0_8_r17); + +void +sminloc0_8_r17 (gfc_array_i8 * const restrict retarray, + gfc_array_r17 * const restrict array, + GFC_LOGICAL_4 * mask, GFC_LOGICAL_4 back) +{ + index_type rank; + index_type dstride; + index_type n; + GFC_INTEGER_8 *dest; + + if (mask == NULL || *mask) + { + minloc0_8_r17 (retarray, array, back); + return; + } + + rank = GFC_DESCRIPTOR_RANK (array); + + if (rank <= 0) + runtime_error ("Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else if (unlikely (compile_options.bounds_check)) + { + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "MINLOC"); + } + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + for (n = 0; n<rank; n++) + dest[n * dstride] = 0 ; +} +#endif diff --git a/libgfortran/generated/minloc1_16_r17.c b/libgfortran/generated/minloc1_16_r17.c new file mode 100644 index 0000000..95f17e0 --- /dev/null +++ b/libgfortran/generated/minloc1_16_r17.c @@ -0,0 +1,617 @@ +/* Implementation of the MINLOC intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <assert.h> + + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_INTEGER_16) + +#define HAVE_BACK_ARG 1 + + +extern void minloc1_16_r17 (gfc_array_i16 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, GFC_LOGICAL_4 back); +export_proto(minloc1_16_r17); + +void +minloc1_16_r17 (gfc_array_i16 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + const GFC_REAL_17 * restrict base; + GFC_INTEGER_16 * restrict dest; + index_type rank; + index_type n; + index_type len; + index_type delta; + index_type dim; + int continue_loop; + + /* Make dim zero based to avoid confusion. */ + rank = GFC_DESCRIPTOR_RANK (array) - 1; + dim = (*pdim) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MINLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len < 0) + len = 0; + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + + } + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " MINLOC intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "MINLOC"); + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + base = array->base_addr; + dest = retarray->base_addr; + + continue_loop = 1; + while (continue_loop) + { + const GFC_REAL_17 * restrict src; + GFC_INTEGER_16 result; + src = base; + { + + GFC_REAL_17 minval; +#if defined (GFC_REAL_17_INFINITY) + minval = GFC_REAL_17_INFINITY; +#else + minval = GFC_REAL_17_HUGE; +#endif + result = 1; + if (len <= 0) + *dest = 0; + else + { +#if ! defined HAVE_BACK_ARG + for (n = 0; n < len; n++, src += delta) + { +#endif + +#if defined (GFC_REAL_17_QUIET_NAN) + for (n = 0; n < len; n++, src += delta) + { + if (*src <= minval) + { + minval = *src; + result = (GFC_INTEGER_16)n + 1; + break; + } + } +#else + n = 0; +#endif + if (back) + for (; n < len; n++, src += delta) + { + if (unlikely (*src <= minval)) + { + minval = *src; + result = (GFC_INTEGER_16)n + 1; + } + } + else + for (; n < len; n++, src += delta) + { + if (unlikely (*src < minval)) + { + minval = *src; + result = (GFC_INTEGER_16) n + 1; + } + } + + *dest = result; + } + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + continue_loop = 0; + break; + } + else + { + count[n]++; + base += sstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void mminloc1_16_r17 (gfc_array_i16 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, + gfc_array_l1 * const restrict, GFC_LOGICAL_4 back); +export_proto(mminloc1_16_r17); + +void +mminloc1_16_r17 (gfc_array_i16 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, + gfc_array_l1 * const restrict mask, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + index_type mstride[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_16 * restrict dest; + const GFC_REAL_17 * restrict base; + const GFC_LOGICAL_1 * restrict mbase; + index_type rank; + index_type dim; + index_type n; + index_type len; + index_type delta; + index_type mdelta; + int mask_kind; + + if (mask == NULL) + { +#ifdef HAVE_BACK_ARG + minloc1_16_r17 (retarray, array, pdim, back); +#else + minloc1_16_r17 (retarray, array, pdim); +#endif + return; + } + + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MINLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len <= 0) + return; + + mbase = mask->base_addr; + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + runtime_error ("Funny sized logical array"); + + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + mdelta = GFC_DESCRIPTOR_STRIDE_BYTES(mask,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n + 1); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str= GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype.rank = rank; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in MINLOC intrinsic"); + + if (unlikely (compile_options.bounds_check)) + { + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "MINLOC"); + bounds_equal_extents ((array_t *) mask, (array_t *) array, + "MASK argument", "MINLOC"); + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + dest = retarray->base_addr; + base = array->base_addr; + + while (base) + { + const GFC_REAL_17 * restrict src; + const GFC_LOGICAL_1 * restrict msrc; + GFC_INTEGER_16 result; + src = base; + msrc = mbase; + { + + GFC_REAL_17 minval; +#if defined (GFC_REAL_17_INFINITY) + minval = GFC_REAL_17_INFINITY; +#else + minval = GFC_REAL_17_HUGE; +#endif +#if defined (GFC_REAL_17_QUIET_NAN) + GFC_INTEGER_16 result2 = 0; +#endif + result = 0; + for (n = 0; n < len; n++, src += delta, msrc += mdelta) + { + + if (*msrc) + { +#if defined (GFC_REAL_17_QUIET_NAN) + if (!result2) + result2 = (GFC_INTEGER_16)n + 1; + if (*src <= minval) +#endif + { + minval = *src; + result = (GFC_INTEGER_16)n + 1; + break; + } + } + } +#if defined (GFC_REAL_17_QUIET_NAN) + if (unlikely (n >= len)) + result = result2; + else +#endif + if (back) + for (; n < len; n++, src += delta, msrc += mdelta) + { + if (*msrc && unlikely (*src <= minval)) + { + minval = *src; + result = (GFC_INTEGER_16)n + 1; + } + } + else + for (; n < len; n++, src += delta, msrc += mdelta) + { + if (*msrc && unlikely (*src < minval)) + { + minval = *src; + result = (GFC_INTEGER_16) n + 1; + } + } + *dest = result; + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + mbase += mstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + mbase -= mstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + mbase += mstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void sminloc1_16_r17 (gfc_array_i16 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, + GFC_LOGICAL_4 *, GFC_LOGICAL_4 back); +export_proto(sminloc1_16_r17); + +void +sminloc1_16_r17 (gfc_array_i16 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, + GFC_LOGICAL_4 * mask, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_16 * restrict dest; + index_type rank; + index_type n; + index_type dim; + + + if (mask == NULL || *mask) + { +#ifdef HAVE_BACK_ARG + minloc1_16_r17 (retarray, array, pdim, back); +#else + minloc1_16_r17 (retarray, array, pdim); +#endif + return; + } + /* Make dim zero based to avoid confusion. */ + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MINLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + for (n = 0; n < dim; n++) + { + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] <= 0) + extent[n] = 0; + } + + for (n = dim; n < rank; n++) + { + extent[n] = + GFC_DESCRIPTOR_EXTENT(array,n + 1); + + if (extent[n] <= 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " MINLOC intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + { + for (n=0; n < rank; n++) + { + index_type ret_extent; + + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,n); + if (extent[n] != ret_extent) + runtime_error ("Incorrect extent in return value of" + " MINLOC intrinsic in dimension %ld:" + " is %ld, should be %ld", (long int) n + 1, + (long int) ret_extent, (long int) extent[n]); + } + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + } + + dest = retarray->base_addr; + + while(1) + { + *dest = 0; + count[0]++; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + return; + else + { + count[n]++; + dest += dstride[n]; + } + } + } +} + +#endif diff --git a/libgfortran/generated/minloc1_4_r17.c b/libgfortran/generated/minloc1_4_r17.c new file mode 100644 index 0000000..4deb317 --- /dev/null +++ b/libgfortran/generated/minloc1_4_r17.c @@ -0,0 +1,617 @@ +/* Implementation of the MINLOC intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <assert.h> + + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_INTEGER_4) + +#define HAVE_BACK_ARG 1 + + +extern void minloc1_4_r17 (gfc_array_i4 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, GFC_LOGICAL_4 back); +export_proto(minloc1_4_r17); + +void +minloc1_4_r17 (gfc_array_i4 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + const GFC_REAL_17 * restrict base; + GFC_INTEGER_4 * restrict dest; + index_type rank; + index_type n; + index_type len; + index_type delta; + index_type dim; + int continue_loop; + + /* Make dim zero based to avoid confusion. */ + rank = GFC_DESCRIPTOR_RANK (array) - 1; + dim = (*pdim) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MINLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len < 0) + len = 0; + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + + } + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " MINLOC intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "MINLOC"); + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + base = array->base_addr; + dest = retarray->base_addr; + + continue_loop = 1; + while (continue_loop) + { + const GFC_REAL_17 * restrict src; + GFC_INTEGER_4 result; + src = base; + { + + GFC_REAL_17 minval; +#if defined (GFC_REAL_17_INFINITY) + minval = GFC_REAL_17_INFINITY; +#else + minval = GFC_REAL_17_HUGE; +#endif + result = 1; + if (len <= 0) + *dest = 0; + else + { +#if ! defined HAVE_BACK_ARG + for (n = 0; n < len; n++, src += delta) + { +#endif + +#if defined (GFC_REAL_17_QUIET_NAN) + for (n = 0; n < len; n++, src += delta) + { + if (*src <= minval) + { + minval = *src; + result = (GFC_INTEGER_4)n + 1; + break; + } + } +#else + n = 0; +#endif + if (back) + for (; n < len; n++, src += delta) + { + if (unlikely (*src <= minval)) + { + minval = *src; + result = (GFC_INTEGER_4)n + 1; + } + } + else + for (; n < len; n++, src += delta) + { + if (unlikely (*src < minval)) + { + minval = *src; + result = (GFC_INTEGER_4) n + 1; + } + } + + *dest = result; + } + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + continue_loop = 0; + break; + } + else + { + count[n]++; + base += sstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void mminloc1_4_r17 (gfc_array_i4 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, + gfc_array_l1 * const restrict, GFC_LOGICAL_4 back); +export_proto(mminloc1_4_r17); + +void +mminloc1_4_r17 (gfc_array_i4 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, + gfc_array_l1 * const restrict mask, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + index_type mstride[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_4 * restrict dest; + const GFC_REAL_17 * restrict base; + const GFC_LOGICAL_1 * restrict mbase; + index_type rank; + index_type dim; + index_type n; + index_type len; + index_type delta; + index_type mdelta; + int mask_kind; + + if (mask == NULL) + { +#ifdef HAVE_BACK_ARG + minloc1_4_r17 (retarray, array, pdim, back); +#else + minloc1_4_r17 (retarray, array, pdim); +#endif + return; + } + + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MINLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len <= 0) + return; + + mbase = mask->base_addr; + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + runtime_error ("Funny sized logical array"); + + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + mdelta = GFC_DESCRIPTOR_STRIDE_BYTES(mask,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n + 1); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str= GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype.rank = rank; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in MINLOC intrinsic"); + + if (unlikely (compile_options.bounds_check)) + { + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "MINLOC"); + bounds_equal_extents ((array_t *) mask, (array_t *) array, + "MASK argument", "MINLOC"); + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + dest = retarray->base_addr; + base = array->base_addr; + + while (base) + { + const GFC_REAL_17 * restrict src; + const GFC_LOGICAL_1 * restrict msrc; + GFC_INTEGER_4 result; + src = base; + msrc = mbase; + { + + GFC_REAL_17 minval; +#if defined (GFC_REAL_17_INFINITY) + minval = GFC_REAL_17_INFINITY; +#else + minval = GFC_REAL_17_HUGE; +#endif +#if defined (GFC_REAL_17_QUIET_NAN) + GFC_INTEGER_4 result2 = 0; +#endif + result = 0; + for (n = 0; n < len; n++, src += delta, msrc += mdelta) + { + + if (*msrc) + { +#if defined (GFC_REAL_17_QUIET_NAN) + if (!result2) + result2 = (GFC_INTEGER_4)n + 1; + if (*src <= minval) +#endif + { + minval = *src; + result = (GFC_INTEGER_4)n + 1; + break; + } + } + } +#if defined (GFC_REAL_17_QUIET_NAN) + if (unlikely (n >= len)) + result = result2; + else +#endif + if (back) + for (; n < len; n++, src += delta, msrc += mdelta) + { + if (*msrc && unlikely (*src <= minval)) + { + minval = *src; + result = (GFC_INTEGER_4)n + 1; + } + } + else + for (; n < len; n++, src += delta, msrc += mdelta) + { + if (*msrc && unlikely (*src < minval)) + { + minval = *src; + result = (GFC_INTEGER_4) n + 1; + } + } + *dest = result; + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + mbase += mstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + mbase -= mstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + mbase += mstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void sminloc1_4_r17 (gfc_array_i4 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, + GFC_LOGICAL_4 *, GFC_LOGICAL_4 back); +export_proto(sminloc1_4_r17); + +void +sminloc1_4_r17 (gfc_array_i4 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, + GFC_LOGICAL_4 * mask, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_4 * restrict dest; + index_type rank; + index_type n; + index_type dim; + + + if (mask == NULL || *mask) + { +#ifdef HAVE_BACK_ARG + minloc1_4_r17 (retarray, array, pdim, back); +#else + minloc1_4_r17 (retarray, array, pdim); +#endif + return; + } + /* Make dim zero based to avoid confusion. */ + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MINLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + for (n = 0; n < dim; n++) + { + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] <= 0) + extent[n] = 0; + } + + for (n = dim; n < rank; n++) + { + extent[n] = + GFC_DESCRIPTOR_EXTENT(array,n + 1); + + if (extent[n] <= 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " MINLOC intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + { + for (n=0; n < rank; n++) + { + index_type ret_extent; + + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,n); + if (extent[n] != ret_extent) + runtime_error ("Incorrect extent in return value of" + " MINLOC intrinsic in dimension %ld:" + " is %ld, should be %ld", (long int) n + 1, + (long int) ret_extent, (long int) extent[n]); + } + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + } + + dest = retarray->base_addr; + + while(1) + { + *dest = 0; + count[0]++; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + return; + else + { + count[n]++; + dest += dstride[n]; + } + } + } +} + +#endif diff --git a/libgfortran/generated/minloc1_8_r17.c b/libgfortran/generated/minloc1_8_r17.c new file mode 100644 index 0000000..4237bb3 --- /dev/null +++ b/libgfortran/generated/minloc1_8_r17.c @@ -0,0 +1,617 @@ +/* Implementation of the MINLOC intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <assert.h> + + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_INTEGER_8) + +#define HAVE_BACK_ARG 1 + + +extern void minloc1_8_r17 (gfc_array_i8 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, GFC_LOGICAL_4 back); +export_proto(minloc1_8_r17); + +void +minloc1_8_r17 (gfc_array_i8 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + const GFC_REAL_17 * restrict base; + GFC_INTEGER_8 * restrict dest; + index_type rank; + index_type n; + index_type len; + index_type delta; + index_type dim; + int continue_loop; + + /* Make dim zero based to avoid confusion. */ + rank = GFC_DESCRIPTOR_RANK (array) - 1; + dim = (*pdim) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MINLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len < 0) + len = 0; + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + + } + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " MINLOC intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "MINLOC"); + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + base = array->base_addr; + dest = retarray->base_addr; + + continue_loop = 1; + while (continue_loop) + { + const GFC_REAL_17 * restrict src; + GFC_INTEGER_8 result; + src = base; + { + + GFC_REAL_17 minval; +#if defined (GFC_REAL_17_INFINITY) + minval = GFC_REAL_17_INFINITY; +#else + minval = GFC_REAL_17_HUGE; +#endif + result = 1; + if (len <= 0) + *dest = 0; + else + { +#if ! defined HAVE_BACK_ARG + for (n = 0; n < len; n++, src += delta) + { +#endif + +#if defined (GFC_REAL_17_QUIET_NAN) + for (n = 0; n < len; n++, src += delta) + { + if (*src <= minval) + { + minval = *src; + result = (GFC_INTEGER_8)n + 1; + break; + } + } +#else + n = 0; +#endif + if (back) + for (; n < len; n++, src += delta) + { + if (unlikely (*src <= minval)) + { + minval = *src; + result = (GFC_INTEGER_8)n + 1; + } + } + else + for (; n < len; n++, src += delta) + { + if (unlikely (*src < minval)) + { + minval = *src; + result = (GFC_INTEGER_8) n + 1; + } + } + + *dest = result; + } + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + continue_loop = 0; + break; + } + else + { + count[n]++; + base += sstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void mminloc1_8_r17 (gfc_array_i8 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, + gfc_array_l1 * const restrict, GFC_LOGICAL_4 back); +export_proto(mminloc1_8_r17); + +void +mminloc1_8_r17 (gfc_array_i8 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, + gfc_array_l1 * const restrict mask, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + index_type mstride[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_8 * restrict dest; + const GFC_REAL_17 * restrict base; + const GFC_LOGICAL_1 * restrict mbase; + index_type rank; + index_type dim; + index_type n; + index_type len; + index_type delta; + index_type mdelta; + int mask_kind; + + if (mask == NULL) + { +#ifdef HAVE_BACK_ARG + minloc1_8_r17 (retarray, array, pdim, back); +#else + minloc1_8_r17 (retarray, array, pdim); +#endif + return; + } + + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MINLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len <= 0) + return; + + mbase = mask->base_addr; + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + runtime_error ("Funny sized logical array"); + + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + mdelta = GFC_DESCRIPTOR_STRIDE_BYTES(mask,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n + 1); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str= GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype.rank = rank; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in MINLOC intrinsic"); + + if (unlikely (compile_options.bounds_check)) + { + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "MINLOC"); + bounds_equal_extents ((array_t *) mask, (array_t *) array, + "MASK argument", "MINLOC"); + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + dest = retarray->base_addr; + base = array->base_addr; + + while (base) + { + const GFC_REAL_17 * restrict src; + const GFC_LOGICAL_1 * restrict msrc; + GFC_INTEGER_8 result; + src = base; + msrc = mbase; + { + + GFC_REAL_17 minval; +#if defined (GFC_REAL_17_INFINITY) + minval = GFC_REAL_17_INFINITY; +#else + minval = GFC_REAL_17_HUGE; +#endif +#if defined (GFC_REAL_17_QUIET_NAN) + GFC_INTEGER_8 result2 = 0; +#endif + result = 0; + for (n = 0; n < len; n++, src += delta, msrc += mdelta) + { + + if (*msrc) + { +#if defined (GFC_REAL_17_QUIET_NAN) + if (!result2) + result2 = (GFC_INTEGER_8)n + 1; + if (*src <= minval) +#endif + { + minval = *src; + result = (GFC_INTEGER_8)n + 1; + break; + } + } + } +#if defined (GFC_REAL_17_QUIET_NAN) + if (unlikely (n >= len)) + result = result2; + else +#endif + if (back) + for (; n < len; n++, src += delta, msrc += mdelta) + { + if (*msrc && unlikely (*src <= minval)) + { + minval = *src; + result = (GFC_INTEGER_8)n + 1; + } + } + else + for (; n < len; n++, src += delta, msrc += mdelta) + { + if (*msrc && unlikely (*src < minval)) + { + minval = *src; + result = (GFC_INTEGER_8) n + 1; + } + } + *dest = result; + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + mbase += mstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + mbase -= mstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + mbase += mstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void sminloc1_8_r17 (gfc_array_i8 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, + GFC_LOGICAL_4 *, GFC_LOGICAL_4 back); +export_proto(sminloc1_8_r17); + +void +sminloc1_8_r17 (gfc_array_i8 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, + GFC_LOGICAL_4 * mask, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_8 * restrict dest; + index_type rank; + index_type n; + index_type dim; + + + if (mask == NULL || *mask) + { +#ifdef HAVE_BACK_ARG + minloc1_8_r17 (retarray, array, pdim, back); +#else + minloc1_8_r17 (retarray, array, pdim); +#endif + return; + } + /* Make dim zero based to avoid confusion. */ + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MINLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + for (n = 0; n < dim; n++) + { + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] <= 0) + extent[n] = 0; + } + + for (n = dim; n < rank; n++) + { + extent[n] = + GFC_DESCRIPTOR_EXTENT(array,n + 1); + + if (extent[n] <= 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " MINLOC intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + { + for (n=0; n < rank; n++) + { + index_type ret_extent; + + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,n); + if (extent[n] != ret_extent) + runtime_error ("Incorrect extent in return value of" + " MINLOC intrinsic in dimension %ld:" + " is %ld, should be %ld", (long int) n + 1, + (long int) ret_extent, (long int) extent[n]); + } + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + } + + dest = retarray->base_addr; + + while(1) + { + *dest = 0; + count[0]++; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + return; + else + { + count[n]++; + dest += dstride[n]; + } + } + } +} + +#endif diff --git a/libgfortran/generated/minval_r17.c b/libgfortran/generated/minval_r17.c new file mode 100644 index 0000000..9705003 --- /dev/null +++ b/libgfortran/generated/minval_r17.c @@ -0,0 +1,578 @@ +/* Implementation of the MINVAL intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" + + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_REAL_17) + + +extern void minval_r17 (gfc_array_r17 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict); +export_proto(minval_r17); + +void +minval_r17 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + const GFC_REAL_17 * restrict base; + GFC_REAL_17 * restrict dest; + index_type rank; + index_type n; + index_type len; + index_type delta; + index_type dim; + int continue_loop; + + /* Make dim zero based to avoid confusion. */ + rank = GFC_DESCRIPTOR_RANK (array) - 1; + dim = (*pdim) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MINVAL intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len < 0) + len = 0; + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_17)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + + } + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " MINVAL intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "MINVAL"); + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + base = array->base_addr; + dest = retarray->base_addr; + + continue_loop = 1; + while (continue_loop) + { + const GFC_REAL_17 * restrict src; + GFC_REAL_17 result; + src = base; + { + +#if defined (GFC_REAL_17_INFINITY) + result = GFC_REAL_17_INFINITY; +#else + result = GFC_REAL_17_HUGE; +#endif + if (len <= 0) + *dest = GFC_REAL_17_HUGE; + else + { +#if ! defined HAVE_BACK_ARG + for (n = 0; n < len; n++, src += delta) + { +#endif + +#if defined (GFC_REAL_17_QUIET_NAN) + if (*src <= result) + break; + } + if (unlikely (n >= len)) + result = GFC_REAL_17_QUIET_NAN; + else for (; n < len; n++, src += delta) + { +#endif + if (*src < result) + result = *src; + } + + *dest = result; + } + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + continue_loop = 0; + break; + } + else + { + count[n]++; + base += sstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void mminval_r17 (gfc_array_r17 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, + gfc_array_l1 * const restrict); +export_proto(mminval_r17); + +void +mminval_r17 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, + gfc_array_l1 * const restrict mask) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + index_type mstride[GFC_MAX_DIMENSIONS]; + GFC_REAL_17 * restrict dest; + const GFC_REAL_17 * restrict base; + const GFC_LOGICAL_1 * restrict mbase; + index_type rank; + index_type dim; + index_type n; + index_type len; + index_type delta; + index_type mdelta; + int mask_kind; + + if (mask == NULL) + { +#ifdef HAVE_BACK_ARG + minval_r17 (retarray, array, pdim, back); +#else + minval_r17 (retarray, array, pdim); +#endif + return; + } + + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MINVAL intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len <= 0) + return; + + mbase = mask->base_addr; + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + runtime_error ("Funny sized logical array"); + + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + mdelta = GFC_DESCRIPTOR_STRIDE_BYTES(mask,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n + 1); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str= GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype.rank = rank; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_17)); + + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in MINVAL intrinsic"); + + if (unlikely (compile_options.bounds_check)) + { + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "MINVAL"); + bounds_equal_extents ((array_t *) mask, (array_t *) array, + "MASK argument", "MINVAL"); + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + dest = retarray->base_addr; + base = array->base_addr; + + while (base) + { + const GFC_REAL_17 * restrict src; + const GFC_LOGICAL_1 * restrict msrc; + GFC_REAL_17 result; + src = base; + msrc = mbase; + { + +#if defined (GFC_REAL_17_INFINITY) + result = GFC_REAL_17_INFINITY; +#else + result = GFC_REAL_17_HUGE; +#endif +#if defined (GFC_REAL_17_QUIET_NAN) + int non_empty_p = 0; +#endif + for (n = 0; n < len; n++, src += delta, msrc += mdelta) + { + +#if defined (GFC_REAL_17_INFINITY) || defined (GFC_REAL_17_QUIET_NAN) + if (*msrc) + { +#if defined (GFC_REAL_17_QUIET_NAN) + non_empty_p = 1; + if (*src <= result) +#endif + break; + } + } + if (unlikely (n >= len)) + { +#if defined (GFC_REAL_17_QUIET_NAN) + result = non_empty_p ? GFC_REAL_17_QUIET_NAN : GFC_REAL_17_HUGE; +#else + result = GFC_REAL_17_HUGE; +#endif + } + else for (; n < len; n++, src += delta, msrc += mdelta) + { +#endif + if (*msrc && *src < result) + result = *src; + } + *dest = result; + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + mbase += mstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + mbase -= mstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + mbase += mstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void sminval_r17 (gfc_array_r17 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, + GFC_LOGICAL_4 *); +export_proto(sminval_r17); + +void +sminval_r17 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, + GFC_LOGICAL_4 * mask) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + GFC_REAL_17 * restrict dest; + index_type rank; + index_type n; + index_type dim; + + + if (mask == NULL || *mask) + { +#ifdef HAVE_BACK_ARG + minval_r17 (retarray, array, pdim, back); +#else + minval_r17 (retarray, array, pdim); +#endif + return; + } + /* Make dim zero based to avoid confusion. */ + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MINVAL intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + for (n = 0; n < dim; n++) + { + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] <= 0) + extent[n] = 0; + } + + for (n = dim; n < rank; n++) + { + extent[n] = + GFC_DESCRIPTOR_EXTENT(array,n + 1); + + if (extent[n] <= 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_17)); + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " MINVAL intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + { + for (n=0; n < rank; n++) + { + index_type ret_extent; + + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,n); + if (extent[n] != ret_extent) + runtime_error ("Incorrect extent in return value of" + " MINVAL intrinsic in dimension %ld:" + " is %ld, should be %ld", (long int) n + 1, + (long int) ret_extent, (long int) extent[n]); + } + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + } + + dest = retarray->base_addr; + + while(1) + { + *dest = GFC_REAL_17_HUGE; + count[0]++; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + return; + else + { + count[n]++; + dest += dstride[n]; + } + } + } +} + +#endif diff --git a/libgfortran/generated/norm2_r17.c b/libgfortran/generated/norm2_r17.c new file mode 100644 index 0000000..d8a0fb7 --- /dev/null +++ b/libgfortran/generated/norm2_r17.c @@ -0,0 +1,219 @@ +/* Implementation of the NORM2 intrinsic + Copyright (C) 2010-2022 Free Software Foundation, Inc. + Contributed by Tobias Burnus <burnus@net-b.de> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" + + + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_REAL_17) && 1 /* FIXME: figure this out later. */ && 1 /* FIXME: figure this out later. */ + +#if defined(POWER_IEEE128) +#define MATHFUNC(funcname) __ ## funcname ## ieee128 +#else +#define MATHFUNC(funcname) funcname ## q +#endif + + +extern void norm2_r17 (gfc_array_r17 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict); +export_proto(norm2_r17); + +void +norm2_r17 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + const GFC_REAL_17 * restrict base; + GFC_REAL_17 * restrict dest; + index_type rank; + index_type n; + index_type len; + index_type delta; + index_type dim; + int continue_loop; + + /* Make dim zero based to avoid confusion. */ + rank = GFC_DESCRIPTOR_RANK (array) - 1; + dim = (*pdim) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in NORM intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len < 0) + len = 0; + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_17)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + + } + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " NORM intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "NORM"); + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + base = array->base_addr; + dest = retarray->base_addr; + + continue_loop = 1; + while (continue_loop) + { + const GFC_REAL_17 * restrict src; + GFC_REAL_17 result; + src = base; + { + + GFC_REAL_17 scale; + result = 0; + scale = 1; + if (len <= 0) + *dest = 0; + else + { +#if ! defined HAVE_BACK_ARG + for (n = 0; n < len; n++, src += delta) + { +#endif + + if (*src != 0) + { + GFC_REAL_17 absX, val; + absX = MATHFUNC(fabs) (*src); + if (scale < absX) + { + val = scale / absX; + result = 1 + result * val * val; + scale = absX; + } + else + { + val = absX / scale; + result += val * val; + } + } + } + result = scale * MATHFUNC(sqrt) (result); + *dest = result; + } + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + continue_loop = 0; + break; + } + else + { + count[n]++; + base += sstride[n]; + dest += dstride[n]; + } + } + } +} + +#endif diff --git a/libgfortran/generated/pack_c17.c b/libgfortran/generated/pack_c17.c new file mode 100644 index 0000000..2084c53 --- /dev/null +++ b/libgfortran/generated/pack_c17.c @@ -0,0 +1,257 @@ +/* Specific implementation of the PACK intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Ligbfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <string.h> + + +#if defined (HAVE_GFC_COMPLEX_17) + +/* PACK is specified as follows: + + 13.14.80 PACK (ARRAY, MASK, [VECTOR]) + + Description: Pack an array into an array of rank one under the + control of a mask. + + Class: Transformational function. + + Arguments: + ARRAY may be of any type. It shall not be scalar. + MASK shall be of type LOGICAL. It shall be conformable with ARRAY. + VECTOR (optional) shall be of the same type and type parameters + as ARRAY. VECTOR shall have at least as many elements as + there are true elements in MASK. If MASK is a scalar + with the value true, VECTOR shall have at least as many + elements as there are in ARRAY. + + Result Characteristics: The result is an array of rank one with the + same type and type parameters as ARRAY. If VECTOR is present, the + result size is that of VECTOR; otherwise, the result size is the + number /t/ of true elements in MASK unless MASK is scalar with the + value true, in which case the result size is the size of ARRAY. + + Result Value: Element /i/ of the result is the element of ARRAY + that corresponds to the /i/th true element of MASK, taking elements + in array element order, for /i/ = 1, 2, ..., /t/. If VECTOR is + present and has size /n/ > /t/, element /i/ of the result has the + value VECTOR(/i/), for /i/ = /t/ + 1, ..., /n/. + + Examples: The nonzero elements of an array M with the value + | 0 0 0 | + | 9 0 0 | may be "gathered" by the function PACK. The result of + | 0 0 7 | + PACK (M, MASK = M.NE.0) is [9,7] and the result of PACK (M, M.NE.0, + VECTOR = (/ 2,4,6,8,10,12 /)) is [9,7,6,8,10,12]. + +There are two variants of the PACK intrinsic: one, where MASK is +array valued, and the other one where MASK is scalar. */ + +void +pack_c17 (gfc_array_c17 *ret, const gfc_array_c17 *array, + const gfc_array_l1 *mask, const gfc_array_c17 *vector) +{ + /* r.* indicates the return array. */ + index_type rstride0; + GFC_COMPLEX_17 * restrict rptr; + /* s.* indicates the source array. */ + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type sstride0; + const GFC_COMPLEX_17 *sptr; + /* m.* indicates the mask array. */ + index_type mstride[GFC_MAX_DIMENSIONS]; + index_type mstride0; + const GFC_LOGICAL_1 *mptr; + + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + int zero_sized; + index_type n; + index_type dim; + index_type nelem; + index_type total; + int mask_kind; + + dim = GFC_DESCRIPTOR_RANK (array); + + mptr = mask->base_addr; + + /* Use the same loop for all logical types, by using GFC_LOGICAL_1 + and using shifting to address size and endian issues. */ + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + { + /* Do not convert a NULL pointer as we use test for NULL below. */ + if (mptr) + mptr = GFOR_POINTER_TO_L1 (mptr, mask_kind); + } + else + runtime_error ("Funny sized logical array"); + + zero_sized = 0; + for (n = 0; n < dim; n++) + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + if (extent[n] <= 0) + zero_sized = 1; + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + } + if (sstride[0] == 0) + sstride[0] = 1; + if (mstride[0] == 0) + mstride[0] = mask_kind; + + if (zero_sized) + sptr = NULL; + else + sptr = array->base_addr; + + if (ret->base_addr == NULL || unlikely (compile_options.bounds_check)) + { + /* Count the elements, either for allocating memory or + for bounds checking. */ + + if (vector != NULL) + { + /* The return array will have as many + elements as there are in VECTOR. */ + total = GFC_DESCRIPTOR_EXTENT(vector,0); + if (total < 0) + { + total = 0; + vector = NULL; + } + } + else + { + /* We have to count the true elements in MASK. */ + total = count_0 (mask); + } + + if (ret->base_addr == NULL) + { + /* Setup the array descriptor. */ + GFC_DIMENSION_SET(ret->dim[0], 0, total-1, 1); + + ret->offset = 0; + + /* xmallocarray allocates a single byte for zero size. */ + ret->base_addr = xmallocarray (total, sizeof (GFC_COMPLEX_17)); + + if (total == 0) + return; + } + else + { + /* We come here because of range checking. */ + index_type ret_extent; + + ret_extent = GFC_DESCRIPTOR_EXTENT(ret,0); + if (total != ret_extent) + runtime_error ("Incorrect extent in return value of PACK intrinsic;" + " is %ld, should be %ld", (long int) total, + (long int) ret_extent); + } + } + + rstride0 = GFC_DESCRIPTOR_STRIDE(ret,0); + if (rstride0 == 0) + rstride0 = 1; + sstride0 = sstride[0]; + mstride0 = mstride[0]; + rptr = ret->base_addr; + + while (sptr && mptr) + { + /* Test this element. */ + if (*mptr) + { + /* Add it. */ + *rptr = *sptr; + rptr += rstride0; + } + /* Advance to the next element. */ + sptr += sstride0; + mptr += mstride0; + count[0]++; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + sptr -= sstride[n] * extent[n]; + mptr -= mstride[n] * extent[n]; + n++; + if (n >= dim) + { + /* Break out of the loop. */ + sptr = NULL; + break; + } + else + { + count[n]++; + sptr += sstride[n]; + mptr += mstride[n]; + } + } + } + + /* Add any remaining elements from VECTOR. */ + if (vector) + { + n = GFC_DESCRIPTOR_EXTENT(vector,0); + nelem = ((rptr - ret->base_addr) / rstride0); + if (n > nelem) + { + sstride0 = GFC_DESCRIPTOR_STRIDE(vector,0); + if (sstride0 == 0) + sstride0 = 1; + + sptr = vector->base_addr + sstride0 * nelem; + n -= nelem; + while (n--) + { + *rptr = *sptr; + rptr += rstride0; + sptr += sstride0; + } + } + } +} + +#endif + diff --git a/libgfortran/generated/pack_r17.c b/libgfortran/generated/pack_r17.c new file mode 100644 index 0000000..ce95011 --- /dev/null +++ b/libgfortran/generated/pack_r17.c @@ -0,0 +1,257 @@ +/* Specific implementation of the PACK intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Ligbfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <string.h> + + +#if defined (HAVE_GFC_REAL_17) + +/* PACK is specified as follows: + + 13.14.80 PACK (ARRAY, MASK, [VECTOR]) + + Description: Pack an array into an array of rank one under the + control of a mask. + + Class: Transformational function. + + Arguments: + ARRAY may be of any type. It shall not be scalar. + MASK shall be of type LOGICAL. It shall be conformable with ARRAY. + VECTOR (optional) shall be of the same type and type parameters + as ARRAY. VECTOR shall have at least as many elements as + there are true elements in MASK. If MASK is a scalar + with the value true, VECTOR shall have at least as many + elements as there are in ARRAY. + + Result Characteristics: The result is an array of rank one with the + same type and type parameters as ARRAY. If VECTOR is present, the + result size is that of VECTOR; otherwise, the result size is the + number /t/ of true elements in MASK unless MASK is scalar with the + value true, in which case the result size is the size of ARRAY. + + Result Value: Element /i/ of the result is the element of ARRAY + that corresponds to the /i/th true element of MASK, taking elements + in array element order, for /i/ = 1, 2, ..., /t/. If VECTOR is + present and has size /n/ > /t/, element /i/ of the result has the + value VECTOR(/i/), for /i/ = /t/ + 1, ..., /n/. + + Examples: The nonzero elements of an array M with the value + | 0 0 0 | + | 9 0 0 | may be "gathered" by the function PACK. The result of + | 0 0 7 | + PACK (M, MASK = M.NE.0) is [9,7] and the result of PACK (M, M.NE.0, + VECTOR = (/ 2,4,6,8,10,12 /)) is [9,7,6,8,10,12]. + +There are two variants of the PACK intrinsic: one, where MASK is +array valued, and the other one where MASK is scalar. */ + +void +pack_r17 (gfc_array_r17 *ret, const gfc_array_r17 *array, + const gfc_array_l1 *mask, const gfc_array_r17 *vector) +{ + /* r.* indicates the return array. */ + index_type rstride0; + GFC_REAL_17 * restrict rptr; + /* s.* indicates the source array. */ + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type sstride0; + const GFC_REAL_17 *sptr; + /* m.* indicates the mask array. */ + index_type mstride[GFC_MAX_DIMENSIONS]; + index_type mstride0; + const GFC_LOGICAL_1 *mptr; + + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + int zero_sized; + index_type n; + index_type dim; + index_type nelem; + index_type total; + int mask_kind; + + dim = GFC_DESCRIPTOR_RANK (array); + + mptr = mask->base_addr; + + /* Use the same loop for all logical types, by using GFC_LOGICAL_1 + and using shifting to address size and endian issues. */ + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + { + /* Do not convert a NULL pointer as we use test for NULL below. */ + if (mptr) + mptr = GFOR_POINTER_TO_L1 (mptr, mask_kind); + } + else + runtime_error ("Funny sized logical array"); + + zero_sized = 0; + for (n = 0; n < dim; n++) + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + if (extent[n] <= 0) + zero_sized = 1; + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + } + if (sstride[0] == 0) + sstride[0] = 1; + if (mstride[0] == 0) + mstride[0] = mask_kind; + + if (zero_sized) + sptr = NULL; + else + sptr = array->base_addr; + + if (ret->base_addr == NULL || unlikely (compile_options.bounds_check)) + { + /* Count the elements, either for allocating memory or + for bounds checking. */ + + if (vector != NULL) + { + /* The return array will have as many + elements as there are in VECTOR. */ + total = GFC_DESCRIPTOR_EXTENT(vector,0); + if (total < 0) + { + total = 0; + vector = NULL; + } + } + else + { + /* We have to count the true elements in MASK. */ + total = count_0 (mask); + } + + if (ret->base_addr == NULL) + { + /* Setup the array descriptor. */ + GFC_DIMENSION_SET(ret->dim[0], 0, total-1, 1); + + ret->offset = 0; + + /* xmallocarray allocates a single byte for zero size. */ + ret->base_addr = xmallocarray (total, sizeof (GFC_REAL_17)); + + if (total == 0) + return; + } + else + { + /* We come here because of range checking. */ + index_type ret_extent; + + ret_extent = GFC_DESCRIPTOR_EXTENT(ret,0); + if (total != ret_extent) + runtime_error ("Incorrect extent in return value of PACK intrinsic;" + " is %ld, should be %ld", (long int) total, + (long int) ret_extent); + } + } + + rstride0 = GFC_DESCRIPTOR_STRIDE(ret,0); + if (rstride0 == 0) + rstride0 = 1; + sstride0 = sstride[0]; + mstride0 = mstride[0]; + rptr = ret->base_addr; + + while (sptr && mptr) + { + /* Test this element. */ + if (*mptr) + { + /* Add it. */ + *rptr = *sptr; + rptr += rstride0; + } + /* Advance to the next element. */ + sptr += sstride0; + mptr += mstride0; + count[0]++; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + sptr -= sstride[n] * extent[n]; + mptr -= mstride[n] * extent[n]; + n++; + if (n >= dim) + { + /* Break out of the loop. */ + sptr = NULL; + break; + } + else + { + count[n]++; + sptr += sstride[n]; + mptr += mstride[n]; + } + } + } + + /* Add any remaining elements from VECTOR. */ + if (vector) + { + n = GFC_DESCRIPTOR_EXTENT(vector,0); + nelem = ((rptr - ret->base_addr) / rstride0); + if (n > nelem) + { + sstride0 = GFC_DESCRIPTOR_STRIDE(vector,0); + if (sstride0 == 0) + sstride0 = 1; + + sptr = vector->base_addr + sstride0 * nelem; + n -= nelem; + while (n--) + { + *rptr = *sptr; + rptr += rstride0; + sptr += sstride0; + } + } + } +} + +#endif + diff --git a/libgfortran/generated/pow_c17_i16.c b/libgfortran/generated/pow_c17_i16.c new file mode 100644 index 0000000..c7dccf5 --- /dev/null +++ b/libgfortran/generated/pow_c17_i16.c @@ -0,0 +1,75 @@ +/* Support routines for the intrinsic power (**) operator. + Copyright (C) 2004-2022 Free Software Foundation, Inc. + Contributed by Paul Brook + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" + + +/* Use Binary Method to calculate the powi. This is not an optimal but + a simple and reasonable arithmetic. See section 4.6.3, "Evaluation of + Powers" of Donald E. Knuth, "Seminumerical Algorithms", Vol. 2, "The Art + of Computer Programming", 3rd Edition, 1998. */ + +#if defined (HAVE_GFC_COMPLEX_17) && defined (HAVE_GFC_INTEGER_16) + +GFC_COMPLEX_17 pow_c17_i16 (GFC_COMPLEX_17 a, GFC_INTEGER_16 b); +export_proto(pow_c17_i16); + +GFC_COMPLEX_17 +pow_c17_i16 (GFC_COMPLEX_17 a, GFC_INTEGER_16 b) +{ + GFC_COMPLEX_17 pow, x; + GFC_INTEGER_16 n; + GFC_UINTEGER_16 u; + + n = b; + x = a; + pow = 1; + if (n != 0) + { + if (n < 0) + { + + u = -n; + x = pow / x; + } + else + { + u = n; + } + for (;;) + { + if (u & 1) + pow *= x; + u >>= 1; + if (u) + x *= x; + else + break; + } + } + return pow; +} + +#endif diff --git a/libgfortran/generated/pow_c17_i4.c b/libgfortran/generated/pow_c17_i4.c new file mode 100644 index 0000000..74d092a --- /dev/null +++ b/libgfortran/generated/pow_c17_i4.c @@ -0,0 +1,75 @@ +/* Support routines for the intrinsic power (**) operator. + Copyright (C) 2004-2022 Free Software Foundation, Inc. + Contributed by Paul Brook + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" + + +/* Use Binary Method to calculate the powi. This is not an optimal but + a simple and reasonable arithmetic. See section 4.6.3, "Evaluation of + Powers" of Donald E. Knuth, "Seminumerical Algorithms", Vol. 2, "The Art + of Computer Programming", 3rd Edition, 1998. */ + +#if defined (HAVE_GFC_COMPLEX_17) && defined (HAVE_GFC_INTEGER_4) + +GFC_COMPLEX_17 pow_c17_i4 (GFC_COMPLEX_17 a, GFC_INTEGER_4 b); +export_proto(pow_c17_i4); + +GFC_COMPLEX_17 +pow_c17_i4 (GFC_COMPLEX_17 a, GFC_INTEGER_4 b) +{ + GFC_COMPLEX_17 pow, x; + GFC_INTEGER_4 n; + GFC_UINTEGER_4 u; + + n = b; + x = a; + pow = 1; + if (n != 0) + { + if (n < 0) + { + + u = -n; + x = pow / x; + } + else + { + u = n; + } + for (;;) + { + if (u & 1) + pow *= x; + u >>= 1; + if (u) + x *= x; + else + break; + } + } + return pow; +} + +#endif diff --git a/libgfortran/generated/pow_c17_i8.c b/libgfortran/generated/pow_c17_i8.c new file mode 100644 index 0000000..8dc30ee --- /dev/null +++ b/libgfortran/generated/pow_c17_i8.c @@ -0,0 +1,75 @@ +/* Support routines for the intrinsic power (**) operator. + Copyright (C) 2004-2022 Free Software Foundation, Inc. + Contributed by Paul Brook + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" + + +/* Use Binary Method to calculate the powi. This is not an optimal but + a simple and reasonable arithmetic. See section 4.6.3, "Evaluation of + Powers" of Donald E. Knuth, "Seminumerical Algorithms", Vol. 2, "The Art + of Computer Programming", 3rd Edition, 1998. */ + +#if defined (HAVE_GFC_COMPLEX_17) && defined (HAVE_GFC_INTEGER_8) + +GFC_COMPLEX_17 pow_c17_i8 (GFC_COMPLEX_17 a, GFC_INTEGER_8 b); +export_proto(pow_c17_i8); + +GFC_COMPLEX_17 +pow_c17_i8 (GFC_COMPLEX_17 a, GFC_INTEGER_8 b) +{ + GFC_COMPLEX_17 pow, x; + GFC_INTEGER_8 n; + GFC_UINTEGER_8 u; + + n = b; + x = a; + pow = 1; + if (n != 0) + { + if (n < 0) + { + + u = -n; + x = pow / x; + } + else + { + u = n; + } + for (;;) + { + if (u & 1) + pow *= x; + u >>= 1; + if (u) + x *= x; + else + break; + } + } + return pow; +} + +#endif diff --git a/libgfortran/generated/pow_r17_i16.c b/libgfortran/generated/pow_r17_i16.c new file mode 100644 index 0000000..817dd59 --- /dev/null +++ b/libgfortran/generated/pow_r17_i16.c @@ -0,0 +1,75 @@ +/* Support routines for the intrinsic power (**) operator. + Copyright (C) 2004-2022 Free Software Foundation, Inc. + Contributed by Paul Brook + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" + + +/* Use Binary Method to calculate the powi. This is not an optimal but + a simple and reasonable arithmetic. See section 4.6.3, "Evaluation of + Powers" of Donald E. Knuth, "Seminumerical Algorithms", Vol. 2, "The Art + of Computer Programming", 3rd Edition, 1998. */ + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_INTEGER_16) + +GFC_REAL_17 pow_r17_i16 (GFC_REAL_17 a, GFC_INTEGER_16 b); +export_proto(pow_r17_i16); + +GFC_REAL_17 +pow_r17_i16 (GFC_REAL_17 a, GFC_INTEGER_16 b) +{ + GFC_REAL_17 pow, x; + GFC_INTEGER_16 n; + GFC_UINTEGER_16 u; + + n = b; + x = a; + pow = 1; + if (n != 0) + { + if (n < 0) + { + + u = -n; + x = pow / x; + } + else + { + u = n; + } + for (;;) + { + if (u & 1) + pow *= x; + u >>= 1; + if (u) + x *= x; + else + break; + } + } + return pow; +} + +#endif diff --git a/libgfortran/generated/pow_r17_i4.c b/libgfortran/generated/pow_r17_i4.c new file mode 100644 index 0000000..16ba682 --- /dev/null +++ b/libgfortran/generated/pow_r17_i4.c @@ -0,0 +1,75 @@ +/* Support routines for the intrinsic power (**) operator. + Copyright (C) 2004-2022 Free Software Foundation, Inc. + Contributed by Paul Brook + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" + + +/* Use Binary Method to calculate the powi. This is not an optimal but + a simple and reasonable arithmetic. See section 4.6.3, "Evaluation of + Powers" of Donald E. Knuth, "Seminumerical Algorithms", Vol. 2, "The Art + of Computer Programming", 3rd Edition, 1998. */ + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_INTEGER_4) + +GFC_REAL_17 pow_r17_i4 (GFC_REAL_17 a, GFC_INTEGER_4 b); +export_proto(pow_r17_i4); + +GFC_REAL_17 +pow_r17_i4 (GFC_REAL_17 a, GFC_INTEGER_4 b) +{ + GFC_REAL_17 pow, x; + GFC_INTEGER_4 n; + GFC_UINTEGER_4 u; + + n = b; + x = a; + pow = 1; + if (n != 0) + { + if (n < 0) + { + + u = -n; + x = pow / x; + } + else + { + u = n; + } + for (;;) + { + if (u & 1) + pow *= x; + u >>= 1; + if (u) + x *= x; + else + break; + } + } + return pow; +} + +#endif diff --git a/libgfortran/generated/pow_r17_i8.c b/libgfortran/generated/pow_r17_i8.c new file mode 100644 index 0000000..d7bfce1 --- /dev/null +++ b/libgfortran/generated/pow_r17_i8.c @@ -0,0 +1,75 @@ +/* Support routines for the intrinsic power (**) operator. + Copyright (C) 2004-2022 Free Software Foundation, Inc. + Contributed by Paul Brook + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" + + +/* Use Binary Method to calculate the powi. This is not an optimal but + a simple and reasonable arithmetic. See section 4.6.3, "Evaluation of + Powers" of Donald E. Knuth, "Seminumerical Algorithms", Vol. 2, "The Art + of Computer Programming", 3rd Edition, 1998. */ + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_INTEGER_8) + +GFC_REAL_17 pow_r17_i8 (GFC_REAL_17 a, GFC_INTEGER_8 b); +export_proto(pow_r17_i8); + +GFC_REAL_17 +pow_r17_i8 (GFC_REAL_17 a, GFC_INTEGER_8 b) +{ + GFC_REAL_17 pow, x; + GFC_INTEGER_8 n; + GFC_UINTEGER_8 u; + + n = b; + x = a; + pow = 1; + if (n != 0) + { + if (n < 0) + { + + u = -n; + x = pow / x; + } + else + { + u = n; + } + for (;;) + { + if (u & 1) + pow *= x; + u >>= 1; + if (u) + x *= x; + else + break; + } + } + return pow; +} + +#endif diff --git a/libgfortran/generated/product_c17.c b/libgfortran/generated/product_c17.c new file mode 100644 index 0000000..dee58a1 --- /dev/null +++ b/libgfortran/generated/product_c17.c @@ -0,0 +1,536 @@ +/* Implementation of the PRODUCT intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" + + +#if defined (HAVE_GFC_COMPLEX_17) && defined (HAVE_GFC_COMPLEX_17) + + +extern void product_c17 (gfc_array_c17 * const restrict, + gfc_array_c17 * const restrict, const index_type * const restrict); +export_proto(product_c17); + +void +product_c17 (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict array, + const index_type * const restrict pdim) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + const GFC_COMPLEX_17 * restrict base; + GFC_COMPLEX_17 * restrict dest; + index_type rank; + index_type n; + index_type len; + index_type delta; + index_type dim; + int continue_loop; + + /* Make dim zero based to avoid confusion. */ + rank = GFC_DESCRIPTOR_RANK (array) - 1; + dim = (*pdim) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in PRODUCT intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len < 0) + len = 0; + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_COMPLEX_17)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + + } + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " PRODUCT intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "PRODUCT"); + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + base = array->base_addr; + dest = retarray->base_addr; + + continue_loop = 1; + while (continue_loop) + { + const GFC_COMPLEX_17 * restrict src; + GFC_COMPLEX_17 result; + src = base; + { + + result = 1; + if (len <= 0) + *dest = 1; + else + { +#if ! defined HAVE_BACK_ARG + for (n = 0; n < len; n++, src += delta) + { +#endif + + result *= *src; + } + + *dest = result; + } + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + continue_loop = 0; + break; + } + else + { + count[n]++; + base += sstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void mproduct_c17 (gfc_array_c17 * const restrict, + gfc_array_c17 * const restrict, const index_type * const restrict, + gfc_array_l1 * const restrict); +export_proto(mproduct_c17); + +void +mproduct_c17 (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict array, + const index_type * const restrict pdim, + gfc_array_l1 * const restrict mask) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + index_type mstride[GFC_MAX_DIMENSIONS]; + GFC_COMPLEX_17 * restrict dest; + const GFC_COMPLEX_17 * restrict base; + const GFC_LOGICAL_1 * restrict mbase; + index_type rank; + index_type dim; + index_type n; + index_type len; + index_type delta; + index_type mdelta; + int mask_kind; + + if (mask == NULL) + { +#ifdef HAVE_BACK_ARG + product_c17 (retarray, array, pdim, back); +#else + product_c17 (retarray, array, pdim); +#endif + return; + } + + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in PRODUCT intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len <= 0) + return; + + mbase = mask->base_addr; + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + runtime_error ("Funny sized logical array"); + + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + mdelta = GFC_DESCRIPTOR_STRIDE_BYTES(mask,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n + 1); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str= GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype.rank = rank; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_COMPLEX_17)); + + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in PRODUCT intrinsic"); + + if (unlikely (compile_options.bounds_check)) + { + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "PRODUCT"); + bounds_equal_extents ((array_t *) mask, (array_t *) array, + "MASK argument", "PRODUCT"); + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + dest = retarray->base_addr; + base = array->base_addr; + + while (base) + { + const GFC_COMPLEX_17 * restrict src; + const GFC_LOGICAL_1 * restrict msrc; + GFC_COMPLEX_17 result; + src = base; + msrc = mbase; + { + + result = 1; + for (n = 0; n < len; n++, src += delta, msrc += mdelta) + { + + if (*msrc) + result *= *src; + } + *dest = result; + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + mbase += mstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + mbase -= mstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + mbase += mstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void sproduct_c17 (gfc_array_c17 * const restrict, + gfc_array_c17 * const restrict, const index_type * const restrict, + GFC_LOGICAL_4 *); +export_proto(sproduct_c17); + +void +sproduct_c17 (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict array, + const index_type * const restrict pdim, + GFC_LOGICAL_4 * mask) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + GFC_COMPLEX_17 * restrict dest; + index_type rank; + index_type n; + index_type dim; + + + if (mask == NULL || *mask) + { +#ifdef HAVE_BACK_ARG + product_c17 (retarray, array, pdim, back); +#else + product_c17 (retarray, array, pdim); +#endif + return; + } + /* Make dim zero based to avoid confusion. */ + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in PRODUCT intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + for (n = 0; n < dim; n++) + { + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] <= 0) + extent[n] = 0; + } + + for (n = dim; n < rank; n++) + { + extent[n] = + GFC_DESCRIPTOR_EXTENT(array,n + 1); + + if (extent[n] <= 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_COMPLEX_17)); + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " PRODUCT intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + { + for (n=0; n < rank; n++) + { + index_type ret_extent; + + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,n); + if (extent[n] != ret_extent) + runtime_error ("Incorrect extent in return value of" + " PRODUCT intrinsic in dimension %ld:" + " is %ld, should be %ld", (long int) n + 1, + (long int) ret_extent, (long int) extent[n]); + } + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + } + + dest = retarray->base_addr; + + while(1) + { + *dest = 1; + count[0]++; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + return; + else + { + count[n]++; + dest += dstride[n]; + } + } + } +} + +#endif diff --git a/libgfortran/generated/product_r17.c b/libgfortran/generated/product_r17.c new file mode 100644 index 0000000..1059533 --- /dev/null +++ b/libgfortran/generated/product_r17.c @@ -0,0 +1,536 @@ +/* Implementation of the PRODUCT intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" + + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_REAL_17) + + +extern void product_r17 (gfc_array_r17 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict); +export_proto(product_r17); + +void +product_r17 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + const GFC_REAL_17 * restrict base; + GFC_REAL_17 * restrict dest; + index_type rank; + index_type n; + index_type len; + index_type delta; + index_type dim; + int continue_loop; + + /* Make dim zero based to avoid confusion. */ + rank = GFC_DESCRIPTOR_RANK (array) - 1; + dim = (*pdim) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in PRODUCT intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len < 0) + len = 0; + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_17)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + + } + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " PRODUCT intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "PRODUCT"); + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + base = array->base_addr; + dest = retarray->base_addr; + + continue_loop = 1; + while (continue_loop) + { + const GFC_REAL_17 * restrict src; + GFC_REAL_17 result; + src = base; + { + + result = 1; + if (len <= 0) + *dest = 1; + else + { +#if ! defined HAVE_BACK_ARG + for (n = 0; n < len; n++, src += delta) + { +#endif + + result *= *src; + } + + *dest = result; + } + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + continue_loop = 0; + break; + } + else + { + count[n]++; + base += sstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void mproduct_r17 (gfc_array_r17 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, + gfc_array_l1 * const restrict); +export_proto(mproduct_r17); + +void +mproduct_r17 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, + gfc_array_l1 * const restrict mask) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + index_type mstride[GFC_MAX_DIMENSIONS]; + GFC_REAL_17 * restrict dest; + const GFC_REAL_17 * restrict base; + const GFC_LOGICAL_1 * restrict mbase; + index_type rank; + index_type dim; + index_type n; + index_type len; + index_type delta; + index_type mdelta; + int mask_kind; + + if (mask == NULL) + { +#ifdef HAVE_BACK_ARG + product_r17 (retarray, array, pdim, back); +#else + product_r17 (retarray, array, pdim); +#endif + return; + } + + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in PRODUCT intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len <= 0) + return; + + mbase = mask->base_addr; + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + runtime_error ("Funny sized logical array"); + + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + mdelta = GFC_DESCRIPTOR_STRIDE_BYTES(mask,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n + 1); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str= GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype.rank = rank; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_17)); + + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in PRODUCT intrinsic"); + + if (unlikely (compile_options.bounds_check)) + { + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "PRODUCT"); + bounds_equal_extents ((array_t *) mask, (array_t *) array, + "MASK argument", "PRODUCT"); + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + dest = retarray->base_addr; + base = array->base_addr; + + while (base) + { + const GFC_REAL_17 * restrict src; + const GFC_LOGICAL_1 * restrict msrc; + GFC_REAL_17 result; + src = base; + msrc = mbase; + { + + result = 1; + for (n = 0; n < len; n++, src += delta, msrc += mdelta) + { + + if (*msrc) + result *= *src; + } + *dest = result; + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + mbase += mstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + mbase -= mstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + mbase += mstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void sproduct_r17 (gfc_array_r17 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, + GFC_LOGICAL_4 *); +export_proto(sproduct_r17); + +void +sproduct_r17 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, + GFC_LOGICAL_4 * mask) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + GFC_REAL_17 * restrict dest; + index_type rank; + index_type n; + index_type dim; + + + if (mask == NULL || *mask) + { +#ifdef HAVE_BACK_ARG + product_r17 (retarray, array, pdim, back); +#else + product_r17 (retarray, array, pdim); +#endif + return; + } + /* Make dim zero based to avoid confusion. */ + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in PRODUCT intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + for (n = 0; n < dim; n++) + { + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] <= 0) + extent[n] = 0; + } + + for (n = dim; n < rank; n++) + { + extent[n] = + GFC_DESCRIPTOR_EXTENT(array,n + 1); + + if (extent[n] <= 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_17)); + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " PRODUCT intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + { + for (n=0; n < rank; n++) + { + index_type ret_extent; + + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,n); + if (extent[n] != ret_extent) + runtime_error ("Incorrect extent in return value of" + " PRODUCT intrinsic in dimension %ld:" + " is %ld, should be %ld", (long int) n + 1, + (long int) ret_extent, (long int) extent[n]); + } + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + } + + dest = retarray->base_addr; + + while(1) + { + *dest = 1; + count[0]++; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + return; + else + { + count[n]++; + dest += dstride[n]; + } + } + } +} + +#endif diff --git a/libgfortran/generated/reshape_c17.c b/libgfortran/generated/reshape_c17.c new file mode 100644 index 0000000..b0d7ec8 --- /dev/null +++ b/libgfortran/generated/reshape_c17.c @@ -0,0 +1,366 @@ +/* Implementation of the RESHAPE intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" + + +#if defined (HAVE_GFC_COMPLEX_17) + +typedef GFC_FULL_ARRAY_DESCRIPTOR(1, index_type) shape_type; + + +extern void reshape_c17 (gfc_array_c17 * const restrict, + gfc_array_c17 * const restrict, + shape_type * const restrict, + gfc_array_c17 * const restrict, + shape_type * const restrict); +export_proto(reshape_c17); + +void +reshape_c17 (gfc_array_c17 * const restrict ret, + gfc_array_c17 * const restrict source, + shape_type * const restrict shape, + gfc_array_c17 * const restrict pad, + shape_type * const restrict order) +{ + /* r.* indicates the return array. */ + index_type rcount[GFC_MAX_DIMENSIONS]; + index_type rextent[GFC_MAX_DIMENSIONS]; + index_type rstride[GFC_MAX_DIMENSIONS]; + index_type rstride0; + index_type rdim; + index_type rsize; + index_type rs; + index_type rex; + GFC_COMPLEX_17 *rptr; + /* s.* indicates the source array. */ + index_type scount[GFC_MAX_DIMENSIONS]; + index_type sextent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type sstride0; + index_type sdim; + index_type ssize; + const GFC_COMPLEX_17 *sptr; + /* p.* indicates the pad array. */ + index_type pcount[GFC_MAX_DIMENSIONS]; + index_type pextent[GFC_MAX_DIMENSIONS]; + index_type pstride[GFC_MAX_DIMENSIONS]; + index_type pdim; + index_type psize; + const GFC_COMPLEX_17 *pptr; + + const GFC_COMPLEX_17 *src; + int sempty, pempty, shape_empty; + index_type shape_data[GFC_MAX_DIMENSIONS]; + + rdim = GFC_DESCRIPTOR_EXTENT(shape,0); + /* rdim is always > 0; this lets the compiler optimize more and + avoids a potential warning. */ + GFC_ASSERT(rdim>0); + + if (rdim != GFC_DESCRIPTOR_RANK(ret)) + runtime_error("rank of return array incorrect in RESHAPE intrinsic"); + + shape_empty = 0; + + for (index_type n = 0; n < rdim; n++) + { + shape_data[n] = shape->base_addr[n * GFC_DESCRIPTOR_STRIDE(shape,0)]; + if (shape_data[n] <= 0) + { + shape_data[n] = 0; + shape_empty = 1; + } + } + + if (ret->base_addr == NULL) + { + index_type alloc_size; + + rs = 1; + for (index_type n = 0; n < rdim; n++) + { + rex = shape_data[n]; + + GFC_DIMENSION_SET(ret->dim[n], 0, rex - 1, rs); + + rs *= rex; + } + ret->offset = 0; + + if (unlikely (rs < 1)) + alloc_size = 0; + else + alloc_size = rs; + + ret->base_addr = xmallocarray (alloc_size, sizeof (GFC_COMPLEX_17)); + ret->dtype.rank = rdim; + } + + if (shape_empty) + return; + + if (pad) + { + pdim = GFC_DESCRIPTOR_RANK (pad); + psize = 1; + pempty = 0; + for (index_type n = 0; n < pdim; n++) + { + pcount[n] = 0; + pstride[n] = GFC_DESCRIPTOR_STRIDE(pad,n); + pextent[n] = GFC_DESCRIPTOR_EXTENT(pad,n); + if (pextent[n] <= 0) + { + pempty = 1; + pextent[n] = 0; + } + + if (psize == pstride[n]) + psize *= pextent[n]; + else + psize = 0; + } + pptr = pad->base_addr; + } + else + { + pdim = 0; + psize = 1; + pempty = 1; + pptr = NULL; + } + + if (unlikely (compile_options.bounds_check)) + { + index_type ret_extent, source_extent; + + rs = 1; + for (index_type n = 0; n < rdim; n++) + { + rs *= shape_data[n]; + ret_extent = GFC_DESCRIPTOR_EXTENT(ret,n); + if (ret_extent != shape_data[n]) + runtime_error("Incorrect extent in return value of RESHAPE" + " intrinsic in dimension %ld: is %ld," + " should be %ld", (long int) n+1, + (long int) ret_extent, (long int) shape_data[n]); + } + + source_extent = 1; + sdim = GFC_DESCRIPTOR_RANK (source); + for (index_type n = 0; n < sdim; n++) + { + index_type se; + se = GFC_DESCRIPTOR_EXTENT(source,n); + source_extent *= se > 0 ? se : 0; + } + + if (rs > source_extent && (!pad || pempty)) + runtime_error("Incorrect size in SOURCE argument to RESHAPE" + " intrinsic: is %ld, should be %ld", + (long int) source_extent, (long int) rs); + + if (order) + { + int seen[GFC_MAX_DIMENSIONS]; + index_type v; + + for (index_type n = 0; n < rdim; n++) + seen[n] = 0; + + for (index_type n = 0; n < rdim; n++) + { + v = order->base_addr[n * GFC_DESCRIPTOR_STRIDE(order,0)] - 1; + + if (v < 0 || v >= rdim) + runtime_error("Value %ld out of range in ORDER argument" + " to RESHAPE intrinsic", (long int) v + 1); + + if (seen[v] != 0) + runtime_error("Duplicate value %ld in ORDER argument to" + " RESHAPE intrinsic", (long int) v + 1); + + seen[v] = 1; + } + } + } + + rsize = 1; + for (index_type n = 0; n < rdim; n++) + { + index_type dim; + if (order) + dim = order->base_addr[n * GFC_DESCRIPTOR_STRIDE(order,0)] - 1; + else + dim = n; + + rcount[n] = 0; + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + rextent[n] = GFC_DESCRIPTOR_EXTENT(ret,dim); + if (rextent[n] < 0) + rextent[n] = 0; + + if (rextent[n] != shape_data[dim]) + runtime_error ("shape and target do not conform"); + + if (rsize == rstride[n]) + rsize *= rextent[n]; + else + rsize = 0; + if (rextent[n] <= 0) + return; + } + + sdim = GFC_DESCRIPTOR_RANK (source); + + /* sdim is always > 0; this lets the compiler optimize more and + avoids a warning. */ + GFC_ASSERT(sdim>0); + + ssize = 1; + sempty = 0; + for (index_type n = 0; n < sdim; n++) + { + scount[n] = 0; + sstride[n] = GFC_DESCRIPTOR_STRIDE(source,n); + sextent[n] = GFC_DESCRIPTOR_EXTENT(source,n); + if (sextent[n] <= 0) + { + sempty = 1; + sextent[n] = 0; + } + + if (ssize == sstride[n]) + ssize *= sextent[n]; + else + ssize = 0; + } + + if (rsize != 0 && ssize != 0 && psize != 0) + { + rsize *= sizeof (GFC_COMPLEX_17); + ssize *= sizeof (GFC_COMPLEX_17); + psize *= sizeof (GFC_COMPLEX_17); + reshape_packed ((char *)ret->base_addr, rsize, (char *)source->base_addr, + ssize, pad ? (char *)pad->base_addr : NULL, psize); + return; + } + rptr = ret->base_addr; + src = sptr = source->base_addr; + rstride0 = rstride[0]; + sstride0 = sstride[0]; + + if (sempty && pempty) + abort (); + + if (sempty) + { + /* Pretend we are using the pad array the first time around, too. */ + src = pptr; + sptr = pptr; + sdim = pdim; + for (index_type dim = 0; dim < pdim; dim++) + { + scount[dim] = pcount[dim]; + sextent[dim] = pextent[dim]; + sstride[dim] = pstride[dim]; + sstride0 = pstride[0]; + } + } + + while (rptr) + { + /* Select between the source and pad arrays. */ + *rptr = *src; + /* Advance to the next element. */ + rptr += rstride0; + src += sstride0; + rcount[0]++; + scount[0]++; + + /* Advance to the next destination element. */ + index_type n = 0; + while (rcount[n] == rextent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + rcount[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + rptr -= rstride[n] * rextent[n]; + n++; + if (n == rdim) + { + /* Break out of the loop. */ + rptr = NULL; + break; + } + else + { + rcount[n]++; + rptr += rstride[n]; + } + } + /* Advance to the next source element. */ + n = 0; + while (scount[n] == sextent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + scount[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + src -= sstride[n] * sextent[n]; + n++; + if (n == sdim) + { + if (sptr && pad) + { + /* Switch to the pad array. */ + sptr = NULL; + sdim = pdim; + for (index_type dim = 0; dim < pdim; dim++) + { + scount[dim] = pcount[dim]; + sextent[dim] = pextent[dim]; + sstride[dim] = pstride[dim]; + sstride0 = sstride[0]; + } + } + /* We now start again from the beginning of the pad array. */ + src = pptr; + break; + } + else + { + scount[n]++; + src += sstride[n]; + } + } + } +} + +#endif diff --git a/libgfortran/generated/reshape_r17.c b/libgfortran/generated/reshape_r17.c new file mode 100644 index 0000000..36425ca --- /dev/null +++ b/libgfortran/generated/reshape_r17.c @@ -0,0 +1,366 @@ +/* Implementation of the RESHAPE intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" + + +#if defined (HAVE_GFC_REAL_17) + +typedef GFC_FULL_ARRAY_DESCRIPTOR(1, index_type) shape_type; + + +extern void reshape_r17 (gfc_array_r17 * const restrict, + gfc_array_r17 * const restrict, + shape_type * const restrict, + gfc_array_r17 * const restrict, + shape_type * const restrict); +export_proto(reshape_r17); + +void +reshape_r17 (gfc_array_r17 * const restrict ret, + gfc_array_r17 * const restrict source, + shape_type * const restrict shape, + gfc_array_r17 * const restrict pad, + shape_type * const restrict order) +{ + /* r.* indicates the return array. */ + index_type rcount[GFC_MAX_DIMENSIONS]; + index_type rextent[GFC_MAX_DIMENSIONS]; + index_type rstride[GFC_MAX_DIMENSIONS]; + index_type rstride0; + index_type rdim; + index_type rsize; + index_type rs; + index_type rex; + GFC_REAL_17 *rptr; + /* s.* indicates the source array. */ + index_type scount[GFC_MAX_DIMENSIONS]; + index_type sextent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type sstride0; + index_type sdim; + index_type ssize; + const GFC_REAL_17 *sptr; + /* p.* indicates the pad array. */ + index_type pcount[GFC_MAX_DIMENSIONS]; + index_type pextent[GFC_MAX_DIMENSIONS]; + index_type pstride[GFC_MAX_DIMENSIONS]; + index_type pdim; + index_type psize; + const GFC_REAL_17 *pptr; + + const GFC_REAL_17 *src; + int sempty, pempty, shape_empty; + index_type shape_data[GFC_MAX_DIMENSIONS]; + + rdim = GFC_DESCRIPTOR_EXTENT(shape,0); + /* rdim is always > 0; this lets the compiler optimize more and + avoids a potential warning. */ + GFC_ASSERT(rdim>0); + + if (rdim != GFC_DESCRIPTOR_RANK(ret)) + runtime_error("rank of return array incorrect in RESHAPE intrinsic"); + + shape_empty = 0; + + for (index_type n = 0; n < rdim; n++) + { + shape_data[n] = shape->base_addr[n * GFC_DESCRIPTOR_STRIDE(shape,0)]; + if (shape_data[n] <= 0) + { + shape_data[n] = 0; + shape_empty = 1; + } + } + + if (ret->base_addr == NULL) + { + index_type alloc_size; + + rs = 1; + for (index_type n = 0; n < rdim; n++) + { + rex = shape_data[n]; + + GFC_DIMENSION_SET(ret->dim[n], 0, rex - 1, rs); + + rs *= rex; + } + ret->offset = 0; + + if (unlikely (rs < 1)) + alloc_size = 0; + else + alloc_size = rs; + + ret->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_17)); + ret->dtype.rank = rdim; + } + + if (shape_empty) + return; + + if (pad) + { + pdim = GFC_DESCRIPTOR_RANK (pad); + psize = 1; + pempty = 0; + for (index_type n = 0; n < pdim; n++) + { + pcount[n] = 0; + pstride[n] = GFC_DESCRIPTOR_STRIDE(pad,n); + pextent[n] = GFC_DESCRIPTOR_EXTENT(pad,n); + if (pextent[n] <= 0) + { + pempty = 1; + pextent[n] = 0; + } + + if (psize == pstride[n]) + psize *= pextent[n]; + else + psize = 0; + } + pptr = pad->base_addr; + } + else + { + pdim = 0; + psize = 1; + pempty = 1; + pptr = NULL; + } + + if (unlikely (compile_options.bounds_check)) + { + index_type ret_extent, source_extent; + + rs = 1; + for (index_type n = 0; n < rdim; n++) + { + rs *= shape_data[n]; + ret_extent = GFC_DESCRIPTOR_EXTENT(ret,n); + if (ret_extent != shape_data[n]) + runtime_error("Incorrect extent in return value of RESHAPE" + " intrinsic in dimension %ld: is %ld," + " should be %ld", (long int) n+1, + (long int) ret_extent, (long int) shape_data[n]); + } + + source_extent = 1; + sdim = GFC_DESCRIPTOR_RANK (source); + for (index_type n = 0; n < sdim; n++) + { + index_type se; + se = GFC_DESCRIPTOR_EXTENT(source,n); + source_extent *= se > 0 ? se : 0; + } + + if (rs > source_extent && (!pad || pempty)) + runtime_error("Incorrect size in SOURCE argument to RESHAPE" + " intrinsic: is %ld, should be %ld", + (long int) source_extent, (long int) rs); + + if (order) + { + int seen[GFC_MAX_DIMENSIONS]; + index_type v; + + for (index_type n = 0; n < rdim; n++) + seen[n] = 0; + + for (index_type n = 0; n < rdim; n++) + { + v = order->base_addr[n * GFC_DESCRIPTOR_STRIDE(order,0)] - 1; + + if (v < 0 || v >= rdim) + runtime_error("Value %ld out of range in ORDER argument" + " to RESHAPE intrinsic", (long int) v + 1); + + if (seen[v] != 0) + runtime_error("Duplicate value %ld in ORDER argument to" + " RESHAPE intrinsic", (long int) v + 1); + + seen[v] = 1; + } + } + } + + rsize = 1; + for (index_type n = 0; n < rdim; n++) + { + index_type dim; + if (order) + dim = order->base_addr[n * GFC_DESCRIPTOR_STRIDE(order,0)] - 1; + else + dim = n; + + rcount[n] = 0; + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + rextent[n] = GFC_DESCRIPTOR_EXTENT(ret,dim); + if (rextent[n] < 0) + rextent[n] = 0; + + if (rextent[n] != shape_data[dim]) + runtime_error ("shape and target do not conform"); + + if (rsize == rstride[n]) + rsize *= rextent[n]; + else + rsize = 0; + if (rextent[n] <= 0) + return; + } + + sdim = GFC_DESCRIPTOR_RANK (source); + + /* sdim is always > 0; this lets the compiler optimize more and + avoids a warning. */ + GFC_ASSERT(sdim>0); + + ssize = 1; + sempty = 0; + for (index_type n = 0; n < sdim; n++) + { + scount[n] = 0; + sstride[n] = GFC_DESCRIPTOR_STRIDE(source,n); + sextent[n] = GFC_DESCRIPTOR_EXTENT(source,n); + if (sextent[n] <= 0) + { + sempty = 1; + sextent[n] = 0; + } + + if (ssize == sstride[n]) + ssize *= sextent[n]; + else + ssize = 0; + } + + if (rsize != 0 && ssize != 0 && psize != 0) + { + rsize *= sizeof (GFC_REAL_17); + ssize *= sizeof (GFC_REAL_17); + psize *= sizeof (GFC_REAL_17); + reshape_packed ((char *)ret->base_addr, rsize, (char *)source->base_addr, + ssize, pad ? (char *)pad->base_addr : NULL, psize); + return; + } + rptr = ret->base_addr; + src = sptr = source->base_addr; + rstride0 = rstride[0]; + sstride0 = sstride[0]; + + if (sempty && pempty) + abort (); + + if (sempty) + { + /* Pretend we are using the pad array the first time around, too. */ + src = pptr; + sptr = pptr; + sdim = pdim; + for (index_type dim = 0; dim < pdim; dim++) + { + scount[dim] = pcount[dim]; + sextent[dim] = pextent[dim]; + sstride[dim] = pstride[dim]; + sstride0 = pstride[0]; + } + } + + while (rptr) + { + /* Select between the source and pad arrays. */ + *rptr = *src; + /* Advance to the next element. */ + rptr += rstride0; + src += sstride0; + rcount[0]++; + scount[0]++; + + /* Advance to the next destination element. */ + index_type n = 0; + while (rcount[n] == rextent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + rcount[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + rptr -= rstride[n] * rextent[n]; + n++; + if (n == rdim) + { + /* Break out of the loop. */ + rptr = NULL; + break; + } + else + { + rcount[n]++; + rptr += rstride[n]; + } + } + /* Advance to the next source element. */ + n = 0; + while (scount[n] == sextent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + scount[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + src -= sstride[n] * sextent[n]; + n++; + if (n == sdim) + { + if (sptr && pad) + { + /* Switch to the pad array. */ + sptr = NULL; + sdim = pdim; + for (index_type dim = 0; dim < pdim; dim++) + { + scount[dim] = pcount[dim]; + sextent[dim] = pextent[dim]; + sstride[dim] = pstride[dim]; + sstride0 = sstride[0]; + } + } + /* We now start again from the beginning of the pad array. */ + src = pptr; + break; + } + else + { + scount[n]++; + src += sstride[n]; + } + } + } +} + +#endif diff --git a/libgfortran/generated/spread_c17.c b/libgfortran/generated/spread_c17.c new file mode 100644 index 0000000..70de587 --- /dev/null +++ b/libgfortran/generated/spread_c17.c @@ -0,0 +1,266 @@ +/* Special implementation of the SPREAD intrinsic + Copyright (C) 2008-2022 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>, based on + spread_generic.c written by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Ligbfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <string.h> + + +#if defined (HAVE_GFC_COMPLEX_17) + +void +spread_c17 (gfc_array_c17 *ret, const gfc_array_c17 *source, + const index_type along, const index_type pncopies) +{ + /* r.* indicates the return array. */ + index_type rstride[GFC_MAX_DIMENSIONS]; + index_type rstride0; + index_type rdelta = 0; + index_type rrank; + index_type rs; + GFC_COMPLEX_17 *rptr; + GFC_COMPLEX_17 * restrict dest; + /* s.* indicates the source array. */ + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type sstride0; + index_type srank; + const GFC_COMPLEX_17 *sptr; + + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type n; + index_type dim; + index_type ncopies; + + srank = GFC_DESCRIPTOR_RANK(source); + + rrank = srank + 1; + if (rrank > GFC_MAX_DIMENSIONS) + runtime_error ("return rank too large in spread()"); + + if (along > rrank) + runtime_error ("dim outside of rank in spread()"); + + ncopies = pncopies; + + if (ret->base_addr == NULL) + { + + size_t ub, stride; + + /* The front end has signalled that we need to populate the + return array descriptor. */ + ret->dtype.rank = rrank; + + dim = 0; + rs = 1; + for (n = 0; n < rrank; n++) + { + stride = rs; + if (n == along - 1) + { + ub = ncopies - 1; + rdelta = rs; + rs *= ncopies; + } + else + { + count[dim] = 0; + extent[dim] = GFC_DESCRIPTOR_EXTENT(source,dim); + sstride[dim] = GFC_DESCRIPTOR_STRIDE(source,dim); + rstride[dim] = rs; + + ub = extent[dim] - 1; + rs *= extent[dim]; + dim++; + } + GFC_DIMENSION_SET(ret->dim[n], 0, ub, stride); + } + ret->offset = 0; + + /* xmallocarray allocates a single byte for zero size. */ + ret->base_addr = xmallocarray (rs, sizeof(GFC_COMPLEX_17)); + if (rs <= 0) + return; + } + else + { + int zero_sized; + + zero_sized = 0; + + dim = 0; + if (GFC_DESCRIPTOR_RANK(ret) != rrank) + runtime_error ("rank mismatch in spread()"); + + if (unlikely (compile_options.bounds_check)) + { + for (n = 0; n < rrank; n++) + { + index_type ret_extent; + + ret_extent = GFC_DESCRIPTOR_EXTENT(ret,n); + if (n == along - 1) + { + rdelta = GFC_DESCRIPTOR_STRIDE(ret,n); + + if (ret_extent != ncopies) + runtime_error("Incorrect extent in return value of SPREAD" + " intrinsic in dimension %ld: is %ld," + " should be %ld", (long int) n+1, + (long int) ret_extent, (long int) ncopies); + } + else + { + count[dim] = 0; + extent[dim] = GFC_DESCRIPTOR_EXTENT(source,dim); + if (ret_extent != extent[dim]) + runtime_error("Incorrect extent in return value of SPREAD" + " intrinsic in dimension %ld: is %ld," + " should be %ld", (long int) n+1, + (long int) ret_extent, + (long int) extent[dim]); + + if (extent[dim] <= 0) + zero_sized = 1; + sstride[dim] = GFC_DESCRIPTOR_STRIDE(source,dim); + rstride[dim] = GFC_DESCRIPTOR_STRIDE(ret,n); + dim++; + } + } + } + else + { + for (n = 0; n < rrank; n++) + { + if (n == along - 1) + { + rdelta = GFC_DESCRIPTOR_STRIDE(ret,n); + } + else + { + count[dim] = 0; + extent[dim] = GFC_DESCRIPTOR_EXTENT(source,dim); + if (extent[dim] <= 0) + zero_sized = 1; + sstride[dim] = GFC_DESCRIPTOR_STRIDE(source,dim); + rstride[dim] = GFC_DESCRIPTOR_STRIDE(ret,n); + dim++; + } + } + } + + if (zero_sized) + return; + + if (sstride[0] == 0) + sstride[0] = 1; + } + sstride0 = sstride[0]; + rstride0 = rstride[0]; + rptr = ret->base_addr; + sptr = source->base_addr; + + while (sptr) + { + /* Spread this element. */ + dest = rptr; + for (n = 0; n < ncopies; n++) + { + *dest = *sptr; + dest += rdelta; + } + /* Advance to the next element. */ + sptr += sstride0; + rptr += rstride0; + count[0]++; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + sptr -= sstride[n] * extent[n]; + rptr -= rstride[n] * extent[n]; + n++; + if (n >= srank) + { + /* Break out of the loop. */ + sptr = NULL; + break; + } + else + { + count[n]++; + sptr += sstride[n]; + rptr += rstride[n]; + } + } + } +} + +/* This version of spread_internal treats the special case of a scalar + source. This is much simpler than the more general case above. */ + +void +spread_scalar_c17 (gfc_array_c17 *ret, const GFC_COMPLEX_17 *source, + const index_type along, const index_type ncopies) +{ + GFC_COMPLEX_17 * restrict dest; + index_type stride; + + if (GFC_DESCRIPTOR_RANK (ret) != 1) + runtime_error ("incorrect destination rank in spread()"); + + if (along > 1) + runtime_error ("dim outside of rank in spread()"); + + if (ret->base_addr == NULL) + { + ret->base_addr = xmallocarray (ncopies, sizeof (GFC_COMPLEX_17)); + ret->offset = 0; + GFC_DIMENSION_SET(ret->dim[0], 0, ncopies - 1, 1); + } + else + { + if (ncopies - 1 > (GFC_DESCRIPTOR_EXTENT(ret,0) - 1) + / GFC_DESCRIPTOR_STRIDE(ret,0)) + runtime_error ("dim too large in spread()"); + } + + dest = ret->base_addr; + stride = GFC_DESCRIPTOR_STRIDE(ret,0); + + for (index_type n = 0; n < ncopies; n++) + { + *dest = *source; + dest += stride; + } +} + +#endif + diff --git a/libgfortran/generated/spread_r17.c b/libgfortran/generated/spread_r17.c new file mode 100644 index 0000000..4aab77d --- /dev/null +++ b/libgfortran/generated/spread_r17.c @@ -0,0 +1,266 @@ +/* Special implementation of the SPREAD intrinsic + Copyright (C) 2008-2022 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>, based on + spread_generic.c written by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Ligbfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <string.h> + + +#if defined (HAVE_GFC_REAL_17) + +void +spread_r17 (gfc_array_r17 *ret, const gfc_array_r17 *source, + const index_type along, const index_type pncopies) +{ + /* r.* indicates the return array. */ + index_type rstride[GFC_MAX_DIMENSIONS]; + index_type rstride0; + index_type rdelta = 0; + index_type rrank; + index_type rs; + GFC_REAL_17 *rptr; + GFC_REAL_17 * restrict dest; + /* s.* indicates the source array. */ + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type sstride0; + index_type srank; + const GFC_REAL_17 *sptr; + + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type n; + index_type dim; + index_type ncopies; + + srank = GFC_DESCRIPTOR_RANK(source); + + rrank = srank + 1; + if (rrank > GFC_MAX_DIMENSIONS) + runtime_error ("return rank too large in spread()"); + + if (along > rrank) + runtime_error ("dim outside of rank in spread()"); + + ncopies = pncopies; + + if (ret->base_addr == NULL) + { + + size_t ub, stride; + + /* The front end has signalled that we need to populate the + return array descriptor. */ + ret->dtype.rank = rrank; + + dim = 0; + rs = 1; + for (n = 0; n < rrank; n++) + { + stride = rs; + if (n == along - 1) + { + ub = ncopies - 1; + rdelta = rs; + rs *= ncopies; + } + else + { + count[dim] = 0; + extent[dim] = GFC_DESCRIPTOR_EXTENT(source,dim); + sstride[dim] = GFC_DESCRIPTOR_STRIDE(source,dim); + rstride[dim] = rs; + + ub = extent[dim] - 1; + rs *= extent[dim]; + dim++; + } + GFC_DIMENSION_SET(ret->dim[n], 0, ub, stride); + } + ret->offset = 0; + + /* xmallocarray allocates a single byte for zero size. */ + ret->base_addr = xmallocarray (rs, sizeof(GFC_REAL_17)); + if (rs <= 0) + return; + } + else + { + int zero_sized; + + zero_sized = 0; + + dim = 0; + if (GFC_DESCRIPTOR_RANK(ret) != rrank) + runtime_error ("rank mismatch in spread()"); + + if (unlikely (compile_options.bounds_check)) + { + for (n = 0; n < rrank; n++) + { + index_type ret_extent; + + ret_extent = GFC_DESCRIPTOR_EXTENT(ret,n); + if (n == along - 1) + { + rdelta = GFC_DESCRIPTOR_STRIDE(ret,n); + + if (ret_extent != ncopies) + runtime_error("Incorrect extent in return value of SPREAD" + " intrinsic in dimension %ld: is %ld," + " should be %ld", (long int) n+1, + (long int) ret_extent, (long int) ncopies); + } + else + { + count[dim] = 0; + extent[dim] = GFC_DESCRIPTOR_EXTENT(source,dim); + if (ret_extent != extent[dim]) + runtime_error("Incorrect extent in return value of SPREAD" + " intrinsic in dimension %ld: is %ld," + " should be %ld", (long int) n+1, + (long int) ret_extent, + (long int) extent[dim]); + + if (extent[dim] <= 0) + zero_sized = 1; + sstride[dim] = GFC_DESCRIPTOR_STRIDE(source,dim); + rstride[dim] = GFC_DESCRIPTOR_STRIDE(ret,n); + dim++; + } + } + } + else + { + for (n = 0; n < rrank; n++) + { + if (n == along - 1) + { + rdelta = GFC_DESCRIPTOR_STRIDE(ret,n); + } + else + { + count[dim] = 0; + extent[dim] = GFC_DESCRIPTOR_EXTENT(source,dim); + if (extent[dim] <= 0) + zero_sized = 1; + sstride[dim] = GFC_DESCRIPTOR_STRIDE(source,dim); + rstride[dim] = GFC_DESCRIPTOR_STRIDE(ret,n); + dim++; + } + } + } + + if (zero_sized) + return; + + if (sstride[0] == 0) + sstride[0] = 1; + } + sstride0 = sstride[0]; + rstride0 = rstride[0]; + rptr = ret->base_addr; + sptr = source->base_addr; + + while (sptr) + { + /* Spread this element. */ + dest = rptr; + for (n = 0; n < ncopies; n++) + { + *dest = *sptr; + dest += rdelta; + } + /* Advance to the next element. */ + sptr += sstride0; + rptr += rstride0; + count[0]++; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + sptr -= sstride[n] * extent[n]; + rptr -= rstride[n] * extent[n]; + n++; + if (n >= srank) + { + /* Break out of the loop. */ + sptr = NULL; + break; + } + else + { + count[n]++; + sptr += sstride[n]; + rptr += rstride[n]; + } + } + } +} + +/* This version of spread_internal treats the special case of a scalar + source. This is much simpler than the more general case above. */ + +void +spread_scalar_r17 (gfc_array_r17 *ret, const GFC_REAL_17 *source, + const index_type along, const index_type ncopies) +{ + GFC_REAL_17 * restrict dest; + index_type stride; + + if (GFC_DESCRIPTOR_RANK (ret) != 1) + runtime_error ("incorrect destination rank in spread()"); + + if (along > 1) + runtime_error ("dim outside of rank in spread()"); + + if (ret->base_addr == NULL) + { + ret->base_addr = xmallocarray (ncopies, sizeof (GFC_REAL_17)); + ret->offset = 0; + GFC_DIMENSION_SET(ret->dim[0], 0, ncopies - 1, 1); + } + else + { + if (ncopies - 1 > (GFC_DESCRIPTOR_EXTENT(ret,0) - 1) + / GFC_DESCRIPTOR_STRIDE(ret,0)) + runtime_error ("dim too large in spread()"); + } + + dest = ret->base_addr; + stride = GFC_DESCRIPTOR_STRIDE(ret,0); + + for (index_type n = 0; n < ncopies; n++) + { + *dest = *source; + dest += stride; + } +} + +#endif + diff --git a/libgfortran/generated/sum_c17.c b/libgfortran/generated/sum_c17.c new file mode 100644 index 0000000..16514d3 --- /dev/null +++ b/libgfortran/generated/sum_c17.c @@ -0,0 +1,536 @@ +/* Implementation of the SUM intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" + + +#if defined (HAVE_GFC_COMPLEX_17) && defined (HAVE_GFC_COMPLEX_17) + + +extern void sum_c17 (gfc_array_c17 * const restrict, + gfc_array_c17 * const restrict, const index_type * const restrict); +export_proto(sum_c17); + +void +sum_c17 (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict array, + const index_type * const restrict pdim) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + const GFC_COMPLEX_17 * restrict base; + GFC_COMPLEX_17 * restrict dest; + index_type rank; + index_type n; + index_type len; + index_type delta; + index_type dim; + int continue_loop; + + /* Make dim zero based to avoid confusion. */ + rank = GFC_DESCRIPTOR_RANK (array) - 1; + dim = (*pdim) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in SUM intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len < 0) + len = 0; + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_COMPLEX_17)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + + } + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " SUM intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "SUM"); + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + base = array->base_addr; + dest = retarray->base_addr; + + continue_loop = 1; + while (continue_loop) + { + const GFC_COMPLEX_17 * restrict src; + GFC_COMPLEX_17 result; + src = base; + { + + result = 0; + if (len <= 0) + *dest = 0; + else + { +#if ! defined HAVE_BACK_ARG + for (n = 0; n < len; n++, src += delta) + { +#endif + + result += *src; + } + + *dest = result; + } + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + continue_loop = 0; + break; + } + else + { + count[n]++; + base += sstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void msum_c17 (gfc_array_c17 * const restrict, + gfc_array_c17 * const restrict, const index_type * const restrict, + gfc_array_l1 * const restrict); +export_proto(msum_c17); + +void +msum_c17 (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict array, + const index_type * const restrict pdim, + gfc_array_l1 * const restrict mask) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + index_type mstride[GFC_MAX_DIMENSIONS]; + GFC_COMPLEX_17 * restrict dest; + const GFC_COMPLEX_17 * restrict base; + const GFC_LOGICAL_1 * restrict mbase; + index_type rank; + index_type dim; + index_type n; + index_type len; + index_type delta; + index_type mdelta; + int mask_kind; + + if (mask == NULL) + { +#ifdef HAVE_BACK_ARG + sum_c17 (retarray, array, pdim, back); +#else + sum_c17 (retarray, array, pdim); +#endif + return; + } + + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in SUM intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len <= 0) + return; + + mbase = mask->base_addr; + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + runtime_error ("Funny sized logical array"); + + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + mdelta = GFC_DESCRIPTOR_STRIDE_BYTES(mask,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n + 1); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str= GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype.rank = rank; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_COMPLEX_17)); + + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in SUM intrinsic"); + + if (unlikely (compile_options.bounds_check)) + { + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "SUM"); + bounds_equal_extents ((array_t *) mask, (array_t *) array, + "MASK argument", "SUM"); + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + dest = retarray->base_addr; + base = array->base_addr; + + while (base) + { + const GFC_COMPLEX_17 * restrict src; + const GFC_LOGICAL_1 * restrict msrc; + GFC_COMPLEX_17 result; + src = base; + msrc = mbase; + { + + result = 0; + for (n = 0; n < len; n++, src += delta, msrc += mdelta) + { + + if (*msrc) + result += *src; + } + *dest = result; + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + mbase += mstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + mbase -= mstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + mbase += mstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void ssum_c17 (gfc_array_c17 * const restrict, + gfc_array_c17 * const restrict, const index_type * const restrict, + GFC_LOGICAL_4 *); +export_proto(ssum_c17); + +void +ssum_c17 (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict array, + const index_type * const restrict pdim, + GFC_LOGICAL_4 * mask) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + GFC_COMPLEX_17 * restrict dest; + index_type rank; + index_type n; + index_type dim; + + + if (mask == NULL || *mask) + { +#ifdef HAVE_BACK_ARG + sum_c17 (retarray, array, pdim, back); +#else + sum_c17 (retarray, array, pdim); +#endif + return; + } + /* Make dim zero based to avoid confusion. */ + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in SUM intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + for (n = 0; n < dim; n++) + { + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] <= 0) + extent[n] = 0; + } + + for (n = dim; n < rank; n++) + { + extent[n] = + GFC_DESCRIPTOR_EXTENT(array,n + 1); + + if (extent[n] <= 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_COMPLEX_17)); + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " SUM intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + { + for (n=0; n < rank; n++) + { + index_type ret_extent; + + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,n); + if (extent[n] != ret_extent) + runtime_error ("Incorrect extent in return value of" + " SUM intrinsic in dimension %ld:" + " is %ld, should be %ld", (long int) n + 1, + (long int) ret_extent, (long int) extent[n]); + } + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + } + + dest = retarray->base_addr; + + while(1) + { + *dest = 0; + count[0]++; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + return; + else + { + count[n]++; + dest += dstride[n]; + } + } + } +} + +#endif diff --git a/libgfortran/generated/sum_r17.c b/libgfortran/generated/sum_r17.c new file mode 100644 index 0000000..a115507 --- /dev/null +++ b/libgfortran/generated/sum_r17.c @@ -0,0 +1,536 @@ +/* Implementation of the SUM intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" + + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_REAL_17) + + +extern void sum_r17 (gfc_array_r17 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict); +export_proto(sum_r17); + +void +sum_r17 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + const GFC_REAL_17 * restrict base; + GFC_REAL_17 * restrict dest; + index_type rank; + index_type n; + index_type len; + index_type delta; + index_type dim; + int continue_loop; + + /* Make dim zero based to avoid confusion. */ + rank = GFC_DESCRIPTOR_RANK (array) - 1; + dim = (*pdim) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in SUM intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len < 0) + len = 0; + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_17)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + + } + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " SUM intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "SUM"); + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + base = array->base_addr; + dest = retarray->base_addr; + + continue_loop = 1; + while (continue_loop) + { + const GFC_REAL_17 * restrict src; + GFC_REAL_17 result; + src = base; + { + + result = 0; + if (len <= 0) + *dest = 0; + else + { +#if ! defined HAVE_BACK_ARG + for (n = 0; n < len; n++, src += delta) + { +#endif + + result += *src; + } + + *dest = result; + } + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + continue_loop = 0; + break; + } + else + { + count[n]++; + base += sstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void msum_r17 (gfc_array_r17 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, + gfc_array_l1 * const restrict); +export_proto(msum_r17); + +void +msum_r17 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, + gfc_array_l1 * const restrict mask) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + index_type mstride[GFC_MAX_DIMENSIONS]; + GFC_REAL_17 * restrict dest; + const GFC_REAL_17 * restrict base; + const GFC_LOGICAL_1 * restrict mbase; + index_type rank; + index_type dim; + index_type n; + index_type len; + index_type delta; + index_type mdelta; + int mask_kind; + + if (mask == NULL) + { +#ifdef HAVE_BACK_ARG + sum_r17 (retarray, array, pdim, back); +#else + sum_r17 (retarray, array, pdim); +#endif + return; + } + + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in SUM intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len <= 0) + return; + + mbase = mask->base_addr; + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + runtime_error ("Funny sized logical array"); + + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + mdelta = GFC_DESCRIPTOR_STRIDE_BYTES(mask,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n + 1); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str= GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype.rank = rank; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_17)); + + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in SUM intrinsic"); + + if (unlikely (compile_options.bounds_check)) + { + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "SUM"); + bounds_equal_extents ((array_t *) mask, (array_t *) array, + "MASK argument", "SUM"); + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + dest = retarray->base_addr; + base = array->base_addr; + + while (base) + { + const GFC_REAL_17 * restrict src; + const GFC_LOGICAL_1 * restrict msrc; + GFC_REAL_17 result; + src = base; + msrc = mbase; + { + + result = 0; + for (n = 0; n < len; n++, src += delta, msrc += mdelta) + { + + if (*msrc) + result += *src; + } + *dest = result; + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + mbase += mstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + mbase -= mstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + mbase += mstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void ssum_r17 (gfc_array_r17 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, + GFC_LOGICAL_4 *); +export_proto(ssum_r17); + +void +ssum_r17 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, + GFC_LOGICAL_4 * mask) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + GFC_REAL_17 * restrict dest; + index_type rank; + index_type n; + index_type dim; + + + if (mask == NULL || *mask) + { +#ifdef HAVE_BACK_ARG + sum_r17 (retarray, array, pdim, back); +#else + sum_r17 (retarray, array, pdim); +#endif + return; + } + /* Make dim zero based to avoid confusion. */ + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in SUM intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + for (n = 0; n < dim; n++) + { + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] <= 0) + extent[n] = 0; + } + + for (n = dim; n < rank; n++) + { + extent[n] = + GFC_DESCRIPTOR_EXTENT(array,n + 1); + + if (extent[n] <= 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_17)); + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " SUM intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + { + for (n=0; n < rank; n++) + { + index_type ret_extent; + + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,n); + if (extent[n] != ret_extent) + runtime_error ("Incorrect extent in return value of" + " SUM intrinsic in dimension %ld:" + " is %ld, should be %ld", (long int) n + 1, + (long int) ret_extent, (long int) extent[n]); + } + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + } + + dest = retarray->base_addr; + + while(1) + { + *dest = 0; + count[0]++; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + return; + else + { + count[n]++; + dest += dstride[n]; + } + } + } +} + +#endif diff --git a/libgfortran/generated/unpack_c17.c b/libgfortran/generated/unpack_c17.c new file mode 100644 index 0000000..c19d802 --- /dev/null +++ b/libgfortran/generated/unpack_c17.c @@ -0,0 +1,333 @@ +/* Specific implementation of the UNPACK intrinsic + Copyright (C) 2008-2022 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>, based on + unpack_generic.c by Paul Brook <paul@nowt.org>. + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Ligbfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <string.h> + + +#if defined (HAVE_GFC_COMPLEX_17) + +void +unpack0_c17 (gfc_array_c17 *ret, const gfc_array_c17 *vector, + const gfc_array_l1 *mask, const GFC_COMPLEX_17 *fptr) +{ + /* r.* indicates the return array. */ + index_type rstride[GFC_MAX_DIMENSIONS]; + index_type rstride0; + index_type rs; + GFC_COMPLEX_17 * restrict rptr; + /* v.* indicates the vector array. */ + index_type vstride0; + GFC_COMPLEX_17 *vptr; + /* Value for field, this is constant. */ + const GFC_COMPLEX_17 fval = *fptr; + /* m.* indicates the mask array. */ + index_type mstride[GFC_MAX_DIMENSIONS]; + index_type mstride0; + const GFC_LOGICAL_1 *mptr; + + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type n; + index_type dim; + + int empty; + int mask_kind; + + empty = 0; + + mptr = mask->base_addr; + + /* Use the same loop for all logical types, by using GFC_LOGICAL_1 + and using shifting to address size and endian issues. */ + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + { + /* Do not convert a NULL pointer as we use test for NULL below. */ + if (mptr) + mptr = GFOR_POINTER_TO_L1 (mptr, mask_kind); + } + else + runtime_error ("Funny sized logical array"); + + /* Initialize to avoid -Wmaybe-uninitialized complaints. */ + rstride[0] = 1; + if (ret->base_addr == NULL) + { + /* The front end has signalled that we need to populate the + return array descriptor. */ + dim = GFC_DESCRIPTOR_RANK (mask); + rs = 1; + for (n = 0; n < dim; n++) + { + count[n] = 0; + GFC_DIMENSION_SET(ret->dim[n], 0, + GFC_DESCRIPTOR_EXTENT(mask,n) - 1, rs); + extent[n] = GFC_DESCRIPTOR_EXTENT(ret,n); + empty = empty || extent[n] <= 0; + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + rs *= extent[n]; + } + ret->offset = 0; + ret->base_addr = xmallocarray (rs, sizeof (GFC_COMPLEX_17)); + } + else + { + dim = GFC_DESCRIPTOR_RANK (ret); + for (n = 0; n < dim; n++) + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(ret,n); + empty = empty || extent[n] <= 0; + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + } + if (rstride[0] == 0) + rstride[0] = 1; + } + + if (empty) + return; + + if (mstride[0] == 0) + mstride[0] = 1; + + vstride0 = GFC_DESCRIPTOR_STRIDE(vector,0); + if (vstride0 == 0) + vstride0 = 1; + rstride0 = rstride[0]; + mstride0 = mstride[0]; + rptr = ret->base_addr; + vptr = vector->base_addr; + + while (rptr) + { + if (*mptr) + { + /* From vector. */ + *rptr = *vptr; + vptr += vstride0; + } + else + { + /* From field. */ + *rptr = fval; + } + /* Advance to the next element. */ + rptr += rstride0; + mptr += mstride0; + count[0]++; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + rptr -= rstride[n] * extent[n]; + mptr -= mstride[n] * extent[n]; + n++; + if (n >= dim) + { + /* Break out of the loop. */ + rptr = NULL; + break; + } + else + { + count[n]++; + rptr += rstride[n]; + mptr += mstride[n]; + } + } + } +} + +void +unpack1_c17 (gfc_array_c17 *ret, const gfc_array_c17 *vector, + const gfc_array_l1 *mask, const gfc_array_c17 *field) +{ + /* r.* indicates the return array. */ + index_type rstride[GFC_MAX_DIMENSIONS]; + index_type rstride0; + index_type rs; + GFC_COMPLEX_17 * restrict rptr; + /* v.* indicates the vector array. */ + index_type vstride0; + GFC_COMPLEX_17 *vptr; + /* f.* indicates the field array. */ + index_type fstride[GFC_MAX_DIMENSIONS]; + index_type fstride0; + const GFC_COMPLEX_17 *fptr; + /* m.* indicates the mask array. */ + index_type mstride[GFC_MAX_DIMENSIONS]; + index_type mstride0; + const GFC_LOGICAL_1 *mptr; + + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type n; + index_type dim; + + int empty; + int mask_kind; + + empty = 0; + + mptr = mask->base_addr; + + /* Use the same loop for all logical types, by using GFC_LOGICAL_1 + and using shifting to address size and endian issues. */ + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + { + /* Do not convert a NULL pointer as we use test for NULL below. */ + if (mptr) + mptr = GFOR_POINTER_TO_L1 (mptr, mask_kind); + } + else + runtime_error ("Funny sized logical array"); + + /* Initialize to avoid -Wmaybe-uninitialized complaints. */ + rstride[0] = 1; + if (ret->base_addr == NULL) + { + /* The front end has signalled that we need to populate the + return array descriptor. */ + dim = GFC_DESCRIPTOR_RANK (mask); + rs = 1; + for (n = 0; n < dim; n++) + { + count[n] = 0; + GFC_DIMENSION_SET(ret->dim[n], 0, + GFC_DESCRIPTOR_EXTENT(mask,n) - 1, rs); + extent[n] = GFC_DESCRIPTOR_EXTENT(ret,n); + empty = empty || extent[n] <= 0; + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,n); + fstride[n] = GFC_DESCRIPTOR_STRIDE(field,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + rs *= extent[n]; + } + ret->offset = 0; + ret->base_addr = xmallocarray (rs, sizeof (GFC_COMPLEX_17)); + } + else + { + dim = GFC_DESCRIPTOR_RANK (ret); + for (n = 0; n < dim; n++) + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(ret,n); + empty = empty || extent[n] <= 0; + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,n); + fstride[n] = GFC_DESCRIPTOR_STRIDE(field,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + } + if (rstride[0] == 0) + rstride[0] = 1; + } + + if (empty) + return; + + if (fstride[0] == 0) + fstride[0] = 1; + if (mstride[0] == 0) + mstride[0] = 1; + + vstride0 = GFC_DESCRIPTOR_STRIDE(vector,0); + if (vstride0 == 0) + vstride0 = 1; + rstride0 = rstride[0]; + fstride0 = fstride[0]; + mstride0 = mstride[0]; + rptr = ret->base_addr; + fptr = field->base_addr; + vptr = vector->base_addr; + + while (rptr) + { + if (*mptr) + { + /* From vector. */ + *rptr = *vptr; + vptr += vstride0; + } + else + { + /* From field. */ + *rptr = *fptr; + } + /* Advance to the next element. */ + rptr += rstride0; + fptr += fstride0; + mptr += mstride0; + count[0]++; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + rptr -= rstride[n] * extent[n]; + fptr -= fstride[n] * extent[n]; + mptr -= mstride[n] * extent[n]; + n++; + if (n >= dim) + { + /* Break out of the loop. */ + rptr = NULL; + break; + } + else + { + count[n]++; + rptr += rstride[n]; + fptr += fstride[n]; + mptr += mstride[n]; + } + } + } +} + +#endif + diff --git a/libgfortran/generated/unpack_r17.c b/libgfortran/generated/unpack_r17.c new file mode 100644 index 0000000..d09d3ab --- /dev/null +++ b/libgfortran/generated/unpack_r17.c @@ -0,0 +1,333 @@ +/* Specific implementation of the UNPACK intrinsic + Copyright (C) 2008-2022 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>, based on + unpack_generic.c by Paul Brook <paul@nowt.org>. + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Ligbfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <string.h> + + +#if defined (HAVE_GFC_REAL_17) + +void +unpack0_r17 (gfc_array_r17 *ret, const gfc_array_r17 *vector, + const gfc_array_l1 *mask, const GFC_REAL_17 *fptr) +{ + /* r.* indicates the return array. */ + index_type rstride[GFC_MAX_DIMENSIONS]; + index_type rstride0; + index_type rs; + GFC_REAL_17 * restrict rptr; + /* v.* indicates the vector array. */ + index_type vstride0; + GFC_REAL_17 *vptr; + /* Value for field, this is constant. */ + const GFC_REAL_17 fval = *fptr; + /* m.* indicates the mask array. */ + index_type mstride[GFC_MAX_DIMENSIONS]; + index_type mstride0; + const GFC_LOGICAL_1 *mptr; + + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type n; + index_type dim; + + int empty; + int mask_kind; + + empty = 0; + + mptr = mask->base_addr; + + /* Use the same loop for all logical types, by using GFC_LOGICAL_1 + and using shifting to address size and endian issues. */ + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + { + /* Do not convert a NULL pointer as we use test for NULL below. */ + if (mptr) + mptr = GFOR_POINTER_TO_L1 (mptr, mask_kind); + } + else + runtime_error ("Funny sized logical array"); + + /* Initialize to avoid -Wmaybe-uninitialized complaints. */ + rstride[0] = 1; + if (ret->base_addr == NULL) + { + /* The front end has signalled that we need to populate the + return array descriptor. */ + dim = GFC_DESCRIPTOR_RANK (mask); + rs = 1; + for (n = 0; n < dim; n++) + { + count[n] = 0; + GFC_DIMENSION_SET(ret->dim[n], 0, + GFC_DESCRIPTOR_EXTENT(mask,n) - 1, rs); + extent[n] = GFC_DESCRIPTOR_EXTENT(ret,n); + empty = empty || extent[n] <= 0; + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + rs *= extent[n]; + } + ret->offset = 0; + ret->base_addr = xmallocarray (rs, sizeof (GFC_REAL_17)); + } + else + { + dim = GFC_DESCRIPTOR_RANK (ret); + for (n = 0; n < dim; n++) + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(ret,n); + empty = empty || extent[n] <= 0; + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + } + if (rstride[0] == 0) + rstride[0] = 1; + } + + if (empty) + return; + + if (mstride[0] == 0) + mstride[0] = 1; + + vstride0 = GFC_DESCRIPTOR_STRIDE(vector,0); + if (vstride0 == 0) + vstride0 = 1; + rstride0 = rstride[0]; + mstride0 = mstride[0]; + rptr = ret->base_addr; + vptr = vector->base_addr; + + while (rptr) + { + if (*mptr) + { + /* From vector. */ + *rptr = *vptr; + vptr += vstride0; + } + else + { + /* From field. */ + *rptr = fval; + } + /* Advance to the next element. */ + rptr += rstride0; + mptr += mstride0; + count[0]++; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + rptr -= rstride[n] * extent[n]; + mptr -= mstride[n] * extent[n]; + n++; + if (n >= dim) + { + /* Break out of the loop. */ + rptr = NULL; + break; + } + else + { + count[n]++; + rptr += rstride[n]; + mptr += mstride[n]; + } + } + } +} + +void +unpack1_r17 (gfc_array_r17 *ret, const gfc_array_r17 *vector, + const gfc_array_l1 *mask, const gfc_array_r17 *field) +{ + /* r.* indicates the return array. */ + index_type rstride[GFC_MAX_DIMENSIONS]; + index_type rstride0; + index_type rs; + GFC_REAL_17 * restrict rptr; + /* v.* indicates the vector array. */ + index_type vstride0; + GFC_REAL_17 *vptr; + /* f.* indicates the field array. */ + index_type fstride[GFC_MAX_DIMENSIONS]; + index_type fstride0; + const GFC_REAL_17 *fptr; + /* m.* indicates the mask array. */ + index_type mstride[GFC_MAX_DIMENSIONS]; + index_type mstride0; + const GFC_LOGICAL_1 *mptr; + + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type n; + index_type dim; + + int empty; + int mask_kind; + + empty = 0; + + mptr = mask->base_addr; + + /* Use the same loop for all logical types, by using GFC_LOGICAL_1 + and using shifting to address size and endian issues. */ + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + { + /* Do not convert a NULL pointer as we use test for NULL below. */ + if (mptr) + mptr = GFOR_POINTER_TO_L1 (mptr, mask_kind); + } + else + runtime_error ("Funny sized logical array"); + + /* Initialize to avoid -Wmaybe-uninitialized complaints. */ + rstride[0] = 1; + if (ret->base_addr == NULL) + { + /* The front end has signalled that we need to populate the + return array descriptor. */ + dim = GFC_DESCRIPTOR_RANK (mask); + rs = 1; + for (n = 0; n < dim; n++) + { + count[n] = 0; + GFC_DIMENSION_SET(ret->dim[n], 0, + GFC_DESCRIPTOR_EXTENT(mask,n) - 1, rs); + extent[n] = GFC_DESCRIPTOR_EXTENT(ret,n); + empty = empty || extent[n] <= 0; + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,n); + fstride[n] = GFC_DESCRIPTOR_STRIDE(field,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + rs *= extent[n]; + } + ret->offset = 0; + ret->base_addr = xmallocarray (rs, sizeof (GFC_REAL_17)); + } + else + { + dim = GFC_DESCRIPTOR_RANK (ret); + for (n = 0; n < dim; n++) + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(ret,n); + empty = empty || extent[n] <= 0; + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,n); + fstride[n] = GFC_DESCRIPTOR_STRIDE(field,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + } + if (rstride[0] == 0) + rstride[0] = 1; + } + + if (empty) + return; + + if (fstride[0] == 0) + fstride[0] = 1; + if (mstride[0] == 0) + mstride[0] = 1; + + vstride0 = GFC_DESCRIPTOR_STRIDE(vector,0); + if (vstride0 == 0) + vstride0 = 1; + rstride0 = rstride[0]; + fstride0 = fstride[0]; + mstride0 = mstride[0]; + rptr = ret->base_addr; + fptr = field->base_addr; + vptr = vector->base_addr; + + while (rptr) + { + if (*mptr) + { + /* From vector. */ + *rptr = *vptr; + vptr += vstride0; + } + else + { + /* From field. */ + *rptr = *fptr; + } + /* Advance to the next element. */ + rptr += rstride0; + fptr += fstride0; + mptr += mstride0; + count[0]++; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + rptr -= rstride[n] * extent[n]; + fptr -= fstride[n] * extent[n]; + mptr -= mstride[n] * extent[n]; + n++; + if (n >= dim) + { + /* Break out of the loop. */ + rptr = NULL; + break; + } + else + { + count[n]++; + rptr += rstride[n]; + fptr += fstride[n]; + mptr += mstride[n]; + } + } + } +} + +#endif + |