diff options
author | Thomas Koenig <tkoenig@gcc.gnu.org> | 2008-03-21 14:37:03 +0000 |
---|---|---|
committer | Thomas Koenig <tkoenig@gcc.gnu.org> | 2008-03-21 14:37:03 +0000 |
commit | 3ef2513a1d3a2497773f9aa37ac1242e88df8652 (patch) | |
tree | 04bf31b498d7e26a396dabb53adec7b17a4a8ba4 /libgfortran/m4 | |
parent | f4351641f0c9a8d7d0962da084bb9127ce41bef8 (diff) | |
download | gcc-3ef2513a1d3a2497773f9aa37ac1242e88df8652.zip gcc-3ef2513a1d3a2497773f9aa37ac1242e88df8652.tar.gz gcc-3ef2513a1d3a2497773f9aa37ac1242e88df8652.tar.bz2 |
re PR libfortran/32972 (performance of pack/unpack)
2008-03-21 Thomas Koenig <tkoenig@gcc.gnu.org>
PR libfortran/32972
* Makefile.am: Add new variable, i_pack_c, containing
pack_i1.c, pack_i2.c, pack_i4.c, pack_i8.c, pack_i16.c,
pack_r4.c, pack_r8.c, pack_r10.c, pack_r16.c, pack_c4.c,
pack_c8.c, pack_c10.c, pack_c16.c.
Add m4/pack.m4 to m4_files.
Add i_pack_c to gfor_built_src.
Add rule to generate i_pack_c from m4/pack.m4.
* Makefile.in: Regenerated.
* libgfortran.h: Add prototypes for pack_i1, pack_i2, pack_i4,
pack_i8, pack_i16, pack_r4, pack_r8, pack_c4, pack_c8,
pack_c10, pack_c16.
* intrinsics/pack_generic.c: Add calls to specific
pack functions.
* m4/pack.m4: New file.
* generated/pack_i1.c: New file.
* generated/pack_i2.c: New file.
* generated/pack_i4.c: New file.
* generated/pack_i8.c: New file.
* generated/pack_i16.c: New file.
* generated/pack_r4.c: New file.
* generated/pack_r8.c: New file.
* generated/pack_r10.c: New file.
* generated/pack_r16.c: New file.
* generated/pack_c4.c: New file.
* generated/pack_c8.c: New file.
* generated/pack_c10.c: New file.
* generated/pack_c16.c: New file.
2008-03-21 Thomas Koenig <tkoenig@gcc.gnu.org>
PR libfortran/32972
* internal_pack_1.f90: New test case.
* internal_pack_2.f90: New test case.
* internal_pack_3.f90: New test case.
From-SVN: r133427
Diffstat (limited to 'libgfortran/m4')
-rw-r--r-- | libgfortran/m4/pack.m4 | 312 |
1 files changed, 312 insertions, 0 deletions
diff --git a/libgfortran/m4/pack.m4 b/libgfortran/m4/pack.m4 new file mode 100644 index 0000000..87409a5 --- /dev/null +++ b/libgfortran/m4/pack.m4 @@ -0,0 +1,312 @@ +`/* Specific implementation of the PACK intrinsic + Copyright (C) 2002, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 2 of the License, or (at your option) any later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +Ligbfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public +License along with libgfortran; see the file COPYING. If not, +write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + +#include "libgfortran.h" +#include <stdlib.h> +#include <assert.h> +#include <string.h>' + +include(iparm.m4)dnl + +`#if defined (HAVE_'rtype_name`) + +/* PACK is specified as follows: + + 13.14.80 PACK (ARRAY, MASK, [VECTOR]) + + Description: Pack an array into an array of rank one under the + control of a mask. + + Class: Transformational function. + + Arguments: + ARRAY may be of any type. It shall not be scalar. + MASK shall be of type LOGICAL. It shall be conformable with ARRAY. + VECTOR (optional) shall be of the same type and type parameters + as ARRAY. VECTOR shall have at least as many elements as + there are true elements in MASK. If MASK is a scalar + with the value true, VECTOR shall have at least as many + elements as there are in ARRAY. + + Result Characteristics: The result is an array of rank one with the + same type and type parameters as ARRAY. If VECTOR is present, the + result size is that of VECTOR; otherwise, the result size is the + number /t/ of true elements in MASK unless MASK is scalar with the + value true, in which case the result size is the size of ARRAY. + + Result Value: Element /i/ of the result is the element of ARRAY + that corresponds to the /i/th true element of MASK, taking elements + in array element order, for /i/ = 1, 2, ..., /t/. If VECTOR is + present and has size /n/ > /t/, element /i/ of the result has the + value VECTOR(/i/), for /i/ = /t/ + 1, ..., /n/. + + Examples: The nonzero elements of an array M with the value + | 0 0 0 | + | 9 0 0 | may be "gathered" by the function PACK. The result of + | 0 0 7 | + PACK (M, MASK = M.NE.0) is [9,7] and the result of PACK (M, M.NE.0, + VECTOR = (/ 2,4,6,8,10,12 /)) is [9,7,6,8,10,12]. + +There are two variants of the PACK intrinsic: one, where MASK is +array valued, and the other one where MASK is scalar. */ + +void +pack_'rtype_code` ('rtype` *ret, const 'rtype` *array, + const gfc_array_l1 *mask, const 'rtype` *vector) +{ + /* r.* indicates the return array. */ + index_type rstride0; + 'rtype_name` *rptr; + /* s.* indicates the source array. */ + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type sstride0; + const 'rtype_name` *sptr; + /* m.* indicates the mask array. */ + index_type mstride[GFC_MAX_DIMENSIONS]; + index_type mstride0; + const GFC_LOGICAL_1 *mptr; + + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + int zero_sized; + index_type n; + index_type dim; + index_type nelem; + index_type total; + int mask_kind; + + dim = GFC_DESCRIPTOR_RANK (array); + + sptr = array->data; + mptr = mask->data; + + /* Use the same loop for all logical types, by using GFC_LOGICAL_1 + and using shifting to address size and endian issues. */ + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + { + /* Do not convert a NULL pointer as we use test for NULL below. */ + if (mptr) + mptr = GFOR_POINTER_TO_L1 (mptr, mask_kind); + } + else + runtime_error ("Funny sized logical array"); + + zero_sized = 0; + for (n = 0; n < dim; n++) + { + count[n] = 0; + extent[n] = array->dim[n].ubound + 1 - array->dim[n].lbound; + if (extent[n] <= 0) + zero_sized = 1; + sstride[n] = array->dim[n].stride; + mstride[n] = mask->dim[n].stride * mask_kind; + } + if (sstride[0] == 0) + sstride[0] = 1; + if (mstride[0] == 0) + mstride[0] = mask_kind; + + if (ret->data == NULL || compile_options.bounds_check) + { + /* Count the elements, either for allocating memory or + for bounds checking. */ + + if (vector != NULL) + { + /* The return array will have as many + elements as there are in VECTOR. */ + total = vector->dim[0].ubound + 1 - vector->dim[0].lbound; + } + else + { + /* We have to count the true elements in MASK. */ + + /* TODO: We could speed up pack easily in the case of only + few .TRUE. entries in MASK, by keeping track of where we + would be in the source array during the initial traversal + of MASK, and caching the pointers to those elements. Then, + supposed the number of elements is small enough, we would + only have to traverse the list, and copy those elements + into the result array. In the case of datatypes which fit + in one of the integer types we could also cache the + value instead of a pointer to it. + This approach might be bad from the point of view of + cache behavior in the case where our cache is not big + enough to hold all elements that have to be copied. */ + + const GFC_LOGICAL_1 *m = mptr; + + total = 0; + if (zero_sized) + m = NULL; + + while (m) + { + /* Test this element. */ + if (*m) + total++; + + /* Advance to the next element. */ + m += mstride[0]; + count[0]++; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it + and increment the next dimension. */ + count[n] = 0; + /* We could precalculate this product, but this is a + less frequently used path so probably not worth + it. */ + m -= mstride[n] * extent[n]; + n++; + if (n >= dim) + { + /* Break out of the loop. */ + m = NULL; + break; + } + else + { + count[n]++; + m += mstride[n]; + } + } + } + } + + if (ret->data == NULL) + { + /* Setup the array descriptor. */ + ret->dim[0].lbound = 0; + ret->dim[0].ubound = total - 1; + ret->dim[0].stride = 1; + + ret->offset = 0; + if (total == 0) + { + /* In this case, nothing remains to be done. */ + ret->data = internal_malloc_size (1); + return; + } + else + ret->data = internal_malloc_size (sizeof ('rtype_name`) * total); + } + else + { + /* We come here because of range checking. */ + index_type ret_extent; + + ret_extent = ret->dim[0].ubound + 1 - ret->dim[0].lbound; + if (total != ret_extent) + runtime_error ("Incorrect extent in return value of PACK intrinsic;" + " is %ld, should be %ld", (long int) total, + (long int) ret_extent); + } + } + + rstride0 = ret->dim[0].stride; + if (rstride0 == 0) + rstride0 = 1; + sstride0 = sstride[0]; + mstride0 = mstride[0]; + rptr = ret->data; + + while (sptr && mptr) + { + /* Test this element. */ + if (*mptr) + { + /* Add it. */ + *rptr = *sptr; + rptr += rstride0; + } + /* Advance to the next element. */ + sptr += sstride0; + mptr += mstride0; + count[0]++; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + sptr -= sstride[n] * extent[n]; + mptr -= mstride[n] * extent[n]; + n++; + if (n >= dim) + { + /* Break out of the loop. */ + sptr = NULL; + break; + } + else + { + count[n]++; + sptr += sstride[n]; + mptr += mstride[n]; + } + } + } + + /* Add any remaining elements from VECTOR. */ + if (vector) + { + n = vector->dim[0].ubound + 1 - vector->dim[0].lbound; + nelem = ((rptr - ret->data) / rstride0); + if (n > nelem) + { + sstride0 = vector->dim[0].stride; + if (sstride0 == 0) + sstride0 = 1; + + sptr = vector->data + sstride0 * nelem; + n -= nelem; + while (n--) + { + *rptr = *sptr; + rptr += rstride0; + sptr += sstride0; + } + } + } +} + +#endif +'
\ No newline at end of file |