diff options
author | Thomas Koenig <tkoenig@gcc.gnu.org> | 2007-08-24 16:16:16 +0000 |
---|---|---|
committer | Thomas Koenig <tkoenig@gcc.gnu.org> | 2007-08-24 16:16:16 +0000 |
commit | 28dc6b33c4b8bfbd36866e084e2263ef384c1bbc (patch) | |
tree | b12449583134bdcb1419a30d4a072fbb3e1c10fd /libgfortran/m4 | |
parent | c6a8035acf00eb3ef5f1a4a22469e7ebb6785af5 (diff) | |
download | gcc-28dc6b33c4b8bfbd36866e084e2263ef384c1bbc.zip gcc-28dc6b33c4b8bfbd36866e084e2263ef384c1bbc.tar.gz gcc-28dc6b33c4b8bfbd36866e084e2263ef384c1bbc.tar.bz2 |
re PR libfortran/32972 (performance of pack/unpack)
2007-08-24 Thomas Koenig <tkoenig@gcc.gnu.org>
PR fortran/32972
* iresolve.c: Don't convert array masks.
2007-08-24 Thomas Koenig <tkoenig@gcc.gnu.org>
PR fortran/32972
* libgfortran.h: Remove GFOR_POINTER_L8_TO_L4 macro.
Add GFOR_POINTER_TO_L1 macro.
* m4/iforeach.m4(`m'name`'rtype_qual`_'atype_code):
Change argument 'mask' to gfc_array_l1. Adjust prototype.
Change mask pointer to GFC_LOGICAL_1. Multiply strides
by kind of mask
* m4/ifunction.m4: Likewise.
* intrinsics/pack_generic.c(pack_internal): Likewise.
* intrinsics/unpack_generic.c(unpack_internal): Likewise.
* m4/matmull.m4: Switch to GFC_LOGICAL_1. Multiply strides by
kind of logical arguments a and b.
* generated/matmul_l16.c: Regenerated.
* generated/matmul_l4.c: Regenerated.
* generated/matmul_l8.c: Regenerated.
* generated/maxloc0_16_i1.c: Regenerated.
* generated/maxloc0_16_i16.c: Regenerated.
* generated/maxloc0_16_i2.c: Regenerated.
* generated/maxloc0_16_i4.c: Regenerated.
* generated/maxloc0_16_i8.c: Regenerated.
* generated/maxloc0_16_r10.c: Regenerated.
* generated/maxloc0_16_r16.c: Regenerated.
* generated/maxloc0_16_r4.c: Regenerated.
* generated/maxloc0_16_r8.c: Regenerated.
* generated/maxloc0_4_i1.c: Regenerated.
* generated/maxloc0_4_i16.c: Regenerated.
* generated/maxloc0_4_i2.c: Regenerated.
* generated/maxloc0_4_i4.c: Regenerated.
* generated/maxloc0_4_i8.c: Regenerated.
* generated/maxloc0_4_r10.c: Regenerated.
* generated/maxloc0_4_r16.c: Regenerated.
* generated/maxloc0_4_r4.c: Regenerated.
* generated/maxloc0_4_r8.c: Regenerated.
* generated/maxloc0_8_i1.c: Regenerated.
* generated/maxloc0_8_i16.c: Regenerated.
* generated/maxloc0_8_i2.c: Regenerated.
* generated/maxloc0_8_i4.c: Regenerated.
* generated/maxloc0_8_i8.c: Regenerated.
* generated/maxloc0_8_r10.c: Regenerated.
* generated/maxloc0_8_r16.c: Regenerated.
* generated/maxloc0_8_r4.c: Regenerated.
* generated/maxloc0_8_r8.c: Regenerated.
* generated/maxloc1_16_i1.c: Regenerated.
* generated/maxloc1_16_i16.c: Regenerated.
* generated/maxloc1_16_i2.c: Regenerated.
* generated/maxloc1_16_i4.c: Regenerated.
* generated/maxloc1_16_i8.c: Regenerated.
* generated/maxloc1_16_r10.c: Regenerated.
* generated/maxloc1_16_r16.c: Regenerated.
* generated/maxloc1_16_r4.c: Regenerated.
* generated/maxloc1_16_r8.c: Regenerated.
* generated/maxloc1_4_i1.c: Regenerated.
* generated/maxloc1_4_i16.c: Regenerated.
* generated/maxloc1_4_i2.c: Regenerated.
* generated/maxloc1_4_i4.c: Regenerated.
* generated/maxloc1_4_i8.c: Regenerated.
* generated/maxloc1_4_r10.c: Regenerated.
* generated/maxloc1_4_r16.c: Regenerated.
* generated/maxloc1_4_r4.c: Regenerated.
* generated/maxloc1_4_r8.c: Regenerated.
* generated/maxloc1_8_i1.c: Regenerated.
* generated/maxloc1_8_i16.c: Regenerated.
* generated/maxloc1_8_i2.c: Regenerated.
* generated/maxloc1_8_i4.c: Regenerated.
* generated/maxloc1_8_i8.c: Regenerated.
* generated/maxloc1_8_r10.c: Regenerated.
* generated/maxloc1_8_r16.c: Regenerated.
* generated/maxloc1_8_r4.c: Regenerated.
* generated/maxloc1_8_r8.c: Regenerated.
* generated/maxval_i1.c: Regenerated.
* generated/maxval_i16.c: Regenerated.
* generated/maxval_i2.c: Regenerated.
* generated/maxval_i4.c: Regenerated.
* generated/maxval_i8.c: Regenerated.
* generated/maxval_r10.c: Regenerated.
* generated/maxval_r16.c: Regenerated.
* generated/maxval_r4.c: Regenerated.
* generated/maxval_r8.c: Regenerated.
* generated/minloc0_16_i1.c: Regenerated.
* generated/minloc0_16_i16.c: Regenerated.
* generated/minloc0_16_i2.c: Regenerated.
* generated/minloc0_16_i4.c: Regenerated.
* generated/minloc0_16_i8.c: Regenerated.
* generated/minloc0_16_r10.c: Regenerated.
* generated/minloc0_16_r16.c: Regenerated.
* generated/minloc0_16_r4.c: Regenerated.
* generated/minloc0_16_r8.c: Regenerated.
* generated/minloc0_4_i1.c: Regenerated.
* generated/minloc0_4_i16.c: Regenerated.
* generated/minloc0_4_i2.c: Regenerated.
* generated/minloc0_4_i4.c: Regenerated.
* generated/minloc0_4_i8.c: Regenerated.
* generated/minloc0_4_r10.c: Regenerated.
* generated/minloc0_4_r16.c: Regenerated.
* generated/minloc0_4_r4.c: Regenerated.
* generated/minloc0_4_r8.c: Regenerated.
* generated/minloc0_8_i1.c: Regenerated.
* generated/minloc0_8_i16.c: Regenerated.
* generated/minloc0_8_i2.c: Regenerated.
* generated/minloc0_8_i4.c: Regenerated.
* generated/minloc0_8_i8.c: Regenerated.
* generated/minloc0_8_r10.c: Regenerated.
* generated/minloc0_8_r16.c: Regenerated.
* generated/minloc0_8_r4.c: Regenerated.
* generated/minloc0_8_r8.c: Regenerated.
* generated/minloc1_16_i1.c: Regenerated.
* generated/minloc1_16_i16.c: Regenerated.
* generated/minloc1_16_i2.c: Regenerated.
* generated/minloc1_16_i4.c: Regenerated.
* generated/minloc1_16_i8.c: Regenerated.
* generated/minloc1_16_r10.c: Regenerated.
* generated/minloc1_16_r16.c: Regenerated.
* generated/minloc1_16_r4.c: Regenerated.
* generated/minloc1_16_r8.c: Regenerated.
* generated/minloc1_4_i1.c: Regenerated.
* generated/minloc1_4_i16.c: Regenerated.
* generated/minloc1_4_i2.c: Regenerated.
* generated/minloc1_4_i4.c: Regenerated.
* generated/minloc1_4_i8.c: Regenerated.
* generated/minloc1_4_r10.c: Regenerated.
* generated/minloc1_4_r16.c: Regenerated.
* generated/minloc1_4_r4.c: Regenerated.
* generated/minloc1_4_r8.c: Regenerated.
* generated/minloc1_8_i1.c: Regenerated.
* generated/minloc1_8_i16.c: Regenerated.
* generated/minloc1_8_i2.c: Regenerated.
* generated/minloc1_8_i4.c: Regenerated.
* generated/minloc1_8_i8.c: Regenerated.
* generated/minloc1_8_r10.c: Regenerated.
* generated/minloc1_8_r16.c: Regenerated.
* generated/minloc1_8_r4.c: Regenerated.
* generated/minloc1_8_r8.c: Regenerated.
* generated/minval_i1.c: Regenerated.
* generated/minval_i16.c: Regenerated.
* generated/minval_i2.c: Regenerated.
* generated/minval_i4.c: Regenerated.
* generated/minval_i8.c: Regenerated.
* generated/minval_r10.c: Regenerated.
* generated/minval_r16.c: Regenerated.
* generated/minval_r4.c: Regenerated.
* generated/minval_r8.c: Regenerated.
* generated/product_c10.c: Regenerated.
* generated/product_c16.c: Regenerated.
* generated/product_c4.c: Regenerated.
* generated/product_c8.c: Regenerated.
* generated/product_i1.c: Regenerated.
* generated/product_i16.c: Regenerated.
* generated/product_i2.c: Regenerated.
* generated/product_i4.c: Regenerated.
* generated/product_i8.c: Regenerated.
* generated/product_r10.c: Regenerated.
* generated/product_r16.c: Regenerated.
* generated/product_r4.c: Regenerated.
* generated/product_r8.c: Regenerated.
* generated/sum_c10.c: Regenerated.
* generated/sum_c16.c: Regenerated.
* generated/sum_c4.c: Regenerated.
* generated/sum_c8.c: Regenerated.
* generated/sum_i1.c: Regenerated.
* generated/sum_i16.c: Regenerated.
* generated/sum_i2.c: Regenerated.
* generated/sum_i4.c: Regenerated.
* generated/sum_i8.c: Regenerated.
* generated/sum_r10.c: Regenerated.
* generated/sum_r16.c: Regenerated.
* generated/sum_r4.c: Regenerated.
* generated/sum_r8.c: Regenerated.
From-SVN: r127774
Diffstat (limited to 'libgfortran/m4')
-rw-r--r-- | libgfortran/m4/iforeach.m4 | 33 | ||||
-rw-r--r-- | libgfortran/m4/ifunction.m4 | 40 | ||||
-rw-r--r-- | libgfortran/m4/matmull.m4 | 56 |
3 files changed, 75 insertions, 54 deletions
diff --git a/libgfortran/m4/iforeach.m4 b/libgfortran/m4/iforeach.m4 index af53cef..720a4c0 100644 --- a/libgfortran/m4/iforeach.m4 +++ b/libgfortran/m4/iforeach.m4 @@ -106,13 +106,13 @@ define(FINISH_FOREACH_FUNCTION, define(START_MASKED_FOREACH_FUNCTION, ` extern void `m'name`'rtype_qual`_'atype_code (rtype * const restrict, - atype * const restrict, gfc_array_l4 * const restrict); + atype * const restrict, gfc_array_l1 * const restrict); export_proto(`m'name`'rtype_qual`_'atype_code); void `m'name`'rtype_qual`_'atype_code (rtype * const restrict retarray, atype * const restrict array, - gfc_array_l4 * const restrict mask) + gfc_array_l1 * const restrict mask) { index_type count[GFC_MAX_DIMENSIONS]; index_type extent[GFC_MAX_DIMENSIONS]; @@ -121,9 +121,10 @@ void index_type dstride; rtype_name *dest; const atype_name *base; - GFC_LOGICAL_4 *mbase; + GFC_LOGICAL_1 *mbase; int rank; index_type n; + int mask_kind; rank = GFC_DESCRIPTOR_RANK (array); if (rank <= 0) @@ -147,12 +148,25 @@ void runtime_error ("dimension of return array incorrect"); } + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + mbase = mask->data; + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + runtime_error ("Funny sized logical array"); + dstride = retarray->dim[0].stride; dest = retarray->data; for (n = 0; n < rank; n++) { sstride[n] = array->dim[n].stride; - mstride[n] = mask->dim[n].stride; + mstride[n] = mask->dim[n].stride * mask_kind; extent[n] = array->dim[n].ubound + 1 - array->dim[n].lbound; count[n] = 0; if (extent[n] <= 0) @@ -165,17 +179,6 @@ void } base = array->data; - mbase = mask->data; - - if (GFC_DESCRIPTOR_SIZE (mask) != 4) - { - /* This allows the same loop to be used for all logical types. */ - assert (GFC_DESCRIPTOR_SIZE (mask) == 8); - for (n = 0; n < rank; n++) - mstride[n] <<= 1; - mbase = (GFOR_POINTER_L8_TO_L4 (mbase)); - } - /* Initialize the return value. */ for (n = 0; n < rank; n++) diff --git a/libgfortran/m4/ifunction.m4 b/libgfortran/m4/ifunction.m4 index 225b89a..d8a661c 100644 --- a/libgfortran/m4/ifunction.m4 +++ b/libgfortran/m4/ifunction.m4 @@ -166,14 +166,14 @@ define(START_MASKED_ARRAY_FUNCTION, ` extern void `m'name`'rtype_qual`_'atype_code (rtype * const restrict, atype * const restrict, const index_type * const restrict, - gfc_array_l4 * const restrict); + gfc_array_l1 * const restrict); export_proto(`m'name`'rtype_qual`_'atype_code); void `m'name`'rtype_qual`_'atype_code (rtype * const restrict retarray, atype * const restrict array, const index_type * const restrict pdim, - gfc_array_l4 * const restrict mask) + gfc_array_l1 * const restrict mask) { index_type count[GFC_MAX_DIMENSIONS]; index_type extent[GFC_MAX_DIMENSIONS]; @@ -182,13 +182,14 @@ void index_type mstride[GFC_MAX_DIMENSIONS]; rtype_name * restrict dest; const atype_name * restrict base; - const GFC_LOGICAL_4 * restrict mbase; + const GFC_LOGICAL_1 * restrict mbase; int rank; int dim; index_type n; index_type len; index_type delta; index_type mdelta; + int mask_kind; dim = (*pdim) - 1; rank = GFC_DESCRIPTOR_RANK (array) - 1; @@ -196,13 +197,27 @@ void len = array->dim[dim].ubound + 1 - array->dim[dim].lbound; if (len <= 0) return; + + mbase = mask->data; + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + runtime_error ("Funny sized logical array"); + delta = array->dim[dim].stride; - mdelta = mask->dim[dim].stride; + mdelta = mask->dim[dim].stride * mask_kind; for (n = 0; n < dim; n++) { sstride[n] = array->dim[n].stride; - mstride[n] = mask->dim[n].stride; + mstride[n] = mask->dim[n].stride * mask_kind; extent[n] = array->dim[n].ubound + 1 - array->dim[n].lbound; if (extent[n] < 0) @@ -212,7 +227,7 @@ void for (n = dim; n < rank; n++) { sstride[n] = array->dim[n + 1].stride; - mstride[n] = mask->dim[n + 1].stride; + mstride[n] = mask->dim[n + 1].stride * mask_kind; extent[n] = array->dim[n + 1].ubound + 1 - array->dim[n + 1].lbound; @@ -267,22 +282,11 @@ void dest = retarray->data; base = array->data; - mbase = mask->data; - - if (GFC_DESCRIPTOR_SIZE (mask) != 4) - { - /* This allows the same loop to be used for all logical types. */ - assert (GFC_DESCRIPTOR_SIZE (mask) == 8); - for (n = 0; n < rank; n++) - mstride[n] <<= 1; - mdelta <<= 1; - mbase = (GFOR_POINTER_L8_TO_L4 (mbase)); - } while (base) { const atype_name * restrict src; - const GFC_LOGICAL_4 * restrict msrc; + const GFC_LOGICAL_1 * restrict msrc; rtype_name result; src = base; msrc = mbase; diff --git a/libgfortran/m4/matmull.m4 b/libgfortran/m4/matmull.m4 index d4b0008..4e999fc 100644 --- a/libgfortran/m4/matmull.m4 +++ b/libgfortran/m4/matmull.m4 @@ -40,15 +40,15 @@ include(iparm.m4)dnl Either a or b can be rank 1. In this case x or y is 1. */ extern void matmul_'rtype_code` ('rtype` * const restrict, - gfc_array_l4 * const restrict, gfc_array_l4 * const restrict); + gfc_array_l1 * const restrict, gfc_array_l1 * const restrict); export_proto(matmul_'rtype_code`); void matmul_'rtype_code` ('rtype` * const restrict retarray, - gfc_array_l4 * const restrict a, gfc_array_l4 * const restrict b) + gfc_array_l1 * const restrict a, gfc_array_l1 * const restrict b) { - const GFC_INTEGER_4 * restrict abase; - const GFC_INTEGER_4 * restrict bbase; + const GFC_LOGICAL_1 * restrict abase; + const GFC_LOGICAL_1 * restrict bbase; 'rtype_name` * restrict dest; index_type rxstride; index_type rystride; @@ -58,9 +58,11 @@ matmul_'rtype_code` ('rtype` * const restrict retarray, index_type ystride; index_type x; index_type y; + int a_kind; + int b_kind; - const GFC_INTEGER_4 * restrict pa; - const GFC_INTEGER_4 * restrict pb; + const GFC_LOGICAL_1 * restrict pa; + const GFC_LOGICAL_1 * restrict pb; index_type astride; index_type bstride; index_type count; @@ -100,17 +102,29 @@ matmul_'rtype_code` ('rtype` * const restrict retarray, } abase = a->data; - if (GFC_DESCRIPTOR_SIZE (a) != 4) - { - assert (GFC_DESCRIPTOR_SIZE (a) == 8); - abase = GFOR_POINTER_L8_TO_L4 (abase); - } + a_kind = GFC_DESCRIPTOR_SIZE (a); + + if (a_kind == 1 || a_kind == 2 || a_kind == 4 || a_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || a_kind == 16 +#endif + ) + abase = GFOR_POINTER_TO_L1 (abase, a_kind); + else + internal_error (NULL, "Funny sized logical array"); + bbase = b->data; - if (GFC_DESCRIPTOR_SIZE (b) != 4) - { - assert (GFC_DESCRIPTOR_SIZE (b) == 8); - bbase = GFOR_POINTER_L8_TO_L4 (bbase); - } + b_kind = GFC_DESCRIPTOR_SIZE (b); + + if (b_kind == 1 || b_kind == 2 || b_kind == 4 || b_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || b_kind == 16 +#endif + ) + bbase = GFOR_POINTER_TO_L1 (bbase, b_kind); + else + internal_error (NULL, "Funny sized logical array"); + dest = retarray->data; ' sinclude(`matmul_asm_'rtype_code`.m4')dnl @@ -130,7 +144,7 @@ sinclude(`matmul_asm_'rtype_code`.m4')dnl one. */ if (GFC_DESCRIPTOR_RANK (a) == 1) { - astride = a->dim[0].stride; + astride = a->dim[0].stride * a_kind; count = a->dim[0].ubound + 1 - a->dim[0].lbound; xstride = 0; rxstride = 0; @@ -138,14 +152,14 @@ sinclude(`matmul_asm_'rtype_code`.m4')dnl } else { - astride = a->dim[1].stride; + astride = a->dim[1].stride * a_kind; count = a->dim[1].ubound + 1 - a->dim[1].lbound; xstride = a->dim[0].stride; xcount = a->dim[0].ubound + 1 - a->dim[0].lbound; } if (GFC_DESCRIPTOR_RANK (b) == 1) { - bstride = b->dim[0].stride; + bstride = b->dim[0].stride * b_kind; assert(count == b->dim[0].ubound + 1 - b->dim[0].lbound); ystride = 0; rystride = 0; @@ -153,7 +167,7 @@ sinclude(`matmul_asm_'rtype_code`.m4')dnl } else { - bstride = b->dim[0].stride; + bstride = b->dim[0].stride * b_kind; assert(count == b->dim[0].ubound + 1 - b->dim[0].lbound); ystride = b->dim[1].stride; ycount = b->dim[1].ubound + 1 - b->dim[1].lbound; @@ -191,4 +205,4 @@ sinclude(`matmul_asm_'rtype_code`.m4')dnl } #endif -'
\ No newline at end of file +' |