diff options
author | Richard Biener <rguenther@suse.de> | 2022-02-09 10:55:18 +0100 |
---|---|---|
committer | Richard Biener <rguenther@suse.de> | 2022-02-09 14:03:31 +0100 |
commit | da2bf62d9e2a25f2d6a99176144c250b51fbdee7 (patch) | |
tree | c9b7384cd0a3becee9cb8f89c0c407b79d9e76f0 /gcc | |
parent | ab0b5fbfe90168d2e470aefb19e0cf31526290bc (diff) | |
download | gcc-da2bf62d9e2a25f2d6a99176144c250b51fbdee7.zip gcc-da2bf62d9e2a25f2d6a99176144c250b51fbdee7.tar.gz gcc-da2bf62d9e2a25f2d6a99176144c250b51fbdee7.tar.bz2 |
tree-optimization/104445 - check for vector extraction support
This adds a missing check to epilogue reduction re-use, namely
that we can do hi/lo extracts from the vector when demoting it
to the epilogue vector size.
I've chosen to add a can_vec_extract helper to optabs-query.h,
in the future we might want to simplify the vectorizers life by
handling vector-from-vector extraction via BIT_FIELD_REFs during
RTL expansion via the mode punning when the vec_extract is not
directly supported.
I'm not 100% sure we can always do the punning of the
vec_extract result to a vector mode of the same size, but then
I'm also not sure how to check for that (the vectorizer doesn't
in other places it does that at the moment, but I suppose we
eventually just go through memory there)?
2022-02-09 Richard Biener <rguenther@suse.de>
PR tree-optimization/104445
PR tree-optimization/102832
* optabs-query.h (can_vec_extract): New.
* optabs-query.cc (can_vec_extract): Likewise.
* tree-vect-loop.cc (vect_find_reusable_accumulator): Check
we can extract a hi/lo part from the larger vector, rework
check iteration from larger to smaller sizes.
* gcc.dg/vect/pr104445.c: New testcase.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/optabs-query.cc | 28 | ||||
-rw-r--r-- | gcc/optabs-query.h | 1 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/pr102832.c | 12 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/pr104445.c | 16 | ||||
-rw-r--r-- | gcc/tree-vect-loop.cc | 16 |
5 files changed, 67 insertions, 6 deletions
diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc index 2ce8d74..713c098 100644 --- a/gcc/optabs-query.cc +++ b/gcc/optabs-query.cc @@ -763,3 +763,31 @@ supports_vec_scatter_store_p (machine_mode mode) return this_fn_optabs->supports_vec_scatter_store[mode] > 0; } +/* Whether we can extract part of the vector mode MODE as + (scalar or vector) mode EXTR_MODE. */ + +bool +can_vec_extract (machine_mode mode, machine_mode extr_mode) +{ + unsigned m; + if (!VECTOR_MODE_P (mode) + || !constant_multiple_p (GET_MODE_SIZE (mode), + GET_MODE_SIZE (extr_mode), &m)) + return false; + + if (convert_optab_handler (vec_extract_optab, mode, extr_mode) + != CODE_FOR_nothing) + return true; + + /* Besides a direct vec_extract we can also use an element extract from + an integer vector mode with elements of the size of the extr_mode. */ + scalar_int_mode imode; + machine_mode vmode; + if (!int_mode_for_size (GET_MODE_BITSIZE (extr_mode), 0).exists (&imode) + || !related_vector_mode (mode, imode, m).exists (&vmode) + || (convert_optab_handler (vec_extract_optab, vmode, imode) + == CODE_FOR_nothing)) + return false; + /* We assume we can pun mode to vmode and imode to extr_mode. */ + return true; +} diff --git a/gcc/optabs-query.h b/gcc/optabs-query.h index 8b768c1..b9c9fd6 100644 --- a/gcc/optabs-query.h +++ b/gcc/optabs-query.h @@ -195,6 +195,7 @@ bool can_atomic_load_p (machine_mode); bool lshift_cheap_p (bool); bool supports_vec_gather_load_p (machine_mode = E_VOIDmode); bool supports_vec_scatter_store_p (machine_mode = E_VOIDmode); +bool can_vec_extract (machine_mode, machine_mode); /* Version of find_widening_optab_handler_and_mode that operates on specific mode types. */ diff --git a/gcc/testsuite/gcc.dg/vect/pr102832.c b/gcc/testsuite/gcc.dg/vect/pr102832.c new file mode 100644 index 0000000..7cb4db5 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr102832.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O3" } */ +/* { dg-additional-options "-march=armv8.2-a+sve -msve-vector-bits=128" { target aarch64-*-* } } */ + +int a, b; +char c; +signed char d(int e, int f) { return e - f; } +void g() { + a = 0; + for (; a >= -17; a = d(a, 1)) + c ^= b; +} diff --git a/gcc/testsuite/gcc.dg/vect/pr104445.c b/gcc/testsuite/gcc.dg/vect/pr104445.c new file mode 100644 index 0000000..8ec3b3b --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr104445.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O3" } */ +/* { dg-additional-options "-mavx -mno-mmx" { target x86_64-*-* i?86-*-* } } */ + +signed char a; +signed char f (int i, int j) +{ + signed char c; + while (i != 0) + { + a ^= j; + ++c; + ++i; + } + return c; +} diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 4860bfd..896218f 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -4997,17 +4997,21 @@ vect_find_reusable_accumulator (loop_vec_info loop_vinfo, if (!constant_multiple_p (TYPE_VECTOR_SUBPARTS (old_vectype), TYPE_VECTOR_SUBPARTS (vectype), &m)) return false; - /* Check the intermediate vector types are available. */ - while (m > 2) + /* Check the intermediate vector types and operations are available. */ + tree prev_vectype = old_vectype; + poly_uint64 intermediate_nunits = TYPE_VECTOR_SUBPARTS (old_vectype); + while (known_gt (intermediate_nunits, TYPE_VECTOR_SUBPARTS (vectype))) { - m /= 2; + intermediate_nunits = exact_div (intermediate_nunits, 2); tree intermediate_vectype = get_related_vectype_for_scalar_type - (TYPE_MODE (vectype), TREE_TYPE (vectype), - exact_div (TYPE_VECTOR_SUBPARTS (old_vectype), m)); + (TYPE_MODE (vectype), TREE_TYPE (vectype), intermediate_nunits); if (!intermediate_vectype || !directly_supported_p (STMT_VINFO_REDUC_CODE (reduc_info), - intermediate_vectype)) + intermediate_vectype) + || !can_vec_extract (TYPE_MODE (prev_vectype), + TYPE_MODE (intermediate_vectype))) return false; + prev_vectype = intermediate_vectype; } /* Non-SLP reductions might apply an adjustment after the reduction |