diff options
author | H.J. Lu <hongjiu.lu@intel.com> | 2018-10-18 08:18:42 +0000 |
---|---|---|
committer | H.J. Lu <hjl@gcc.gnu.org> | 2018-10-18 01:18:42 -0700 |
commit | 294973a49751a7fc2d6a7a9f2749ce851a368c04 (patch) | |
tree | 3a5176da419a68ba632eb29ab9f43065afcc053e | |
parent | 7702ab653a903d5131bf72244ae22424ffd2e6c7 (diff) | |
download | gcc-294973a49751a7fc2d6a7a9f2749ce851a368c04.zip gcc-294973a49751a7fc2d6a7a9f2749ce851a368c04.tar.gz gcc-294973a49751a7fc2d6a7a9f2749ce851a368c04.tar.bz2 |
Simplify subreg of vec_merge of vec_duplicate
We can simplify
(subreg (vec_merge (vec_duplicate X)
(vector)
(const_int ((1 << N) | M)))
(N * sizeof (X)))
to X when mode of X is the same as of mode of subreg.
gcc/
PR target/87537
* simplify-rtx.c (simplify_subreg): Simplify subreg of vec_merge
of vec_duplicate.
(test_vector_ops_duplicate): Add test for a scalar subreg of a
VEC_MERGE of a VEC_DUPLICATE.
gcc/testsuite/
PR target/87537
* gcc.target/i386/pr87537-1.c: New test.
From-SVN: r265260
-rw-r--r-- | gcc/ChangeLog | 8 | ||||
-rw-r--r-- | gcc/simplify-rtx.c | 29 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr87537-1.c | 12 |
4 files changed, 53 insertions, 1 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index f4e147c..41a1dd1 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2018-10-18 H.J. Lu <hongjiu.lu@intel.com> + + PR target/87537 + * simplify-rtx.c (simplify_subreg): Simplify subreg of vec_merge + of vec_duplicate. + (test_vector_ops_duplicate): Add test for a scalar subreg of a + VEC_MERGE of a VEC_DUPLICATE. + 2018-10-17 Joseph Myers <joseph@codesourcery.com> * doc/cpp.texi (__STDC_VERSION__): Document C2X handling. diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c index 9bc5386..b0cf3bb 100644 --- a/gcc/simplify-rtx.c +++ b/gcc/simplify-rtx.c @@ -6601,6 +6601,21 @@ simplify_subreg (machine_mode outermode, rtx op, return NULL_RTX; } + /* Return X for + (subreg (vec_merge (vec_duplicate X) + (vector) + (const_int ((1 << N) | M))) + (N * sizeof (X))) + */ + unsigned int idx; + if (constant_multiple_p (byte, GET_MODE_SIZE (outermode), &idx) + && GET_CODE (op) == VEC_MERGE + && GET_CODE (XEXP (op, 0)) == VEC_DUPLICATE + && GET_MODE (XEXP (XEXP (op, 0), 0)) == outermode + && CONST_INT_P (XEXP (op, 2)) + && (UINTVAL (XEXP (op, 2)) & (HOST_WIDE_INT_1U << idx)) != 0) + return XEXP (XEXP (op, 0), 0); + /* A SUBREG resulting from a zero extension may fold to zero if it extracts higher bits that the ZERO_EXTEND's source bits. */ if (GET_CODE (op) == ZERO_EXTEND && SCALAR_INT_MODE_P (innermode)) @@ -6831,15 +6846,27 @@ test_vector_ops_duplicate (machine_mode mode, rtx scalar_reg) simplify_binary_operation (VEC_SELECT, inner_mode, duplicate, zero_par)); - /* And again with the final element. */ unsigned HOST_WIDE_INT const_nunits; if (nunits.is_constant (&const_nunits)) { + /* And again with the final element. */ rtx last_index = gen_int_mode (const_nunits - 1, word_mode); rtx last_par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, last_index)); ASSERT_RTX_PTR_EQ (scalar_reg, simplify_binary_operation (VEC_SELECT, inner_mode, duplicate, last_par)); + + /* Test a scalar subreg of a VEC_MERGE of a VEC_DUPLICATE. */ + rtx vector_reg = make_test_reg (mode); + for (unsigned HOST_WIDE_INT i = 0; i < const_nunits; i++) + { + rtx mask = GEN_INT ((HOST_WIDE_INT_1U << i) | (i + 1)); + rtx vm = gen_rtx_VEC_MERGE (mode, duplicate, vector_reg, mask); + poly_uint64 offset = i * GET_MODE_SIZE (inner_mode); + ASSERT_RTX_EQ (scalar_reg, + simplify_gen_subreg (inner_mode, vm, + mode, offset)); + } } /* Test a scalar subreg of a VEC_DUPLICATE. */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index f6dbcf7..5e1ad06 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2018-10-18 H.J. Lu <hongjiu.lu@intel.com> + + PR target/87537 + * gcc.target/i386/pr87537-1.c: New test. + 2018-10-17 Joseph Myers <joseph@codesourcery.com> * gcc.dg/c11-static-assert-7.c, gcc.dg/c11-static-assert-8.c, diff --git a/gcc/testsuite/gcc.target/i386/pr87537-1.c b/gcc/testsuite/gcc.target/i386/pr87537-1.c new file mode 100644 index 0000000..df849b0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr87537-1.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx2 -O2" } */ +/* { dg-final { scan-assembler-times "vbroadcastss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]" 1 } } */ +/* { dg-final { scan-assembler-not "vmovss" } } */ + +#include <immintrin.h> + +__m128 +foo (float *x) +{ + return _mm_broadcastss_ps(_mm_load_ss(x)); +} |