diff options
author | Jakub Jelinek <jakub@redhat.com> | 2015-01-13 20:12:47 +0100 |
---|---|---|
committer | Jakub Jelinek <jakub@gcc.gnu.org> | 2015-01-13 20:12:47 +0100 |
commit | 860dadcbeac42219978ccc320ef13a5afa56138b (patch) | |
tree | 9195e2668a728ec9e560e43a016f488af5350d77 | |
parent | 6e5a37dae73dc82c735d85f5d1f835f91cd36352 (diff) | |
download | gcc-860dadcbeac42219978ccc320ef13a5afa56138b.zip gcc-860dadcbeac42219978ccc320ef13a5afa56138b.tar.gz gcc-860dadcbeac42219978ccc320ef13a5afa56138b.tar.bz2 |
re PR rtl-optimization/64286 (Redundant extend removal ignores vector element type)
PR rtl-optimization/64286
* ree.c (combine_reaching_defs): Move part of comment earlier,
remove !SCALAR_INT_MODE_P check.
(add_removable_extension): Don't add vector mode
extensions if all uses of the source register aren't the same
vector extensions.
* gcc.target/i386/avx2-pr64286.c: New test.
From-SVN: r219550
-rw-r--r-- | gcc/ChangeLog | 9 | ||||
-rw-r--r-- | gcc/ree.c | 75 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx2-pr64286.c | 37 |
4 files changed, 114 insertions, 12 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b8886d2..725d7a6 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2015-01-13 Jakub Jelinek <jakub@redhat.com> + + PR rtl-optimization/64286 + * ree.c (combine_reaching_defs): Move part of comment earlier, + remove !SCALAR_INT_MODE_P check. + (add_removable_extension): Don't add vector mode + extensions if all uses of the source register aren't the same + vector extensions. + 2015-01-13 Renlin Li <renlin.li@arm.com> * config/arm/arm.h (CLZ_DEFINED_VALUE_AT_ZERO): Return 2. @@ -783,6 +783,17 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state) != REGNO (get_extended_src_reg (SET_SRC (PATTERN (cand->insn))))); if (copy_needed) { + /* Considering transformation of + (set (reg1) (expression)) + ... + (set (reg2) (any_extend (reg1))) + + into + + (set (reg2) (any_extend (expression))) + (set (reg1) (reg2)) + ... */ + /* In theory we could handle more than one reaching def, it just makes the code to update the insn stream more complex. */ if (state->defs_list.length () != 1) @@ -798,18 +809,6 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state) if (state->modified[INSN_UID (cand->insn)].kind != EXT_MODIFIED_NONE) return false; - /* Transformation of - (set (reg1) (expression)) - (set (reg2) (any_extend (reg1))) - into - (set (reg2) (any_extend (expression))) - (set (reg1) (reg2)) - is only valid for scalar integral modes, as it relies on the low - subreg of reg1 to have the value of (expression), which is not true - e.g. for vector modes. */ - if (!SCALAR_INT_MODE_P (GET_MODE (SET_DEST (PATTERN (cand->insn))))) - return false; - machine_mode dst_mode = GET_MODE (SET_DEST (PATTERN (cand->insn))); rtx src_reg = get_extended_src_reg (SET_SRC (PATTERN (cand->insn))); @@ -1027,6 +1026,7 @@ add_removable_extension (const_rtx expr, rtx_insn *insn, different extension. FIXME: this obviously can be improved. */ for (def = defs; def; def = def->next) if ((idx = def_map[INSN_UID (DF_REF_INSN (def->ref))]) + && idx != -1U && (cand = &(*insn_list)[idx - 1]) && cand->code != code) { @@ -1038,6 +1038,57 @@ add_removable_extension (const_rtx expr, rtx_insn *insn, } return; } + /* For vector mode extensions, ensure that all uses of the + XEXP (src, 0) register are the same extension (both code + and to which mode), as unlike integral extensions lowpart + subreg of the sign/zero extended register are not equal + to the original register, so we have to change all uses or + none. */ + else if (VECTOR_MODE_P (GET_MODE (XEXP (src, 0)))) + { + if (idx == 0) + { + struct df_link *ref_chain, *ref_link; + + ref_chain = DF_REF_CHAIN (def->ref); + for (ref_link = ref_chain; ref_link; ref_link = ref_link->next) + { + if (ref_link->ref == NULL + || DF_REF_INSN_INFO (ref_link->ref) == NULL) + { + idx = -1U; + break; + } + rtx_insn *use_insn = DF_REF_INSN (ref_link->ref); + const_rtx use_set; + if (use_insn == insn || DEBUG_INSN_P (use_insn)) + continue; + if (!(use_set = single_set (use_insn)) + || !REG_P (SET_DEST (use_set)) + || GET_MODE (SET_DEST (use_set)) != GET_MODE (dest) + || GET_CODE (SET_SRC (use_set)) != code + || !rtx_equal_p (XEXP (SET_SRC (use_set), 0), + XEXP (src, 0))) + { + idx = -1U; + break; + } + } + if (idx == -1U) + def_map[INSN_UID (DF_REF_INSN (def->ref))] = idx; + } + if (idx == -1U) + { + if (dump_file) + { + fprintf (dump_file, "Cannot eliminate extension:\n"); + print_rtl_single (dump_file, insn); + fprintf (dump_file, + " because some vector uses aren't extension\n"); + } + return; + } + } /* Then add the candidate to the list and insert the reaching definitions into the definition map. */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 5a09e16..3263f13 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2015-01-13 Jakub Jelinek <jakub@redhat.com> + + PR rtl-optimization/64286 + * gcc.target/i386/avx2-pr64286.c: New test. + 2015-01-13 Paolo Carlini <paolo.carlini@oracle.com> PR c++/57626 diff --git a/gcc/testsuite/gcc.target/i386/avx2-pr64286.c b/gcc/testsuite/gcc.target/i386/avx2-pr64286.c new file mode 100644 index 0000000..2edb321 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx2-pr64286.c @@ -0,0 +1,37 @@ +/* PR rtl-optimization/64286 */ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx2" } */ +/* { dg-require-effective-target avx2 } */ + +#include <string.h> +#include <stdlib.h> +#include <x86intrin.h> +#include "avx2-check.h" + +__m128i v; +__m256i w; + +__attribute__((noinline, noclone)) void +foo (__m128i *p, __m128i *q) +{ + __m128i a = _mm_loadu_si128 (p); + __m128i b = _mm_xor_si128 (a, v); + w = _mm256_cvtepu8_epi16 (a); + *q = b; +} + +static void +avx2_test (void) +{ + v = _mm_set1_epi8 (0x40); + __m128i c = _mm_set_epi8 (16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); + __m128i d; + foo (&c, &d); + __m128i e = _mm_set_epi8 (0x50, 0x4f, 0x4e, 0x4d, 0x4c, 0x4b, 0x4a, 0x49, + 0x48, 0x47, 0x46, 0x45, 0x44, 0x43, 0x42, 0x41); + __m256i f = _mm256_set_epi16 (16, 15, 14, 13, 12, 11, 10, 9, + 8, 7, 6, 5, 4, 3, 2, 1); + if (memcmp (&w, &f, sizeof (w)) != 0 + || memcmp (&d, &e, sizeof (d)) != 0) + abort (); +} |