diff options
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/simplify-rtx.c | 16 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/extract-insert-combining.c | 34 |
4 files changed, 59 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 60a6b22..e362f1d 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,8 @@ +2014-11-24 Petr Murzin <petr.murzin@intel.com> + + * simplify-rtx.c (simplify_ternary_operation): Simplify + vec_merge (vec_duplicate (vec_select)). + 2014-11-24 Kyrylo Tkachov <kyrylo.tkachov@arm.com> * config/aarch64/aarch64.c (AARCH64_FUSE_ADRP_LDR): Define. diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c index 98d4ceb..055ba78 100644 --- a/gcc/simplify-rtx.c +++ b/gcc/simplify-rtx.c @@ -5233,6 +5233,22 @@ simplify_ternary_operation (enum rtx_code code, machine_mode mode, op0, XEXP (op1, 0), op2); } } + + /* Replace (vec_merge (vec_duplicate (vec_select a parallel (i))) a 1 << i) + with a. */ + if (GET_CODE (op0) == VEC_DUPLICATE + && GET_CODE (XEXP (op0, 0)) == VEC_SELECT + && GET_CODE (XEXP (XEXP (op0, 0), 1)) == PARALLEL + && mode_nunits[GET_MODE (XEXP (op0, 0))] == 1) + { + tem = XVECEXP ((XEXP (XEXP (op0, 0), 1)), 0, 0); + if (CONST_INT_P (tem) && CONST_INT_P (op2)) + { + if (XEXP (XEXP (op0, 0), 0) == op1 + && UINTVAL (op2) == HOST_WIDE_INT_1U << UINTVAL (tem)) + return op1; + } + } } if (rtx_equal_p (op0, op1) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index b3d827f..07b00cf 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2014-11-24 Petr Murzin <petr.murzin@intel.com> + + * gcc.target/i386/extract-insert-combining.c: New test. + 2014-11-24 Kyrylo Tkachov <kyrylo.tkachov@arm.com> * gcc.target/aarch64/fuse_adrp_add_1.c: New test. diff --git a/gcc/testsuite/gcc.target/i386/extract-insert-combining.c b/gcc/testsuite/gcc.target/i386/extract-insert-combining.c new file mode 100644 index 0000000..f27f92c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/extract-insert-combining.c @@ -0,0 +1,34 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-msse4.2 -O3" } */ +/* { dg-final { scan-assembler-times "(?:vmovd|movd)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]" 3 } } */ +/* { dg-final { scan-assembler-times "(?:vpaddd|paddd)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]" 2 } } */ +/* { dg-final { scan-assembler-times "(?:vpinsrd|pinsrd)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]" 1 } } */ +/* { dg-final { scan-assembler-not "vmovss" } } */ + +#include <immintrin.h> + +int +main (int a, int b) +{ + int res; + + __m128i xa, xb, xres; + + xa = _mm_insert_epi32 (xa, a, 0); + xb = _mm_insert_epi32 (xb, b, 0); + + xres = _mm_add_epi32 (xa, xb); + + res = _mm_extract_epi32 (xres, 0); + + xres = _mm_insert_epi32 (xres, res, 0); + xb = _mm_insert_epi32 (xb, b, 0); + + xres = _mm_add_epi32 (xres, xb); + + res = _mm_extract_epi32 (xres, 0); + + return res; +} + |