aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc')
-rw-r--r--gcc/expr.c74
-rw-r--r--gcc/testsuite/gcc.target/aarch64/pr95254.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/pr67609.c2
3 files changed, 94 insertions, 1 deletions
diff --git a/gcc/expr.c b/gcc/expr.c
index 6b75028..ca6b1c1 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -3814,6 +3814,80 @@ emit_move_insn (rtx x, rtx y)
gcc_assert (mode != BLKmode
&& (GET_MODE (y) == mode || GET_MODE (y) == VOIDmode));
+ /* If we have a copy that looks like one of the following patterns:
+ (set (subreg:M1 (reg:M2 ...)) (subreg:M1 (reg:M2 ...)))
+ (set (subreg:M1 (reg:M2 ...)) (mem:M1 ADDR))
+ (set (mem:M1 ADDR) (subreg:M1 (reg:M2 ...)))
+ (set (subreg:M1 (reg:M2 ...)) (constant C))
+ where mode M1 is equal in size to M2, try to detect whether the
+ mode change involves an implicit round trip through memory.
+ If so, see if we can avoid that by removing the subregs and
+ doing the move in mode M2 instead. */
+
+ rtx x_inner = NULL_RTX;
+ rtx y_inner = NULL_RTX;
+
+ auto candidate_subreg_p = [&](rtx subreg) {
+ return (REG_P (SUBREG_REG (subreg))
+ && known_eq (GET_MODE_SIZE (GET_MODE (SUBREG_REG (subreg))),
+ GET_MODE_SIZE (GET_MODE (subreg)))
+ && optab_handler (mov_optab, GET_MODE (SUBREG_REG (subreg)))
+ != CODE_FOR_nothing);
+ };
+
+ auto candidate_mem_p = [&](machine_mode innermode, rtx mem) {
+ return (!targetm.can_change_mode_class (innermode, GET_MODE (mem), ALL_REGS)
+ && !push_operand (mem, GET_MODE (mem))
+ /* Not a candiate if innermode requires too much alignment. */
+ && (MEM_ALIGN (mem) >= GET_MODE_ALIGNMENT (innermode)
+ || targetm.slow_unaligned_access (GET_MODE (mem),
+ MEM_ALIGN (mem))
+ || !targetm.slow_unaligned_access (innermode,
+ MEM_ALIGN (mem))));
+ };
+
+ if (SUBREG_P (x) && candidate_subreg_p (x))
+ x_inner = SUBREG_REG (x);
+
+ if (SUBREG_P (y) && candidate_subreg_p (y))
+ y_inner = SUBREG_REG (y);
+
+ if (x_inner != NULL_RTX
+ && y_inner != NULL_RTX
+ && GET_MODE (x_inner) == GET_MODE (y_inner)
+ && !targetm.can_change_mode_class (GET_MODE (x_inner), mode, ALL_REGS))
+ {
+ x = x_inner;
+ y = y_inner;
+ mode = GET_MODE (x_inner);
+ }
+ else if (x_inner != NULL_RTX
+ && MEM_P (y)
+ && candidate_mem_p (GET_MODE (x_inner), y))
+ {
+ x = x_inner;
+ y = adjust_address (y, GET_MODE (x_inner), 0);
+ mode = GET_MODE (x_inner);
+ }
+ else if (y_inner != NULL_RTX
+ && MEM_P (x)
+ && candidate_mem_p (GET_MODE (y_inner), x))
+ {
+ x = adjust_address (x, GET_MODE (y_inner), 0);
+ y = y_inner;
+ mode = GET_MODE (y_inner);
+ }
+ else if (x_inner != NULL_RTX
+ && CONSTANT_P (y)
+ && !targetm.can_change_mode_class (GET_MODE (x_inner),
+ mode, ALL_REGS)
+ && (y_inner = simplify_subreg (GET_MODE (x_inner), y, mode, 0)))
+ {
+ x = x_inner;
+ y = y_inner;
+ mode = GET_MODE (x_inner);
+ }
+
if (CONSTANT_P (y))
{
if (optimize
diff --git a/gcc/testsuite/gcc.target/aarch64/pr95254.c b/gcc/testsuite/gcc.target/aarch64/pr95254.c
new file mode 100644
index 0000000..10bfc86
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr95254.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-slp-vectorize -march=armv8.2-a+sve -msve-vector-bits=256" } */
+
+typedef short __attribute__((vector_size (8))) v4hi;
+
+typedef union U4HI { v4hi v; short a[4]; } u4hi;
+
+short b[4];
+
+void pass_v4hi (v4hi v)
+{
+ int i;
+ u4hi u;
+ u.v = v;
+ for (i = 0; i < 4; i++)
+ b[i] = u.a[i];
+};
+
+/* { dg-final { scan-assembler-not "ptrue" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr67609.c b/gcc/testsuite/gcc.target/i386/pr67609.c
index 518071b..398cdba 100644
--- a/gcc/testsuite/gcc.target/i386/pr67609.c
+++ b/gcc/testsuite/gcc.target/i386/pr67609.c
@@ -1,7 +1,7 @@
/* { dg-do compile } */
/* { dg-options "-O2 -msse2" } */
/* { dg-require-effective-target lp64 } */
-/* { dg-final { scan-assembler "movdqa" } } */
+/* { dg-final { scan-assembler "movq\t%xmm0" } } */
#include <emmintrin.h>
__m128d reg;