aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHaochen Gui <guihaoc@gcc.gnu.org>2023-11-17 17:17:59 +0800
committerHaochen Gui <guihaoc@gcc.gnu.org>2023-11-17 17:19:41 +0800
commit10615c8a10d6b61e813254924d76be728dbd4688 (patch)
tree82336995b983274e89d69424957b1501af7e8d88
parentcd295a80c91040fd4d826528c8e8e07fe909ae62 (diff)
downloadgcc-10615c8a10d6b61e813254924d76be728dbd4688.zip
gcc-10615c8a10d6b61e813254924d76be728dbd4688.tar.gz
gcc-10615c8a10d6b61e813254924d76be728dbd4688.tar.bz2
rs6000: Fix regression cases caused 16-byte by pieces move
The previous patch enables 16-byte by pieces move. Originally 16-byte move is implemented via pattern. expand_block_move does an optimization on P8 LE to leverage V2DI reversed load/store for memory to memory move. Now 16-byte move is implemented via by pieces move and finally split to two DI load/store. This patch creates an insn_and_split pattern to retake the optimization. gcc/ PR target/111449 * config/rs6000/vsx.md (*vsx_le_mem_to_mem_mov_ti): New. gcc/testsuite/ PR target/111449 * gcc.target/powerpc/pr111449-2.c: New.
-rw-r--r--gcc/config/rs6000/vsx.md21
-rw-r--r--gcc/testsuite/gcc.target/powerpc/pr111449-2.c18
2 files changed, 39 insertions, 0 deletions
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index f3b4022..26fa328 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -414,6 +414,27 @@
;; VSX moves
+;; TImode memory to memory move optimization on LE with p8vector
+(define_insn_and_split "*vsx_le_mem_to_mem_mov_ti"
+ [(set (match_operand:TI 0 "indexed_or_indirect_operand" "=Z")
+ (match_operand:TI 1 "indexed_or_indirect_operand" "Z"))]
+ "!BYTES_BIG_ENDIAN
+ && TARGET_VSX
+ && !TARGET_P9_VECTOR
+ && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ rtx tmp = gen_reg_rtx (V2DImode);
+ rtx src = adjust_address (operands[1], V2DImode, 0);
+ emit_insn (gen_vsx_ld_elemrev_v2di (tmp, src));
+ rtx dest = adjust_address (operands[0], V2DImode, 0);
+ emit_insn (gen_vsx_st_elemrev_v2di (dest, tmp));
+ DONE;
+}
+ [(set_attr "length" "16")])
+
;; The patterns for LE permuted loads and stores come before the general
;; VSX moves so they match first.
(define_insn_and_split "*vsx_le_perm_load_<mode>"
diff --git a/gcc/testsuite/gcc.target/powerpc/pr111449-2.c b/gcc/testsuite/gcc.target/powerpc/pr111449-2.c
new file mode 100644
index 0000000..7003bdc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr111449-2.c
@@ -0,0 +1,18 @@
+/* { dg-do compile { target { has_arch_pwr8 } } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mvsx -O2" } */
+
+/* Ensure 16-byte by pieces move is enabled. */
+
+void move1 (void *s1, void *s2)
+{
+ __builtin_memcpy (s1, s2, 16);
+}
+
+void move2 (void *s1)
+{
+ __builtin_memcpy (s1, "0123456789012345", 16);
+}
+
+/* { dg-final { scan-assembler-times {\mlxvd2x\M|\mp?lxv\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mstxvd2x\M|\mstxv\M} 2 } } */