diff options
author | Haochen Gui <guihaoc@gcc.gnu.org> | 2023-11-17 17:17:59 +0800 |
---|---|---|
committer | Haochen Gui <guihaoc@gcc.gnu.org> | 2023-11-17 17:19:41 +0800 |
commit | 10615c8a10d6b61e813254924d76be728dbd4688 (patch) | |
tree | 82336995b983274e89d69424957b1501af7e8d88 | |
parent | cd295a80c91040fd4d826528c8e8e07fe909ae62 (diff) | |
download | gcc-10615c8a10d6b61e813254924d76be728dbd4688.zip gcc-10615c8a10d6b61e813254924d76be728dbd4688.tar.gz gcc-10615c8a10d6b61e813254924d76be728dbd4688.tar.bz2 |
rs6000: Fix regression cases caused 16-byte by pieces move
The previous patch enables 16-byte by pieces move. Originally 16-byte
move is implemented via pattern. expand_block_move does an optimization
on P8 LE to leverage V2DI reversed load/store for memory to memory move.
Now 16-byte move is implemented via by pieces move and finally split to
two DI load/store. This patch creates an insn_and_split pattern to
retake the optimization.
gcc/
PR target/111449
* config/rs6000/vsx.md (*vsx_le_mem_to_mem_mov_ti): New.
gcc/testsuite/
PR target/111449
* gcc.target/powerpc/pr111449-2.c: New.
-rw-r--r-- | gcc/config/rs6000/vsx.md | 21 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/powerpc/pr111449-2.c | 18 |
2 files changed, 39 insertions, 0 deletions
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index f3b4022..26fa328 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -414,6 +414,27 @@ ;; VSX moves +;; TImode memory to memory move optimization on LE with p8vector +(define_insn_and_split "*vsx_le_mem_to_mem_mov_ti" + [(set (match_operand:TI 0 "indexed_or_indirect_operand" "=Z") + (match_operand:TI 1 "indexed_or_indirect_operand" "Z"))] + "!BYTES_BIG_ENDIAN + && TARGET_VSX + && !TARGET_P9_VECTOR + && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] +{ + rtx tmp = gen_reg_rtx (V2DImode); + rtx src = adjust_address (operands[1], V2DImode, 0); + emit_insn (gen_vsx_ld_elemrev_v2di (tmp, src)); + rtx dest = adjust_address (operands[0], V2DImode, 0); + emit_insn (gen_vsx_st_elemrev_v2di (dest, tmp)); + DONE; +} + [(set_attr "length" "16")]) + ;; The patterns for LE permuted loads and stores come before the general ;; VSX moves so they match first. (define_insn_and_split "*vsx_le_perm_load_<mode>" diff --git a/gcc/testsuite/gcc.target/powerpc/pr111449-2.c b/gcc/testsuite/gcc.target/powerpc/pr111449-2.c new file mode 100644 index 0000000..7003bdc --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr111449-2.c @@ -0,0 +1,18 @@ +/* { dg-do compile { target { has_arch_pwr8 } } } */ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-options "-mvsx -O2" } */ + +/* Ensure 16-byte by pieces move is enabled. */ + +void move1 (void *s1, void *s2) +{ + __builtin_memcpy (s1, s2, 16); +} + +void move2 (void *s1) +{ + __builtin_memcpy (s1, "0123456789012345", 16); +} + +/* { dg-final { scan-assembler-times {\mlxvd2x\M|\mp?lxv\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mstxvd2x\M|\mstxv\M} 2 } } */ |