aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHaochen Gui <guihaoc@gcc.gnu.org>2023-08-16 14:21:09 +0800
committerHaochen Gui <guihaoc@gcc.gnu.org>2023-08-16 14:23:38 +0800
commita79cf858b39e01c80537bc5d47a5e9004418c267 (patch)
treeb831f5e5d727b83ada81f701e382e30bda7ac9ce
parentfe5788862ba8d5ac4551658d842f2d038bd8d363 (diff)
downloadgcc-a79cf858b39e01c80537bc5d47a5e9004418c267.zip
gcc-a79cf858b39e01c80537bc5d47a5e9004418c267.tar.gz
gcc-a79cf858b39e01c80537bc5d47a5e9004418c267.tar.bz2
rs6000: Generate mfvsrwz for all platforms and remove redundant zero extend
mfvsrwz has lower latency than xxextractuw or vextuw[lr]x. So it should be generated even with p9 vector enabled. Also the instruction is already zero extended. A combine pattern is needed to eliminate redundant zero extend instructions. gcc/ PR target/106769 * config/rs6000/vsx.md (expand vsx_extract_<mode>): Set it only for V8HI and V16QI. (vsx_extract_v4si): New expand for V4SI extraction. (vsx_extract_v4si_w1): New insn pattern for V4SI extraction on word 1 from BE order. (*mfvsrwz): New insn pattern for mfvsrwz. (*vsx_extract_<mode>_di_p9): Assert that it won't be generated on word 1 from BE order. (*vsx_extract_si): Remove. (*vsx_extract_v4si_w023): New insn and split pattern on word 0, 2, 3 from BE order. gcc/testsuite/ PR target/106769 * gcc.target/powerpc/pr106769.h: New. * gcc.target/powerpc/pr106769-p8.c: New. * gcc.target/powerpc/pr106769-p9.c: New.
-rw-r--r--gcc/config/rs6000/vsx.md112
-rw-r--r--gcc/testsuite/gcc.target/powerpc/pr106769-p8.c11
-rw-r--r--gcc/testsuite/gcc.target/powerpc/pr106769-p9.c13
-rw-r--r--gcc/testsuite/gcc.target/powerpc/pr106769.h17
4 files changed, 119 insertions, 34 deletions
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 9cd7be2..e4192c0 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -3753,9 +3753,9 @@
(define_expand "vsx_extract_<mode>"
[(parallel [(set (match_operand:<VEC_base> 0 "gpc_reg_operand")
(vec_select:<VEC_base>
- (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand")
+ (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand")
(parallel [(match_operand:QI 2 "const_int_operand")])))
- (clobber (match_scratch:VSX_EXTRACT_I 3))])]
+ (clobber (match_scratch:VSX_EXTRACT_I2 3))])]
"VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
{
/* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}. */
@@ -3767,6 +3767,63 @@
}
})
+(define_expand "vsx_extract_v4si"
+ [(parallel [(set (match_operand:SI 0 "gpc_reg_operand")
+ (vec_select:SI
+ (match_operand:V4SI 1 "gpc_reg_operand")
+ (parallel [(match_operand:QI 2 "const_0_to_3_operand")])))
+ (clobber (match_scratch:V4SI 3))])]
+ "TARGET_DIRECT_MOVE_64BIT"
+{
+ /* The word 1 (BE order) can be extracted by mfvsrwz/stxsiwx. So just
+ fall through to vsx_extract_v4si_w1. */
+ if (TARGET_P9_VECTOR
+ && INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2))
+ {
+ emit_insn (gen_vsx_extract_v4si_p9 (operands[0], operands[1],
+ operands[2]));
+ DONE;
+ }
+})
+
+;; Extract from word 1 (BE order).
+(define_insn "vsx_extract_v4si_w1"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z,wa")
+ (vec_select:SI
+ (match_operand:V4SI 1 "gpc_reg_operand" "v,v,v,0")
+ (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
+ (clobber (match_scratch:V4SI 3 "=v,v,v,v"))]
+ "TARGET_DIRECT_MOVE_64BIT
+ && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 1 : 2)"
+{
+ if (which_alternative == 0)
+ return "mfvsrwz %0,%x1";
+
+ if (which_alternative == 1)
+ return "xxlor %x0,%x1,%x1";
+
+ if (which_alternative == 2)
+ return "stxsiwx %x1,%y0";
+
+ return ASM_COMMENT_START " vec_extract to same register";
+}
+ [(set_attr "type" "mfvsr,veclogical,fpstore,*")
+ (set_attr "length" "4,4,4,0")
+ (set_attr "isa" "p8v,*,p8v,*")])
+
+(define_insn "*mfvsrwz"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (vec_select:SI
+ (match_operand:V4SI 1 "vsx_register_operand" "wa")
+ (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
+ (clobber (match_scratch:V4SI 3 "=v"))]
+ "TARGET_DIRECT_MOVE_64BIT
+ && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 1 : 2)"
+ "mfvsrwz %0,%x1"
+ [(set_attr "type" "mfvsr")
+ (set_attr "isa" "p8v")])
+
(define_insn "vsx_extract_<mode>_p9"
[(set (match_operand:<VEC_base> 0 "gpc_reg_operand" "=r,<VSX_EX>")
(vec_select:<VEC_base>
@@ -3838,6 +3895,9 @@
(parallel [(match_dup 2)])))
(clobber (match_dup 3))])]
{
+ gcc_assert (<MODE>mode != V4SImode
+ || INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2));
+
operands[4] = gen_rtx_REG (<VEC_base>mode, REGNO (operands[0]));
}
[(set_attr "isa" "p9v,*")])
@@ -3861,58 +3921,42 @@
(set (match_dup 0)
(match_dup 3))])
-(define_insn_and_split "*vsx_extract_si"
+;; Extract from word 0, 2, 3 (BE order).
+(define_insn_and_split "*vsx_extract_v4si_w023"
[(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z")
(vec_select:SI
(match_operand:V4SI 1 "gpc_reg_operand" "v,v,v")
(parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")])))
(clobber (match_scratch:V4SI 3 "=v,v,v"))]
- "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT && !TARGET_P9_VECTOR"
+ "TARGET_DIRECT_MOVE_64BIT"
"#"
- "&& reload_completed"
+ "&& INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2)"
[(const_int 0)]
{
+ gcc_assert (!TARGET_P9_VECTOR);
+
rtx dest = operands[0];
rtx src = operands[1];
rtx element = operands[2];
- rtx vec_tmp = operands[3];
- int value;
+ rtx vec_tmp;
+
+ if (GET_CODE (operands[3]) == SCRATCH)
+ vec_tmp = gen_reg_rtx (V4SImode);
+ else
+ vec_tmp = operands[3];
/* Adjust index for LE element ordering, the below minuend 3 is computed by
GET_MODE_NUNITS (V4SImode) - 1. */
if (!BYTES_BIG_ENDIAN)
element = GEN_INT (3 - INTVAL (element));
- /* If the value is in the correct position, we can avoid doing the VSPLT<x>
- instruction. */
- value = INTVAL (element);
- if (value != 1)
- emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element));
- else
- vec_tmp = src;
+ emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element));
- if (MEM_P (operands[0]))
- {
- if (can_create_pseudo_p ())
- dest = rs6000_force_indexed_or_indirect_mem (dest);
-
- if (TARGET_P8_VECTOR)
- emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
- else
- emit_insn (gen_stfiwx (dest, gen_rtx_REG (DImode, REGNO (vec_tmp))));
- }
-
- else if (TARGET_P8_VECTOR)
- emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
- else
- emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
- gen_rtx_REG (DImode, REGNO (vec_tmp)));
+ int value = BYTES_BIG_ENDIAN ? 1 : 2;
+ emit_insn (gen_vsx_extract_v4si_w1 (dest, vec_tmp, GEN_INT (value)));
DONE;
-}
- [(set_attr "type" "mfvsr,vecperm,fpstore")
- (set_attr "length" "8")
- (set_attr "isa" "*,p8v,*")])
+})
(define_insn_and_split "*vsx_extract_<mode>_p8"
[(set (match_operand:<VEC_base> 0 "nonimmediate_operand" "=r")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr106769-p8.c b/gcc/testsuite/gcc.target/powerpc/pr106769-p8.c
new file mode 100644
index 0000000..e7cdbc7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr106769-p8.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power8 -O2" } */
+/* { dg-require-effective-target has_arch_ppc64 } */
+
+#include "pr106769.h"
+
+/* { dg-final { scan-assembler {\mmfvsrwz\M} } } */
+/* { dg-final { scan-assembler {\mstxsiwx\M} } } */
+/* { dg-final { scan-assembler-not {\mrldicl\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr106769-p9.c b/gcc/testsuite/gcc.target/powerpc/pr106769-p9.c
new file mode 100644
index 0000000..2248b52
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr106769-p9.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
+/* { dg-require-effective-target has_arch_ppc64 } */
+
+#include "pr106769.h"
+
+/* { dg-final { scan-assembler {\mmfvsrwz\M} } } */
+/* { dg-final { scan-assembler {\mstxsiwx\M} } } */
+/* { dg-final { scan-assembler-not {\mrldicl\M} } } */
+/* { dg-final { scan-assembler-not {\mxxextractuw\M} } } */
+/* { dg-final { scan-assembler-not {\mvextuw[rl]x\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr106769.h b/gcc/testsuite/gcc.target/powerpc/pr106769.h
new file mode 100644
index 0000000..1c8c8a0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr106769.h
@@ -0,0 +1,17 @@
+#include <altivec.h>
+
+#ifdef __BIG_ENDIAN__
+#define LANE 1
+#else
+#define LANE 2
+#endif
+
+unsigned int foo1 (vector unsigned int v)
+{
+ return vec_extract(v, LANE);
+}
+
+void foo2 (vector unsigned int v, unsigned int* p)
+{
+ *p = vec_extract(v, LANE);
+}