aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog33
-rw-r--r--gcc/config/rs6000/rs6000.c3
-rw-r--r--gcc/config/rs6000/vector.md1
-rw-r--r--gcc/config/rs6000/vsx.md78
-rw-r--r--gcc/testsuite/ChangeLog5
-rw-r--r--gcc/testsuite/gcc.target/powerpc/p9-lxvx-stxvx-1.c26
-rw-r--r--gcc/testsuite/gcc.target/powerpc/p9-lxvx-stxvx-2.c15
7 files changed, 130 insertions, 31 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 3a50100..5f7e000 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,36 @@
+2015-12-28 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
+
+ * config/rs6000/rs6000.c (rs6000_emit_le_vsx_move): Verify that
+ this is never called when lxvx/stxvx are available.
+ (pass_analyze_swaps::gate): Don't perform swap optimization when
+ lxvx/stxvx are available.
+ * config/rs6000/vector.md (mov<mode>): Don't call
+ rs6000_emit_le_vsx_move when lxvx/stxvx are available.
+ * config/rs6000/vsx.md (*p9_vecload_<mode>): New define_insn.
+ (*p9_vecstore_<mode>): Likewise.
+ (*vsx_le_perm_load_<mode>:VSX_LE): Disable when lxvx/stxvx are
+ available.
+ (*vsx_le_perm_load_<mode>:VSX_W): Likewise.
+ (*vsx_le_perm_load_v8hi): Likewise.
+ (*vsx_le_perm_load_v16qi): Likewise.
+ (*vsx_le_perm_store_<mode>:VSX_LE): Likewise.
+ ([related define_splits]): Likewise.
+ (*vsx_le_perm_store_<mode>:VSX_W): Likewise.
+ ([related define_splits]): Likewise.
+ (*vsx_le_perm_store_v8hi): Likewise.
+ ([related define_splits]): Likewise.
+ (*vsx_le_perm_store_v16qi): Likewise.
+ ([related define_splits]): Likewise.
+ (*vsx_lxvd2x2_le_<mode>): Likewise.
+ (*vsx_lxvd2x4_le_<mode>): Likewise.
+ (*vsx_lxvd2x8_le_V8HI): Likewise.
+ (*vsx_lvxd2x16_le_V16QI): Likewise.
+ (*vsx_stxvd2x2_le_<mode>): Likewise.
+ (*vsx_stxvd2x4_le_<mode>): Likewise.
+ (*vsx_stxvd2x8_le_V8HI): Likewise.
+ (*vsx_stxvdx16_le_V16QI): Likewise.
+ ([define_peepholes for vector load fusion]): Likewise.
+
2015-12-28 Nathan Sidwell <nathan@acm.org>
* config/nvptx/nvptx.c (nvptx_output_call_insn): Expect hard regs.
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 8ffdae2..a97e47a 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -8904,6 +8904,7 @@ rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
{
gcc_assert (!BYTES_BIG_ENDIAN
&& VECTOR_MEM_VSX_P (mode)
+ && !TARGET_P9_VECTOR
&& !gpr_or_gpr_p (dest, source)
&& (MEM_P (source) ^ MEM_P (dest)));
@@ -37793,7 +37794,7 @@ public:
virtual bool gate (function *)
{
return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX
- && rs6000_optimize_swaps);
+ && !TARGET_P9_VECTOR && rs6000_optimize_swaps);
}
virtual unsigned int execute (function *fun)
diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
index d8699c8..1759390 100644
--- a/gcc/config/rs6000/vector.md
+++ b/gcc/config/rs6000/vector.md
@@ -113,6 +113,7 @@
}
if (!BYTES_BIG_ENDIAN
&& VECTOR_MEM_VSX_P (<MODE>mode)
+ && !TARGET_P9_VECTOR
&& !gpr_or_gpr_p (operands[0], operands[1])
&& (memory_operand (operands[0], <MODE>mode)
^ memory_operand (operands[1], <MODE>mode)))
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index b95da6f..cf5ea56 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -301,6 +301,24 @@
UNSPEC_VSX_XVCVDPUXDS
])
+;; VSX (P9) moves
+
+(define_insn "*p9_vecload_<mode>"
+ [(set (match_operand:VSX_M 0 "vsx_register_operand" "=<VSa>")
+ (match_operand:VSX_M 1 "memory_operand" "Z"))]
+ "TARGET_P9_VECTOR"
+ "lxvx %x0,%y1"
+ [(set_attr "type" "vecload")
+ (set_attr "length" "4")])
+
+(define_insn "*p9_vecstore_<mode>"
+ [(set (match_operand:VSX_M 0 "memory_operand" "=Z")
+ (match_operand:VSX_M 1 "vsx_register_operand" "<VSa>"))]
+ "TARGET_P9_VECTOR"
+ "stxvx %x1,%y0"
+ [(set_attr "type" "vecstore")
+ (set_attr "length" "4")])
+
;; VSX moves
;; The patterns for LE permuted loads and stores come before the general
@@ -308,9 +326,9 @@
(define_insn_and_split "*vsx_le_perm_load_<mode>"
[(set (match_operand:VSX_LE 0 "vsx_register_operand" "=<VSa>")
(match_operand:VSX_LE 1 "memory_operand" "Z"))]
- "!BYTES_BIG_ENDIAN && TARGET_VSX"
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
"#"
- "!BYTES_BIG_ENDIAN && TARGET_VSX"
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
[(set (match_dup 2)
(vec_select:<MODE>
(match_dup 1)
@@ -331,9 +349,9 @@
(define_insn_and_split "*vsx_le_perm_load_<mode>"
[(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
(match_operand:VSX_W 1 "memory_operand" "Z"))]
- "!BYTES_BIG_ENDIAN && TARGET_VSX"
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
"#"
- "!BYTES_BIG_ENDIAN && TARGET_VSX"
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
[(set (match_dup 2)
(vec_select:<MODE>
(match_dup 1)
@@ -356,9 +374,9 @@
(define_insn_and_split "*vsx_le_perm_load_v8hi"
[(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
(match_operand:V8HI 1 "memory_operand" "Z"))]
- "!BYTES_BIG_ENDIAN && TARGET_VSX"
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
"#"
- "!BYTES_BIG_ENDIAN && TARGET_VSX"
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
[(set (match_dup 2)
(vec_select:V8HI
(match_dup 1)
@@ -385,9 +403,9 @@
(define_insn_and_split "*vsx_le_perm_load_v16qi"
[(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
(match_operand:V16QI 1 "memory_operand" "Z"))]
- "!BYTES_BIG_ENDIAN && TARGET_VSX"
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
"#"
- "!BYTES_BIG_ENDIAN && TARGET_VSX"
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
[(set (match_dup 2)
(vec_select:V16QI
(match_dup 1)
@@ -422,7 +440,7 @@
(define_insn "*vsx_le_perm_store_<mode>"
[(set (match_operand:VSX_LE 0 "memory_operand" "=Z")
(match_operand:VSX_LE 1 "vsx_register_operand" "+<VSa>"))]
- "!BYTES_BIG_ENDIAN && TARGET_VSX"
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
"#"
[(set_attr "type" "vecstore")
(set_attr "length" "12")])
@@ -430,7 +448,7 @@
(define_split
[(set (match_operand:VSX_LE 0 "memory_operand" "")
(match_operand:VSX_LE 1 "vsx_register_operand" ""))]
- "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
[(set (match_dup 2)
(vec_select:<MODE>
(match_dup 1)
@@ -449,7 +467,7 @@
(define_split
[(set (match_operand:VSX_LE 0 "memory_operand" "")
(match_operand:VSX_LE 1 "vsx_register_operand" ""))]
- "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
[(set (match_dup 1)
(vec_select:<MODE>
(match_dup 1)
@@ -467,7 +485,7 @@
(define_insn "*vsx_le_perm_store_<mode>"
[(set (match_operand:VSX_W 0 "memory_operand" "=Z")
(match_operand:VSX_W 1 "vsx_register_operand" "+<VSa>"))]
- "!BYTES_BIG_ENDIAN && TARGET_VSX"
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
"#"
[(set_attr "type" "vecstore")
(set_attr "length" "12")])
@@ -475,7 +493,7 @@
(define_split
[(set (match_operand:VSX_W 0 "memory_operand" "")
(match_operand:VSX_W 1 "vsx_register_operand" ""))]
- "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
[(set (match_dup 2)
(vec_select:<MODE>
(match_dup 1)
@@ -496,7 +514,7 @@
(define_split
[(set (match_operand:VSX_W 0 "memory_operand" "")
(match_operand:VSX_W 1 "vsx_register_operand" ""))]
- "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
[(set (match_dup 1)
(vec_select:<MODE>
(match_dup 1)
@@ -517,7 +535,7 @@
(define_insn "*vsx_le_perm_store_v8hi"
[(set (match_operand:V8HI 0 "memory_operand" "=Z")
(match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
- "!BYTES_BIG_ENDIAN && TARGET_VSX"
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
"#"
[(set_attr "type" "vecstore")
(set_attr "length" "12")])
@@ -525,7 +543,7 @@
(define_split
[(set (match_operand:V8HI 0 "memory_operand" "")
(match_operand:V8HI 1 "vsx_register_operand" ""))]
- "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
[(set (match_dup 2)
(vec_select:V8HI
(match_dup 1)
@@ -550,7 +568,7 @@
(define_split
[(set (match_operand:V8HI 0 "memory_operand" "")
(match_operand:V8HI 1 "vsx_register_operand" ""))]
- "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
[(set (match_dup 1)
(vec_select:V8HI
(match_dup 1)
@@ -577,7 +595,7 @@
(define_insn "*vsx_le_perm_store_v16qi"
[(set (match_operand:V16QI 0 "memory_operand" "=Z")
(match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
- "!BYTES_BIG_ENDIAN && TARGET_VSX"
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
"#"
[(set_attr "type" "vecstore")
(set_attr "length" "12")])
@@ -585,7 +603,7 @@
(define_split
[(set (match_operand:V16QI 0 "memory_operand" "")
(match_operand:V16QI 1 "vsx_register_operand" ""))]
- "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
[(set (match_dup 2)
(vec_select:V16QI
(match_dup 1)
@@ -618,7 +636,7 @@
(define_split
[(set (match_operand:V16QI 0 "memory_operand" "")
(match_operand:V16QI 1 "vsx_register_operand" ""))]
- "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
[(set (match_dup 1)
(vec_select:V16QI
(match_dup 1)
@@ -1781,7 +1799,7 @@
(vec_select:VSX_LE
(match_operand:VSX_LE 1 "memory_operand" "Z")
(parallel [(const_int 1) (const_int 0)])))]
- "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
"lxvd2x %x0,%y1"
[(set_attr "type" "vecload")])
@@ -1791,7 +1809,7 @@
(match_operand:VSX_W 1 "memory_operand" "Z")
(parallel [(const_int 2) (const_int 3)
(const_int 0) (const_int 1)])))]
- "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
"lxvd2x %x0,%y1"
[(set_attr "type" "vecload")])
@@ -1803,7 +1821,7 @@
(const_int 6) (const_int 7)
(const_int 0) (const_int 1)
(const_int 2) (const_int 3)])))]
- "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
+ "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
"lxvd2x %x0,%y1"
[(set_attr "type" "vecload")])
@@ -1819,7 +1837,7 @@
(const_int 2) (const_int 3)
(const_int 4) (const_int 5)
(const_int 6) (const_int 7)])))]
- "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
+ "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
"lxvd2x %x0,%y1"
[(set_attr "type" "vecload")])
@@ -1830,7 +1848,7 @@
(vec_select:VSX_LE
(match_operand:VSX_LE 1 "vsx_register_operand" "<VSa>")
(parallel [(const_int 1) (const_int 0)])))]
- "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
"stxvd2x %x1,%y0"
[(set_attr "type" "vecstore")])
@@ -1840,7 +1858,7 @@
(match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
(parallel [(const_int 2) (const_int 3)
(const_int 0) (const_int 1)])))]
- "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
"stxvd2x %x1,%y0"
[(set_attr "type" "vecstore")])
@@ -1852,7 +1870,7 @@
(const_int 6) (const_int 7)
(const_int 0) (const_int 1)
(const_int 2) (const_int 3)])))]
- "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
+ "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
"stxvd2x %x1,%y0"
[(set_attr "type" "vecstore")])
@@ -1868,7 +1886,7 @@
(const_int 2) (const_int 3)
(const_int 4) (const_int 5)
(const_int 6) (const_int 7)])))]
- "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
+ "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
"stxvd2x %x1,%y0"
[(set_attr "type" "vecstore")])
@@ -2456,7 +2474,7 @@
(set (match_operand:VSX_M2 2 "vsx_register_operand" "")
(mem:VSX_M2 (plus:P (match_dup 0)
(match_operand:P 3 "int_reg_operand" ""))))]
- "TARGET_VSX && TARGET_P8_FUSION"
+ "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
"li %0,%1\t\t\t# vector load fusion\;lx<VSX_M2:VSm>x %x2,%0,%3"
[(set_attr "length" "8")
(set_attr "type" "vecload")])
@@ -2467,7 +2485,7 @@
(set (match_operand:VSX_M2 2 "vsx_register_operand" "")
(mem:VSX_M2 (plus:P (match_operand:P 3 "int_reg_operand" "")
(match_dup 0))))]
- "TARGET_VSX && TARGET_P8_FUSION"
+ "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
"li %0,%1\t\t\t# vector load fusion\;lx<VSX_M2:VSm>x %x2,%0,%3"
[(set_attr "length" "8")
(set_attr "type" "vecload")])
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 1164bcd..949eeea 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2015-12-28 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
+
+ * gcc.target/powerpc/p9-lxvx-stxvx-1.c: New.
+ * gcc.target/powerpc/p9-lxvx-stxvx-2.c: New.
+
2015-12-24 Kirill Yukhin <kirill.yukhin@intel.com>
* g++.dg/other/i386-2.C: Add -mpku.
diff --git a/gcc/testsuite/gcc.target/powerpc/p9-lxvx-stxvx-1.c b/gcc/testsuite/gcc.target/powerpc/p9-lxvx-stxvx-1.c
new file mode 100644
index 0000000..df25d55
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p9-lxvx-stxvx-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile { target { powerpc64le-*-* } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
+/* { dg-options "-mcpu=power9 -O3" } */
+/* { dg-final { scan-assembler "lxvx" } } */
+/* { dg-final { scan-assembler "stxvx" } } */
+/* { dg-final { scan-assembler-not "lxvd2x" } } */
+/* { dg-final { scan-assembler-not "stxvd2x" } } */
+/* { dg-final { scan-assembler-not "xxpermdi" } } */
+
+/* Verify P9 vector loads and stores are used rather than the
+ load-swap/swap-store workarounds for P8. */
+#define N 16
+
+signed char ca[N] __attribute__((aligned(16)));
+signed char cb[] __attribute__((aligned(16)))
+ = {8, 7, 6, 5, 4, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7};
+signed char cc[] __attribute__((aligned(16)))
+ = {1, 1, 2, 2, 3, 3, 2, 2, 1, 1, 0, 0, -1, -1, -2, -2};
+
+__attribute__((noinline)) void foo ()
+{
+ int i;
+ for (i = 0; i < N; i++) {
+ ca[i] = cb[i] - cc[i];
+ }
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/p9-lxvx-stxvx-2.c b/gcc/testsuite/gcc.target/powerpc/p9-lxvx-stxvx-2.c
new file mode 100644
index 0000000..853a456
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p9-lxvx-stxvx-2.c
@@ -0,0 +1,15 @@
+/* { dg-do compile { target { powerpc64le-*-* } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
+/* { dg-options "-mcpu=power9 -O1" } */
+/* { dg-final { scan-assembler "lxvx" } } */
+/* { dg-final { scan-assembler "stvewx" } } */
+/* { dg-final { scan-assembler-not "lxvd2x" } } */
+
+/* Verify we don't perform P8 load-vector fusion on P9. */
+#include <altivec.h>
+
+void f (void *p)
+{
+ vector unsigned int u32 = vec_vsx_ld (1, (const unsigned int *)p);
+ vec_ste (u32, 1, (unsigned int *)p);
+}