aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog44
-rw-r--r--gcc/config/rs6000/altivec.h1
-rw-r--r--gcc/config/rs6000/altivec.md2
-rw-r--r--gcc/config/rs6000/rs6000-builtin.def9
-rw-r--r--gcc/config/rs6000/rs6000-c.c199
-rw-r--r--gcc/config/rs6000/rs6000.c142
-rw-r--r--gcc/config/rs6000/vsx.md208
-rw-r--r--gcc/testsuite/ChangeLog5
-rw-r--r--gcc/testsuite/gcc.target/powerpc/swaps-p8-26.c6
9 files changed, 380 insertions, 236 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 322124c..8ad4d7b 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,47 @@
+2017-11-15 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
+
+ * config/rs6000/altivec.h (vec_xst_be): New #define.
+ * config/rs6000/altivec.md (altivec_vperm_<mode>_direct): Rename
+ and externalize from *altivec_vperm_<mode>_internal.
+ * config/rs6000/rs6000-builtin.def (XL_BE_V16QI): Remove macro
+ instantiation.
+ (XL_BE_V8HI): Likewise.
+ (XL_BE_V4SI): Likewise.
+ (XL_BE_V4SI): Likewise.
+ (XL_BE_V2DI): Likewise.
+ (XL_BE_V4SF): Likewise.
+ (XL_BE_V2DF): Likewise.
+ (XST_BE): Add BU_VSX_OVERLOAD_X macro instantiation.
+ * config/rs6000/rs6000-c.c (altivec_overloaded_builtins): Correct
+ all array entries with these keys: VSX_BUILTIN_VEC_XL,
+ VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_VEC_XST. Add entries for key
+ VSX_BUILTIN_VEC_XST_BE.
+ * config/rs6000/rs6000.c (altivec_expand_xl_be_builtin): Remove.
+ (altivec_expand_builtin): Remove handling for VSX_BUILTIN_XL_BE_*
+ built-ins.
+ (altivec_init_builtins): Replace conditional calls to def_builtin
+ for __builtin_vsx_ld_elemrev_{v8hi,v16qi} and
+ __builtin_vsx_st_elemrev_{v8hi,v16qi} based on TARGET_P9_VECTOR
+ with unconditional calls. Remove calls to def_builtin for
+ __builtin_vsx_le_be_<mode>. Add a call to def_builtin for
+ __builtin_vec_xst_be.
+ * config/rs6000/vsx.md (vsx_ld_elemrev_v8hi): Convert define_insn
+ to define_expand, and add alternate RTL generation for P8.
+ (*vsx_ld_elemrev_v8hi_internal): New define_insn based on
+ vsx_ld_elemrev_v8hi.
+ (vsx_ld_elemrev_v16qi): Convert define_insn to define_expand, and
+ add alternate RTL generation for P8.
+ (*vsx_ld_elemrev_v16qi_internal): New define_insn based on
+ vsx_ld_elemrev_v16qi.
+ (vsx_st_elemrev_v8hi): Convert define_insn
+ to define_expand, and add alternate RTL generation for P8.
+ (*vsx_st_elemrev_v8hi_internal): New define_insn based on
+ vsx_st_elemrev_v8hi.
+ (vsx_st_elemrev_v16qi): Convert define_insn to define_expand, and
+ add alternate RTL generation for P8.
+ (*vsx_st_elemrev_v16qi_internal): New define_insn based on
+ vsx_st_elemrev_v16qi.
+
2017-11-15 H.J. Lu <hongjiu.lu@intel.com>
PR target/82990
diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index 646712d..068dfef 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -357,6 +357,7 @@
#define vec_xl __builtin_vec_vsx_ld
#define vec_xl_be __builtin_vec_xl_be
#define vec_xst __builtin_vec_vsx_st
+#define vec_xst_be __builtin_vec_xst_be
/* Note, xxsldi and xxpermdi were added as __builtin_vsx_<xxx> functions
instead of __builtin_vec_<xxx> */
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 651f6c9..7122f99 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -2130,7 +2130,7 @@
})
;; Slightly prefer vperm, since the target does not overlap the source
-(define_insn "*altivec_vperm_<mode>_internal"
+(define_insn "altivec_vperm_<mode>_direct"
[(set (match_operand:VM 0 "register_operand" "=v,?wo")
(unspec:VM [(match_operand:VM 1 "register_operand" "v,wo")
(match_operand:VM 2 "register_operand" "v,0")
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index 6842c12..cfb6e55 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -1774,14 +1774,6 @@ BU_VSX_X (LXVW4X_V4SF, "lxvw4x_v4sf", MEM)
BU_VSX_X (LXVW4X_V4SI, "lxvw4x_v4si", MEM)
BU_VSX_X (LXVW4X_V8HI, "lxvw4x_v8hi", MEM)
BU_VSX_X (LXVW4X_V16QI, "lxvw4x_v16qi", MEM)
-
-BU_VSX_X (XL_BE_V16QI, "xl_be_v16qi", MEM)
-BU_VSX_X (XL_BE_V8HI, "xl_be_v8hi", MEM)
-BU_VSX_X (XL_BE_V4SI, "xl_be_v4si", MEM)
-BU_VSX_X (XL_BE_V2DI, "xl_be_v2di", MEM)
-BU_VSX_X (XL_BE_V4SF, "xl_be_v4sf", MEM)
-BU_VSX_X (XL_BE_V2DF, "xl_be_v2df", MEM)
-
BU_VSX_X (STXSDX, "stxsdx", MEM)
BU_VSX_X (STXVD2X_V1TI, "stxvd2x_v1ti", MEM)
BU_VSX_X (STXVD2X_V2DF, "stxvd2x_v2df", MEM)
@@ -1884,6 +1876,7 @@ BU_VSX_OVERLOAD_X (ST, "st")
BU_VSX_OVERLOAD_X (XL, "xl")
BU_VSX_OVERLOAD_X (XL_BE, "xl_be")
BU_VSX_OVERLOAD_X (XST, "xst")
+BU_VSX_OVERLOAD_X (XST_BE, "xst_be")
/* 1 argument builtins pre ISA 2.04. */
BU_FP_MISC_1 (FCTID, "fctid", CONST, lrintdfdi2)
diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index ef21ba3..645260a 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -3111,69 +3111,94 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
{ ALTIVEC_BUILTIN_VEC_SUMS, ALTIVEC_BUILTIN_VSUMSWS,
RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
- { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V2DF,
+
+ { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVD2X_V2DF,
RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF, 0 },
- { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V2DF,
+ { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVD2X_V2DF,
RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_double, 0 },
- { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V2DI,
+ { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVD2X_V2DI,
RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI, 0 },
- { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V2DI,
+ { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVD2X_V2DI,
RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_long_long, 0 },
- { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V2DI,
+ { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVD2X_V2DI,
RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI,
~RS6000_BTI_unsigned_V2DI, 0 },
- { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V2DI,
+ { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVD2X_V2DI,
RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI,
~RS6000_BTI_unsigned_long_long, 0 },
- { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V4SF,
+ { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVW4X_V4SF,
RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 },
- { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V4SF,
+ { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVW4X_V4SF,
RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 },
- { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V4SI,
+ { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVW4X_V4SI,
RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI, 0 },
- { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V4SI,
+ { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVW4X_V4SI,
RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 },
- { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V4SI,
+ { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVW4X_V4SI,
RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI, 0 },
- { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V4SI,
+ { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVW4X_V4SI,
RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 },
- { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V8HI,
+ { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVW4X_V8HI,
RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI, 0 },
- { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V8HI,
+ { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVW4X_V8HI,
RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 },
- { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V8HI,
+ { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVW4X_V8HI,
RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI, 0 },
- { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V8HI,
+ { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVW4X_V8HI,
RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 },
- { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V16QI,
+ { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVW4X_V16QI,
RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI, 0 },
- { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V16QI,
+ { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVW4X_V16QI,
RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 },
- { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V16QI,
+ { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVW4X_V16QI,
RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI,
~RS6000_BTI_unsigned_V16QI, 0 },
- { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V16QI,
- RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
- { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V16QI,
+ { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVW4X_V16QI,
RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
- { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V16QI,
- RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 },
- { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V8HI,
- RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 },
- { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V8HI,
- RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 },
- { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V4SI,
- RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 },
- { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V4SI,
- RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 },
- { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V2DI,
+
+ { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V2DF,
+ RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF, 0 },
+ { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V2DF,
+ RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_double, 0 },
+ { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V2DI,
+ RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI, 0 },
+ { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V2DI,
RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_long_long, 0 },
- { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V2DI,
- RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_long_long, 0 },
- { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V4SF,
+ { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI,
+ ~RS6000_BTI_unsigned_V2DI, 0 },
+ { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI,
+ ~RS6000_BTI_unsigned_long_long, 0 },
+ { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V4SF,
+ RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 },
+ { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V4SF,
RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 },
- { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V2DF,
- RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_double, 0 },
+ { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V4SI,
+ RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI, 0 },
+ { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V4SI,
+ RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 },
+ { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V4SI,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI, 0 },
+ { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V4SI,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 },
+ { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V8HI,
+ RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI, 0 },
+ { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V8HI,
+ RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 },
+ { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V8HI,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI, 0 },
+ { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V8HI,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 },
+ { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V16QI,
+ RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI, 0 },
+ { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V16QI,
+ RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 },
+ { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V16QI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI,
+ ~RS6000_BTI_unsigned_V16QI, 0 },
+ { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V16QI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
{ ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
{ ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
@@ -3949,53 +3974,111 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI },
{ ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL,
RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI },
- { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V2DF,
+ { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVD2X_V2DF,
+ RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF },
+ { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVD2X_V2DI,
+ RS6000_BTI_void, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI },
+ { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVD2X_V2DI,
+ RS6000_BTI_void, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI,
+ ~RS6000_BTI_unsigned_V2DI },
+ { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVD2X_V2DI,
+ RS6000_BTI_void, RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI,
+ ~RS6000_BTI_bool_V2DI },
+ { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V4SF,
+ RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF },
+ { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V4SF,
+ RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float },
+ { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V4SI,
+ RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI },
+ { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V4SI,
+ RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI },
+ { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V4SI,
+ RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI },
+ { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V4SI,
+ RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI },
+ { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V4SI,
+ RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI },
+ { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V4SI,
+ RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI },
+ { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V4SI,
+ RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI },
+ { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V8HI,
+ RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI },
+ { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V8HI,
+ RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI },
+ { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V8HI,
+ RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI },
+ { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V8HI,
+ RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI },
+ { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V8HI,
+ RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI },
+ { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V8HI,
+ RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI },
+ { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V8HI,
+ RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI },
+ { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V16QI,
+ RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI },
+ { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V16QI,
+ RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
+ { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V16QI,
+ RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI },
+ { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V16QI,
+ RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI },
+ { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V16QI,
+ RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI },
+ { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V16QI,
+ RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI },
+ { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V16QI,
+ RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
+ { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V8HI,
+ RS6000_BTI_void, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI },
+ { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V2DF,
RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF },
- { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V2DF,
+ { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V2DF,
RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_double },
- { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V2DI,
+ { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V2DI,
RS6000_BTI_void, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI },
- { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V2DI,
+ { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V2DI,
RS6000_BTI_void, RS6000_BTI_V2DI, RS6000_BTI_INTSI,
~RS6000_BTI_long_long },
- { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V2DI,
+ { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V2DI,
RS6000_BTI_void, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI,
~RS6000_BTI_unsigned_V2DI },
- { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V2DI,
+ { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V2DI,
RS6000_BTI_void, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI,
~RS6000_BTI_unsigned_long_long },
- { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V4SF,
+ { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V4SF,
RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF },
- { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V4SF,
+ { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V4SF,
RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float },
- { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V4SI,
+ { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V4SI,
RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI },
- { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V4SI,
+ { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V4SI,
RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI },
- { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V4SI,
+ { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V4SI,
RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI,
~RS6000_BTI_unsigned_V4SI },
- { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V4SI,
+ { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V4SI,
RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI,
~RS6000_BTI_UINTSI },
- { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V8HI,
+ { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V8HI,
RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI },
- { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V8HI,
+ { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V8HI,
RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI },
- { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V8HI,
+ { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V8HI,
RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI,
~RS6000_BTI_unsigned_V8HI },
- { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V8HI,
+ { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V8HI,
RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI,
~RS6000_BTI_UINTHI },
- { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V16QI,
+ { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V16QI,
RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI },
- { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V16QI,
+ { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V16QI,
RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
- { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V16QI,
+ { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V16QI,
RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI,
~RS6000_BTI_unsigned_V16QI },
- { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V16QI,
+ { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V16QI,
RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI,
~RS6000_BTI_UINTQI },
{ VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_16QI,
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 731613b..80499fd 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -14478,58 +14478,6 @@ altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
}
static rtx
-altivec_expand_xl_be_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
-{
- rtx pat, addr;
- tree arg0 = CALL_EXPR_ARG (exp, 0);
- tree arg1 = CALL_EXPR_ARG (exp, 1);
- machine_mode tmode = insn_data[icode].operand[0].mode;
- machine_mode mode0 = Pmode;
- machine_mode mode1 = Pmode;
- rtx op0 = expand_normal (arg0);
- rtx op1 = expand_normal (arg1);
-
- if (icode == CODE_FOR_nothing)
- /* Builtin not supported on this processor. */
- return 0;
-
- /* If we got invalid arguments bail out before generating bad rtl. */
- if (arg0 == error_mark_node || arg1 == error_mark_node)
- return const0_rtx;
-
- if (target == 0
- || GET_MODE (target) != tmode
- || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
- target = gen_reg_rtx (tmode);
-
- op1 = copy_to_mode_reg (mode1, op1);
-
- if (op0 == const0_rtx)
- addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
- else
- {
- op0 = copy_to_mode_reg (mode0, op0);
- addr = gen_rtx_MEM (blk ? BLKmode : tmode,
- gen_rtx_PLUS (Pmode, op1, op0));
- }
-
- pat = GEN_FCN (icode) (target, addr);
- if (!pat)
- return 0;
-
- emit_insn (pat);
- /* Reverse element order of elements if in LE mode */
- if (!VECTOR_ELT_ORDER_BIG)
- {
- rtx sel = swap_selector_for_mode (tmode);
- rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, target, target, sel),
- UNSPEC_VPERM);
- emit_insn (gen_rtx_SET (target, vperm));
- }
- return target;
-}
-
-static rtx
paired_expand_stv_builtin (enum insn_code icode, tree exp)
{
tree arg0 = CALL_EXPR_ARG (exp, 0);
@@ -15925,50 +15873,6 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
/* Fall through. */
}
- /* XL_BE We initialized them to always load in big endian order. */
- switch (fcode)
- {
- case VSX_BUILTIN_XL_BE_V2DI:
- {
- enum insn_code code = CODE_FOR_vsx_load_v2di;
- return altivec_expand_xl_be_builtin (code, exp, target, false);
- }
- break;
- case VSX_BUILTIN_XL_BE_V4SI:
- {
- enum insn_code code = CODE_FOR_vsx_load_v4si;
- return altivec_expand_xl_be_builtin (code, exp, target, false);
- }
- break;
- case VSX_BUILTIN_XL_BE_V8HI:
- {
- enum insn_code code = CODE_FOR_vsx_load_v8hi;
- return altivec_expand_xl_be_builtin (code, exp, target, false);
- }
- break;
- case VSX_BUILTIN_XL_BE_V16QI:
- {
- enum insn_code code = CODE_FOR_vsx_load_v16qi;
- return altivec_expand_xl_be_builtin (code, exp, target, false);
- }
- break;
- case VSX_BUILTIN_XL_BE_V2DF:
- {
- enum insn_code code = CODE_FOR_vsx_load_v2df;
- return altivec_expand_xl_be_builtin (code, exp, target, false);
- }
- break;
- case VSX_BUILTIN_XL_BE_V4SF:
- {
- enum insn_code code = CODE_FOR_vsx_load_v4sf;
- return altivec_expand_xl_be_builtin (code, exp, target, false);
- }
- break;
- default:
- break;
- /* Fall through. */
- }
-
*expandedp = false;
return NULL_RTX;
}
@@ -17629,6 +17533,10 @@ altivec_init_builtins (void)
VSX_BUILTIN_LD_ELEMREV_V4SF);
def_builtin ("__builtin_vsx_ld_elemrev_v4si", v4si_ftype_long_pcvoid,
VSX_BUILTIN_LD_ELEMREV_V4SI);
+ def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid,
+ VSX_BUILTIN_LD_ELEMREV_V8HI);
+ def_builtin ("__builtin_vsx_ld_elemrev_v16qi", v16qi_ftype_long_pcvoid,
+ VSX_BUILTIN_LD_ELEMREV_V16QI);
def_builtin ("__builtin_vsx_st_elemrev_v2df", void_ftype_v2df_long_pvoid,
VSX_BUILTIN_ST_ELEMREV_V2DF);
def_builtin ("__builtin_vsx_st_elemrev_v2di", void_ftype_v2di_long_pvoid,
@@ -17637,42 +17545,10 @@ altivec_init_builtins (void)
VSX_BUILTIN_ST_ELEMREV_V4SF);
def_builtin ("__builtin_vsx_st_elemrev_v4si", void_ftype_v4si_long_pvoid,
VSX_BUILTIN_ST_ELEMREV_V4SI);
-
- def_builtin ("__builtin_vsx_le_be_v8hi", v8hi_ftype_long_pcvoid,
- VSX_BUILTIN_XL_BE_V8HI);
- def_builtin ("__builtin_vsx_le_be_v4si", v4si_ftype_long_pcvoid,
- VSX_BUILTIN_XL_BE_V4SI);
- def_builtin ("__builtin_vsx_le_be_v2di", v2di_ftype_long_pcvoid,
- VSX_BUILTIN_XL_BE_V2DI);
- def_builtin ("__builtin_vsx_le_be_v4sf", v4sf_ftype_long_pcvoid,
- VSX_BUILTIN_XL_BE_V4SF);
- def_builtin ("__builtin_vsx_le_be_v2df", v2df_ftype_long_pcvoid,
- VSX_BUILTIN_XL_BE_V2DF);
- def_builtin ("__builtin_vsx_le_be_v16qi", v16qi_ftype_long_pcvoid,
- VSX_BUILTIN_XL_BE_V16QI);
-
- if (TARGET_P9_VECTOR)
- {
- def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid,
- VSX_BUILTIN_LD_ELEMREV_V8HI);
- def_builtin ("__builtin_vsx_ld_elemrev_v16qi", v16qi_ftype_long_pcvoid,
- VSX_BUILTIN_LD_ELEMREV_V16QI);
- def_builtin ("__builtin_vsx_st_elemrev_v8hi",
- void_ftype_v8hi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V8HI);
- def_builtin ("__builtin_vsx_st_elemrev_v16qi",
- void_ftype_v16qi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V16QI);
- }
- else
- {
- rs6000_builtin_decls[(int) VSX_BUILTIN_LD_ELEMREV_V8HI]
- = rs6000_builtin_decls[(int) VSX_BUILTIN_LXVW4X_V8HI];
- rs6000_builtin_decls[(int) VSX_BUILTIN_LD_ELEMREV_V16QI]
- = rs6000_builtin_decls[(int) VSX_BUILTIN_LXVW4X_V16QI];
- rs6000_builtin_decls[(int) VSX_BUILTIN_ST_ELEMREV_V8HI]
- = rs6000_builtin_decls[(int) VSX_BUILTIN_STXVW4X_V8HI];
- rs6000_builtin_decls[(int) VSX_BUILTIN_ST_ELEMREV_V16QI]
- = rs6000_builtin_decls[(int) VSX_BUILTIN_STXVW4X_V16QI];
- }
+ def_builtin ("__builtin_vsx_st_elemrev_v8hi", void_ftype_v8hi_long_pvoid,
+ VSX_BUILTIN_ST_ELEMREV_V8HI);
+ def_builtin ("__builtin_vsx_st_elemrev_v16qi", void_ftype_v16qi_long_pvoid,
+ VSX_BUILTIN_ST_ELEMREV_V16QI);
def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
VSX_BUILTIN_VEC_LD);
@@ -17684,6 +17560,8 @@ altivec_init_builtins (void)
VSX_BUILTIN_VEC_XL_BE);
def_builtin ("__builtin_vec_xst", void_ftype_opaque_long_pvoid,
VSX_BUILTIN_VEC_XST);
+ def_builtin ("__builtin_vec_xst_be", void_ftype_opaque_long_pvoid,
+ VSX_BUILTIN_VEC_XST_BE);
def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 6ea16be..00d7656 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -1122,7 +1122,7 @@
"lxvw4x %x0,%y1"
[(set_attr "type" "vecload")])
-(define_insn "vsx_ld_elemrev_v8hi"
+(define_expand "vsx_ld_elemrev_v8hi"
[(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
(vec_select:V8HI
(match_operand:V8HI 1 "memory_operand" "Z")
@@ -1130,22 +1130,94 @@
(const_int 5) (const_int 4)
(const_int 3) (const_int 2)
(const_int 1) (const_int 0)])))]
+ "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
+{
+ if (!TARGET_P9_VECTOR)
+ {
+ rtx tmp = gen_reg_rtx (V4SImode);
+ rtx subreg, subreg2, perm[16], pcv;
+ /* 2 is leftmost element in register */
+ unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
+ int i;
+
+ subreg = simplify_gen_subreg (V4SImode, operands[1], V8HImode, 0);
+ emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
+ subreg2 = simplify_gen_subreg (V8HImode, tmp, V4SImode, 0);
+
+ for (i = 0; i < 16; ++i)
+ perm[i] = GEN_INT (reorder[i]);
+
+ pcv = force_reg (V16QImode,
+ gen_rtx_CONST_VECTOR (V16QImode,
+ gen_rtvec_v (16, perm)));
+ emit_insn (gen_altivec_vperm_v8hi_direct (operands[0], subreg2,
+ subreg2, pcv));
+ DONE;
+ }
+})
+
+(define_insn "*vsx_ld_elemrev_v8hi_internal"
+ [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
+ (vec_select:V8HI
+ (match_operand:V8HI 1 "memory_operand" "Z")
+ (parallel [(const_int 7) (const_int 6)
+ (const_int 5) (const_int 4)
+ (const_int 3) (const_int 2)
+ (const_int 1) (const_int 0)])))]
"VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
"lxvh8x %x0,%y1"
[(set_attr "type" "vecload")])
-(define_insn "vsx_ld_elemrev_v16qi"
+(define_expand "vsx_ld_elemrev_v16qi"
[(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
(vec_select:V16QI
- (match_operand:V16QI 1 "memory_operand" "Z")
- (parallel [(const_int 15) (const_int 14)
- (const_int 13) (const_int 12)
- (const_int 11) (const_int 10)
- (const_int 9) (const_int 8)
- (const_int 7) (const_int 6)
- (const_int 5) (const_int 4)
- (const_int 3) (const_int 2)
- (const_int 1) (const_int 0)])))]
+ (match_operand:V16QI 1 "memory_operand" "Z")
+ (parallel [(const_int 15) (const_int 14)
+ (const_int 13) (const_int 12)
+ (const_int 11) (const_int 10)
+ (const_int 9) (const_int 8)
+ (const_int 7) (const_int 6)
+ (const_int 5) (const_int 4)
+ (const_int 3) (const_int 2)
+ (const_int 1) (const_int 0)])))]
+ "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
+{
+ if (!TARGET_P9_VECTOR)
+ {
+ rtx tmp = gen_reg_rtx (V4SImode);
+ rtx subreg, subreg2, perm[16], pcv;
+ /* 3 is leftmost element in register */
+ unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
+ int i;
+
+ subreg = simplify_gen_subreg (V4SImode, operands[1], V16QImode, 0);
+ emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
+ subreg2 = simplify_gen_subreg (V16QImode, tmp, V4SImode, 0);
+
+ for (i = 0; i < 16; ++i)
+ perm[i] = GEN_INT (reorder[i]);
+
+ pcv = force_reg (V16QImode,
+ gen_rtx_CONST_VECTOR (V16QImode,
+ gen_rtvec_v (16, perm)));
+ emit_insn (gen_altivec_vperm_v16qi_direct (operands[0], subreg2,
+ subreg2, pcv));
+ DONE;
+ }
+})
+
+(define_insn "*vsx_ld_elemrev_v16qi_internal"
+ [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
+ (vec_select:V16QI
+ (match_operand:V16QI 1 "memory_operand" "Z")
+ (parallel [(const_int 15) (const_int 14)
+ (const_int 13) (const_int 12)
+ (const_int 11) (const_int 10)
+ (const_int 9) (const_int 8)
+ (const_int 7) (const_int 6)
+ (const_int 5) (const_int 4)
+ (const_int 3) (const_int 2)
+ (const_int 1) (const_int 0)])))]
"VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
"lxvb16x %x0,%y1"
[(set_attr "type" "vecload")])
@@ -1153,8 +1225,8 @@
(define_insn "vsx_st_elemrev_v2df"
[(set (match_operand:V2DF 0 "memory_operand" "=Z")
(vec_select:V2DF
- (match_operand:V2DF 1 "vsx_register_operand" "wa")
- (parallel [(const_int 1) (const_int 0)])))]
+ (match_operand:V2DF 1 "vsx_register_operand" "wa")
+ (parallel [(const_int 1) (const_int 0)])))]
"VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
"stxvd2x %x1,%y0"
[(set_attr "type" "vecstore")])
@@ -1162,8 +1234,8 @@
(define_insn "vsx_st_elemrev_v2di"
[(set (match_operand:V2DI 0 "memory_operand" "=Z")
(vec_select:V2DI
- (match_operand:V2DI 1 "vsx_register_operand" "wa")
- (parallel [(const_int 1) (const_int 0)])))]
+ (match_operand:V2DI 1 "vsx_register_operand" "wa")
+ (parallel [(const_int 1) (const_int 0)])))]
"VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
"stxvd2x %x1,%y0"
[(set_attr "type" "vecstore")])
@@ -1171,9 +1243,9 @@
(define_insn "vsx_st_elemrev_v4sf"
[(set (match_operand:V4SF 0 "memory_operand" "=Z")
(vec_select:V4SF
- (match_operand:V4SF 1 "vsx_register_operand" "wa")
- (parallel [(const_int 3) (const_int 2)
- (const_int 1) (const_int 0)])))]
+ (match_operand:V4SF 1 "vsx_register_operand" "wa")
+ (parallel [(const_int 3) (const_int 2)
+ (const_int 1) (const_int 0)])))]
"VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
"stxvw4x %x1,%y0"
[(set_attr "type" "vecstore")])
@@ -1188,30 +1260,98 @@
"stxvw4x %x1,%y0"
[(set_attr "type" "vecstore")])
-(define_insn "vsx_st_elemrev_v8hi"
+(define_expand "vsx_st_elemrev_v8hi"
[(set (match_operand:V8HI 0 "memory_operand" "=Z")
(vec_select:V8HI
- (match_operand:V8HI 1 "vsx_register_operand" "wa")
- (parallel [(const_int 7) (const_int 6)
- (const_int 5) (const_int 4)
- (const_int 3) (const_int 2)
- (const_int 1) (const_int 0)])))]
+ (match_operand:V8HI 1 "vsx_register_operand" "wa")
+ (parallel [(const_int 7) (const_int 6)
+ (const_int 5) (const_int 4)
+ (const_int 3) (const_int 2)
+ (const_int 1) (const_int 0)])))]
+ "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
+{
+ if (!TARGET_P9_VECTOR)
+ {
+ rtx subreg, perm[16], pcv;
+ rtx tmp = gen_reg_rtx (V8HImode);
+ /* 2 is leftmost element in register */
+ unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
+ int i;
+
+ for (i = 0; i < 16; ++i)
+ perm[i] = GEN_INT (reorder[i]);
+
+ pcv = force_reg (V16QImode,
+ gen_rtx_CONST_VECTOR (V16QImode,
+ gen_rtvec_v (16, perm)));
+ emit_insn (gen_altivec_vperm_v8hi_direct (tmp, operands[1],
+ operands[1], pcv));
+ subreg = simplify_gen_subreg (V4SImode, tmp, V8HImode, 0);
+ emit_insn (gen_vsx_st_elemrev_v4si (subreg, operands[0]));
+ DONE;
+ }
+})
+
+(define_insn "*vsx_st_elemrev_v8hi_internal"
+ [(set (match_operand:V8HI 0 "memory_operand" "=Z")
+ (vec_select:V8HI
+ (match_operand:V8HI 1 "vsx_register_operand" "wa")
+ (parallel [(const_int 7) (const_int 6)
+ (const_int 5) (const_int 4)
+ (const_int 3) (const_int 2)
+ (const_int 1) (const_int 0)])))]
"VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
"stxvh8x %x1,%y0"
[(set_attr "type" "vecstore")])
-(define_insn "vsx_st_elemrev_v16qi"
+(define_expand "vsx_st_elemrev_v16qi"
+ [(set (match_operand:V16QI 0 "memory_operand" "=Z")
+ (vec_select:V16QI
+ (match_operand:V16QI 1 "vsx_register_operand" "wa")
+ (parallel [(const_int 15) (const_int 14)
+ (const_int 13) (const_int 12)
+ (const_int 11) (const_int 10)
+ (const_int 9) (const_int 8)
+ (const_int 7) (const_int 6)
+ (const_int 5) (const_int 4)
+ (const_int 3) (const_int 2)
+ (const_int 1) (const_int 0)])))]
+ "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
+{
+ if (!TARGET_P9_VECTOR)
+ {
+ rtx subreg, perm[16], pcv;
+ rtx tmp = gen_reg_rtx (V16QImode);
+ /* 3 is leftmost element in register */
+ unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
+ int i;
+
+ for (i = 0; i < 16; ++i)
+ perm[i] = GEN_INT (reorder[i]);
+
+ pcv = force_reg (V16QImode,
+ gen_rtx_CONST_VECTOR (V16QImode,
+ gen_rtvec_v (16, perm)));
+ emit_insn (gen_altivec_vperm_v16qi_direct (tmp, operands[1],
+ operands[1], pcv));
+ subreg = simplify_gen_subreg (V4SImode, tmp, V16QImode, 0);
+ emit_insn (gen_vsx_st_elemrev_v4si (subreg, operands[0]));
+ DONE;
+ }
+})
+
+(define_insn "*vsx_st_elemrev_v16qi_internal"
[(set (match_operand:V16QI 0 "memory_operand" "=Z")
(vec_select:V16QI
- (match_operand:V16QI 1 "vsx_register_operand" "wa")
- (parallel [(const_int 15) (const_int 14)
- (const_int 13) (const_int 12)
- (const_int 11) (const_int 10)
- (const_int 9) (const_int 8)
- (const_int 7) (const_int 6)
- (const_int 5) (const_int 4)
- (const_int 3) (const_int 2)
- (const_int 1) (const_int 0)])))]
+ (match_operand:V16QI 1 "vsx_register_operand" "wa")
+ (parallel [(const_int 15) (const_int 14)
+ (const_int 13) (const_int 12)
+ (const_int 11) (const_int 10)
+ (const_int 9) (const_int 8)
+ (const_int 7) (const_int 6)
+ (const_int 5) (const_int 4)
+ (const_int 3) (const_int 2)
+ (const_int 1) (const_int 0)])))]
"VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
"stxvb16x %x1,%y0"
[(set_attr "type" "vecstore")])
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 676a2c4..eda6451 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2017-11-15 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
+
+ * gcc.target/powerpc/swaps-p8-26.c: Modify expected code
+ generation.
+
2017-11-15 Martin Sebor <msebor@redhat.com>
PR testsuite/82988
diff --git a/gcc/testsuite/gcc.target/powerpc/swaps-p8-26.c b/gcc/testsuite/gcc.target/powerpc/swaps-p8-26.c
index d01d86b..28ce1cd 100644
--- a/gcc/testsuite/gcc.target/powerpc/swaps-p8-26.c
+++ b/gcc/testsuite/gcc.target/powerpc/swaps-p8-26.c
@@ -1,11 +1,11 @@
/* { dg-do compile { target { powerpc64le-*-* } } } */
/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
/* { dg-options "-mcpu=power8 -O3 " } */
-/* { dg-final { scan-assembler-times "lxvw4x" 2 } } */
-/* { dg-final { scan-assembler "stxvw4x" } } */
+/* { dg-final { scan-assembler-times "lxvd2x" 2 } } */
+/* { dg-final { scan-assembler "stxvd2x" } } */
/* { dg-final { scan-assembler-not "xxpermdi" } } */
-/* Verify that swap optimization does not interfere with element-reversing
+/* Verify that swap optimization does not interfere with unaligned
loads and stores. */
/* Test case to resolve PR79044. */