aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBill Schmidt <wschmidt@linux.vnet.ibm.com>2013-10-11 18:52:07 +0000
committerWilliam Schmidt <wschmidt@gcc.gnu.org>2013-10-11 18:52:07 +0000
commitf200869abc9e4b4dc6e48425bc3cd22fa1799a67 (patch)
treeaf7d2f54daecbe920f1c58a12f1883895fd09a75
parent39aeb94e3d9fd7379547e5517c4a12c67f41b862 (diff)
downloadgcc-f200869abc9e4b4dc6e48425bc3cd22fa1799a67.zip
gcc-f200869abc9e4b4dc6e48425bc3cd22fa1799a67.tar.gz
gcc-f200869abc9e4b4dc6e48425bc3cd22fa1799a67.tar.bz2
vector.md (vec_realign_load<mode>): Generate vperm directly to circumvent subtract from splat{31} workaround.
2013-10-11 Bill Schmidt <wschmidt@linux.vnet.ibm.com> * config/rs6000/vector.md (vec_realign_load<mode>): Generate vperm directly to circumvent subtract from splat{31} workaround. * config/rs6000/rs6000-protos.h (altivec_expand_vec_perm_le): New prototype. * config/rs6000/rs6000.c (altivec_expand_vec_perm_le): New. * config/rs6000/altivec.md (define_c_enum "unspec"): Add UNSPEC_VPERM_X and UNSPEC_VPERM_UNS_X. (altivec_vperm_<mode>): Convert to define_insn_and_split to separate big and little endian logic. (*altivec_vperm_<mode>_internal): New define_insn. (altivec_vperm_<mode>_uns): Convert to define_insn_and_split to separate big and little endian logic. (*altivec_vperm_<mode>_uns_internal): New define_insn. (vec_permv16qi): Add little endian logic. From-SVN: r203457
-rw-r--r--gcc/ChangeLog17
-rw-r--r--gcc/config/rs6000/altivec.md83
-rw-r--r--gcc/config/rs6000/rs6000-protos.h1
-rw-r--r--gcc/config/rs6000/rs6000.c48
-rw-r--r--gcc/config/rs6000/vector.md11
5 files changed, 155 insertions, 5 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index a5bde2c..5a6fff6 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,20 @@
+2013-10-11 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
+
+ * config/rs6000/vector.md (vec_realign_load<mode>): Generate vperm
+ directly to circumvent subtract from splat{31} workaround.
+ * config/rs6000/rs6000-protos.h (altivec_expand_vec_perm_le): New
+ prototype.
+ * config/rs6000/rs6000.c (altivec_expand_vec_perm_le): New.
+ * config/rs6000/altivec.md (define_c_enum "unspec"): Add
+ UNSPEC_VPERM_X and UNSPEC_VPERM_UNS_X.
+ (altivec_vperm_<mode>): Convert to define_insn_and_split to
+ separate big and little endian logic.
+ (*altivec_vperm_<mode>_internal): New define_insn.
+ (altivec_vperm_<mode>_uns): Convert to define_insn_and_split to
+ separate big and little endian logic.
+ (*altivec_vperm_<mode>_uns_internal): New define_insn.
+ (vec_permv16qi): Add little endian logic.
+
2013-10-11 Marc Glisse <marc.glisse@inria.fr>
* doc/extend.texi (returns_nonnull): Remove arguments.
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 47a3a689..49f9081 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -59,6 +59,8 @@
UNSPEC_VSUMSWS
UNSPEC_VPERM
UNSPEC_VPERM_UNS
+ UNSPEC_VPERM_X
+ UNSPEC_VPERM_UNS_X
UNSPEC_VRFIN
UNSPEC_VCFUX
UNSPEC_VCFSX
@@ -1279,21 +1281,91 @@
"vrfiz %0,%1"
[(set_attr "type" "vecfloat")])
-(define_insn "altivec_vperm_<mode>"
+(define_insn_and_split "altivec_vperm_<mode>"
[(set (match_operand:VM 0 "register_operand" "=v")
(unspec:VM [(match_operand:VM 1 "register_operand" "v")
(match_operand:VM 2 "register_operand" "v")
(match_operand:V16QI 3 "register_operand" "v")]
+ UNSPEC_VPERM_X))]
+ "TARGET_ALTIVEC"
+ "#"
+ "!reload_in_progress && !reload_completed"
+ [(set (match_dup 0) (match_dup 4))]
+{
+ if (BYTES_BIG_ENDIAN)
+ operands[4] = gen_rtx_UNSPEC (<MODE>mode,
+ gen_rtvec (3, operands[1],
+ operands[2], operands[3]),
+ UNSPEC_VPERM);
+ else
+ {
+ /* We want to subtract from 31, but we can't vspltisb 31 since
+ it's out of range. -1 works as well because only the low-order
+ five bits of the permute control vector elements are used. */
+ rtx splat = gen_rtx_VEC_DUPLICATE (V16QImode,
+ gen_rtx_CONST_INT (QImode, -1));
+ rtx tmp = gen_reg_rtx (V16QImode);
+ emit_move_insn (tmp, splat);
+ rtx sel = gen_rtx_MINUS (V16QImode, tmp, operands[3]);
+ emit_move_insn (tmp, sel);
+ operands[4] = gen_rtx_UNSPEC (<MODE>mode,
+ gen_rtvec (3, operands[2],
+ operands[1], tmp),
+ UNSPEC_VPERM);
+ }
+}
+ [(set_attr "type" "vecperm")])
+
+(define_insn "*altivec_vperm_<mode>_internal"
+ [(set (match_operand:VM 0 "register_operand" "=v")
+ (unspec:VM [(match_operand:VM 1 "register_operand" "v")
+ (match_operand:VM 2 "register_operand" "v")
+ (match_operand:V16QI 3 "register_operand" "+v")]
UNSPEC_VPERM))]
"TARGET_ALTIVEC"
"vperm %0,%1,%2,%3"
[(set_attr "type" "vecperm")])
-(define_insn "altivec_vperm_<mode>_uns"
+(define_insn_and_split "altivec_vperm_<mode>_uns"
[(set (match_operand:VM 0 "register_operand" "=v")
(unspec:VM [(match_operand:VM 1 "register_operand" "v")
(match_operand:VM 2 "register_operand" "v")
(match_operand:V16QI 3 "register_operand" "v")]
+ UNSPEC_VPERM_UNS_X))]
+ "TARGET_ALTIVEC"
+ "#"
+ "!reload_in_progress && !reload_completed"
+ [(set (match_dup 0) (match_dup 4))]
+{
+ if (BYTES_BIG_ENDIAN)
+ operands[4] = gen_rtx_UNSPEC (<MODE>mode,
+ gen_rtvec (3, operands[1],
+ operands[2], operands[3]),
+ UNSPEC_VPERM_UNS);
+ else
+ {
+ /* We want to subtract from 31, but we can't vspltisb 31 since
+ it's out of range. -1 works as well because only the low-order
+ five bits of the permute control vector elements are used. */
+ rtx splat = gen_rtx_VEC_DUPLICATE (V16QImode,
+ gen_rtx_CONST_INT (QImode, -1));
+ rtx tmp = gen_reg_rtx (V16QImode);
+ emit_move_insn (tmp, splat);
+ rtx sel = gen_rtx_MINUS (V16QImode, tmp, operands[3]);
+ emit_move_insn (tmp, sel);
+ operands[4] = gen_rtx_UNSPEC (<MODE>mode,
+ gen_rtvec (3, operands[2],
+ operands[1], tmp),
+ UNSPEC_VPERM_UNS);
+ }
+}
+ [(set_attr "type" "vecperm")])
+
+(define_insn "*altivec_vperm_<mode>_uns_internal"
+ [(set (match_operand:VM 0 "register_operand" "=v")
+ (unspec:VM [(match_operand:VM 1 "register_operand" "v")
+ (match_operand:VM 2 "register_operand" "v")
+ (match_operand:V16QI 3 "register_operand" "+v")]
UNSPEC_VPERM_UNS))]
"TARGET_ALTIVEC"
"vperm %0,%1,%2,%3"
@@ -1306,7 +1378,12 @@
(match_operand:V16QI 3 "register_operand" "")]
UNSPEC_VPERM))]
"TARGET_ALTIVEC"
- "")
+{
+ if (!BYTES_BIG_ENDIAN) {
+ altivec_expand_vec_perm_le (operands);
+ DONE;
+ }
+})
(define_expand "vec_perm_constv16qi"
[(match_operand:V16QI 0 "register_operand" "")
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index 9aa9429..1fc8903 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -56,6 +56,7 @@ extern void paired_expand_vector_init (rtx, rtx);
extern void rs6000_expand_vector_set (rtx, rtx, int);
extern void rs6000_expand_vector_extract (rtx, rtx, int);
extern bool altivec_expand_vec_perm_const (rtx op[4]);
+extern void altivec_expand_vec_perm_le (rtx op[4]);
extern bool rs6000_expand_vec_perm_const (rtx op[4]);
extern void rs6000_expand_extract_even (rtx, rtx, rtx);
extern void rs6000_expand_interleave (rtx, rtx, rtx, bool);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 0cf6ed8..3f56136 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -28608,6 +28608,54 @@ altivec_expand_vec_perm_const_le (rtx operands[4])
emit_move_insn (target, unspec);
}
+/* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
+ permute control vector. But here it's not a constant, so we must
+ generate a vector splat/subtract to do the adjustment. */
+
+void
+altivec_expand_vec_perm_le (rtx operands[4])
+{
+ rtx splat, unspec;
+ rtx target = operands[0];
+ rtx op0 = operands[1];
+ rtx op1 = operands[2];
+ rtx sel = operands[3];
+ rtx tmp = target;
+
+ /* Get everything in regs so the pattern matches. */
+ if (!REG_P (op0))
+ op0 = force_reg (V16QImode, op0);
+ if (!REG_P (op1))
+ op1 = force_reg (V16QImode, op1);
+ if (!REG_P (sel))
+ sel = force_reg (V16QImode, sel);
+ if (!REG_P (target))
+ tmp = gen_reg_rtx (V16QImode);
+
+ /* SEL = splat(31) - SEL. */
+ /* We want to subtract from 31, but we can't vspltisb 31 since
+ it's out of range. -1 works as well because only the low-order
+ five bits of the permute control vector elements are used. */
+ splat = gen_rtx_VEC_DUPLICATE (V16QImode,
+ gen_rtx_CONST_INT (QImode, -1));
+ emit_move_insn (tmp, splat);
+ sel = gen_rtx_MINUS (V16QImode, tmp, sel);
+ emit_move_insn (tmp, sel);
+
+ /* Permute with operands reversed and adjusted selector. */
+ unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, tmp),
+ UNSPEC_VPERM);
+
+ /* Copy into target, possibly by way of a register. */
+ if (!REG_P (target))
+ {
+ emit_move_insn (tmp, unspec);
+ unspec = tmp;
+ }
+
+ emit_move_insn (target, unspec);
+}
+
/* Expand an Altivec constant permutation. Return true if we match
an efficient implementation; false to fall back to VPERM. */
diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
index cbb1f4f..e88d879 100644
--- a/gcc/config/rs6000/vector.md
+++ b/gcc/config/rs6000/vector.md
@@ -950,8 +950,15 @@
emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1],
operands[2], operands[3]));
else
- emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[2],
- operands[1], operands[3]));
+ {
+ /* Avoid the "subtract from splat31" workaround for vperm since
+ we have changed lvsr to lvsl instead. */
+ rtx unspec = gen_rtx_UNSPEC (<MODE>mode,
+ gen_rtvec (3, operands[2],
+ operands[1], operands[3]),
+ UNSPEC_VPERM);
+ emit_move_insn (operands[0], unspec);
+ }
DONE;
})