diff options
author | Bill Schmidt <wschmidt@linux.vnet.ibm.com> | 2013-10-11 18:52:07 +0000 |
---|---|---|
committer | William Schmidt <wschmidt@gcc.gnu.org> | 2013-10-11 18:52:07 +0000 |
commit | f200869abc9e4b4dc6e48425bc3cd22fa1799a67 (patch) | |
tree | af7d2f54daecbe920f1c58a12f1883895fd09a75 /gcc | |
parent | 39aeb94e3d9fd7379547e5517c4a12c67f41b862 (diff) | |
download | gcc-f200869abc9e4b4dc6e48425bc3cd22fa1799a67.zip gcc-f200869abc9e4b4dc6e48425bc3cd22fa1799a67.tar.gz gcc-f200869abc9e4b4dc6e48425bc3cd22fa1799a67.tar.bz2 |
vector.md (vec_realign_load<mode>): Generate vperm directly to circumvent subtract from splat{31} workaround.
2013-10-11 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
* config/rs6000/vector.md (vec_realign_load<mode>): Generate vperm
directly to circumvent subtract from splat{31} workaround.
* config/rs6000/rs6000-protos.h (altivec_expand_vec_perm_le): New
prototype.
* config/rs6000/rs6000.c (altivec_expand_vec_perm_le): New.
* config/rs6000/altivec.md (define_c_enum "unspec"): Add
UNSPEC_VPERM_X and UNSPEC_VPERM_UNS_X.
(altivec_vperm_<mode>): Convert to define_insn_and_split to
separate big and little endian logic.
(*altivec_vperm_<mode>_internal): New define_insn.
(altivec_vperm_<mode>_uns): Convert to define_insn_and_split to
separate big and little endian logic.
(*altivec_vperm_<mode>_uns_internal): New define_insn.
(vec_permv16qi): Add little endian logic.
From-SVN: r203457
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 17 | ||||
-rw-r--r-- | gcc/config/rs6000/altivec.md | 83 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000-protos.h | 1 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.c | 48 | ||||
-rw-r--r-- | gcc/config/rs6000/vector.md | 11 |
5 files changed, 155 insertions, 5 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a5bde2c..5a6fff6 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,20 @@ +2013-10-11 Bill Schmidt <wschmidt@linux.vnet.ibm.com> + + * config/rs6000/vector.md (vec_realign_load<mode>): Generate vperm + directly to circumvent subtract from splat{31} workaround. + * config/rs6000/rs6000-protos.h (altivec_expand_vec_perm_le): New + prototype. + * config/rs6000/rs6000.c (altivec_expand_vec_perm_le): New. + * config/rs6000/altivec.md (define_c_enum "unspec"): Add + UNSPEC_VPERM_X and UNSPEC_VPERM_UNS_X. + (altivec_vperm_<mode>): Convert to define_insn_and_split to + separate big and little endian logic. + (*altivec_vperm_<mode>_internal): New define_insn. + (altivec_vperm_<mode>_uns): Convert to define_insn_and_split to + separate big and little endian logic. + (*altivec_vperm_<mode>_uns_internal): New define_insn. + (vec_permv16qi): Add little endian logic. + 2013-10-11 Marc Glisse <marc.glisse@inria.fr> * doc/extend.texi (returns_nonnull): Remove arguments. diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 47a3a689..49f9081 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -59,6 +59,8 @@ UNSPEC_VSUMSWS UNSPEC_VPERM UNSPEC_VPERM_UNS + UNSPEC_VPERM_X + UNSPEC_VPERM_UNS_X UNSPEC_VRFIN UNSPEC_VCFUX UNSPEC_VCFSX @@ -1279,21 +1281,91 @@ "vrfiz %0,%1" [(set_attr "type" "vecfloat")]) -(define_insn "altivec_vperm_<mode>" +(define_insn_and_split "altivec_vperm_<mode>" [(set (match_operand:VM 0 "register_operand" "=v") (unspec:VM [(match_operand:VM 1 "register_operand" "v") (match_operand:VM 2 "register_operand" "v") (match_operand:V16QI 3 "register_operand" "v")] + UNSPEC_VPERM_X))] + "TARGET_ALTIVEC" + "#" + "!reload_in_progress && !reload_completed" + [(set (match_dup 0) (match_dup 4))] +{ + if (BYTES_BIG_ENDIAN) + operands[4] = gen_rtx_UNSPEC (<MODE>mode, + gen_rtvec (3, operands[1], + operands[2], operands[3]), + UNSPEC_VPERM); + else + { + /* We want to subtract from 31, but we can't vspltisb 31 since + it's out of range. -1 works as well because only the low-order + five bits of the permute control vector elements are used. */ + rtx splat = gen_rtx_VEC_DUPLICATE (V16QImode, + gen_rtx_CONST_INT (QImode, -1)); + rtx tmp = gen_reg_rtx (V16QImode); + emit_move_insn (tmp, splat); + rtx sel = gen_rtx_MINUS (V16QImode, tmp, operands[3]); + emit_move_insn (tmp, sel); + operands[4] = gen_rtx_UNSPEC (<MODE>mode, + gen_rtvec (3, operands[2], + operands[1], tmp), + UNSPEC_VPERM); + } +} + [(set_attr "type" "vecperm")]) + +(define_insn "*altivec_vperm_<mode>_internal" + [(set (match_operand:VM 0 "register_operand" "=v") + (unspec:VM [(match_operand:VM 1 "register_operand" "v") + (match_operand:VM 2 "register_operand" "v") + (match_operand:V16QI 3 "register_operand" "+v")] UNSPEC_VPERM))] "TARGET_ALTIVEC" "vperm %0,%1,%2,%3" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vperm_<mode>_uns" +(define_insn_and_split "altivec_vperm_<mode>_uns" [(set (match_operand:VM 0 "register_operand" "=v") (unspec:VM [(match_operand:VM 1 "register_operand" "v") (match_operand:VM 2 "register_operand" "v") (match_operand:V16QI 3 "register_operand" "v")] + UNSPEC_VPERM_UNS_X))] + "TARGET_ALTIVEC" + "#" + "!reload_in_progress && !reload_completed" + [(set (match_dup 0) (match_dup 4))] +{ + if (BYTES_BIG_ENDIAN) + operands[4] = gen_rtx_UNSPEC (<MODE>mode, + gen_rtvec (3, operands[1], + operands[2], operands[3]), + UNSPEC_VPERM_UNS); + else + { + /* We want to subtract from 31, but we can't vspltisb 31 since + it's out of range. -1 works as well because only the low-order + five bits of the permute control vector elements are used. */ + rtx splat = gen_rtx_VEC_DUPLICATE (V16QImode, + gen_rtx_CONST_INT (QImode, -1)); + rtx tmp = gen_reg_rtx (V16QImode); + emit_move_insn (tmp, splat); + rtx sel = gen_rtx_MINUS (V16QImode, tmp, operands[3]); + emit_move_insn (tmp, sel); + operands[4] = gen_rtx_UNSPEC (<MODE>mode, + gen_rtvec (3, operands[2], + operands[1], tmp), + UNSPEC_VPERM_UNS); + } +} + [(set_attr "type" "vecperm")]) + +(define_insn "*altivec_vperm_<mode>_uns_internal" + [(set (match_operand:VM 0 "register_operand" "=v") + (unspec:VM [(match_operand:VM 1 "register_operand" "v") + (match_operand:VM 2 "register_operand" "v") + (match_operand:V16QI 3 "register_operand" "+v")] UNSPEC_VPERM_UNS))] "TARGET_ALTIVEC" "vperm %0,%1,%2,%3" @@ -1306,7 +1378,12 @@ (match_operand:V16QI 3 "register_operand" "")] UNSPEC_VPERM))] "TARGET_ALTIVEC" - "") +{ + if (!BYTES_BIG_ENDIAN) { + altivec_expand_vec_perm_le (operands); + DONE; + } +}) (define_expand "vec_perm_constv16qi" [(match_operand:V16QI 0 "register_operand" "") diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 9aa9429..1fc8903 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -56,6 +56,7 @@ extern void paired_expand_vector_init (rtx, rtx); extern void rs6000_expand_vector_set (rtx, rtx, int); extern void rs6000_expand_vector_extract (rtx, rtx, int); extern bool altivec_expand_vec_perm_const (rtx op[4]); +extern void altivec_expand_vec_perm_le (rtx op[4]); extern bool rs6000_expand_vec_perm_const (rtx op[4]); extern void rs6000_expand_extract_even (rtx, rtx, rtx); extern void rs6000_expand_interleave (rtx, rtx, rtx, bool); diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 0cf6ed8..3f56136 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -28608,6 +28608,54 @@ altivec_expand_vec_perm_const_le (rtx operands[4]) emit_move_insn (target, unspec); } +/* Similarly to altivec_expand_vec_perm_const_le, we must adjust the + permute control vector. But here it's not a constant, so we must + generate a vector splat/subtract to do the adjustment. */ + +void +altivec_expand_vec_perm_le (rtx operands[4]) +{ + rtx splat, unspec; + rtx target = operands[0]; + rtx op0 = operands[1]; + rtx op1 = operands[2]; + rtx sel = operands[3]; + rtx tmp = target; + + /* Get everything in regs so the pattern matches. */ + if (!REG_P (op0)) + op0 = force_reg (V16QImode, op0); + if (!REG_P (op1)) + op1 = force_reg (V16QImode, op1); + if (!REG_P (sel)) + sel = force_reg (V16QImode, sel); + if (!REG_P (target)) + tmp = gen_reg_rtx (V16QImode); + + /* SEL = splat(31) - SEL. */ + /* We want to subtract from 31, but we can't vspltisb 31 since + it's out of range. -1 works as well because only the low-order + five bits of the permute control vector elements are used. */ + splat = gen_rtx_VEC_DUPLICATE (V16QImode, + gen_rtx_CONST_INT (QImode, -1)); + emit_move_insn (tmp, splat); + sel = gen_rtx_MINUS (V16QImode, tmp, sel); + emit_move_insn (tmp, sel); + + /* Permute with operands reversed and adjusted selector. */ + unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, tmp), + UNSPEC_VPERM); + + /* Copy into target, possibly by way of a register. */ + if (!REG_P (target)) + { + emit_move_insn (tmp, unspec); + unspec = tmp; + } + + emit_move_insn (target, unspec); +} + /* Expand an Altivec constant permutation. Return true if we match an efficient implementation; false to fall back to VPERM. */ diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md index cbb1f4f..e88d879 100644 --- a/gcc/config/rs6000/vector.md +++ b/gcc/config/rs6000/vector.md @@ -950,8 +950,15 @@ emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1], operands[2], operands[3])); else - emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[2], - operands[1], operands[3])); + { + /* Avoid the "subtract from splat31" workaround for vperm since + we have changed lvsr to lvsl instead. */ + rtx unspec = gen_rtx_UNSPEC (<MODE>mode, + gen_rtvec (3, operands[2], + operands[1], operands[3]), + UNSPEC_VPERM); + emit_move_insn (operands[0], unspec); + } DONE; }) |