diff options
author | Bill Schmidt <wschmidt@linux.vnet.ibm.com> | 2013-10-07 13:01:28 +0000 |
---|---|---|
committer | William Schmidt <wschmidt@gcc.gnu.org> | 2013-10-07 13:01:28 +0000 |
commit | 19e9b2a386e52b3045a94b7d401530e4237f87ed (patch) | |
tree | 1c2ae8a1d85f5c2263fb81ad040e43fad9a868fa /gcc | |
parent | 0cf686948bf515dcfcc47314ab75fda468fe7357 (diff) | |
download | gcc-19e9b2a386e52b3045a94b7d401530e4237f87ed.zip gcc-19e9b2a386e52b3045a94b7d401530e4237f87ed.tar.gz gcc-19e9b2a386e52b3045a94b7d401530e4237f87ed.tar.bz2 |
rs6000.c (altivec_expand_vec_perm_const_le): New.
2013-10-07 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
* config/rs6000/rs6000.c (altivec_expand_vec_perm_const_le): New.
(altivec_expand_vec_perm_const): Call it.
From-SVN: r203247
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.c | 88 |
2 files changed, 93 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index f025d83..aa0325f 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,10 @@ 2013-10-07 Bill Schmidt <wschmidt@linux.vnet.ibm.com> + * config/rs6000/rs6000.c (altivec_expand_vec_perm_const_le): New. + (altivec_expand_vec_perm_const): Call it. + +2013-10-07 Bill Schmidt <wschmidt@linux.vnet.ibm.com> + * config/rs6000/vector.md (mov<mode>): Emit permuted move sequences for LE VSX loads and stores at expand time. * config/rs6000/rs6000-protos.h (rs6000_emit_le_vsx_move): New diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index a2028e2..0cf6ed8 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -28526,6 +28526,88 @@ rs6000_emit_parity (rtx dst, rtx src) } } +/* Expand an Altivec constant permutation for little endian mode. + There are two issues: First, the two input operands must be + swapped so that together they form a double-wide array in LE + order. Second, the vperm instruction has surprising behavior + in LE mode: it interprets the elements of the source vectors + in BE mode ("left to right") and interprets the elements of + the destination vector in LE mode ("right to left"). To + correct for this, we must subtract each element of the permute + control vector from 31. + + For example, suppose we want to concatenate vr10 = {0, 1, 2, 3} + with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm. + We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to + serve as the permute control vector. Then, in BE mode, + + vperm 9,10,11,12 + + places the desired result in vr9. However, in LE mode the + vector contents will be + + vr10 = 00000003 00000002 00000001 00000000 + vr11 = 00000007 00000006 00000005 00000004 + + The result of the vperm using the same permute control vector is + + vr9 = 05000000 07000000 01000000 03000000 + + That is, the leftmost 4 bytes of vr10 are interpreted as the + source for the rightmost 4 bytes of vr9, and so on. + + If we change the permute control vector to + + vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4} + + and issue + + vperm 9,11,10,12 + + we get the desired + + vr9 = 00000006 00000004 00000002 00000000. */ + +void +altivec_expand_vec_perm_const_le (rtx operands[4]) +{ + unsigned int i; + rtx perm[16]; + rtx constv, unspec; + rtx target = operands[0]; + rtx op0 = operands[1]; + rtx op1 = operands[2]; + rtx sel = operands[3]; + + /* Unpack and adjust the constant selector. */ + for (i = 0; i < 16; ++i) + { + rtx e = XVECEXP (sel, 0, i); + unsigned int elt = 31 - (INTVAL (e) & 31); + perm[i] = GEN_INT (elt); + } + + /* Expand to a permute, swapping the inputs and using the + adjusted selector. */ + if (!REG_P (op0)) + op0 = force_reg (V16QImode, op0); + if (!REG_P (op1)) + op1 = force_reg (V16QImode, op1); + + constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)); + constv = force_reg (V16QImode, constv); + unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv), + UNSPEC_VPERM); + if (!REG_P (target)) + { + rtx tmp = gen_reg_rtx (V16QImode); + emit_move_insn (tmp, unspec); + unspec = tmp; + } + + emit_move_insn (target, unspec); +} + /* Expand an Altivec constant permutation. Return true if we match an efficient implementation; false to fall back to VPERM. */ @@ -28706,6 +28788,12 @@ altivec_expand_vec_perm_const (rtx operands[4]) } } + if (!BYTES_BIG_ENDIAN) + { + altivec_expand_vec_perm_const_le (operands); + return true; + } + return false; } |