aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTrevor Smigiel <Trevor_Smigiel@playstation.sony.com>2008-05-27 08:19:45 +0000
committerSa Liu <saliu@gcc.gnu.org>2008-05-27 08:19:45 +0000
commit9bf850280c6291a6d81ac1bdc113b6deb8f95449 (patch)
treead00c0ed4a8bfc16ce765c4f3c22294faa45f7eb
parentebe74ac50b01a591558cab26fb88ef7ee80748ad (diff)
downloadgcc-9bf850280c6291a6d81ac1bdc113b6deb8f95449.zip
gcc-9bf850280c6291a6d81ac1bdc113b6deb8f95449.tar.gz
gcc-9bf850280c6291a6d81ac1bdc113b6deb8f95449.tar.bz2
Add TImode libgcc functions for mul and div.
From-SVN: r135973
-rw-r--r--gcc/config/spu/divmodti4.c168
-rw-r--r--gcc/config/spu/multi3.c99
-rw-r--r--gcc/config/spu/spu.c12
-rw-r--r--gcc/testsuite/gcc.target/spu/muldivti3.c46
4 files changed, 323 insertions, 2 deletions
diff --git a/gcc/config/spu/divmodti4.c b/gcc/config/spu/divmodti4.c
new file mode 100644
index 0000000..ca643cc
--- /dev/null
+++ b/gcc/config/spu/divmodti4.c
@@ -0,0 +1,168 @@
+/* Copyright (C) 2008 Free Software Foundation, Inc.
+
+ This file is free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your option)
+ any later version.
+
+ This file is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this file; see the file COPYING. If not, write to the Free
+ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA. */
+
+/* As a special exception, if you link this library with files compiled with
+ GCC to produce an executable, this does not cause the resulting executable
+ to be covered by the GNU General Public License. The exception does not
+ however invalidate any other reasons why the executable file might be covered
+ by the GNU General Public License. */
+
+#include <spu_intrinsics.h>
+
+typedef unsigned int UTItype __attribute__ ((mode (TI)));
+typedef int TItype __attribute__ ((mode (TI)));
+TItype __divti3 (TItype u, TItype v);
+TItype __modti3 (TItype u, TItype v);
+UTItype __udivti3 (UTItype u, UTItype v);
+UTItype __umodti3 (UTItype u, UTItype v);
+UTItype __udivmodti4 (UTItype u, UTItype v, UTItype *w);
+
+inline static unsigned int
+count_leading_zeros (UTItype x)
+{
+ qword c = si_clz (*(qword *) & x);
+ qword cmp0 = si_cgti (c, 31);
+ qword cmp1 = si_and (cmp0, si_shlqbyi (cmp0, 4));
+ qword cmp2 = si_and (cmp1, si_shlqbyi (cmp0, 8));
+ qword s = si_a (c, si_and (cmp0, si_shlqbyi (c, 4)));
+ s = si_a (s, si_and (cmp1, si_shlqbyi (c, 8)));
+ s = si_a (s, si_and (cmp2, si_shlqbyi (c, 12)));
+ return si_to_uint (s);
+}
+
+/* Based on implementation of udivmodsi4, which is essentially
+ * an optimized version of gcc/config/udivmodsi4.c
+ clz %7,%2
+ clz %4,%1
+ il %5,1
+ fsmbi %0,0
+ sf %7,%4,%7
+ ori %3,%1,0
+ shl %5,%5,%7
+ shl %4,%2,%7
+1: or %8,%0,%5
+ rotmi %5,%5,-1
+ clgt %6,%4,%3
+ sf %7,%4,%3
+ rotmi %4,%4,-1
+ selb %0,%8,%0,%6
+ selb %3,%7,%3,%6
+3: brnz %5,1b
+ */
+
+UTItype
+__udivmodti4 (UTItype num, UTItype den, UTItype * rp)
+{
+ qword shift =
+ si_from_uint (count_leading_zeros (den) - count_leading_zeros (num));
+ qword n0 = *(qword *) & num;
+ qword d0 = *(qword *) & den;
+ qword bit = si_andi (si_fsmbi (1), 1);
+ qword r0 = si_il (0);
+ qword m1 = si_fsmbi (0x000f);
+ qword mask, r1, n1;
+
+ d0 = si_shlqbybi (si_shlqbi (d0, shift), shift);
+ bit = si_shlqbybi (si_shlqbi (bit, shift), shift);
+
+ do
+ {
+ r1 = si_or (r0, bit);
+
+ // n1 = n0 - d0 in TImode
+ n1 = si_bg (d0, n0);
+ n1 = si_shlqbyi (n1, 4);
+ n1 = si_sf (m1, n1);
+ n1 = si_bgx (d0, n0, n1);
+ n1 = si_shlqbyi (n1, 4);
+ n1 = si_sf (m1, n1);
+ n1 = si_bgx (d0, n0, n1);
+ n1 = si_shlqbyi (n1, 4);
+ n1 = si_sf (m1, n1);
+ n1 = si_sfx (d0, n0, n1);
+
+ mask = si_fsm (si_cgti (n1, -1));
+ r0 = si_selb (r0, r1, mask);
+ n0 = si_selb (n0, n1, mask);
+ bit = si_rotqmbii (bit, -1);
+ d0 = si_rotqmbii (d0, -1);
+ }
+ while (si_to_uint (si_orx (bit)));
+ if (rp)
+ *rp = *(UTItype *) & n0;
+ return *(UTItype *) & r0;
+}
+
+UTItype
+__udivti3 (UTItype n, UTItype d)
+{
+ return __udivmodti4 (n, d, (UTItype *)0);
+}
+
+UTItype
+__umodti3 (UTItype n, UTItype d)
+{
+ UTItype w;
+ __udivmodti4 (n, d, &w);
+ return w;
+}
+
+TItype
+__divti3 (TItype n, TItype d)
+{
+ int c = 0;
+ TItype w;
+
+ if (n < 0)
+ {
+ c = ~c;
+ n = -n;
+ }
+ if (d < 0)
+ {
+ c = ~c;
+ d = -d;
+ }
+
+ w = __udivmodti4 (n, d, (UTItype *)0);
+ if (c)
+ w = -w;
+ return w;
+}
+
+TItype
+__modti3 (TItype n, TItype d)
+{
+ int c = 0;
+ TItype w;
+
+ if (n < 0)
+ {
+ c = ~c;
+ n = -n;
+ }
+ if (d < 0)
+ {
+ c = ~c;
+ d = -d;
+ }
+
+ __udivmodti4 (n, d, (UTItype *) &w);
+ if (c)
+ w = -w;
+ return w;
+}
diff --git a/gcc/config/spu/multi3.c b/gcc/config/spu/multi3.c
new file mode 100644
index 0000000..6998ed0
--- /dev/null
+++ b/gcc/config/spu/multi3.c
@@ -0,0 +1,99 @@
+/* Copyright (C) 2008 Free Software Foundation, Inc.
+
+ This file is free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your option)
+ any later version.
+
+ This file is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this file; see the file COPYING. If not, write to the Free
+ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA. */
+
+/* As a special exception, if you link this library with files compiled with
+ GCC to produce an executable, this does not cause the resulting executable
+ to be covered by the GNU General Public License. The exception does not
+ however invalidate any other reasons why the executable file might be covered
+ by the GNU General Public License. */
+
+#include <spu_intrinsics.h>
+
+typedef int TItype __attribute__ ((mode (TI)));
+
+/* A straight forward vectorization and unrolling of
+ * short l[8], r[8];
+ * TItype total = 0;
+ * for (i = 0; i < 8; i++)
+ * for (j = 0; j < 8; j++)
+ * total += (TItype)((l[7-i] * r[7-j]) << (16 * (i + j)));
+ */
+TItype
+__multi3 (TItype l, TItype r)
+{
+ qword u = *(qword *) & l;
+ qword v = *(qword *) & r;
+ qword splat0 = si_shufb (v, v, si_ilh (0x0001));
+ qword splat1 = si_shufb (v, v, si_ilh (0x0203));
+ qword splat2 = si_shufb (v, v, si_ilh (0x0405));
+ qword splat3 = si_shufb (v, v, si_ilh (0x0607));
+ qword splat4 = si_shufb (v, v, si_ilh (0x0809));
+ qword splat5 = si_shufb (v, v, si_ilh (0x0a0b));
+ qword splat6 = si_shufb (v, v, si_ilh (0x0c0d));
+ qword splat7 = si_shufb (v, v, si_ilh (0x0e0f));
+
+ qword part0l = si_shlqbyi (si_mpyu (u, splat0), 14);
+ qword part1h = si_shlqbyi (si_mpyhhu (u, splat1), 14);
+ qword part1l = si_shlqbyi (si_mpyu (u, splat1), 12);
+ qword part2h = si_shlqbyi (si_mpyhhu (u, splat2), 12);
+ qword part2l = si_shlqbyi (si_mpyu (u, splat2), 10);
+ qword part3h = si_shlqbyi (si_mpyhhu (u, splat3), 10);
+ qword part3l = si_shlqbyi (si_mpyu (u, splat3), 8);
+ qword part4h = si_shlqbyi (si_mpyhhu (u, splat4), 8);
+ qword part4l = si_shlqbyi (si_mpyu (u, splat4), 6);
+ qword part5h = si_shlqbyi (si_mpyhhu (u, splat5), 6);
+ qword part5l = si_shlqbyi (si_mpyu (u, splat5), 4);
+ qword part6h = si_shlqbyi (si_mpyhhu (u, splat6), 4);
+ qword part6l = si_shlqbyi (si_mpyu (u, splat6), 2);
+ qword part7h = si_shlqbyi (si_mpyhhu (u, splat7), 2);
+ qword part7l = si_mpyu (u, splat7);
+
+ qword carry, total0, total1, total2, total3, total4;
+ qword total5, total6, total7, total8, total9, total10;
+ qword total;
+
+ total0 = si_a (si_a (si_a (part0l, part1h), si_a (part1l, part2h)), part7l);
+ total1 = si_a (part2l, part3h);
+ total2 = si_a (part3l, part4h);
+ total3 = si_a (part4l, part5h);
+ total4 = si_a (part5l, part6h);
+ total5 = si_a (part6l, part7h);
+ total6 = si_a (total0, total1);
+ total7 = si_a (total2, total3);
+ total8 = si_a (total4, total5);
+ total9 = si_a (total6, total7);
+ total10 = si_a (total8, total9);
+
+ carry = si_cg (part2l, part3h);
+ carry = si_a (carry, si_cg (part3l, part4h));
+ carry = si_a (carry, si_cg (part4l, part5h));
+ carry = si_a (carry, si_cg (part5l, part6h));
+ carry = si_a (carry, si_cg (part6l, part7h));
+ carry = si_a (carry, si_cg (total0, total1));
+ carry = si_a (carry, si_cg (total2, total3));
+ carry = si_a (carry, si_cg (total4, total5));
+ carry = si_a (carry, si_cg (total6, total7));
+ carry = si_a (carry, si_cg (total8, total9));
+ carry = si_shlqbyi (carry, 4);
+
+ total = si_cg (total10, carry);
+ total = si_shlqbyi (total, 4);
+ total = si_cgx (total10, carry, total);
+ total = si_shlqbyi (total, 4);
+ total = si_addx (total10, carry, total);
+ return *(TItype *) & total;
+}
diff --git a/gcc/config/spu/spu.c b/gcc/config/spu/spu.c
index 692a8da..de307ab 100644
--- a/gcc/config/spu/spu.c
+++ b/gcc/config/spu/spu.c
@@ -4422,6 +4422,13 @@ spu_init_libfuncs (void)
set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
+
+ set_optab_libfunc (smul_optab, TImode, "__multi3");
+ set_optab_libfunc (sdiv_optab, TImode, "__divti3");
+ set_optab_libfunc (smod_optab, TImode, "__modti3");
+ set_optab_libfunc (udiv_optab, TImode, "__udivti3");
+ set_optab_libfunc (umod_optab, TImode, "__umodti3");
+ set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
}
/* Make a subreg, stripping any existing subreg. We could possibly just
@@ -4473,7 +4480,7 @@ spu_init_builtins (void)
unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
- spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
+ spu_builtin_types[SPU_BTI_QUADWORD] = intTI_type_node;
spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
@@ -5368,7 +5375,8 @@ spu_expand_builtin_1 (struct spu_builtin_description *d,
if (VECTOR_MODE_P (mode)
&& (GET_CODE (ops[i]) == CONST_INT
|| GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
- || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
+ || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT)
+ && d->parm[i] != SPU_BTI_QUADWORD)
{
if (GET_CODE (ops[i]) == CONST_INT)
ops[i] = spu_const (mode, INTVAL (ops[i]));
diff --git a/gcc/testsuite/gcc.target/spu/muldivti3.c b/gcc/testsuite/gcc.target/spu/muldivti3.c
new file mode 100644
index 0000000..0363e34
--- /dev/null
+++ b/gcc/testsuite/gcc.target/spu/muldivti3.c
@@ -0,0 +1,46 @@
+/* { dg-do run } */
+/* { dg-options "-std=c99" } */
+#include <stdlib.h>
+typedef unsigned int uqword __attribute__((mode(TI)));
+typedef int qword __attribute__((mode(TI)));
+
+typedef union
+{
+ uqword uq;
+ qword q;
+ unsigned long long ull[2];
+} u;
+
+int main(void)
+{
+ uqword e, f;
+ qword g, h;
+
+ e = 0x1111111111111111ULL;
+ f = 0xFULL;
+ g = 0x0000000000111100ULL;
+ h = 0x0000000000000000ULL;
+
+ u m, n, o, p, q;
+
+ m.ull[0] = f;
+ m.ull[1] = e;
+ n.ull[0] = h;
+ n.ull[1] = g;
+
+ /* __multi3 */
+ o.q = m.q * n.q;
+
+ o.q = o.q + n.q + 0x1110FF;
+ /* __udivti3, __umodti3 */
+ p.uq = o.uq / n.uq;
+ q.uq = o.uq % n.uq;
+ if (p.uq != (m.uq+1)) abort();
+ if (q.uq != 0x1110FF) abort();
+ /* __divti3, __modti3 */
+ p.q = -o.q / n.q;
+ q.q = -o.q % n.q;
+ if ((-p.q * n.q - q.q) != o.q) abort();
+
+ return 0;
+}