diff options
Diffstat (limited to 'libgcc/config/spu/multi3.c')
-rw-r--r-- | libgcc/config/spu/multi3.c | 119 |
1 files changed, 119 insertions, 0 deletions
diff --git a/libgcc/config/spu/multi3.c b/libgcc/config/spu/multi3.c new file mode 100644 index 0000000..b8b0e90 --- /dev/null +++ b/libgcc/config/spu/multi3.c @@ -0,0 +1,119 @@ +/* Copyright (C) 2008, 2009 Free Software Foundation, Inc. + + This file is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your option) + any later version. + + This file is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <spu_intrinsics.h> + +typedef int TItype __attribute__ ((mode (TI))); + +union qword_TItype + { + qword q; + TItype t; + }; + +inline static qword +si_from_TItype (TItype t) +{ + union qword_TItype u; + u.t = t; + return u.q; +} + +inline static TItype +si_to_TItype (qword q) +{ + union qword_TItype u; + u.q = q; + return u.t; +} + +/* A straight forward vectorization and unrolling of + * short l[8], r[8]; + * TItype total = 0; + * for (i = 0; i < 8; i++) + * for (j = 0; j < 8; j++) + * total += (TItype)((l[7-i] * r[7-j]) << (16 * (i + j))); + */ +TItype +__multi3 (TItype l, TItype r) +{ + qword u = si_from_TItype (l); + qword v = si_from_TItype (r); + qword splat0 = si_shufb (v, v, si_ilh (0x0001)); + qword splat1 = si_shufb (v, v, si_ilh (0x0203)); + qword splat2 = si_shufb (v, v, si_ilh (0x0405)); + qword splat3 = si_shufb (v, v, si_ilh (0x0607)); + qword splat4 = si_shufb (v, v, si_ilh (0x0809)); + qword splat5 = si_shufb (v, v, si_ilh (0x0a0b)); + qword splat6 = si_shufb (v, v, si_ilh (0x0c0d)); + qword splat7 = si_shufb (v, v, si_ilh (0x0e0f)); + + qword part0l = si_shlqbyi (si_mpyu (u, splat0), 14); + qword part1h = si_shlqbyi (si_mpyhhu (u, splat1), 14); + qword part1l = si_shlqbyi (si_mpyu (u, splat1), 12); + qword part2h = si_shlqbyi (si_mpyhhu (u, splat2), 12); + qword part2l = si_shlqbyi (si_mpyu (u, splat2), 10); + qword part3h = si_shlqbyi (si_mpyhhu (u, splat3), 10); + qword part3l = si_shlqbyi (si_mpyu (u, splat3), 8); + qword part4h = si_shlqbyi (si_mpyhhu (u, splat4), 8); + qword part4l = si_shlqbyi (si_mpyu (u, splat4), 6); + qword part5h = si_shlqbyi (si_mpyhhu (u, splat5), 6); + qword part5l = si_shlqbyi (si_mpyu (u, splat5), 4); + qword part6h = si_shlqbyi (si_mpyhhu (u, splat6), 4); + qword part6l = si_shlqbyi (si_mpyu (u, splat6), 2); + qword part7h = si_shlqbyi (si_mpyhhu (u, splat7), 2); + qword part7l = si_mpyu (u, splat7); + + qword carry, total0, total1, total2, total3, total4; + qword total5, total6, total7, total8, total9, total10; + qword total; + + total0 = si_a (si_a (si_a (part0l, part1h), si_a (part1l, part2h)), part7l); + total1 = si_a (part2l, part3h); + total2 = si_a (part3l, part4h); + total3 = si_a (part4l, part5h); + total4 = si_a (part5l, part6h); + total5 = si_a (part6l, part7h); + total6 = si_a (total0, total1); + total7 = si_a (total2, total3); + total8 = si_a (total4, total5); + total9 = si_a (total6, total7); + total10 = si_a (total8, total9); + + carry = si_cg (part2l, part3h); + carry = si_a (carry, si_cg (part3l, part4h)); + carry = si_a (carry, si_cg (part4l, part5h)); + carry = si_a (carry, si_cg (part5l, part6h)); + carry = si_a (carry, si_cg (part6l, part7h)); + carry = si_a (carry, si_cg (total0, total1)); + carry = si_a (carry, si_cg (total2, total3)); + carry = si_a (carry, si_cg (total4, total5)); + carry = si_a (carry, si_cg (total6, total7)); + carry = si_a (carry, si_cg (total8, total9)); + carry = si_shlqbyi (carry, 4); + + total = si_cg (total10, carry); + total = si_shlqbyi (total, 4); + total = si_cgx (total10, carry, total); + total = si_shlqbyi (total, 4); + total = si_addx (total10, carry, total); + return si_to_TItype (total); +} |