aboutsummaryrefslogtreecommitdiff
path: root/libphobos/src/std/internal/math/biguintnoasm.d
diff options
context:
space:
mode:
authorIain Buclaw <ibuclaw@gcc.gnu.org>2018-10-28 19:51:47 +0000
committerIain Buclaw <ibuclaw@gcc.gnu.org>2018-10-28 19:51:47 +0000
commitb4c522fabd0df7be08882d2207df8b2765026110 (patch)
treeb5ffc312b0a441c1ba24323152aec463fdbe5e9f /libphobos/src/std/internal/math/biguintnoasm.d
parent01ce9e31a02c8039d88e90f983735104417bf034 (diff)
downloadgcc-b4c522fabd0df7be08882d2207df8b2765026110.zip
gcc-b4c522fabd0df7be08882d2207df8b2765026110.tar.gz
gcc-b4c522fabd0df7be08882d2207df8b2765026110.tar.bz2
Add D front-end, libphobos library, and D2 testsuite.
ChangeLog: * Makefile.def (target_modules): Add libphobos. (flags_to_pass): Add GDC, GDCFLAGS, GDC_FOR_TARGET and GDCFLAGS_FOR_TARGET. (dependencies): Make libphobos depend on libatomic, libbacktrace configure, and zlib configure. (language): Add language d. * Makefile.in: Rebuild. * Makefile.tpl (BUILD_EXPORTS): Add GDC and GDCFLAGS. (HOST_EXPORTS): Add GDC. (POSTSTAGE1_HOST_EXPORTS): Add GDC and GDC_FOR_BUILD. (BASE_TARGET_EXPORTS): Add GDC. (GDC_FOR_BUILD, GDC, GDCFLAGS): New variables. (GDC_FOR_TARGET, GDC_FLAGS_FOR_TARGET): New variables. (EXTRA_HOST_FLAGS): Add GDC. (STAGE1_FLAGS_TO_PASS): Add GDC. (EXTRA_TARGET_FLAGS): Add GDC and GDCFLAGS. * config-ml.in: Treat GDC and GDCFLAGS like other compiler/flag environment variables. * configure: Rebuild. * configure.ac: Add target-libphobos to target_libraries. Set and substitute GDC_FOR_BUILD and GDC_FOR_TARGET. config/ChangeLog: * multi.m4: Set GDC. gcc/ChangeLog: * Makefile.in (tm_d_file_list, tm_d_include_list): New variables. (TM_D_H, D_TARGET_DEF, D_TARGET_H, D_TARGET_OBJS): New variables. (tm_d.h, cs-tm_d.h, default-d.o): New rules. (d/d-target-hooks-def.h, s-d-target-hooks-def-h): New rules. (s-tm-texi): Also check timestamp on d-target.def. (generated_files): Add TM_D_H and d-target-hooks-def.h. (build/genhooks.o): Also depend on D_TARGET_DEF. * config.gcc (tm_d_file, d_target_objs, target_has_targetdm): New variables. * config/aarch64/aarch64-d.c: New file. * config/aarch64/aarch64-linux.h (GNU_USER_TARGET_D_CRITSEC_SIZE): Define. * config/aarch64/aarch64-protos.h (aarch64_d_target_versions): New prototype. * config/aarch64/aarch64.h (TARGET_D_CPU_VERSIONS): Define. * config/aarch64/t-aarch64 (aarch64-d.o): New rule. * config/arm/arm-d.c: New file. * config/arm/arm-protos.h (arm_d_target_versions): New prototype. * config/arm/arm.h (TARGET_D_CPU_VERSIONS): Define. * config/arm/linux-eabi.h (EXTRA_TARGET_D_OS_VERSIONS): Define. * config/arm/t-arm (arm-d.o): New rule. * config/default-d.c: New file. * config/glibc-d.c: New file. * config/gnu.h (GNU_USER_TARGET_D_OS_VERSIONS): Define. * config/i386/i386-d.c: New file. * config/i386/i386-protos.h (ix86_d_target_versions): New prototype. * config/i386/i386.h (TARGET_D_CPU_VERSIONS): Define. * config/i386/linux-common.h (EXTRA_TARGET_D_OS_VERSIONS): Define. (GNU_USER_TARGET_D_CRITSEC_SIZE): Define. * config/i386/t-i386 (i386-d.o): New rule. * config/kfreebsd-gnu.h (GNU_USER_TARGET_D_OS_VERSIONS): Define. * config/kopensolaris-gnu.h (GNU_USER_TARGET_D_OS_VERSIONS): Define. * config/linux-android.h (ANDROID_TARGET_D_OS_VERSIONS): Define. * config/linux.h (GNU_USER_TARGET_D_OS_VERSIONS): Define. * config/mips/linux-common.h (EXTRA_TARGET_D_OS_VERSIONS): Define. * config/mips/mips-d.c: New file. * config/mips/mips-protos.h (mips_d_target_versions): New prototype. * config/mips/mips.h (TARGET_D_CPU_VERSIONS): Define. * config/mips/t-mips (mips-d.o): New rule. * config/powerpcspe/linux.h (GNU_USER_TARGET_D_OS_VERSIONS): Define. * config/powerpcspe/linux64.h (GNU_USER_TARGET_D_OS_VERSIONS): Define. * config/powerpcspe/powerpcspe-d.c: New file. * config/powerpcspe/powerpcspe-protos.h (rs6000_d_target_versions): New prototype. * config/powerpcspe/powerpcspe.c (rs6000_output_function_epilogue): Support GNU D by using 0 as the language type. * config/powerpcspe/powerpcspe.h (TARGET_D_CPU_VERSIONS): Define. * config/powerpcspe/t-powerpcspe (powerpcspe-d.o): New rule. * config/riscv/riscv-d.c: New file. * config/riscv/riscv-protos.h (riscv_d_target_versions): New prototype. * config/riscv/riscv.h (TARGET_D_CPU_VERSIONS): Define. * config/riscv/t-riscv (riscv-d.o): New rule. * config/rs6000/linux.h (GNU_USER_TARGET_D_OS_VERSIONS): Define. * config/rs6000/linux64.h (GNU_USER_TARGET_D_OS_VERSIONS): Define. * config/rs6000/rs6000-d.c: New file. * config/rs6000/rs6000-protos.h (rs6000_d_target_versions): New prototype. * config/rs6000/rs6000.c (rs6000_output_function_epilogue): Support GNU D by using 0 as the language type. * config/rs6000/rs6000.h (TARGET_D_CPU_VERSIONS): Define. * config/rs6000/t-rs6000 (rs6000-d.o): New rule. * config/s390/s390-d.c: New file. * config/s390/s390-protos.h (s390_d_target_versions): New prototype. * config/s390/s390.h (TARGET_D_CPU_VERSIONS): Define. * config/s390/t-s390 (s390-d.o): New rule. * config/sparc/sparc-d.c: New file. * config/sparc/sparc-protos.h (sparc_d_target_versions): New prototype. * config/sparc/sparc.h (TARGET_D_CPU_VERSIONS): Define. * config/sparc/t-sparc (sparc-d.o): New rule. * config/t-glibc (glibc-d.o): New rule. * configure: Regenerated. * configure.ac (tm_d_file): New variable. (tm_d_file_list, tm_d_include_list, d_target_objs): Add substitutes. * doc/contrib.texi (Contributors): Add self for the D frontend. * doc/frontends.texi (G++ and GCC): Mention D as a supported language. * doc/install.texi (Configuration): Mention libphobos as an option for --enable-shared. Mention d as an option for --enable-languages. (Testing): Mention check-d as a target. * doc/invoke.texi (Overall Options): Mention .d, .dd, and .di as file name suffixes. Mention d as a -x option. * doc/sourcebuild.texi (Top Level): Mention libphobos. * doc/standards.texi (Standards): Add section on D language. * doc/tm.texi: Regenerated. * doc/tm.texi.in: Add @node for D language and ABI, and @hook for TARGET_CPU_VERSIONS, TARGET_D_OS_VERSIONS, and TARGET_D_CRITSEC_SIZE. * dwarf2out.c (is_dlang): New function. (gen_compile_unit_die): Use DW_LANG_D for D. (declare_in_namespace): Return module die for D, instead of adding extra declarations into the namespace. (gen_namespace_die): Generate DW_TAG_module for D. (gen_decl_die): Handle CONST_DECLSs for D. (dwarf2out_decl): Likewise. (prune_unused_types_walk_local_classes): Handle DW_tag_interface_type. (prune_unused_types_walk): Handle DW_tag_interface_type same as other kinds of aggregates. * gcc.c (default_compilers): Add entries for .d, .dd and .di. * genhooks.c: Include d/d-target.def. gcc/po/ChangeLog: * EXCLUDES: Add sources from d/dmd. gcc/testsuite/ChangeLog: * gcc.misc-tests/help.exp: Add D to option descriptions check. * gdc.dg/asan/asan.exp: New file. * gdc.dg/asan/gdc272.d: New test. * gdc.dg/compilable.d: New test. * gdc.dg/dg.exp: New file. * gdc.dg/gdc254.d: New test. * gdc.dg/gdc260.d: New test. * gdc.dg/gdc270a.d: New test. * gdc.dg/gdc270b.d: New test. * gdc.dg/gdc282.d: New test. * gdc.dg/gdc283.d: New test. * gdc.dg/imports/gdc170.d: New test. * gdc.dg/imports/gdc231.d: New test. * gdc.dg/imports/gdc239.d: New test. * gdc.dg/imports/gdc241a.d: New test. * gdc.dg/imports/gdc241b.d: New test. * gdc.dg/imports/gdc251a.d: New test. * gdc.dg/imports/gdc251b.d: New test. * gdc.dg/imports/gdc253.d: New test. * gdc.dg/imports/gdc254a.d: New test. * gdc.dg/imports/gdc256.d: New test. * gdc.dg/imports/gdc27.d: New test. * gdc.dg/imports/gdcpkg256/package.d: New test. * gdc.dg/imports/runnable.d: New test. * gdc.dg/link.d: New test. * gdc.dg/lto/lto.exp: New file. * gdc.dg/lto/ltotests_0.d: New test. * gdc.dg/lto/ltotests_1.d: New test. * gdc.dg/runnable.d: New test. * gdc.dg/simd.d: New test. * gdc.test/gdc-test.exp: New file. * lib/gdc-dg.exp: New file. * lib/gdc.exp: New file. libphobos/ChangeLog: * Makefile.am: New file. * Makefile.in: New file. * acinclude.m4: New file. * aclocal.m4: New file. * config.h.in: New file. * configure: New file. * configure.ac: New file. * d_rules.am: New file. * libdruntime/Makefile.am: New file. * libdruntime/Makefile.in: New file. * libdruntime/__entrypoint.di: New file. * libdruntime/__main.di: New file. * libdruntime/gcc/attribute.d: New file. * libdruntime/gcc/backtrace.d: New file. * libdruntime/gcc/builtins.d: New file. * libdruntime/gcc/config.d.in: New file. * libdruntime/gcc/deh.d: New file. * libdruntime/gcc/libbacktrace.d.in: New file. * libdruntime/gcc/unwind/arm.d: New file. * libdruntime/gcc/unwind/arm_common.d: New file. * libdruntime/gcc/unwind/c6x.d: New file. * libdruntime/gcc/unwind/generic.d: New file. * libdruntime/gcc/unwind/package.d: New file. * libdruntime/gcc/unwind/pe.d: New file. * m4/autoconf.m4: New file. * m4/druntime.m4: New file. * m4/druntime/cpu.m4: New file. * m4/druntime/libraries.m4: New file. * m4/druntime/os.m4: New file. * m4/gcc_support.m4: New file. * m4/gdc.m4: New file. * m4/libtool.m4: New file. * src/Makefile.am: New file. * src/Makefile.in: New file. * src/libgphobos.spec.in: New file. * testsuite/Makefile.am: New file. * testsuite/Makefile.in: New file. * testsuite/config/default.exp: New file. * testsuite/lib/libphobos-dg.exp: New file. * testsuite/lib/libphobos.exp: New file. * testsuite/testsuite_flags.in: New file. From-SVN: r265573
Diffstat (limited to 'libphobos/src/std/internal/math/biguintnoasm.d')
-rw-r--r--libphobos/src/std/internal/math/biguintnoasm.d370
1 files changed, 370 insertions, 0 deletions
diff --git a/libphobos/src/std/internal/math/biguintnoasm.d b/libphobos/src/std/internal/math/biguintnoasm.d
new file mode 100644
index 0000000..aea1d50
--- /dev/null
+++ b/libphobos/src/std/internal/math/biguintnoasm.d
@@ -0,0 +1,370 @@
+/** Arbitrary precision arithmetic ('bignum') for processors with no asm support
+ *
+ * All functions operate on arrays of uints, stored LSB first.
+ * If there is a destination array, it will be the first parameter.
+ * Currently, all of these functions are subject to change, and are
+ * intended for internal use only.
+ * This module is intended only to assist development of high-speed routines
+ * on currently unsupported processors.
+ * The X86 asm version is about 30 times faster than the D version (DMD).
+ */
+
+/* Copyright Don Clugston 2008 - 2010.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or copy at
+ * http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+module std.internal.math.biguintnoasm;
+
+nothrow:
+@safe:
+
+public:
+alias BigDigit = uint; // A Bignum is an array of BigDigits.
+
+ // Limits for when to switch between multiplication algorithms.
+enum int KARATSUBALIMIT = 10; // Minimum value for which Karatsuba is worthwhile.
+enum int KARATSUBASQUARELIMIT = 12; // Minimum value for which square Karatsuba is worthwhile
+
+
+/** Multi-byte addition or subtraction
+ * dest[] = src1[] + src2[] + carry (0 or 1).
+ * or dest[] = src1[] - src2[] - carry (0 or 1).
+ * Returns carry or borrow (0 or 1).
+ * Set op == '+' for addition, '-' for subtraction.
+ */
+uint multibyteAddSub(char op)(uint[] dest, const(uint) [] src1,
+ const (uint) [] src2, uint carry) pure @nogc @safe
+{
+ ulong c = carry;
+ for (size_t i = 0; i < src2.length; ++i)
+ {
+ static if (op=='+') c = c + src1[i] + src2[i];
+ else c = cast(ulong) src1[i] - src2[i] - c;
+ dest[i] = cast(uint) c;
+ c = (c > 0xFFFF_FFFF);
+ }
+ return cast(uint) c;
+}
+
+@safe unittest
+{
+ uint [] a = new uint[40];
+ uint [] b = new uint[40];
+ uint [] c = new uint[40];
+ for (size_t i = 0; i < a.length; ++i)
+ {
+ if (i&1) a[i]=cast(uint)(0x8000_0000 + i);
+ else a[i]=cast(uint) i;
+ b[i]= 0x8000_0003;
+ }
+ c[19]=0x3333_3333;
+ uint carry = multibyteAddSub!('+')(c[0 .. 18], b[0 .. 18], a[0 .. 18], 0);
+ assert(c[0]==0x8000_0003);
+ assert(c[1]==4);
+ assert(c[19]==0x3333_3333); // check for overrun
+ assert(carry == 1);
+ for (size_t i = 0; i < a.length; ++i)
+ {
+ a[i] = b[i] = c[i] = 0;
+ }
+ a[8]=0x048D159E;
+ b[8]=0x048D159E;
+ a[10]=0x1D950C84;
+ b[10]=0x1D950C84;
+ a[5] =0x44444444;
+ carry = multibyteAddSub!('-')(a[0 .. 12], a[0 .. 12], b[0 .. 12], 0);
+ assert(a[11] == 0);
+ for (size_t i = 0; i < 10; ++i)
+ if (i != 5)
+ assert(a[i] == 0);
+
+ for (size_t q = 3; q < 36; ++q)
+ {
+ for (size_t i = 0; i< a.length; ++i)
+ {
+ a[i] = b[i] = c[i] = 0;
+ }
+ a[q-2]=0x040000;
+ b[q-2]=0x040000;
+ carry = multibyteAddSub!('-')(a[0 .. q], a[0 .. q], b[0 .. q], 0);
+ assert(a[q-2]==0);
+ }
+}
+
+
+
+/** dest[] += carry, or dest[] -= carry.
+ * op must be '+' or '-'
+ * Returns final carry or borrow (0 or 1)
+ */
+uint multibyteIncrementAssign(char op)(uint[] dest, uint carry)
+ pure @nogc @safe
+{
+ static if (op=='+')
+ {
+ ulong c = carry;
+ c += dest[0];
+ dest[0] = cast(uint) c;
+ if (c <= 0xFFFF_FFFF)
+ return 0;
+
+ for (size_t i = 1; i < dest.length; ++i)
+ {
+ ++dest[i];
+ if (dest[i] != 0)
+ return 0;
+ }
+ return 1;
+ }
+ else
+ {
+ ulong c = carry;
+ c = dest[0] - c;
+ dest[0] = cast(uint) c;
+ if (c <= 0xFFFF_FFFF)
+ return 0;
+ for (size_t i = 1; i < dest.length; ++i)
+ {
+ --dest[i];
+ if (dest[i] != 0xFFFF_FFFF)
+ return 0;
+ }
+ return 1;
+ }
+}
+
+/** dest[] = src[] << numbits
+ * numbits must be in the range 1 .. 31
+ */
+uint multibyteShl(uint [] dest, const(uint) [] src, uint numbits)
+ pure @nogc @safe
+{
+ ulong c = 0;
+ for (size_t i = 0; i < dest.length; ++i)
+ {
+ c += (cast(ulong)(src[i]) << numbits);
+ dest[i] = cast(uint) c;
+ c >>>= 32;
+ }
+ return cast(uint) c;
+}
+
+
+/** dest[] = src[] >> numbits
+ * numbits must be in the range 1 .. 31
+ */
+void multibyteShr(uint [] dest, const(uint) [] src, uint numbits)
+ pure @nogc @safe
+{
+ ulong c = 0;
+ for (ptrdiff_t i = dest.length; i != 0; --i)
+ {
+ c += (src[i-1] >>numbits) + (cast(ulong)(src[i-1]) << (64 - numbits));
+ dest[i-1] = cast(uint) c;
+ c >>>= 32;
+ }
+}
+
+@safe unittest
+{
+
+ uint [] aa = [0x1222_2223, 0x4555_5556, 0x8999_999A, 0xBCCC_CCCD, 0xEEEE_EEEE];
+ multibyteShr(aa[0..$-2], aa, 4);
+ assert(aa[0] == 0x6122_2222 && aa[1] == 0xA455_5555 && aa[2] == 0x0899_9999);
+ assert(aa[3] == 0xBCCC_CCCD);
+
+ aa = [0x1222_2223, 0x4555_5556, 0x8999_999A, 0xBCCC_CCCD, 0xEEEE_EEEE];
+ multibyteShr(aa[0..$-1], aa, 4);
+ assert(aa[0] == 0x6122_2222 && aa[1] == 0xA455_5555
+ && aa[2] == 0xD899_9999 && aa[3] == 0x0BCC_CCCC);
+
+ aa = [0xF0FF_FFFF, 0x1222_2223, 0x4555_5556, 0x8999_999A, 0xBCCC_CCCD,
+ 0xEEEE_EEEE];
+ multibyteShl(aa[1 .. 4], aa[1..$], 4);
+ assert(aa[0] == 0xF0FF_FFFF && aa[1] == 0x2222_2230
+ && aa[2]==0x5555_5561 && aa[3]==0x9999_99A4 && aa[4]==0x0BCCC_CCCD);
+}
+
+/** dest[] = src[] * multiplier + carry.
+ * Returns carry.
+ */
+uint multibyteMul(uint[] dest, const(uint)[] src, uint multiplier, uint carry)
+ pure @nogc @safe
+{
+ assert(dest.length == src.length);
+ ulong c = carry;
+ for (size_t i = 0; i < src.length; ++i)
+ {
+ c += cast(ulong)(src[i]) * multiplier;
+ dest[i] = cast(uint) c;
+ c>>=32;
+ }
+ return cast(uint) c;
+}
+
+@safe unittest
+{
+ uint [] aa = [0xF0FF_FFFF, 0x1222_2223, 0x4555_5556, 0x8999_999A,
+ 0xBCCC_CCCD, 0xEEEE_EEEE];
+ multibyteMul(aa[1 .. 4], aa[1 .. 4], 16, 0);
+ assert(aa[0] == 0xF0FF_FFFF && aa[1] == 0x2222_2230 && aa[2]==0x5555_5561
+ && aa[3]==0x9999_99A4 && aa[4]==0x0BCCC_CCCD);
+}
+
+/**
+ * dest[] += src[] * multiplier + carry(0 .. FFFF_FFFF).
+ * Returns carry out of MSB (0 .. FFFF_FFFF).
+ */
+uint multibyteMulAdd(char op)(uint [] dest, const(uint)[] src,
+ uint multiplier, uint carry) pure @nogc @safe
+{
+ assert(dest.length == src.length);
+ ulong c = carry;
+ for (size_t i = 0; i < src.length; ++i)
+ {
+ static if (op=='+')
+ {
+ c += cast(ulong)(multiplier) * src[i] + dest[i];
+ dest[i] = cast(uint) c;
+ c >>= 32;
+ }
+ else
+ {
+ c += cast(ulong) multiplier * src[i];
+ ulong t = cast(ulong) dest[i] - cast(uint) c;
+ dest[i] = cast(uint) t;
+ c = cast(uint)((c >> 32) - (t >> 32));
+ }
+ }
+ return cast(uint) c;
+}
+
+@safe unittest
+{
+
+ uint [] aa = [0xF0FF_FFFF, 0x1222_2223, 0x4555_5556, 0x8999_999A,
+ 0xBCCC_CCCD, 0xEEEE_EEEE];
+ uint [] bb = [0x1234_1234, 0xF0F0_F0F0, 0x00C0_C0C0, 0xF0F0_F0F0,
+ 0xC0C0_C0C0];
+ multibyteMulAdd!('+')(bb[1..$-1], aa[1..$-2], 16, 5);
+ assert(bb[0] == 0x1234_1234 && bb[4] == 0xC0C0_C0C0);
+ assert(bb[1] == 0x2222_2230 + 0xF0F0_F0F0 + 5
+ && bb[2] == 0x5555_5561 + 0x00C0_C0C0 + 1
+ && bb[3] == 0x9999_99A4 + 0xF0F0_F0F0 );
+}
+
+
+/**
+ Sets result = result[0 .. left.length] + left * right
+
+ It is defined in this way to allow cache-efficient multiplication.
+ This function is equivalent to:
+ ----
+ for (size_t i = 0; i< right.length; ++i)
+ {
+ dest[left.length + i] = multibyteMulAdd(dest[i .. left.length+i],
+ left, right[i], 0);
+ }
+ ----
+ */
+void multibyteMultiplyAccumulate(uint [] dest, const(uint)[] left, const(uint)
+ [] right) pure @nogc @safe
+{
+ for (size_t i = 0; i < right.length; ++i)
+ {
+ dest[left.length + i] = multibyteMulAdd!('+')(dest[i .. left.length+i],
+ left, right[i], 0);
+ }
+}
+
+/** dest[] /= divisor.
+ * overflow is the initial remainder, and must be in the range 0 .. divisor-1.
+ */
+uint multibyteDivAssign(uint [] dest, uint divisor, uint overflow)
+ pure @nogc @safe
+{
+ ulong c = cast(ulong) overflow;
+ for (ptrdiff_t i = dest.length-1; i >= 0; --i)
+ {
+ c = (c << 32) + cast(ulong)(dest[i]);
+ uint q = cast(uint)(c/divisor);
+ c -= divisor * q;
+ dest[i] = q;
+ }
+ return cast(uint) c;
+}
+
+@safe unittest
+{
+ uint [] aa = new uint[101];
+ for (uint i = 0; i < aa.length; ++i)
+ aa[i] = 0x8765_4321 * (i+3);
+ uint overflow = multibyteMul(aa, aa, 0x8EFD_FCFB, 0x33FF_7461);
+ uint r = multibyteDivAssign(aa, 0x8EFD_FCFB, overflow);
+ for (uint i=0; i<aa.length; ++i)
+ {
+ assert(aa[i] == 0x8765_4321 * (i+3));
+ }
+ assert(r == 0x33FF_7461);
+
+}
+// Set dest[2*i .. 2*i+1]+=src[i]*src[i]
+void multibyteAddDiagonalSquares(uint[] dest, const(uint)[] src)
+ pure @nogc @safe
+{
+ ulong c = 0;
+ for (size_t i = 0; i < src.length; ++i)
+ {
+ // At this point, c is 0 or 1, since FFFF*FFFF+FFFF_FFFF = 1_0000_0000.
+ c += cast(ulong)(src[i]) * src[i] + dest[2*i];
+ dest[2*i] = cast(uint) c;
+ c = (c>>=32) + dest[2*i+1];
+ dest[2*i+1] = cast(uint) c;
+ c >>= 32;
+ }
+}
+
+// Does half a square multiply. (square = diagonal + 2*triangle)
+void multibyteTriangleAccumulate(uint[] dest, const(uint)[] x)
+ pure @nogc @safe
+{
+ // x[0]*x[1...$] + x[1]*x[2..$] + ... + x[$-2]x[$-1..$]
+ dest[x.length] = multibyteMul(dest[1 .. x.length], x[1..$], x[0], 0);
+ if (x.length < 4)
+ {
+ if (x.length == 3)
+ {
+ ulong c = cast(ulong)(x[$-1]) * x[$-2] + dest[2*x.length-3];
+ dest[2*x.length - 3] = cast(uint) c;
+ c >>= 32;
+ dest[2*x.length - 2] = cast(uint) c;
+ }
+ return;
+ }
+ for (size_t i = 2; i < x.length - 2; ++i)
+ {
+ dest[i-1+ x.length] = multibyteMulAdd!('+')(
+ dest[i+i-1 .. i+x.length-1], x[i..$], x[i-1], 0);
+ }
+ // Unroll the last two entries, to reduce loop overhead:
+ ulong c = cast(ulong)(x[$-3]) * x[$-2] + dest[2*x.length-5];
+ dest[2*x.length-5] = cast(uint) c;
+ c >>= 32;
+ c += cast(ulong)(x[$-3]) * x[$-1] + dest[2*x.length-4];
+ dest[2*x.length-4] = cast(uint) c;
+ c >>= 32;
+ c += cast(ulong)(x[$-1]) * x[$-2];
+ dest[2*x.length-3] = cast(uint) c;
+ c >>= 32;
+ dest[2*x.length-2] = cast(uint) c;
+}
+
+void multibyteSquare(BigDigit[] result, const(BigDigit) [] x) pure @nogc @safe
+{
+ multibyteTriangleAccumulate(result, x);
+ result[$-1] = multibyteShl(result[1..$-1], result[1..$-1], 1); // mul by 2
+ result[0] = 0;
+ multibyteAddDiagonalSquares(result, x);
+}