libstdc++: Optimize uniform_int_distribution using Lemire's algorithm

Co-authored-by: Jonathan Wakely <jwakely@redhat.com> libstdc++-v3/ChangeLog: * include/bits/uniform_int_dist.h (uniform_int_distribution::_S_nd): New member function implementing Lemire's "nearly divisionless" algorithm. (uniform_int_distribution::operator()): Use _S_nd when the range of the URBG is the full width of the result type.
author: Daniel Lemire <lemire@gmail.com> 2020-10-09 14:09:36 +0100
committer: Jonathan Wakely <jwakely@redhat.com> 2020-10-09 14:09:36 +0100
commit: 98c37d3bacbb2f8bbbe56ed53a9547d3be01b66b (patch)
tree: 072d00b35158c00d6488a97790c217d4bb27b563
parent: 6ce2cb116af6e0965ff0dd69e7fd1925cf5dc68c (diff)
download: gcc-98c37d3bacbb2f8bbbe56ed53a9547d3be01b66b.zip
gcc-98c37d3bacbb2f8bbbe56ed53a9547d3be01b66b.tar.gz
gcc-98c37d3bacbb2f8bbbe56ed53a9547d3be01b66b.tar.bz2
1 files changed, 54 insertions, 7 deletions
diff --git a/libstdc++-v3/include/bits/uniform_int_dist.h b/libstdc++-v3/include/bits/uniform_int_dist.h
index 6e1e3d5..ecb8574 100644
--- a/libstdc++-v3/include/bits/uniform_int_dist.h
+++ b/libstdc++-v3/include/bits/uniform_int_dist.h
@@ -234,6 +234,34 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 			const param_type& __p);
 
       param_type _M_param;
+
+      // Lemire's nearly divisionless algorithm.
+      // Returns an unbiased random number from __g downscaled to [0,__range)
+      // using an unsigned type _Wp twice as wide as unsigned type _Up.
+      template<typename _Wp, typename _Urbg, typename _Up>
+	static _Up
+	_S_nd(_Urbg& __g, _Up __range)
+	{
+	  using __gnu_cxx::__int_traits;
+	  static_assert(!__int_traits<_Up>::__is_signed, "U must be unsigned");
+	  static_assert(!__int_traits<_Wp>::__is_signed, "W must be unsigned");
+
+	  // reference: Fast Random Integer Generation in an Interval
+	  // ACM Transactions on Modeling and Computer Simulation 29 (1), 2019
+	  // https://arxiv.org/abs/1805.10941
+	  _Wp __product = _Wp(__g()) * _Wp(__range);
+	  _Up __low = _Up(__product);
+	  if (__low < __range)
+	    {
+	      _Up __threshold = -__range % __range;
+	      while (__low < __threshold)
+		{
+		  __product = _Wp(__g()) * _Wp(__range);
+		  __low = _Up(__product);
+		}
+	    }
+	  return __product >> __gnu_cxx::__int_traits<_Up>::__digits;
+	}
     };
 
   template<typename _IntType>
@@ -256,17 +284,36 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	  = __uctype(__param.b()) - __uctype(__param.a());
 
 	__uctype __ret;
-
 	if (__urngrange > __urange)
 	  {
 	    // downscaling
+
 	    const __uctype __uerange = __urange + 1; // __urange can be zero
-	    const __uctype __scaling = __urngrange / __uerange;
-	    const __uctype __past = __uerange * __scaling;
-	    do
-	      __ret = __uctype(__urng()) - __urngmin;
-	    while (__ret >= __past);
-	    __ret /= __scaling;
+
+	    using __gnu_cxx::__int_traits;
+#if __SIZEOF_INT128__
+	    if (__int_traits<__uctype>::__digits == 64
+		&& __urngrange == __int_traits<__uctype>::__max)
+	      {
+		__ret = _S_nd<unsigned __int128>(__urng, __uerange);
+	      }
+	    else
+#endif
+	    if (__int_traits<__uctype>::__digits == 32
+		&& __urngrange == __int_traits<__uctype>::__max)
+	      {
+		__ret = _S_nd<__UINT64_TYPE__>(__urng, __uerange);
+	      }
+	    else
+	      {
+		// fallback case (2 divisions)
+		const __uctype __scaling = __urngrange / __uerange;
+		const __uctype __past = __uerange * __scaling;
+		do
+		  __ret = __uctype(__urng()) - __urngmin;
+		while (__ret >= __past);
+		__ret /= __scaling;
+	      }
 	  }
 	else if (__urngrange < __urange)
 	  {
author	Daniel Lemire <lemire@gmail.com>	2020-10-09 14:09:36 +0100
committer	Jonathan Wakely <jwakely@redhat.com>	2020-10-09 14:09:36 +0100
commit	98c37d3bacbb2f8bbbe56ed53a9547d3be01b66b (patch)
tree	072d00b35158c00d6488a97790c217d4bb27b563
parent	6ce2cb116af6e0965ff0dd69e7fd1925cf5dc68c (diff)
download	gcc-98c37d3bacbb2f8bbbe56ed53a9547d3be01b66b.zip gcc-98c37d3bacbb2f8bbbe56ed53a9547d3be01b66b.tar.gz gcc-98c37d3bacbb2f8bbbe56ed53a9547d3be01b66b.tar.bz2