diff options
author | Daniel Lemire <lemire@gmail.com> | 2020-10-09 14:09:36 +0100 |
---|---|---|
committer | Jonathan Wakely <jwakely@redhat.com> | 2020-10-09 14:09:36 +0100 |
commit | 98c37d3bacbb2f8bbbe56ed53a9547d3be01b66b (patch) | |
tree | 072d00b35158c00d6488a97790c217d4bb27b563 | |
parent | 6ce2cb116af6e0965ff0dd69e7fd1925cf5dc68c (diff) | |
download | gcc-98c37d3bacbb2f8bbbe56ed53a9547d3be01b66b.zip gcc-98c37d3bacbb2f8bbbe56ed53a9547d3be01b66b.tar.gz gcc-98c37d3bacbb2f8bbbe56ed53a9547d3be01b66b.tar.bz2 |
libstdc++: Optimize uniform_int_distribution using Lemire's algorithm
Co-authored-by: Jonathan Wakely <jwakely@redhat.com>
libstdc++-v3/ChangeLog:
* include/bits/uniform_int_dist.h (uniform_int_distribution::_S_nd):
New member function implementing Lemire's "nearly divisionless"
algorithm.
(uniform_int_distribution::operator()): Use _S_nd when the range
of the URBG is the full width of the result type.
-rw-r--r-- | libstdc++-v3/include/bits/uniform_int_dist.h | 61 |
1 files changed, 54 insertions, 7 deletions
diff --git a/libstdc++-v3/include/bits/uniform_int_dist.h b/libstdc++-v3/include/bits/uniform_int_dist.h index 6e1e3d5..ecb8574 100644 --- a/libstdc++-v3/include/bits/uniform_int_dist.h +++ b/libstdc++-v3/include/bits/uniform_int_dist.h @@ -234,6 +234,34 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION const param_type& __p); param_type _M_param; + + // Lemire's nearly divisionless algorithm. + // Returns an unbiased random number from __g downscaled to [0,__range) + // using an unsigned type _Wp twice as wide as unsigned type _Up. + template<typename _Wp, typename _Urbg, typename _Up> + static _Up + _S_nd(_Urbg& __g, _Up __range) + { + using __gnu_cxx::__int_traits; + static_assert(!__int_traits<_Up>::__is_signed, "U must be unsigned"); + static_assert(!__int_traits<_Wp>::__is_signed, "W must be unsigned"); + + // reference: Fast Random Integer Generation in an Interval + // ACM Transactions on Modeling and Computer Simulation 29 (1), 2019 + // https://arxiv.org/abs/1805.10941 + _Wp __product = _Wp(__g()) * _Wp(__range); + _Up __low = _Up(__product); + if (__low < __range) + { + _Up __threshold = -__range % __range; + while (__low < __threshold) + { + __product = _Wp(__g()) * _Wp(__range); + __low = _Up(__product); + } + } + return __product >> __gnu_cxx::__int_traits<_Up>::__digits; + } }; template<typename _IntType> @@ -256,17 +284,36 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION = __uctype(__param.b()) - __uctype(__param.a()); __uctype __ret; - if (__urngrange > __urange) { // downscaling + const __uctype __uerange = __urange + 1; // __urange can be zero - const __uctype __scaling = __urngrange / __uerange; - const __uctype __past = __uerange * __scaling; - do - __ret = __uctype(__urng()) - __urngmin; - while (__ret >= __past); - __ret /= __scaling; + + using __gnu_cxx::__int_traits; +#if __SIZEOF_INT128__ + if (__int_traits<__uctype>::__digits == 64 + && __urngrange == __int_traits<__uctype>::__max) + { + __ret = _S_nd<unsigned __int128>(__urng, __uerange); + } + else +#endif + if (__int_traits<__uctype>::__digits == 32 + && __urngrange == __int_traits<__uctype>::__max) + { + __ret = _S_nd<__UINT64_TYPE__>(__urng, __uerange); + } + else + { + // fallback case (2 divisions) + const __uctype __scaling = __urngrange / __uerange; + const __uctype __past = __uerange * __scaling; + do + __ret = __uctype(__urng()) - __urngmin; + while (__ret >= __past); + __ret /= __scaling; + } } else if (__urngrange < __urange) { |