[libc++] Implement std::gcd using the binary version (#77747)

The binary version is four times faster than current implementation in my setup, and generally considered a better implementation. Code inspired by https://en.algorithmica.org/hpc/algorithms/gcd/ which itself is inspired by https://lemire.me/blog/2013/12/26/fastest-way-to-compute-the-greatest-common-divisor/ Fix #77648
author: serge-sans-paille <sguelton@mozilla.com> 2024-05-08 14:21:31 +0000
committer: GitHub <noreply@github.com> 2024-05-08 14:21:31 +0000
commit: 27a062e9ca7c92e89ed4084c3c3affb9fa39aabb (patch)
tree: 28da7ada9c77aa525c776a0644a47e3d7e6e950f /libcxx/include/__numeric
parent: b5afda8d760998641cf08a6d229252924b0ad146 (diff)
download: llvm-27a062e9ca7c92e89ed4084c3c3affb9fa39aabb.zip
llvm-27a062e9ca7c92e89ed4084c3c3affb9fa39aabb.tar.gz
llvm-27a062e9ca7c92e89ed4084c3c3affb9fa39aabb.tar.bz2
1 files changed, 42 insertions, 2 deletions
diff --git a/libcxx/include/__numeric/gcd_lcm.h b/libcxx/include/__numeric/gcd_lcm.h
index 48df233..5d735a51 100644
--- a/libcxx/include/__numeric/gcd_lcm.h
+++ b/libcxx/include/__numeric/gcd_lcm.h
@@ -10,7 +10,9 @@
 #ifndef _LIBCPP___NUMERIC_GCD_LCM_H
 #define _LIBCPP___NUMERIC_GCD_LCM_H
 
+#include <__algorithm/min.h>
 #include <__assert>
+#include <__bit/countr.h>
 #include <__config>
 #include <__type_traits/common_type.h>
 #include <__type_traits/is_integral.h>
@@ -50,9 +52,47 @@ struct __ct_abs<_Result, _Source, false> {
 };
 
 template <class _Tp>
-_LIBCPP_CONSTEXPR _LIBCPP_HIDDEN _Tp __gcd(_Tp __m, _Tp __n) {
+_LIBCPP_CONSTEXPR _LIBCPP_HIDDEN _Tp __gcd(_Tp __a, _Tp __b) {
   static_assert((!is_signed<_Tp>::value), "");
-  return __n == 0 ? __m : std::__gcd<_Tp>(__n, __m % __n);
+
+  // From: https://lemire.me/blog/2013/12/26/fastest-way-to-compute-the-greatest-common-divisor
+  //
+  // If power of two divides both numbers, we can push it out.
+  // - gcd( 2^x * a, 2^x * b) = 2^x * gcd(a, b)
+  //
+  // If and only if exactly one number is even, we can divide that number by that power.
+  // - if a, b are odd, then gcd(2^x * a, b) = gcd(a, b)
+  //
+  // And standard gcd algorithm where instead of modulo, minus is used.
+
+  if (__a < __b) {
+    _Tp __tmp = __b;
+    __b       = __a;
+    __a       = __tmp;
+  }
+  if (__b == 0)
+    return __a;
+  __a %= __b; // Make both argument of the same size, and early result in the easy case.
+  if (__a == 0)
+    return __b;
+
+  int __az    = std::__countr_zero(__a);
+  int __bz    = std::__countr_zero(__b);
+  int __shift = std::min(__az, __bz);
+  __a >>= __az;
+  __b >>= __bz;
+  do {
+    _Tp __diff = __a - __b;
+    if (__a > __b) {
+      __a = __b;
+      __b = __diff;
+    } else {
+      __b = __b - __a;
+    }
+    if (__diff != 0)
+      __b >>= std::__countr_zero(__diff);
+  } while (__b != 0);
+  return __a << __shift;
 }
 
 template <class _Tp, class _Up>
author	serge-sans-paille <sguelton@mozilla.com>	2024-05-08 14:21:31 +0000
committer	GitHub <noreply@github.com>	2024-05-08 14:21:31 +0000
commit	27a062e9ca7c92e89ed4084c3c3affb9fa39aabb (patch)
tree	28da7ada9c77aa525c776a0644a47e3d7e6e950f /libcxx/include/__numeric
parent	b5afda8d760998641cf08a6d229252924b0ad146 (diff)
download	llvm-27a062e9ca7c92e89ed4084c3c3affb9fa39aabb.zip llvm-27a062e9ca7c92e89ed4084c3c3affb9fa39aabb.tar.gz llvm-27a062e9ca7c92e89ed4084c3c3affb9fa39aabb.tar.bz2