diff options
author | Matthias Kretz <kretz@kde.org> | 2021-02-03 15:49:30 +0000 |
---|---|---|
committer | Jonathan Wakely <jwakely@redhat.com> | 2021-02-03 15:49:30 +0000 |
commit | 81c2c32de9c1058c33fcf77ada31186b4ae1f1fe (patch) | |
tree | ade43ae42ef8baf375965866e4811c3a871d9389 | |
parent | 71f9b9bd0acc7d0749e159efb1b9b4c57197a77d (diff) | |
download | gcc-81c2c32de9c1058c33fcf77ada31186b4ae1f1fe.zip gcc-81c2c32de9c1058c33fcf77ada31186b4ae1f1fe.tar.gz gcc-81c2c32de9c1058c33fcf77ada31186b4ae1f1fe.tar.bz2 |
libstdc++: Fix mask reduction of simd_mask<double> on POWER7
POWER7 does not support __vector long long reductions, making the
generic _S_popcount implementation ill-formed. Specializing _S_popcount
for PPC allows optimization and avoids the issue.
libstdc++-v3/ChangeLog:
* include/experimental/bits/simd.h: Add __have_power10vec
conditional on _ARCH_PWR10.
* include/experimental/bits/simd_builtin.h: Forward declare
_MaskImplPpc and use it as _MaskImpl when __ALTIVEC__ is
defined.
(_MaskImplBuiltin::_S_some_of): Call _S_popcount from the
_SuperImpl for optimizations and correctness.
* include/experimental/bits/simd_ppc.h: Add _MaskImplPpc.
(_MaskImplPpc::_S_popcount): Implement via vec_cntm for POWER10.
Otherwise, for >=int use -vec_sums divided by a sizeof factor.
For <int use -vec_sums(vec_sum4s(...)) to sum all mask entries.
-rw-r--r-- | libstdc++-v3/include/experimental/bits/simd.h | 5 | ||||
-rw-r--r-- | libstdc++-v3/include/experimental/bits/simd_builtin.h | 6 | ||||
-rw-r--r-- | libstdc++-v3/include/experimental/bits/simd_ppc.h | 35 |
3 files changed, 43 insertions, 3 deletions
diff --git a/libstdc++-v3/include/experimental/bits/simd.h b/libstdc++-v3/include/experimental/bits/simd.h index 149396d..becd1d6 100644 --- a/libstdc++-v3/include/experimental/bits/simd.h +++ b/libstdc++-v3/include/experimental/bits/simd.h @@ -477,6 +477,11 @@ constexpr inline bool __support_neon_float = false; #endif +#ifdef _ARCH_PWR10 +constexpr inline bool __have_power10vec = true; +#else +constexpr inline bool __have_power10vec = false; +#endif #ifdef __POWER9_VECTOR__ constexpr inline bool __have_power9vec = true; #else diff --git a/libstdc++-v3/include/experimental/bits/simd_builtin.h b/libstdc++-v3/include/experimental/bits/simd_builtin.h index efca65f..7f728a1 100644 --- a/libstdc++-v3/include/experimental/bits/simd_builtin.h +++ b/libstdc++-v3/include/experimental/bits/simd_builtin.h @@ -920,6 +920,7 @@ template <typename _Abi> struct _MaskImplX86; template <typename _Abi> struct _SimdImplNeon; template <typename _Abi> struct _MaskImplNeon; template <typename _Abi> struct _SimdImplPpc; +template <typename _Abi> struct _MaskImplPpc; // simd_abi::_VecBuiltin {{{ template <int _UsedBytes> @@ -959,11 +960,12 @@ template <int _UsedBytes> using _CommonImpl = _CommonImplBuiltin; #ifdef __ALTIVEC__ using _SimdImpl = _SimdImplPpc<_VecBuiltin<_UsedBytes>>; + using _MaskImpl = _MaskImplPpc<_VecBuiltin<_UsedBytes>>; #else using _SimdImpl = _SimdImplBuiltin<_VecBuiltin<_UsedBytes>>; -#endif using _MaskImpl = _MaskImplBuiltin<_VecBuiltin<_UsedBytes>>; #endif +#endif // }}} // __traits {{{ @@ -2899,7 +2901,7 @@ template <typename _Abi> _GLIBCXX_SIMD_INTRINSIC static bool _S_some_of(simd_mask<_Tp, _Abi> __k) { - const int __n_true = _S_popcount(__k); + const int __n_true = _SuperImpl::_S_popcount(__k); return __n_true > 0 && __n_true < int(_S_size<_Tp>); } diff --git a/libstdc++-v3/include/experimental/bits/simd_ppc.h b/libstdc++-v3/include/experimental/bits/simd_ppc.h index b92fc19..ef52d12 100644 --- a/libstdc++-v3/include/experimental/bits/simd_ppc.h +++ b/libstdc++-v3/include/experimental/bits/simd_ppc.h @@ -30,6 +30,7 @@ #ifndef __ALTIVEC__ #error "simd_ppc.h may only be included when AltiVec/VMX is available" #endif +#include <altivec.h> _GLIBCXX_SIMD_BEGIN_NAMESPACE @@ -115,9 +116,41 @@ template <typename _Abi> }; // }}} +// _MaskImplPpc {{{ +template <typename _Abi> + struct _MaskImplPpc : _MaskImplBuiltin<_Abi> + { + using _Base = _MaskImplBuiltin<_Abi>; + + // _S_popcount {{{ + template <typename _Tp> + _GLIBCXX_SIMD_INTRINSIC static int _S_popcount(simd_mask<_Tp, _Abi> __k) + { + const auto __kv = __as_vector(__k); + if constexpr (__have_power10vec) + { + return vec_cntm(__to_intrin(__kv), 1); + } + else if constexpr (sizeof(_Tp) >= sizeof(int)) + { + using _Intrin = __intrinsic_type16_t<int>; + const int __sum = -vec_sums(__intrin_bitcast<_Intrin>(__kv), _Intrin())[3]; + return __sum / (sizeof(_Tp) / sizeof(int)); + } + else + { + const auto __summed_to_int = vec_sum4s(__to_intrin(__kv), __intrinsic_type16_t<int>()); + return -vec_sums(__summed_to_int, __intrinsic_type16_t<int>())[3]; + } + } + + // }}} + }; + +// }}} _GLIBCXX_SIMD_END_NAMESPACE #endif // __cplusplus >= 201703L #endif // _GLIBCXX_EXPERIMENTAL_SIMD_PPC_H_ -// vim: foldmethod=marker sw=2 noet ts=8 sts=2 tw=80 +// vim: foldmethod=marker foldmarker={{{,}}} sw=2 noet ts=8 sts=2 tw=100 |