diff options
author | Vladimir N. Makarov <vmakarov@redhat.com> | 2025-02-25 15:01:15 -0500 |
---|---|---|
committer | Vladimir N. Makarov <vmakarov@redhat.com> | 2025-02-25 15:04:50 -0500 |
commit | 2341f675edadd6370147d2bc55ca7761a7ecfaa1 (patch) | |
tree | 4b96fc958ea2f747355e8a314a69cd103d80a397 | |
parent | 0bb431d0a77cf8dc790b9c61539b3eb6ab1710f0 (diff) | |
download | gcc-2341f675edadd6370147d2bc55ca7761a7ecfaa1.zip gcc-2341f675edadd6370147d2bc55ca7761a7ecfaa1.tar.gz gcc-2341f675edadd6370147d2bc55ca7761a7ecfaa1.tar.bz2 |
[PR115458][LRA]: Run split sub-pass more times
In this PR case LRA needs to provide too many hard regs for insn
reloads, where some reload pseudos require 8 aligned regs for
themselves. As the last attempt, LRA tries to split live ranges of
hard regs for insn reload pseudos. It is a very rare case. An
inheritance pseudo involving a reload pseudo of the insn can be
spilled in the assignment sub-pass run right after splitting and we need
to run split sub-pass for the inheritance pseudo now.
gcc/ChangeLog:
PR target/115458
* lra-int.h (LRA_MAX_FAILED_SPLITS): Define and check its value.
(lra_split_hard_reg_for): Change prototype.
* lra.cc (lra): Try to split hard reg range several times after a
failure.
* lra-assigns.cc (lra_split_hard_reg_for): Add an arg, a flag of
giving up. Report asm error and nullify the asm insn depending on
the arg value.
gcc/testsuite/ChangeLog:
PR target/115458
* g++.target/riscv/pr115458.C: New.
-rw-r--r-- | gcc/lra-assigns.cc | 50 | ||||
-rw-r--r-- | gcc/lra-int.h | 14 | ||||
-rw-r--r-- | gcc/lra.cc | 14 | ||||
-rw-r--r-- | gcc/testsuite/g++.target/riscv/pr115458.C | 357 |
4 files changed, 410 insertions, 25 deletions
diff --git a/gcc/lra-assigns.cc b/gcc/lra-assigns.cc index f9e3dfc..480925a 100644 --- a/gcc/lra-assigns.cc +++ b/gcc/lra-assigns.cc @@ -1763,12 +1763,13 @@ find_reload_regno_insns (int regno, rtx_insn * &start, rtx_insn * &finish) return true; } -/* Process reload pseudos which did not get a hard reg, split a hard - reg live range in live range of a reload pseudo, and then return - TRUE. If we did not split a hard reg live range, report an error, - and return FALSE. */ +/* Process reload pseudos which did not get a hard reg, split a hard reg live + range in live range of a reload pseudo, and then return TRUE. Otherwise, + return FALSE. When FAIL_P is TRUE and if we did not split a hard reg live + range for failed reload pseudos, report an error and modify related asm + insns. */ bool -lra_split_hard_reg_for (void) +lra_split_hard_reg_for (bool fail_p) { int i, regno; rtx_insn *insn, *first, *last; @@ -1843,23 +1844,30 @@ lra_split_hard_reg_for (void) regno = u; bitmap_ior_into (&failed_reload_insns, &lra_reg_info[regno].insn_bitmap); - lra_setup_reg_renumber - (regno, ira_class_hard_regs[lra_get_allocno_class (regno)][0], false); - } - EXECUTE_IF_SET_IN_BITMAP (&failed_reload_insns, 0, u, bi) - { - insn = lra_insn_recog_data[u]->insn; - if (asm_noperands (PATTERN (insn)) >= 0) - { - asm_p = true; - lra_asm_insn_error (insn); - } - else if (!asm_p) - { - error ("unable to find a register to spill"); - fatal_insn ("this is the insn:", insn); - } + if (fail_p) + lra_setup_reg_renumber + (regno, ira_class_hard_regs[lra_get_allocno_class (regno)][0], false); } + if (fail_p) + EXECUTE_IF_SET_IN_BITMAP (&failed_reload_insns, 0, u, bi) + { + insn = lra_insn_recog_data[u]->insn; + if (asm_noperands (PATTERN (insn)) >= 0) + { + asm_p = true; + lra_asm_insn_error (insn); + if (JUMP_P (insn)) + ira_nullify_asm_goto (insn); + else + PATTERN (insn) = gen_rtx_USE (VOIDmode, const0_rtx); + lra_invalidate_insn_data (insn); + } + else if (!asm_p) + { + error ("unable to find a register to spill"); + fatal_insn ("this is the insn:", insn); + } + } bitmap_clear (&failed_reload_pseudos); bitmap_clear (&failed_reload_insns); return false; diff --git a/gcc/lra-int.h b/gcc/lra-int.h index 1f89e06..ad42f48 100644 --- a/gcc/lra-int.h +++ b/gcc/lra-int.h @@ -252,6 +252,18 @@ typedef class lra_insn_recog_data *lra_insn_recog_data_t; for preventing LRA cycling in a bug case. */ #define LRA_MAX_ASSIGNMENT_ITERATION_NUMBER 30 +/* Maximum allowed number of tries to split hard reg live ranges after failure + in assignment of reload pseudos. Theoretical bound for the value is the + number of the insn reload pseudos plus the number of inheritance pseudos + generated from the reload pseudos. This bound can be achieved when all the + reload pseudos and the inheritance pseudos require hard reg splitting for + their assignment. This is extremely unlikely event. */ +#define LRA_MAX_FAILED_SPLITS 10 + +#if LRA_MAX_FAILED_SPLITS >= LRA_MAX_ASSIGNMENT_ITERATION_NUMBER +#error wrong LRA_MAX_FAILED_SPLITS value +#endif + /* The maximal number of inheritance/split passes in LRA. It should be more 1 in order to perform caller saves transformations and much less MAX_CONSTRAINT_ITERATION_NUMBER to prevent LRA to do as many @@ -392,7 +404,7 @@ extern int lra_assignment_iter; extern int lra_assignment_iter_after_spill; extern void lra_setup_reg_renumber (int, int, bool); extern bool lra_assign (bool &); -extern bool lra_split_hard_reg_for (void); +extern bool lra_split_hard_reg_for (bool fail_p); /* lra-coalesce.cc: */ @@ -2480,6 +2480,7 @@ lra (FILE *f, int verbose) lra_clear_live_ranges (); bool fails_p; lra_hard_reg_split_p = false; + int split_fails_num = 0; do { /* We need live ranges for lra_assign -- so build them. @@ -2493,7 +2494,7 @@ lra (FILE *f, int verbose) coalescing. If inheritance pseudos were spilled, the memory-memory moves involving them will be removed by pass undoing inheritance. */ - if (lra_simple_p) + if (lra_simple_p || lra_hard_reg_split_p) lra_assign (fails_p); else { @@ -2522,8 +2523,15 @@ lra (FILE *f, int verbose) if (live_p) lra_clear_live_ranges (); live_p = false; - if (! lra_split_hard_reg_for ()) - break; + /* See a comment for LRA_MAX_FAILED_SPLITS definition. */ + bool last_failed_split_p + = split_fails_num > LRA_MAX_FAILED_SPLITS; + if (! lra_split_hard_reg_for (last_failed_split_p)) + { + if (last_failed_split_p) + break; + split_fails_num++; + } lra_hard_reg_split_p = true; } } diff --git a/gcc/testsuite/g++.target/riscv/pr115458.C b/gcc/testsuite/g++.target/riscv/pr115458.C new file mode 100644 index 0000000..2c8d907 --- /dev/null +++ b/gcc/testsuite/g++.target/riscv/pr115458.C @@ -0,0 +1,357 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv1p0 -mabi=lp64d -misa-spec=20191213 -mtls-dialect=trad -march=rv64imafdc_v1p0_zmmul_zca_zcd_zve32f_zve32x_zve64d_zve64f_zve64x_zvl128b_zvl32b_zvl64b -O2 -std=c++17 -fno-exceptions -w" } */ + +typedef signed char int8_t; +typedef unsigned char uint8_t; +void Abort(...); +template <bool> struct EnableIfT; +template <> struct EnableIfT<true> { + using type = void; +}; +template <bool Condition> using EnableIf = typename EnableIfT<Condition>::type; +template <typename> using MakeUnsigned = unsigned char; +template <typename> using MakeSigned = signed char; +template <int __v> struct integral_constant { + static constexpr int value = __v; +}; +template <bool __v> using __bool_constant = integral_constant<__v>; +template <bool, typename _Tp> using __enable_if_t = _Tp; +char *TargetName(); +template <typename _Tp> struct __uniq_ptr_impl { + template <typename _Up> struct _Ptr { + using type = _Up *; + }; + using pointer = typename _Ptr<_Tp>::type; +}; +template <typename _Tp, typename = _Tp> class unique_ptr; +template <typename _Tp, typename _Dp> struct unique_ptr<_Tp[], _Dp> { + template <typename _Up, typename _Del> + unique_ptr(_Up, __enable_if_t<__bool_constant<false>::value, _Del>); + typename __uniq_ptr_impl<_Tp>::pointer get(); + operator bool(); +}; +using AllocPtr = void *; +using FreePtr = void(); +template <typename T> T AllocateAlignedItems(int, AllocPtr, void *); +struct AlignedFreer { + AlignedFreer(FreePtr, void *); +}; +template <typename T> using AlignedFreeUniquePtr = unique_ptr<T>; +AllocPtr AllocateAligned_alloc; +template <typename T> +AlignedFreeUniquePtr<T[]> AllocateAligned(int items, void *opaque) { + FreePtr free; + return AlignedFreeUniquePtr<T[]>( + AllocateAlignedItems<T>(items, AllocateAligned_alloc, opaque), + AlignedFreer(free, opaque)); +} +template <typename T> AlignedFreeUniquePtr<T[]> AllocateAligned(int items) { + return AllocateAligned<T>(items, nullptr); +} +template <typename> void MakeTypeInfo(); +void AssertArrayEqual(void *, void *, char *, int); +#pragma riscv intrinsic "vector" +template <typename Lane, int, int kPow2> struct Simd { + using T = Lane; + constexpr int Pow2() { return kPow2; } + template <typename> static constexpr int RebindPow2() { return kPow2; } + template <typename NewT> using Rebind = Simd<NewT, 0, RebindPow2<NewT>()>; +}; +template <typename T, int kPow2> struct ClampNAndPow2 { + using type = Simd<T, 6, kPow2>; +}; +template <typename T, int kPow2> struct ScalableTagChecker { + using type = typename ClampNAndPow2<T, kPow2>::type; +}; +template <typename T, int kPow2> +using ScalableTag = typename ScalableTagChecker<T, kPow2>::type; +template <class D> using TFromD = typename D::T; +template <class T, class D> using Rebind = typename D::Rebind<T>; +template <class D> using RebindToSigned = Rebind<MakeSigned<D>, D>; +template <class D> using RebindToUnsigned = Rebind<MakeUnsigned<D>, D>; +template <class> struct DFromV_t; +template <class V> using DFromV = typename DFromV_t<V>::type; +template <> struct DFromV_t<vint8mf8_t> { + using Lane = int8_t; + using type = ScalableTag<Lane, -3>; +}; +template <> struct DFromV_t<vint8mf4_t> { + using Lane = int8_t; + using type = ScalableTag<Lane, -2>; +}; +template <> struct DFromV_t<vint8mf2_t> { + using Lane = int8_t; + using type = ScalableTag<Lane, -1>; +}; +template <> struct DFromV_t<vint8m1_t> { + using Lane = int8_t; + using type = ScalableTag<Lane, 0>; +}; +template <> struct DFromV_t<vint8m2_t> { + using Lane = int8_t; + using type = ScalableTag<Lane, 1>; +}; +template <> struct DFromV_t<vint8m4_t> { + using Lane = int8_t; + using type = ScalableTag<Lane, 2>; +}; +template <> struct DFromV_t<vint8m8_t> { + using Lane = int8_t; + using type = ScalableTag<Lane, 3>; +}; +template <int N> int Lanes(Simd<int8_t, N, -3>); +template <int N> int Lanes(Simd<int8_t, N, -2>); +template <int N> int Lanes(Simd<int8_t, N, -1>); +template <int N> int Lanes(Simd<int8_t, N, 0>); +template <int N> int Lanes(Simd<int8_t, N, 1>); +template <int N> int Lanes(Simd<int8_t, N, 2>); +template <int N> int Lanes(Simd<int8_t, N, 3>); +template <int N> vuint8mf8_t Set(Simd<uint8_t, N, -3>, uint8_t); +template <int N> vuint8mf4_t Set(Simd<uint8_t, N, -2>, uint8_t); +template <int N> vuint8mf2_t Set(Simd<uint8_t, N, -1>, uint8_t); +template <int N> vuint8m1_t Set(Simd<uint8_t, N, 0>, uint8_t); +template <int N> vuint8m2_t Set(Simd<uint8_t, N, 1>, uint8_t); +template <int N> vuint8m4_t Set(Simd<uint8_t, N, 2>, uint8_t); +template <int N> vuint8m8_t Set(Simd<uint8_t, N, 3>, uint8_t arg) { + return __riscv_vmv_v_x_u8m8(arg, 0); +} +template <int N> vint8mf8_t Set(Simd<int8_t, N, -3>, int8_t); +template <int N> vint8mf4_t Set(Simd<int8_t, N, -2>, int8_t); +template <int N> vint8mf2_t Set(Simd<int8_t, N, -1>, int8_t); +template <int N> vint8m1_t Set(Simd<int8_t, N, 0>, int8_t); +template <int N> vint8m2_t Set(Simd<int8_t, N, 1>, int8_t); +template <int N> vint8m4_t Set(Simd<int8_t, N, 2>, int8_t); +template <int N> vint8m8_t Set(Simd<int8_t, N, 3>, int8_t); +template <class D> using VFromD = decltype(Set(D(), TFromD<D>())); +template <class D> VFromD<D> Zero(D d) { + RebindToUnsigned<decltype(d)> du; + return BitCast(d, Set(du, 0)); +} +template <typename T, int N> +vuint8mf8_t BitCastToByte(Simd<T, N, -3>, vuint8mf8_t); +template <typename T, int N> +vuint8mf4_t BitCastToByte(Simd<T, N, -2>, vuint8mf4_t); +template <typename T, int N> +vuint8mf2_t BitCastToByte(Simd<T, N, -1>, vuint8mf2_t); +template <typename T, int N> +vuint8m1_t BitCastToByte(Simd<T, N, 0>, vuint8m1_t); +template <typename T, int N> +vuint8m2_t BitCastToByte(Simd<T, N, 1>, vuint8m2_t); +template <typename T, int N> +vuint8m4_t BitCastToByte(Simd<T, N, 2>, vuint8m4_t); +template <typename T, int N> +vuint8m8_t BitCastToByte(Simd<T, N, 3>, vuint8m8_t v) { + return v; +} +template <typename T, int N> +vuint8mf8_t BitCastToByte(Simd<T, N, -3>, vint8mf8_t); +template <int N> vint8mf8_t BitCastFromByte(Simd<int8_t, N, -3>, vuint8mf8_t); +template <typename T, int N> +vuint8mf4_t BitCastToByte(Simd<T, N, -2>, vint8mf4_t); +template <int N> vint8mf4_t BitCastFromByte(Simd<int8_t, N, -2>, vuint8mf4_t); +template <typename T, int N> +vuint8mf2_t BitCastToByte(Simd<T, N, -1>, vint8mf2_t); +template <int N> vint8mf2_t BitCastFromByte(Simd<int8_t, N, -1>, vuint8mf2_t); +template <typename T, int N> vuint8m1_t BitCastToByte(Simd<T, N, 0>, vint8m1_t); +template <int N> vint8m1_t BitCastFromByte(Simd<int8_t, N, 0>, vuint8m1_t); +template <typename T, int N> vuint8m2_t BitCastToByte(Simd<T, N, 1>, vint8m2_t); +template <int N> vint8m2_t BitCastFromByte(Simd<int8_t, N, 1>, vuint8m2_t); +template <typename T, int N> vuint8m4_t BitCastToByte(Simd<T, N, 2>, vint8m4_t); +template <int N> vint8m4_t BitCastFromByte(Simd<int8_t, N, 2>, vuint8m4_t); +template <typename T, int N> vuint8m8_t BitCastToByte(Simd<T, N, 3>, vint8m8_t); +template <int N> vint8m8_t BitCastFromByte(Simd<int8_t, N, 3>, vuint8m8_t v) { + return __riscv_vreinterpret_v_u8m8_i8m8(v); +} +template <class D, class FromV> VFromD<D> BitCast(D d, FromV v) { + return BitCastFromByte(d, BitCastToByte(d, v)); +} +vint8mf8_t And(vint8mf8_t, vint8mf8_t); +vint8mf4_t And(vint8mf4_t, vint8mf4_t); +vint8mf2_t And(vint8mf2_t, vint8mf2_t); +vint8m1_t And(vint8m1_t, vint8m1_t); +vint8m2_t And(vint8m2_t, vint8m2_t); +vint8m4_t And(vint8m4_t, vint8m4_t); +vint8m8_t And(vint8m8_t, vint8m8_t); +vint8mf8_t Xor(vint8mf8_t, vint8mf8_t); +vint8mf4_t Xor(vint8mf4_t, vint8mf4_t); +vint8mf2_t Xor(vint8mf2_t, vint8mf2_t); +vint8m1_t Xor(vint8m1_t, vint8m1_t); +vint8m2_t Xor(vint8m2_t, vint8m2_t); +vint8m4_t Xor(vint8m4_t, vint8m4_t); +vint8m8_t Xor(vint8m8_t, vint8m8_t); +template <class V> V AndNot(V); +template <class V> V Xor3(V); +template <class V> V Neg(V); +template <int> vuint8mf8_t ShiftLeft(vuint8mf8_t); +template <int> vuint8mf4_t ShiftLeft(vuint8mf4_t); +template <int> vuint8mf2_t ShiftLeft(vuint8mf2_t); +template <int> vuint8m1_t ShiftLeft(vuint8m1_t); +template <int> vuint8m2_t ShiftLeft(vuint8m2_t); +template <int> vuint8m4_t ShiftLeft(vuint8m4_t); +template <int> vuint8m8_t ShiftLeft(vuint8m8_t); +vint8mf8_t MaskedSubOr(vint8mf8_t, vbool64_t, vint8mf8_t, vint8mf8_t); +vint8mf4_t MaskedSubOr(vint8mf4_t, vbool32_t, vint8mf4_t, vint8mf4_t); +vint8mf2_t MaskedSubOr(vint8mf2_t, vbool16_t, vint8mf2_t, vint8mf2_t); +vint8m1_t MaskedSubOr(vint8m1_t, vbool8_t, vint8m1_t, vint8m1_t); +vint8m2_t MaskedSubOr(vint8m2_t, vbool4_t, vint8m2_t, vint8m2_t); +vint8m4_t MaskedSubOr(vint8m4_t, vbool2_t, vint8m4_t, vint8m4_t); +vint8m8_t MaskedSubOr(vint8m8_t no, vbool1_t m, vint8m8_t a, vint8m8_t b) { + return __riscv_vsub_vv_i8m8_mu(m, no, a, b, 0); +} +vbool64_t Lt(vint8mf8_t, vint8mf8_t); +vbool32_t Lt(vint8mf4_t, vint8mf4_t); +vbool16_t Lt(vint8mf2_t, vint8mf2_t); +vbool8_t Lt(vint8m1_t, vint8m1_t); +vbool4_t Lt(vint8m2_t, vint8m2_t); +vbool2_t Lt(vint8m4_t, vint8m4_t); +vbool1_t Lt(vint8m8_t a, vint8m8_t b) { + return __riscv_vmslt_vv_i8m8_b1(a, b, 0); +} +template <class V> V BroadcastSignBit(V); +template <class V> V IfNegativeThenElse(V); +template <int N> void Store(vint8mf8_t, Simd<int8_t, N, -3>, int8_t *); +template <int N> void Store(vint8mf4_t, Simd<int8_t, N, -2>, int8_t *); +template <int N> void Store(vint8mf2_t, Simd<int8_t, N, -1>, int8_t *); +template <int N> void Store(vint8m1_t, Simd<int8_t, N, 0>, int8_t *); +template <int N> void Store(vint8m2_t, Simd<int8_t, N, 1>, int8_t *); +template <int N> void Store(vint8m4_t, Simd<int8_t, N, 2>, int8_t *); +template <int N> void Store(vint8m8_t, Simd<int8_t, N, 3>, int8_t *); +template <class D, class V, EnableIf<D().Pow2() <= 2> * = nullptr> +V InterleaveUpperBlocks(D, V, V) {} +template <class D, class V, EnableIf<(D().Pow2() > 2)> * = nullptr> +V InterleaveUpperBlocks(D, V, V); +template <typename T, int N, int kPow2> +constexpr bool IsGE128(Simd<T, N, kPow2>) { + return kPow2 >= 0; +} +template <class D, class V, EnableIf<IsGE128(D())> * = nullptr> +V InterleaveLower(D, V, V); +template <class D, class V, EnableIf<!IsGE128(D())> * = nullptr> +V InterleaveLower(D, V, V); +template <class D, class V, EnableIf<IsGE128(D())> * = nullptr> +V InterleaveUpper(D d, V a, V b) { + return InterleaveUpperBlocks(d, a, b); +} +template <class D, class V, EnableIf<!IsGE128(D())> * = nullptr> +V InterleaveUpper(D, V, V); +template <class D, typename T2> VFromD<D> Iota(D, T2); +template <class D> using Vec = decltype(Zero(D())); +template <class D> Vec<D> SignBit(D); +template <class V> V IfNegativeThenElseZero(V); +template <class V> V IfNegativeThenZeroElse(V); +template <class V> V BitwiseIfThenElse(V, V, V); +template <class V> inline V IfNegativeThenNegOrUndefIfZero(V mask, V v) { + auto zero = Zero(DFromV<V>()); + return MaskedSubOr(v, Lt(mask, zero), zero, v); +} +template <class D> Vec<D> PositiveIota(D); +int AssertVecEqual_line; +template <class D, typename T = TFromD<D>> +inline void AssertVecEqual(D d, Vec<D> expected, Vec<D> actual, char *) { + int N = Lanes(d); + auto expected_lanes = AllocateAligned<T>(N), + actual_lanes = AllocateAligned<T>(N); + if (expected_lanes && actual_lanes) + Abort("", ""); + Store(expected, d, expected_lanes.get()); + Store(actual, d, actual_lanes.get()); + MakeTypeInfo<T>(); + char *target_name = TargetName(); + AssertArrayEqual(expected_lanes.get(), actual_lanes.get(), target_name, + AssertVecEqual_line); +} +template <typename> constexpr int MinPow2() { return sizeof(int) ? -3 : 0; } +template <typename T, int kPow2, int kMaxPow2, int, class Test> +struct ForeachPow2 { + static void Do(int min_lanes) { + ScalableTag<T, kPow2> d; + Lanes(d); + Test()(T(), d); + ForeachPow2<T, kPow2 + 1, kMaxPow2, kPow2 + 1 <= kMaxPow2, Test>::Do( + min_lanes); + } +}; +template <typename T, int kPow2, int kMaxPow2, class Test> +struct ForeachPow2<T, kPow2, kMaxPow2, false, Test> { + static void Do(int); +}; +template <typename T, int kAddMin, int kSubMax, class Test> +using ForeachPow2Trim = + ForeachPow2<T, MinPow2<T>(), 3, kAddMin <= kSubMax, Test>; +template <class Test, int kPow2> struct ForExtendableVectors { + template <typename T> void operator()(T) { + ForeachPow2Trim<T, 0, kPow2, Test>::Do(1); + } +}; +template <class Test> struct ForPartialVectors { + template <typename T> void operator()(T t) { + ForExtendableVectors<Test, 0>()(t); + } +}; +template <class Func> void ForSignedTypes(Func func) { func(int8_t()); } +struct TestIfNegative { + template <class T, class D> void operator()(T, D d) { + auto vp = Iota(d, 1), vsignbit = SignBit(d); + RebindToSigned<decltype(d)> di; + RebindToUnsigned<decltype(d)> du; + BitCast(d, ShiftLeft<sizeof(TFromD<decltype(d)>)>(Iota(du, 1))); + auto m1 = Xor3(BitCast(d, Set(du, {}))); + auto x1 = Xor(vp, BitCast(d, Set(d, {}))); + auto x2 = Xor(vp, BitCast(d, Set(d, {}))); + Xor(m1, vsignbit); + auto m1_s = BitCast(d, BroadcastSignBit(BitCast(di, m1))); + auto expected_2 = BitwiseIfThenElse(m1_s, x2, x1); + AssertVecEqual(d, expected_2, IfNegativeThenElse(x2), ""); + auto expected_3 = And(m1_s, x1); + auto expected_4 = AndNot(x2); + AssertVecEqual(d, expected_3, IfNegativeThenElseZero(x1), ""); + AssertVecEqual(d, expected_3, IfNegativeThenZeroElse(x1), ""); + AssertVecEqual(d, expected_4, IfNegativeThenZeroElse(x2), ""); + AssertVecEqual(d, expected_4, IfNegativeThenElseZero(x2), ""); + } +}; +void TestAllIfNegative() { + ForSignedTypes(ForPartialVectors<TestIfNegative>()); +} +template <class D> +void TestMoreThan1LaneIfNegativeThenNegOrUndefIfZero(D d, Vec<D> v1) { + Vec<D> v2, v3 = InterleaveLower(d, v1, v1), v5 = InterleaveLower(d, v1, v2); + if (Lanes(d) < 2) + return; + Vec<D> v4 = InterleaveUpper(d, v1, v1); + Vec<D> v6 = InterleaveUpper(d, v1, v2); + Vec<D> v7 = InterleaveLower(d, v2, v1); + Vec<D> v8 = InterleaveUpper(d, v2, v1); + AssertVecEqual(d, v3, IfNegativeThenNegOrUndefIfZero(v3, v3), ""); + AssertVecEqual(d, v4, IfNegativeThenNegOrUndefIfZero(v4, v4), ""); + AssertVecEqual(d, v4, IfNegativeThenNegOrUndefIfZero(v8, v8), ""); + AssertVecEqual(d, v6, IfNegativeThenNegOrUndefIfZero(v4, v6), ""); + AssertVecEqual(d, v7, IfNegativeThenNegOrUndefIfZero(v3, v7), ""); + AssertVecEqual(d, v8, IfNegativeThenNegOrUndefIfZero(v4, v8), ""); + Vec<D> zero = Zero(d); + AssertVecEqual(d, zero, IfNegativeThenNegOrUndefIfZero(v3, zero), ""); + AssertVecEqual(d, zero, IfNegativeThenNegOrUndefIfZero(v4, zero), ""); + AssertVecEqual(d, zero, IfNegativeThenNegOrUndefIfZero(v5, zero), ""); + AssertVecEqual(d, zero, IfNegativeThenNegOrUndefIfZero(v6, zero), ""); + AssertVecEqual(d, zero, IfNegativeThenNegOrUndefIfZero(v7, zero), ""); + AssertVecEqual(d, zero, IfNegativeThenNegOrUndefIfZero(v8, zero), ""); +} +struct TestIfNegativeThenNegOrUndefIfZero { + template <typename T, class D> void operator()(T, D d) { + auto v1 = PositiveIota(d), v2 = Neg(v1), zero = Zero(d), vmin = Set(d, 0), + vmax = Set(d, 0); + AssertVecEqual(d, v2, IfNegativeThenNegOrUndefIfZero(v1, v2), ""); + AssertVecEqual(d, v2, IfNegativeThenNegOrUndefIfZero(v2, v1), ""); + AssertVecEqual(d, v1, IfNegativeThenNegOrUndefIfZero(v2, v2), ""); + AssertVecEqual(d, zero, IfNegativeThenNegOrUndefIfZero(zero, zero), ""); + AssertVecEqual(d, zero, IfNegativeThenNegOrUndefIfZero(v1, zero), ""); + AssertVecEqual(d, zero, IfNegativeThenNegOrUndefIfZero(v2, zero), ""); + AssertVecEqual(d, v1, IfNegativeThenNegOrUndefIfZero(vmin, v2), ""); + AssertVecEqual(d, v1, IfNegativeThenNegOrUndefIfZero(vmax, v1), ""); + AssertVecEqual(d, v2, IfNegativeThenNegOrUndefIfZero(vmax, v2), ""); + TestMoreThan1LaneIfNegativeThenNegOrUndefIfZero(d, v1); + } +}; +void TestAllIfNegativeThenNegOrUndefIfZero() { + ForSignedTypes(ForPartialVectors<TestIfNegativeThenNegOrUndefIfZero>()); +} |