aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVladimir N. Makarov <vmakarov@redhat.com>2025-02-25 15:01:15 -0500
committerVladimir N. Makarov <vmakarov@redhat.com>2025-02-25 15:04:50 -0500
commit2341f675edadd6370147d2bc55ca7761a7ecfaa1 (patch)
tree4b96fc958ea2f747355e8a314a69cd103d80a397
parent0bb431d0a77cf8dc790b9c61539b3eb6ab1710f0 (diff)
downloadgcc-2341f675edadd6370147d2bc55ca7761a7ecfaa1.zip
gcc-2341f675edadd6370147d2bc55ca7761a7ecfaa1.tar.gz
gcc-2341f675edadd6370147d2bc55ca7761a7ecfaa1.tar.bz2
[PR115458][LRA]: Run split sub-pass more times
In this PR case LRA needs to provide too many hard regs for insn reloads, where some reload pseudos require 8 aligned regs for themselves. As the last attempt, LRA tries to split live ranges of hard regs for insn reload pseudos. It is a very rare case. An inheritance pseudo involving a reload pseudo of the insn can be spilled in the assignment sub-pass run right after splitting and we need to run split sub-pass for the inheritance pseudo now. gcc/ChangeLog: PR target/115458 * lra-int.h (LRA_MAX_FAILED_SPLITS): Define and check its value. (lra_split_hard_reg_for): Change prototype. * lra.cc (lra): Try to split hard reg range several times after a failure. * lra-assigns.cc (lra_split_hard_reg_for): Add an arg, a flag of giving up. Report asm error and nullify the asm insn depending on the arg value. gcc/testsuite/ChangeLog: PR target/115458 * g++.target/riscv/pr115458.C: New.
-rw-r--r--gcc/lra-assigns.cc50
-rw-r--r--gcc/lra-int.h14
-rw-r--r--gcc/lra.cc14
-rw-r--r--gcc/testsuite/g++.target/riscv/pr115458.C357
4 files changed, 410 insertions, 25 deletions
diff --git a/gcc/lra-assigns.cc b/gcc/lra-assigns.cc
index f9e3dfc..480925a 100644
--- a/gcc/lra-assigns.cc
+++ b/gcc/lra-assigns.cc
@@ -1763,12 +1763,13 @@ find_reload_regno_insns (int regno, rtx_insn * &start, rtx_insn * &finish)
return true;
}
-/* Process reload pseudos which did not get a hard reg, split a hard
- reg live range in live range of a reload pseudo, and then return
- TRUE. If we did not split a hard reg live range, report an error,
- and return FALSE. */
+/* Process reload pseudos which did not get a hard reg, split a hard reg live
+ range in live range of a reload pseudo, and then return TRUE. Otherwise,
+ return FALSE. When FAIL_P is TRUE and if we did not split a hard reg live
+ range for failed reload pseudos, report an error and modify related asm
+ insns. */
bool
-lra_split_hard_reg_for (void)
+lra_split_hard_reg_for (bool fail_p)
{
int i, regno;
rtx_insn *insn, *first, *last;
@@ -1843,23 +1844,30 @@ lra_split_hard_reg_for (void)
regno = u;
bitmap_ior_into (&failed_reload_insns,
&lra_reg_info[regno].insn_bitmap);
- lra_setup_reg_renumber
- (regno, ira_class_hard_regs[lra_get_allocno_class (regno)][0], false);
- }
- EXECUTE_IF_SET_IN_BITMAP (&failed_reload_insns, 0, u, bi)
- {
- insn = lra_insn_recog_data[u]->insn;
- if (asm_noperands (PATTERN (insn)) >= 0)
- {
- asm_p = true;
- lra_asm_insn_error (insn);
- }
- else if (!asm_p)
- {
- error ("unable to find a register to spill");
- fatal_insn ("this is the insn:", insn);
- }
+ if (fail_p)
+ lra_setup_reg_renumber
+ (regno, ira_class_hard_regs[lra_get_allocno_class (regno)][0], false);
}
+ if (fail_p)
+ EXECUTE_IF_SET_IN_BITMAP (&failed_reload_insns, 0, u, bi)
+ {
+ insn = lra_insn_recog_data[u]->insn;
+ if (asm_noperands (PATTERN (insn)) >= 0)
+ {
+ asm_p = true;
+ lra_asm_insn_error (insn);
+ if (JUMP_P (insn))
+ ira_nullify_asm_goto (insn);
+ else
+ PATTERN (insn) = gen_rtx_USE (VOIDmode, const0_rtx);
+ lra_invalidate_insn_data (insn);
+ }
+ else if (!asm_p)
+ {
+ error ("unable to find a register to spill");
+ fatal_insn ("this is the insn:", insn);
+ }
+ }
bitmap_clear (&failed_reload_pseudos);
bitmap_clear (&failed_reload_insns);
return false;
diff --git a/gcc/lra-int.h b/gcc/lra-int.h
index 1f89e06..ad42f48 100644
--- a/gcc/lra-int.h
+++ b/gcc/lra-int.h
@@ -252,6 +252,18 @@ typedef class lra_insn_recog_data *lra_insn_recog_data_t;
for preventing LRA cycling in a bug case. */
#define LRA_MAX_ASSIGNMENT_ITERATION_NUMBER 30
+/* Maximum allowed number of tries to split hard reg live ranges after failure
+ in assignment of reload pseudos. Theoretical bound for the value is the
+ number of the insn reload pseudos plus the number of inheritance pseudos
+ generated from the reload pseudos. This bound can be achieved when all the
+ reload pseudos and the inheritance pseudos require hard reg splitting for
+ their assignment. This is extremely unlikely event. */
+#define LRA_MAX_FAILED_SPLITS 10
+
+#if LRA_MAX_FAILED_SPLITS >= LRA_MAX_ASSIGNMENT_ITERATION_NUMBER
+#error wrong LRA_MAX_FAILED_SPLITS value
+#endif
+
/* The maximal number of inheritance/split passes in LRA. It should
be more 1 in order to perform caller saves transformations and much
less MAX_CONSTRAINT_ITERATION_NUMBER to prevent LRA to do as many
@@ -392,7 +404,7 @@ extern int lra_assignment_iter;
extern int lra_assignment_iter_after_spill;
extern void lra_setup_reg_renumber (int, int, bool);
extern bool lra_assign (bool &);
-extern bool lra_split_hard_reg_for (void);
+extern bool lra_split_hard_reg_for (bool fail_p);
/* lra-coalesce.cc: */
diff --git a/gcc/lra.cc b/gcc/lra.cc
index daf9840..b753729 100644
--- a/gcc/lra.cc
+++ b/gcc/lra.cc
@@ -2480,6 +2480,7 @@ lra (FILE *f, int verbose)
lra_clear_live_ranges ();
bool fails_p;
lra_hard_reg_split_p = false;
+ int split_fails_num = 0;
do
{
/* We need live ranges for lra_assign -- so build them.
@@ -2493,7 +2494,7 @@ lra (FILE *f, int verbose)
coalescing. If inheritance pseudos were spilled, the
memory-memory moves involving them will be removed by
pass undoing inheritance. */
- if (lra_simple_p)
+ if (lra_simple_p || lra_hard_reg_split_p)
lra_assign (fails_p);
else
{
@@ -2522,8 +2523,15 @@ lra (FILE *f, int verbose)
if (live_p)
lra_clear_live_ranges ();
live_p = false;
- if (! lra_split_hard_reg_for ())
- break;
+ /* See a comment for LRA_MAX_FAILED_SPLITS definition. */
+ bool last_failed_split_p
+ = split_fails_num > LRA_MAX_FAILED_SPLITS;
+ if (! lra_split_hard_reg_for (last_failed_split_p))
+ {
+ if (last_failed_split_p)
+ break;
+ split_fails_num++;
+ }
lra_hard_reg_split_p = true;
}
}
diff --git a/gcc/testsuite/g++.target/riscv/pr115458.C b/gcc/testsuite/g++.target/riscv/pr115458.C
new file mode 100644
index 0000000..2c8d907
--- /dev/null
+++ b/gcc/testsuite/g++.target/riscv/pr115458.C
@@ -0,0 +1,357 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv1p0 -mabi=lp64d -misa-spec=20191213 -mtls-dialect=trad -march=rv64imafdc_v1p0_zmmul_zca_zcd_zve32f_zve32x_zve64d_zve64f_zve64x_zvl128b_zvl32b_zvl64b -O2 -std=c++17 -fno-exceptions -w" } */
+
+typedef signed char int8_t;
+typedef unsigned char uint8_t;
+void Abort(...);
+template <bool> struct EnableIfT;
+template <> struct EnableIfT<true> {
+ using type = void;
+};
+template <bool Condition> using EnableIf = typename EnableIfT<Condition>::type;
+template <typename> using MakeUnsigned = unsigned char;
+template <typename> using MakeSigned = signed char;
+template <int __v> struct integral_constant {
+ static constexpr int value = __v;
+};
+template <bool __v> using __bool_constant = integral_constant<__v>;
+template <bool, typename _Tp> using __enable_if_t = _Tp;
+char *TargetName();
+template <typename _Tp> struct __uniq_ptr_impl {
+ template <typename _Up> struct _Ptr {
+ using type = _Up *;
+ };
+ using pointer = typename _Ptr<_Tp>::type;
+};
+template <typename _Tp, typename = _Tp> class unique_ptr;
+template <typename _Tp, typename _Dp> struct unique_ptr<_Tp[], _Dp> {
+ template <typename _Up, typename _Del>
+ unique_ptr(_Up, __enable_if_t<__bool_constant<false>::value, _Del>);
+ typename __uniq_ptr_impl<_Tp>::pointer get();
+ operator bool();
+};
+using AllocPtr = void *;
+using FreePtr = void();
+template <typename T> T AllocateAlignedItems(int, AllocPtr, void *);
+struct AlignedFreer {
+ AlignedFreer(FreePtr, void *);
+};
+template <typename T> using AlignedFreeUniquePtr = unique_ptr<T>;
+AllocPtr AllocateAligned_alloc;
+template <typename T>
+AlignedFreeUniquePtr<T[]> AllocateAligned(int items, void *opaque) {
+ FreePtr free;
+ return AlignedFreeUniquePtr<T[]>(
+ AllocateAlignedItems<T>(items, AllocateAligned_alloc, opaque),
+ AlignedFreer(free, opaque));
+}
+template <typename T> AlignedFreeUniquePtr<T[]> AllocateAligned(int items) {
+ return AllocateAligned<T>(items, nullptr);
+}
+template <typename> void MakeTypeInfo();
+void AssertArrayEqual(void *, void *, char *, int);
+#pragma riscv intrinsic "vector"
+template <typename Lane, int, int kPow2> struct Simd {
+ using T = Lane;
+ constexpr int Pow2() { return kPow2; }
+ template <typename> static constexpr int RebindPow2() { return kPow2; }
+ template <typename NewT> using Rebind = Simd<NewT, 0, RebindPow2<NewT>()>;
+};
+template <typename T, int kPow2> struct ClampNAndPow2 {
+ using type = Simd<T, 6, kPow2>;
+};
+template <typename T, int kPow2> struct ScalableTagChecker {
+ using type = typename ClampNAndPow2<T, kPow2>::type;
+};
+template <typename T, int kPow2>
+using ScalableTag = typename ScalableTagChecker<T, kPow2>::type;
+template <class D> using TFromD = typename D::T;
+template <class T, class D> using Rebind = typename D::Rebind<T>;
+template <class D> using RebindToSigned = Rebind<MakeSigned<D>, D>;
+template <class D> using RebindToUnsigned = Rebind<MakeUnsigned<D>, D>;
+template <class> struct DFromV_t;
+template <class V> using DFromV = typename DFromV_t<V>::type;
+template <> struct DFromV_t<vint8mf8_t> {
+ using Lane = int8_t;
+ using type = ScalableTag<Lane, -3>;
+};
+template <> struct DFromV_t<vint8mf4_t> {
+ using Lane = int8_t;
+ using type = ScalableTag<Lane, -2>;
+};
+template <> struct DFromV_t<vint8mf2_t> {
+ using Lane = int8_t;
+ using type = ScalableTag<Lane, -1>;
+};
+template <> struct DFromV_t<vint8m1_t> {
+ using Lane = int8_t;
+ using type = ScalableTag<Lane, 0>;
+};
+template <> struct DFromV_t<vint8m2_t> {
+ using Lane = int8_t;
+ using type = ScalableTag<Lane, 1>;
+};
+template <> struct DFromV_t<vint8m4_t> {
+ using Lane = int8_t;
+ using type = ScalableTag<Lane, 2>;
+};
+template <> struct DFromV_t<vint8m8_t> {
+ using Lane = int8_t;
+ using type = ScalableTag<Lane, 3>;
+};
+template <int N> int Lanes(Simd<int8_t, N, -3>);
+template <int N> int Lanes(Simd<int8_t, N, -2>);
+template <int N> int Lanes(Simd<int8_t, N, -1>);
+template <int N> int Lanes(Simd<int8_t, N, 0>);
+template <int N> int Lanes(Simd<int8_t, N, 1>);
+template <int N> int Lanes(Simd<int8_t, N, 2>);
+template <int N> int Lanes(Simd<int8_t, N, 3>);
+template <int N> vuint8mf8_t Set(Simd<uint8_t, N, -3>, uint8_t);
+template <int N> vuint8mf4_t Set(Simd<uint8_t, N, -2>, uint8_t);
+template <int N> vuint8mf2_t Set(Simd<uint8_t, N, -1>, uint8_t);
+template <int N> vuint8m1_t Set(Simd<uint8_t, N, 0>, uint8_t);
+template <int N> vuint8m2_t Set(Simd<uint8_t, N, 1>, uint8_t);
+template <int N> vuint8m4_t Set(Simd<uint8_t, N, 2>, uint8_t);
+template <int N> vuint8m8_t Set(Simd<uint8_t, N, 3>, uint8_t arg) {
+ return __riscv_vmv_v_x_u8m8(arg, 0);
+}
+template <int N> vint8mf8_t Set(Simd<int8_t, N, -3>, int8_t);
+template <int N> vint8mf4_t Set(Simd<int8_t, N, -2>, int8_t);
+template <int N> vint8mf2_t Set(Simd<int8_t, N, -1>, int8_t);
+template <int N> vint8m1_t Set(Simd<int8_t, N, 0>, int8_t);
+template <int N> vint8m2_t Set(Simd<int8_t, N, 1>, int8_t);
+template <int N> vint8m4_t Set(Simd<int8_t, N, 2>, int8_t);
+template <int N> vint8m8_t Set(Simd<int8_t, N, 3>, int8_t);
+template <class D> using VFromD = decltype(Set(D(), TFromD<D>()));
+template <class D> VFromD<D> Zero(D d) {
+ RebindToUnsigned<decltype(d)> du;
+ return BitCast(d, Set(du, 0));
+}
+template <typename T, int N>
+vuint8mf8_t BitCastToByte(Simd<T, N, -3>, vuint8mf8_t);
+template <typename T, int N>
+vuint8mf4_t BitCastToByte(Simd<T, N, -2>, vuint8mf4_t);
+template <typename T, int N>
+vuint8mf2_t BitCastToByte(Simd<T, N, -1>, vuint8mf2_t);
+template <typename T, int N>
+vuint8m1_t BitCastToByte(Simd<T, N, 0>, vuint8m1_t);
+template <typename T, int N>
+vuint8m2_t BitCastToByte(Simd<T, N, 1>, vuint8m2_t);
+template <typename T, int N>
+vuint8m4_t BitCastToByte(Simd<T, N, 2>, vuint8m4_t);
+template <typename T, int N>
+vuint8m8_t BitCastToByte(Simd<T, N, 3>, vuint8m8_t v) {
+ return v;
+}
+template <typename T, int N>
+vuint8mf8_t BitCastToByte(Simd<T, N, -3>, vint8mf8_t);
+template <int N> vint8mf8_t BitCastFromByte(Simd<int8_t, N, -3>, vuint8mf8_t);
+template <typename T, int N>
+vuint8mf4_t BitCastToByte(Simd<T, N, -2>, vint8mf4_t);
+template <int N> vint8mf4_t BitCastFromByte(Simd<int8_t, N, -2>, vuint8mf4_t);
+template <typename T, int N>
+vuint8mf2_t BitCastToByte(Simd<T, N, -1>, vint8mf2_t);
+template <int N> vint8mf2_t BitCastFromByte(Simd<int8_t, N, -1>, vuint8mf2_t);
+template <typename T, int N> vuint8m1_t BitCastToByte(Simd<T, N, 0>, vint8m1_t);
+template <int N> vint8m1_t BitCastFromByte(Simd<int8_t, N, 0>, vuint8m1_t);
+template <typename T, int N> vuint8m2_t BitCastToByte(Simd<T, N, 1>, vint8m2_t);
+template <int N> vint8m2_t BitCastFromByte(Simd<int8_t, N, 1>, vuint8m2_t);
+template <typename T, int N> vuint8m4_t BitCastToByte(Simd<T, N, 2>, vint8m4_t);
+template <int N> vint8m4_t BitCastFromByte(Simd<int8_t, N, 2>, vuint8m4_t);
+template <typename T, int N> vuint8m8_t BitCastToByte(Simd<T, N, 3>, vint8m8_t);
+template <int N> vint8m8_t BitCastFromByte(Simd<int8_t, N, 3>, vuint8m8_t v) {
+ return __riscv_vreinterpret_v_u8m8_i8m8(v);
+}
+template <class D, class FromV> VFromD<D> BitCast(D d, FromV v) {
+ return BitCastFromByte(d, BitCastToByte(d, v));
+}
+vint8mf8_t And(vint8mf8_t, vint8mf8_t);
+vint8mf4_t And(vint8mf4_t, vint8mf4_t);
+vint8mf2_t And(vint8mf2_t, vint8mf2_t);
+vint8m1_t And(vint8m1_t, vint8m1_t);
+vint8m2_t And(vint8m2_t, vint8m2_t);
+vint8m4_t And(vint8m4_t, vint8m4_t);
+vint8m8_t And(vint8m8_t, vint8m8_t);
+vint8mf8_t Xor(vint8mf8_t, vint8mf8_t);
+vint8mf4_t Xor(vint8mf4_t, vint8mf4_t);
+vint8mf2_t Xor(vint8mf2_t, vint8mf2_t);
+vint8m1_t Xor(vint8m1_t, vint8m1_t);
+vint8m2_t Xor(vint8m2_t, vint8m2_t);
+vint8m4_t Xor(vint8m4_t, vint8m4_t);
+vint8m8_t Xor(vint8m8_t, vint8m8_t);
+template <class V> V AndNot(V);
+template <class V> V Xor3(V);
+template <class V> V Neg(V);
+template <int> vuint8mf8_t ShiftLeft(vuint8mf8_t);
+template <int> vuint8mf4_t ShiftLeft(vuint8mf4_t);
+template <int> vuint8mf2_t ShiftLeft(vuint8mf2_t);
+template <int> vuint8m1_t ShiftLeft(vuint8m1_t);
+template <int> vuint8m2_t ShiftLeft(vuint8m2_t);
+template <int> vuint8m4_t ShiftLeft(vuint8m4_t);
+template <int> vuint8m8_t ShiftLeft(vuint8m8_t);
+vint8mf8_t MaskedSubOr(vint8mf8_t, vbool64_t, vint8mf8_t, vint8mf8_t);
+vint8mf4_t MaskedSubOr(vint8mf4_t, vbool32_t, vint8mf4_t, vint8mf4_t);
+vint8mf2_t MaskedSubOr(vint8mf2_t, vbool16_t, vint8mf2_t, vint8mf2_t);
+vint8m1_t MaskedSubOr(vint8m1_t, vbool8_t, vint8m1_t, vint8m1_t);
+vint8m2_t MaskedSubOr(vint8m2_t, vbool4_t, vint8m2_t, vint8m2_t);
+vint8m4_t MaskedSubOr(vint8m4_t, vbool2_t, vint8m4_t, vint8m4_t);
+vint8m8_t MaskedSubOr(vint8m8_t no, vbool1_t m, vint8m8_t a, vint8m8_t b) {
+ return __riscv_vsub_vv_i8m8_mu(m, no, a, b, 0);
+}
+vbool64_t Lt(vint8mf8_t, vint8mf8_t);
+vbool32_t Lt(vint8mf4_t, vint8mf4_t);
+vbool16_t Lt(vint8mf2_t, vint8mf2_t);
+vbool8_t Lt(vint8m1_t, vint8m1_t);
+vbool4_t Lt(vint8m2_t, vint8m2_t);
+vbool2_t Lt(vint8m4_t, vint8m4_t);
+vbool1_t Lt(vint8m8_t a, vint8m8_t b) {
+ return __riscv_vmslt_vv_i8m8_b1(a, b, 0);
+}
+template <class V> V BroadcastSignBit(V);
+template <class V> V IfNegativeThenElse(V);
+template <int N> void Store(vint8mf8_t, Simd<int8_t, N, -3>, int8_t *);
+template <int N> void Store(vint8mf4_t, Simd<int8_t, N, -2>, int8_t *);
+template <int N> void Store(vint8mf2_t, Simd<int8_t, N, -1>, int8_t *);
+template <int N> void Store(vint8m1_t, Simd<int8_t, N, 0>, int8_t *);
+template <int N> void Store(vint8m2_t, Simd<int8_t, N, 1>, int8_t *);
+template <int N> void Store(vint8m4_t, Simd<int8_t, N, 2>, int8_t *);
+template <int N> void Store(vint8m8_t, Simd<int8_t, N, 3>, int8_t *);
+template <class D, class V, EnableIf<D().Pow2() <= 2> * = nullptr>
+V InterleaveUpperBlocks(D, V, V) {}
+template <class D, class V, EnableIf<(D().Pow2() > 2)> * = nullptr>
+V InterleaveUpperBlocks(D, V, V);
+template <typename T, int N, int kPow2>
+constexpr bool IsGE128(Simd<T, N, kPow2>) {
+ return kPow2 >= 0;
+}
+template <class D, class V, EnableIf<IsGE128(D())> * = nullptr>
+V InterleaveLower(D, V, V);
+template <class D, class V, EnableIf<!IsGE128(D())> * = nullptr>
+V InterleaveLower(D, V, V);
+template <class D, class V, EnableIf<IsGE128(D())> * = nullptr>
+V InterleaveUpper(D d, V a, V b) {
+ return InterleaveUpperBlocks(d, a, b);
+}
+template <class D, class V, EnableIf<!IsGE128(D())> * = nullptr>
+V InterleaveUpper(D, V, V);
+template <class D, typename T2> VFromD<D> Iota(D, T2);
+template <class D> using Vec = decltype(Zero(D()));
+template <class D> Vec<D> SignBit(D);
+template <class V> V IfNegativeThenElseZero(V);
+template <class V> V IfNegativeThenZeroElse(V);
+template <class V> V BitwiseIfThenElse(V, V, V);
+template <class V> inline V IfNegativeThenNegOrUndefIfZero(V mask, V v) {
+ auto zero = Zero(DFromV<V>());
+ return MaskedSubOr(v, Lt(mask, zero), zero, v);
+}
+template <class D> Vec<D> PositiveIota(D);
+int AssertVecEqual_line;
+template <class D, typename T = TFromD<D>>
+inline void AssertVecEqual(D d, Vec<D> expected, Vec<D> actual, char *) {
+ int N = Lanes(d);
+ auto expected_lanes = AllocateAligned<T>(N),
+ actual_lanes = AllocateAligned<T>(N);
+ if (expected_lanes && actual_lanes)
+ Abort("", "");
+ Store(expected, d, expected_lanes.get());
+ Store(actual, d, actual_lanes.get());
+ MakeTypeInfo<T>();
+ char *target_name = TargetName();
+ AssertArrayEqual(expected_lanes.get(), actual_lanes.get(), target_name,
+ AssertVecEqual_line);
+}
+template <typename> constexpr int MinPow2() { return sizeof(int) ? -3 : 0; }
+template <typename T, int kPow2, int kMaxPow2, int, class Test>
+struct ForeachPow2 {
+ static void Do(int min_lanes) {
+ ScalableTag<T, kPow2> d;
+ Lanes(d);
+ Test()(T(), d);
+ ForeachPow2<T, kPow2 + 1, kMaxPow2, kPow2 + 1 <= kMaxPow2, Test>::Do(
+ min_lanes);
+ }
+};
+template <typename T, int kPow2, int kMaxPow2, class Test>
+struct ForeachPow2<T, kPow2, kMaxPow2, false, Test> {
+ static void Do(int);
+};
+template <typename T, int kAddMin, int kSubMax, class Test>
+using ForeachPow2Trim =
+ ForeachPow2<T, MinPow2<T>(), 3, kAddMin <= kSubMax, Test>;
+template <class Test, int kPow2> struct ForExtendableVectors {
+ template <typename T> void operator()(T) {
+ ForeachPow2Trim<T, 0, kPow2, Test>::Do(1);
+ }
+};
+template <class Test> struct ForPartialVectors {
+ template <typename T> void operator()(T t) {
+ ForExtendableVectors<Test, 0>()(t);
+ }
+};
+template <class Func> void ForSignedTypes(Func func) { func(int8_t()); }
+struct TestIfNegative {
+ template <class T, class D> void operator()(T, D d) {
+ auto vp = Iota(d, 1), vsignbit = SignBit(d);
+ RebindToSigned<decltype(d)> di;
+ RebindToUnsigned<decltype(d)> du;
+ BitCast(d, ShiftLeft<sizeof(TFromD<decltype(d)>)>(Iota(du, 1)));
+ auto m1 = Xor3(BitCast(d, Set(du, {})));
+ auto x1 = Xor(vp, BitCast(d, Set(d, {})));
+ auto x2 = Xor(vp, BitCast(d, Set(d, {})));
+ Xor(m1, vsignbit);
+ auto m1_s = BitCast(d, BroadcastSignBit(BitCast(di, m1)));
+ auto expected_2 = BitwiseIfThenElse(m1_s, x2, x1);
+ AssertVecEqual(d, expected_2, IfNegativeThenElse(x2), "");
+ auto expected_3 = And(m1_s, x1);
+ auto expected_4 = AndNot(x2);
+ AssertVecEqual(d, expected_3, IfNegativeThenElseZero(x1), "");
+ AssertVecEqual(d, expected_3, IfNegativeThenZeroElse(x1), "");
+ AssertVecEqual(d, expected_4, IfNegativeThenZeroElse(x2), "");
+ AssertVecEqual(d, expected_4, IfNegativeThenElseZero(x2), "");
+ }
+};
+void TestAllIfNegative() {
+ ForSignedTypes(ForPartialVectors<TestIfNegative>());
+}
+template <class D>
+void TestMoreThan1LaneIfNegativeThenNegOrUndefIfZero(D d, Vec<D> v1) {
+ Vec<D> v2, v3 = InterleaveLower(d, v1, v1), v5 = InterleaveLower(d, v1, v2);
+ if (Lanes(d) < 2)
+ return;
+ Vec<D> v4 = InterleaveUpper(d, v1, v1);
+ Vec<D> v6 = InterleaveUpper(d, v1, v2);
+ Vec<D> v7 = InterleaveLower(d, v2, v1);
+ Vec<D> v8 = InterleaveUpper(d, v2, v1);
+ AssertVecEqual(d, v3, IfNegativeThenNegOrUndefIfZero(v3, v3), "");
+ AssertVecEqual(d, v4, IfNegativeThenNegOrUndefIfZero(v4, v4), "");
+ AssertVecEqual(d, v4, IfNegativeThenNegOrUndefIfZero(v8, v8), "");
+ AssertVecEqual(d, v6, IfNegativeThenNegOrUndefIfZero(v4, v6), "");
+ AssertVecEqual(d, v7, IfNegativeThenNegOrUndefIfZero(v3, v7), "");
+ AssertVecEqual(d, v8, IfNegativeThenNegOrUndefIfZero(v4, v8), "");
+ Vec<D> zero = Zero(d);
+ AssertVecEqual(d, zero, IfNegativeThenNegOrUndefIfZero(v3, zero), "");
+ AssertVecEqual(d, zero, IfNegativeThenNegOrUndefIfZero(v4, zero), "");
+ AssertVecEqual(d, zero, IfNegativeThenNegOrUndefIfZero(v5, zero), "");
+ AssertVecEqual(d, zero, IfNegativeThenNegOrUndefIfZero(v6, zero), "");
+ AssertVecEqual(d, zero, IfNegativeThenNegOrUndefIfZero(v7, zero), "");
+ AssertVecEqual(d, zero, IfNegativeThenNegOrUndefIfZero(v8, zero), "");
+}
+struct TestIfNegativeThenNegOrUndefIfZero {
+ template <typename T, class D> void operator()(T, D d) {
+ auto v1 = PositiveIota(d), v2 = Neg(v1), zero = Zero(d), vmin = Set(d, 0),
+ vmax = Set(d, 0);
+ AssertVecEqual(d, v2, IfNegativeThenNegOrUndefIfZero(v1, v2), "");
+ AssertVecEqual(d, v2, IfNegativeThenNegOrUndefIfZero(v2, v1), "");
+ AssertVecEqual(d, v1, IfNegativeThenNegOrUndefIfZero(v2, v2), "");
+ AssertVecEqual(d, zero, IfNegativeThenNegOrUndefIfZero(zero, zero), "");
+ AssertVecEqual(d, zero, IfNegativeThenNegOrUndefIfZero(v1, zero), "");
+ AssertVecEqual(d, zero, IfNegativeThenNegOrUndefIfZero(v2, zero), "");
+ AssertVecEqual(d, v1, IfNegativeThenNegOrUndefIfZero(vmin, v2), "");
+ AssertVecEqual(d, v1, IfNegativeThenNegOrUndefIfZero(vmax, v1), "");
+ AssertVecEqual(d, v2, IfNegativeThenNegOrUndefIfZero(vmax, v2), "");
+ TestMoreThan1LaneIfNegativeThenNegOrUndefIfZero(d, v1);
+ }
+};
+void TestAllIfNegativeThenNegOrUndefIfZero() {
+ ForSignedTypes(ForPartialVectors<TestIfNegativeThenNegOrUndefIfZero>());
+}