6 files changed, 507 insertions, 1 deletions
diff --git a/libc/test/src/__support/CMakeLists.txt b/libc/test/src/__support/CMakeLists.txt
index 98980ce..b6729ba 100644
--- a/libc/test/src/__support/CMakeLists.txt
+++ b/libc/test/src/__support/CMakeLists.txt
@@ -280,6 +280,16 @@ add_libc_test(
     libc.src.__support.CPP.bit
 )
 
+add_libc_test(
+  weak_avl_test
+  SUITE
+    libc-support-tests
+  SRCS
+    weak_avl_test.cpp
+  DEPENDS
+    libc.src.__support.weak_avl
+)
+
 add_subdirectory(CPP)
 add_subdirectory(File)
 add_subdirectory(RPC)
diff --git a/libc/test/src/__support/weak_avl_test.cpp b/libc/test/src/__support/weak_avl_test.cpp
new file mode 100644
index 0000000..49ff2e8
--- /dev/null
+++ b/libc/test/src/__support/weak_avl_test.cpp
@@ -0,0 +1,274 @@
+//===-- Unittests for WeakAVL ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/CPP/optional.h"
+#include "src/__support/weak_avl.h"
+#include "test/UnitTest/Test.h"
+
+using Node = LIBC_NAMESPACE::WeakAVLNode<int>;
+
+namespace {
+int ternary_compare(int a, int b) { return (a > b) - (a < b); }
+constexpr int TEST_SIZE = 128;
+// Validate weak-AVL rank-difference invariant assuming **pure insertion only**
+// (i.e. no erasure has occurred).
+//
+// NOTE: This validator is intentionally *not* correct after erase(), because
+// weak-AVL allows transient or permanent 2-2 configurations during deletion
+// fixup.
+bool validate_pure_insertion(const Node *node) {
+  if (!node)
+    return true;
+  bool left_2 = node->has_rank_diff_2(false);
+  bool right_2 = node->has_rank_diff_2(true);
+  return (!left_2 || !right_2) && validate_pure_insertion(node->get_left()) &&
+         validate_pure_insertion(node->get_right());
+}
+
+// Insert according to pattern `next(i)`
+using NextFn = int (*)(int);
+using OptionalNodePtr = LIBC_NAMESPACE::cpp::optional<Node *>;
+struct Tree {
+  Node *root = nullptr;
+
+  bool validate_pure_insertion() { return ::validate_pure_insertion(root); }
+
+  bool contains(int value) {
+    return Node::find(root, value, ternary_compare).has_value();
+  }
+
+  OptionalNodePtr insert(int value) {
+    return Node::find_or_insert(root, value, ternary_compare);
+  }
+
+  OptionalNodePtr find(int value) {
+    return Node::find(root, value, ternary_compare);
+  }
+
+  void erase(int value) {
+    if (OptionalNodePtr node = Node::find(root, value, ternary_compare))
+      Node::erase(root, node.value());
+  }
+
+  template <typename NextFn> static Tree build(NextFn next, int N) {
+    Tree tree;
+    for (int i = 0; i < N; ++i)
+      tree.insert(next(i));
+    return tree;
+  }
+
+  bool empty() const { return root == nullptr; }
+
+  ~Tree() { Node::destroy(root); }
+};
+
+// Insertion patterns
+static int seq(int i) { return i; }
+
+static int rev(int i) {
+  constexpr int N = TEST_SIZE;
+  return N - 1 - i;
+}
+
+// Coprime stride permutation: i -> (i * X) % N
+static int stride(int i, int prime = 7919) {
+  constexpr int N = TEST_SIZE;
+  return (i * prime) % N;
+}
+
+} // namespace
+
+TEST(LlvmLibcWeakAVLTest, SimpleInsertion) {
+  Tree tree;
+
+  OptionalNodePtr node10 = tree.insert(10);
+  ASSERT_TRUE(node10.has_value());
+  ASSERT_TRUE(tree.insert(5).has_value());
+  ASSERT_TRUE(tree.validate_pure_insertion());
+
+  OptionalNodePtr node15 = tree.insert(15);
+  ASSERT_TRUE(node15.has_value());
+  ASSERT_TRUE(tree.validate_pure_insertion());
+
+  OptionalNodePtr node10_again = tree.insert(10);
+  ASSERT_EQ(*node10, *node10_again);
+  ASSERT_TRUE(tree.validate_pure_insertion());
+}
+
+TEST(LlvmLibcWeakAVLTest, SequentialInsertion) {
+  constexpr int N = TEST_SIZE;
+
+  Tree tree = Tree::build(seq, N);
+  ASSERT_TRUE(tree.validate_pure_insertion());
+
+  for (int i = 0; i < N; ++i) {
+    OptionalNodePtr node = tree.insert(i);
+    ASSERT_TRUE(node.has_value());
+    ASSERT_EQ(node.value()->get_data(), i);
+  }
+
+  ASSERT_TRUE(tree.validate_pure_insertion());
+}
+
+TEST(LlvmLibcWeakAVLTest, ReversedInsertion) {
+  constexpr int N = TEST_SIZE;
+
+  Tree tree = Tree::build(rev, N);
+  ASSERT_TRUE(tree.validate_pure_insertion());
+
+  for (int i = 0; i < N; ++i) {
+    OptionalNodePtr node = tree.insert(i);
+    ASSERT_TRUE(node.has_value());
+    ASSERT_EQ(node.value()->get_data(), i);
+  }
+
+  ASSERT_TRUE(tree.validate_pure_insertion());
+}
+
+TEST(LlvmLibcWeakAVLTest, StridedInsertion) {
+  constexpr int N = TEST_SIZE;
+
+  Tree tree = Tree::build([](int i) { return stride(i); }, N);
+  ASSERT_TRUE(tree.validate_pure_insertion());
+
+  for (int i = 0; i < N; ++i) {
+    OptionalNodePtr node = tree.insert(i);
+    ASSERT_TRUE(node.has_value());
+    ASSERT_EQ(node.value()->get_data(), i);
+  }
+
+  ASSERT_TRUE(tree.validate_pure_insertion());
+}
+
+TEST(LlvmLibcWeakAVLTest, FindExistingAndMissing) {
+  constexpr int N = TEST_SIZE;
+
+  Tree tree = Tree::build(seq, N);
+  ASSERT_TRUE(tree.validate_pure_insertion());
+
+  for (int i = 0; i < N; ++i) {
+    OptionalNodePtr node = tree.find(i);
+    ASSERT_TRUE(node.has_value());
+    ASSERT_EQ(node.value()->get_data(), i);
+  }
+
+  ASSERT_FALSE(tree.find(-1).has_value());
+  ASSERT_FALSE(tree.find(N).has_value());
+  ASSERT_FALSE(tree.find(2 * N).has_value());
+}
+
+TEST(LlvmLibcWeakAVLTest, SequentialErase) {
+  constexpr int N = TEST_SIZE;
+
+  Tree tree = Tree::build(seq, N);
+
+  for (int i = 0; i < N; ++i) {
+    ASSERT_TRUE(tree.contains(i));
+    tree.erase(i);
+    ASSERT_FALSE(tree.contains(i));
+  }
+
+  ASSERT_TRUE(tree.empty());
+}
+
+TEST(LlvmLibcWeakAVLTest, ReverseErase) {
+  constexpr int N = TEST_SIZE;
+
+  Tree tree = Tree::build(seq, N);
+
+  for (int i = N - 1; i >= 0; --i) {
+    ASSERT_TRUE(tree.contains(i));
+    tree.erase(i);
+    ASSERT_FALSE(tree.contains(i));
+  }
+
+  ASSERT_TRUE(tree.empty());
+}
+
+TEST(LlvmLibcWeakAVLTest, StridedErase) {
+  constexpr int N = TEST_SIZE;
+
+  Tree tree = Tree::build(seq, N);
+
+  for (int i = 0; i < N; ++i) {
+    int key = stride(i, 5261);
+    ASSERT_TRUE(tree.contains(key));
+    tree.erase(key);
+    ASSERT_FALSE(tree.contains(key));
+  }
+
+  ASSERT_TRUE(tree.empty());
+}
+
+TEST(LlvmLibcWeakAVLTest, EraseStructuralCases) {
+  Tree tree;
+  int keys[] = {10, 5, 15, 3, 7, 12, 18};
+
+  // rank1:               10              10
+  //                      /              / \
+  // rank0:   10   -->   5        -->   5   15
+
+  // rank2:                   10               10
+  //                         / \              / \
+  // rank1:   10            5   \            5   \
+  //         / \   -->     /     \     -->  /\    \
+  // rank0: 5  15         3       15       3  7    15
+
+  // rank2:     10            10             10
+  //           / \           / \            / \
+  // rank1:   5   \   -->   5   15   -->   5   15
+  //         /\    \       /\   /         /\   / \
+  // rank0: 3  7    15    3  7 12       3  7 12  18
+
+  for (int k : keys)
+    tree.insert(k);
+
+  // Erase leaf.
+  // rank2:     10                   10
+  //           / \                  / \
+  // rank1:   5   15               5   15
+  //         /\   / \      -->     \   / \
+  // rank0: 3  7 12  18            7  12  18
+  tree.erase(3);
+  ASSERT_FALSE(tree.contains(3));
+
+  // Erase internal nodes.
+  // Erase leaf.
+  // rank2:     10                   10               10
+  //           / \                  / \              / \
+  // rank1:   5   15               7   15           /  15
+  //          \   / \      -->     \   / \    -->  /   /\
+  // rank0:    7 12  18            5  12  18      7  12  18
+  tree.erase(5);
+  ASSERT_FALSE(tree.contains(5));
+
+  // Erase root.
+  // rank2:     10              12               12
+  //           / \             / \              / \
+  // rank1:   /  15     -->   /  15     -->    /  15
+  //         /   /\          /   /\           /    \
+  // rank0: 7  12  18      7  10  18         7     18
+  tree.erase(10);
+  ASSERT_FALSE(tree.contains(10));
+
+  int attempts[] = {7, 12, 15, 18};
+  for (int k : attempts)
+    ASSERT_TRUE(tree.contains(k));
+}
+
+TEST(LlvmLibcTreeWalk, InOrderTraversal) {
+  Tree tree = Tree::build([](int x) { return stride(x, 1007); }, TEST_SIZE);
+  int data[TEST_SIZE];
+  int counter = 0;
+  Node::walk(tree.root, [&](Node *node, Node::WalkType type) {
+    if (type == Node::WalkType::InOrder || type == Node::WalkType::Leaf)
+      data[counter++] = node->get_data();
+  });
+  for (int i = 0; i < TEST_SIZE; ++i)
+    ASSERT_EQ(data[i], i);
+}
diff --git a/libc/test/src/stdio/CMakeLists.txt b/libc/test/src/stdio/CMakeLists.txt
index a39428f..fde2023 100644
--- a/libc/test/src/stdio/CMakeLists.txt
+++ b/libc/test/src/stdio/CMakeLists.txt
@@ -137,6 +137,15 @@ if(LIBC_CONF_PRINTF_DISABLE_STRERROR)
   list(APPEND sprintf_test_copts "-DLIBC_COPT_PRINTF_DISABLE_STRERROR")
 endif()
 
+if(LIBC_CONF_PRINTF_DISABLE_WIDE)
+  set(wchar_deps "")
+else()
+  set(wchar_deps
+    libc.hdr.types.wint_t
+    libc.hdr.wchar_macros
+  )
+endif()
+
 add_fp_unittest(
   sprintf_test
   UNIT_TEST_ONLY
@@ -148,6 +157,7 @@ add_fp_unittest(
     libc.src.stdio.sprintf
     libc.src.__support.FPUtil.fp_bits
     libc.include.inttypes
+    ${wchar_deps}
   COMPILE_OPTIONS
     ${sprintf_test_copts}
 )
diff --git a/libc/test/src/stdio/sprintf_test.cpp b/libc/test/src/stdio/sprintf_test.cpp
index 689a38a4..78186ab 100644
--- a/libc/test/src/stdio/sprintf_test.cpp
+++ b/libc/test/src/stdio/sprintf_test.cpp
@@ -9,8 +9,13 @@
 #include "src/__support/macros/config.h"
 #include "src/stdio/sprintf.h"
 
+#ifndef LIBC_COPT_PRINTF_DISABLE_WIDE
+#include "hdr/types/wint_t.h"
+#include "hdr/wchar_macros.h"
+#endif // LIBC_COPT_PRINTF_DISABLE_WIDE
+
 #include "src/__support/FPUtil/FPBits.h"
-#include "src/__support/libc_errno.h"
+#include "test/UnitTest/ErrnoCheckingTest.h"
 #include "test/UnitTest/RoundingModeUtils.h"
 #include "test/UnitTest/Test.h"
 #include <inttypes.h>
@@ -3487,3 +3492,94 @@ TEST(LlvmLibcSPrintfTest, IndexModeParsing) {
                    "why would u do this, this is such   a pain. %");
 }
 #endif // LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
+
+#ifndef LIBC_COPT_PRINTF_DISABLE_WIDE
+TEST(LlvmLibcSprintfTest, WideCharConversion) {
+  char buff[16];
+  int written;
+
+  // 1 byte UTF-8 character.
+  written = LIBC_NAMESPACE::sprintf(buff, "%lc", L'A');
+  EXPECT_EQ(written, 1);
+  ASSERT_STREQ_LEN(written, buff, "A");
+
+  // 1 byte UTF-8 character left justified.
+  written = LIBC_NAMESPACE::sprintf(buff, "%-4lc", L'A');
+  EXPECT_EQ(written, 4);
+  ASSERT_STREQ_LEN(written, buff, "A   ");
+
+  // 1 byte UTF-8 character right justified.
+  written = LIBC_NAMESPACE::sprintf(buff, "%4lc", L'A');
+  EXPECT_EQ(written, 4);
+  ASSERT_STREQ_LEN(written, buff, "   A");
+
+  // 2 byte UTF-8 character.
+  written = LIBC_NAMESPACE::sprintf(buff, "%lc", L'¢');
+  EXPECT_EQ(written, 2);
+  ASSERT_STREQ_LEN(written, buff, "¢");
+
+  // 2 byte UTF-8 character left justified.
+  written = LIBC_NAMESPACE::sprintf(buff, "%-4lc", L'¢');
+  EXPECT_EQ(written, 4);
+  ASSERT_STREQ_LEN(written, buff, "¢  ");
+
+  // 2 byte UTF-8 character right justified.
+  written = LIBC_NAMESPACE::sprintf(buff, "%4lc", L'¢');
+  EXPECT_EQ(written, 4);
+  ASSERT_STREQ_LEN(written, buff, "  ¢");
+
+  // Euro sign is a 3-byte UTF-8 character.
+  written = LIBC_NAMESPACE::sprintf(buff, "%lc", L'€');
+  EXPECT_EQ(written, 3);
+  ASSERT_STREQ_LEN(written, buff, "€");
+
+  // Euro sign left justified.
+  written = LIBC_NAMESPACE::sprintf(buff, "%-4lc", L'€');
+  EXPECT_EQ(written, 4);
+  ASSERT_STREQ_LEN(written, buff, "€ ");
+
+  // Euro sign right justified.
+  written = LIBC_NAMESPACE::sprintf(buff, "%4lc", L'€');
+  EXPECT_EQ(written, 4);
+  ASSERT_STREQ_LEN(written, buff, " €");
+
+  // Euro sign right justified.
+  written = LIBC_NAMESPACE::sprintf(buff, "%+4lc", L'€');
+  EXPECT_EQ(written, 4);
+  ASSERT_STREQ_LEN(written, buff, " €");
+
+  // Grinning face emoji is a 4-byte UTF-8 character.
+  written = LIBC_NAMESPACE::sprintf(buff, "%lc", L'😀');
+  EXPECT_EQ(written, 4);
+  ASSERT_STREQ_LEN(written, buff, "😀");
+
+  // Grinning face emoji left justified.
+  written = LIBC_NAMESPACE::sprintf(buff, "%-4lc", L'😀');
+  EXPECT_EQ(written, 4);
+  ASSERT_STREQ_LEN(written, buff, "😀");
+
+  // Grinning face emoji right justified.
+  written = LIBC_NAMESPACE::sprintf(buff, "%4lc", L'😀');
+  EXPECT_EQ(written, 4);
+  ASSERT_STREQ_LEN(written, buff, "😀");
+
+  // Grinning face emoji with smaller width, left justified.
+  written = LIBC_NAMESPACE::sprintf(buff, "%-3lc", L'😀');
+  EXPECT_EQ(written, 4);
+  ASSERT_STREQ_LEN(written, buff, "😀");
+
+  // Grinning face emoji with smaller width, right justified.
+  written = LIBC_NAMESPACE::sprintf(buff, "%3lc", L'😀');
+  EXPECT_EQ(written, 4);
+  ASSERT_STREQ_LEN(written, buff, "😀");
+
+  // WEOF test.
+  EXPECT_EQ(LIBC_NAMESPACE::sprintf(buff, "%lc", WEOF), -1);
+  ASSERT_ERRNO_EQ(EILSEQ);
+
+  // Invalid wide character test
+  EXPECT_EQ(LIBC_NAMESPACE::sprintf(buff, "%lc", static_cast<wint_t>(0x12ffff)),
+            -1);
+  ASSERT_ERRNO_EQ(EILSEQ);
+}
+#endif // LIBC_COPT_PRINTF_DISABLE_WIDE
diff --git a/libc/test/src/strings/CMakeLists.txt b/libc/test/src/strings/CMakeLists.txt
index 5f70dc0..6e1befc 100644
--- a/libc/test/src/strings/CMakeLists.txt
+++ b/libc/test/src/strings/CMakeLists.txt
@@ -110,5 +110,20 @@ add_libc_test(
     libc.src.strings.strncasecmp_l
 )
 
+add_libc_test(
+  wide_read_memory_test
+  SUITE
+    libc-strings-tests
+  SRCS
+    wide_read_memory_test.cpp
+  DEPENDS
+    libc.src.string.string_utils
+    libc.src.sys.mman.mmap
+    libc.src.sys.mman.mprotect
+    libc.src.sys.mman.munmap
+    libc.src.unistd.linux.getpagesize
+    libc.src.__support.CPP.array
+)
+
 add_libc_multi_impl_test(bcmp libc-strings-tests SRCS bcmp_test.cpp)
 add_libc_multi_impl_test(bzero libc-strings-tests SRCS bzero_test.cpp)
diff --git a/libc/test/src/strings/wide_read_memory_test.cpp b/libc/test/src/strings/wide_read_memory_test.cpp
new file mode 100644
index 0000000..cc4a2dc
--- /dev/null
+++ b/libc/test/src/strings/wide_read_memory_test.cpp
@@ -0,0 +1,101 @@
+//===-- Memory bounds check test for wide-read functions ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// For performance, some vector-based libc functions read data outside of, but
+// adjacent to, the input address. For example, string_length can read both
+// before and after the data in its src parameter. As part of the
+// implementation, it is allowed to do this. However, the code must take care to
+// avoid address errors. The sanitizers can't distinguish between "the
+// implementation" and user-code, and so report an error. Therefore we can't use
+// them to check if functions like these have memory errors.
+//
+// This test uses mprotect to simulate address sanitization. Tests that read too
+// far outside data will segfault.
+//
+// It creates three adjacent pages in memory. The outer two are mprotected
+// unreadable, the middle usable normally. By placing test data at the edges
+// between the middle page and the others, we can test for bad accesses.
+
+#include "src/__support/CPP/array.h"
+#include "src/string/memory_utils/inline_memset.h"
+#include "src/string/string_utils.h"
+#include "src/sys/mman/mmap.h"
+#include "src/sys/mman/mprotect.h"
+#include "src/sys/mman/munmap.h"
+#include "src/unistd/getpagesize.h"
+#include "test/UnitTest/MemoryMatcher.h"
+#include "test/UnitTest/Test.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+using TwoKilobyteBuffer = cpp::array<char, 2048>;
+// This could be smaller on a target-basis, but that adds complexity and the
+// extra testing is fine.
+static constexpr unsigned long kLargestTestVectorSize = 512;
+
+class LlvmLibcWideAccessMemoryTest : public testing::Test {
+  char *page0_;
+  char *page1_;
+  char *page2_;
+  size_t page_size;
+
+public:
+  void SetUp() override {
+    page_size = LIBC_NAMESPACE::getpagesize();
+    page0_ = static_cast<char *>(
+        LIBC_NAMESPACE::mmap(nullptr, page_size * 3, PROT_READ | PROT_WRITE,
+                             MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
+    ASSERT_NE(static_cast<void *>(page0_), MAP_FAILED);
+    page1_ = page0_ + page_size;
+    page2_ = page1_ + page_size;
+    LIBC_NAMESPACE::mprotect(page0_, page_size, PROT_NONE);
+    LIBC_NAMESPACE::mprotect(page2_, page_size, PROT_NONE);
+  }
+
+  void TearDown() override { LIBC_NAMESPACE::munmap(page0_, page_size * 3); }
+
+  // Repeatedly runs "func" on copies of the data in "buf", each progressively
+  // closer to the boundary of valid memory. Test will segfault if function
+  // under test accesses invalid memory.
+  //
+  // Func should test the function in question just as normal. Recommend making
+  // the amount of test data at least 1.5k, which guarantees a wind-up, multiple
+  // iterations of the inner loop, and a wind-down, even on systems with
+  // 512-byte vectors. The termination condition, eg, end-of string or character
+  // being searched for, should be near the end of the data.
+  template <typename TestFunc>
+  void TestMemoryAccess(const TwoKilobyteBuffer &buf, TestFunc func) {
+    // Run func on data near the start boundary of valid memory.
+    for (unsigned long offset = 0; offset < kLargestTestVectorSize; ++offset) {
+      char *test_addr = page1_ + offset;
+      inline_memcpy(test_addr, buf.data(), buf.size());
+      func(test_addr);
+    }
+    // Run func on data near the end boundary of valid memory.
+    for (unsigned long offset = 0; offset < kLargestTestVectorSize; ++offset) {
+      char *test_addr = page2_ - buf.size() - offset - 1;
+      ASSERT_LE(test_addr + buf.size(), page2_);
+      inline_memcpy(test_addr, buf.data(), buf.size());
+      func(test_addr);
+    }
+  }
+};
+
+TEST_F(LlvmLibcWideAccessMemoryTest, StringLength) {
+  // 1.5 k long vector of a's.
+  TwoKilobyteBuffer buf;
+  inline_memset(buf.data(), 'a', buf.size());
+  // Make sure it is null terminated.
+  buf[buf.size() - 1] = '\0';
+  this->TestMemoryAccess(buf, [this, buf](const char *test_data) {
+    // -1 for the null character.
+    ASSERT_EQ(internal::string_length(test_data), size_t(buf.size() - 1));
+  });
+}
+
+} // namespace LIBC_NAMESPACE_DECL