Add libcody

In order to separate compiler from build system, C++ Modules, as implemented in GCC introduces a communication channel between those two entities. This is implemented by libcody. It is anticipated that other implementations will also implement this protocol, or use libcody to provide it. * Makefile.def: Add libcody. * configure.ac: Add libcody. * Makefile.in: Regenerated. * configure: Regenerated. gcc/ * Makefile.in (CODYINC, CODYLIB, CODYLIB_H): New. Use them. libcody/ * configure.ac: New. * CMakeLists.txt: New. * CODING.md: New. * CONTRIB.md: New. * LICENSE: New. * LICENSE.gcc: New. * Makefile.in: New. * Makesub.in: New. * README.md: New. * buffer.cc: New. * build-aux/config.guess: New. * build-aux/config.sub: New. * build-aux/install-sh: New. * client.cc: New. * cmake/libcody-config-ix.cmake * cody.hh: New. * config.h.in: New. * config.m4: New. * configure: New. * configure.ac: New. * dox.cfg.in: New. * fatal.cc: New. * gdbinit.in: New. * internal.hh: New. * netclient.cc: New. * netserver.cc: New. * packet.cc: New. * resolver.cc: New. * server.cc: New. * tests/01-serialize/connect.cc: New. * tests/01-serialize/decoder.cc: New. * tests/01-serialize/encoder.cc: New. * tests/02-comms/client-1.cc: New. * tests/02-comms/pivot-1.cc: New. * tests/02-comms/server-1.cc: New. * tests/Makesub.in: New. * tests/jouster: New.
author: Nathan Sidwell <nathan@acm.org> 2020-12-14 08:10:27 -0800
committer: Nathan Sidwell <nathan@acm.org> 2020-12-15 07:09:59 -0800
commit: 362303298ac4c1f93bda87535df2b726481d54bb (patch)
tree: b728e42aa7e93c1fd673e75ee0071b86b8ae9c6c /libcody/buffer.cc
parent: c5271279d6e86df0d0203c11fc4c3e3c99a14bb7 (diff)
download: gcc-362303298ac4c1f93bda87535df2b726481d54bb.zip
gcc-362303298ac4c1f93bda87535df2b726481d54bb.tar.gz
gcc-362303298ac4c1f93bda87535df2b726481d54bb.tar.bz2
1 files changed, 387 insertions, 0 deletions
diff --git a/libcody/buffer.cc b/libcody/buffer.cc
new file mode 100644
index 0000000..52df317
--- /dev/null
+++ b/libcody/buffer.cc
@@ -0,0 +1,387 @@
+// CODYlib		-*- mode:c++ -*-
+// Copyright (C) 2020 Nathan Sidwell, nathan@acm.org
+// License: Apache v2.0
+
+// Cody
+#include "internal.hh"
+// C++
+#include <algorithm>
+// C
+#include <cstring>
+// OS
+#include <unistd.h>
+#include <cerrno>
+
+// MessageBuffer code
+
+// Lines consist of words and end with a NEWLINE (0xa) char
+// Whitespace characters are TAB (0x9) and SPACE (0x20)
+// Words consist of non-whitespace chars separated by whitespace.
+// Multiple lines in one transaction are indicated by ending non-final
+// lines with a SEMICOLON (0x3b) word, immediately before the NEWLINE
+// Continuations with ; preceding it
+// Words matching regexp [-+_/%.a-zA-Z0-9]+ need no quoting.
+// Quoting with '...'
+// Anything outside of [-+_/%.a-zA-Z0-9] needs quoting
+// Anything outside of <= <space> or DEL or \' or \\ needs escaping.
+// Escapes are \\, \', \n, \t, \_, everything else as \<hex><hex>?
+// Spaces separate words, UTF8 encoding for non-ascii chars
+
+namespace Cody {
+namespace Detail {
+
+static const char CONTINUE = S2C(u8";");
+
+void MessageBuffer::BeginLine ()
+{
+  if (!buffer.empty ())
+    {
+      // Terminate the previous line with a continuation
+      buffer.reserve (buffer.size () + 3);
+      buffer.push_back (S2C(u8" "));
+      buffer.push_back (CONTINUE);
+      buffer.push_back (S2C(u8"\n"));
+    }
+  lastBol = buffer.size ();
+}
+
+// QUOTE means 'maybe quote', we search it for quote-needing chars
+
+void MessageBuffer::Append (char const *str, bool quote, size_t len)
+{
+  if (len == ~size_t (0))
+    len = strlen (str);
+
+  if (!len && !quote)
+    return;
+
+  // We want to quote characters outside of [-+_A-Za-z0-9/%.], anything
+  // that could remotely be shell-active.  UTF8 encoding for non-ascii.
+  if (quote && len)
+    {
+      quote = false;
+      // Scan looking for quote-needing characters.  We could just
+      // append until we find one, but that's probably confusing
+      for (size_t ix = len; ix--;)
+	{
+	  unsigned char c = (unsigned char)str[ix];
+	  if (!((c >= S2C(u8"a") && c <= S2C(u8"z"))
+		|| (c >= S2C(u8"A") && c <= S2C(u8"Z"))
+		|| (c >= S2C(u8"0") && c <= S2C(u8"9"))
+		|| c == S2C(u8"-") || c == S2C(u8"+") || c == S2C(u8"_")
+		|| c == S2C(u8"/") || c == S2C(u8"%") || c == S2C(u8".")))
+	    {
+	      quote = true;
+	      break;
+	    }
+	}
+    }
+
+  // Maximal length of appended string
+  buffer.reserve (buffer.size () + len * (quote ? 3 : 1) + 2);
+
+  if (quote)
+    buffer.push_back (S2C(u8"'"));
+
+  for (auto *end = str + len; str != end;)
+    {
+      auto *e = end;
+
+      if (quote)
+	// Look for next escape-needing char.  More relaxed than
+	// the earlier needs-quoting check.
+	for (e = str; e != end; ++e)
+	  {
+	    unsigned char c = (unsigned char)*e;
+	    if (c < S2C(u8" ") || c == 0x7f
+		|| c == S2C(u8"\\") || c == S2C(u8"'"))
+	      break;
+	  }
+      buffer.insert (buffer.end (), str, e);
+      str = e;
+
+      if (str == end)
+	break;
+
+      buffer.push_back (S2C(u8"\\"));
+      switch (unsigned char c = (unsigned char)*str++)
+	{
+	case S2C(u8"\t"):
+	  c = S2C(u8"t");
+	  goto append;
+
+	case S2C(u8"\n"):
+	  c = S2C(u8"n");
+	  goto append;
+
+	case S2C(u8"'"):
+	case S2C(u8"\\"):
+	append:
+	  buffer.push_back (c);
+	  break;
+
+	default:
+	  // Full-on escape.  Use 2 lower-case hex chars
+	  for (unsigned shift = 8; shift;)
+	    {
+	      shift -= 4;
+
+	      char nibble = (c >> shift) & 0xf;
+	      nibble += S2C(u8"0");
+	      if (nibble > S2C(u8"9"))
+		nibble += S2C(u8"a") - (S2C(u8"9") + 1);
+	      buffer.push_back (nibble);
+	    }
+	}
+    }
+
+  if (quote)
+    buffer.push_back (S2C(u8"'"));
+}
+
+void MessageBuffer::Append (char c)
+{
+  buffer.push_back (c);
+}
+
+void MessageBuffer::AppendInteger (unsigned u)
+{
+  std::string v (std::to_string (u));
+  AppendWord (v);
+}
+
+int MessageBuffer::Write (int fd) noexcept
+{
+  size_t limit = buffer.size () - lastBol;
+  ssize_t count = write (fd, &buffer.data ()[lastBol], limit);
+
+  int err = 0;
+  if (count < 0)
+    err = errno;
+  else
+    {
+      lastBol += count;
+      if (size_t (count) != limit)
+	err = EAGAIN;
+    }
+
+  if (err != EAGAIN && err != EINTR)
+    {
+      // Reset for next message
+      buffer.clear ();
+      lastBol = 0;
+    }
+
+  return err;
+}
+
+int MessageBuffer::Read (int fd) noexcept
+{
+  constexpr size_t blockSize = 200;
+
+  size_t lwm = buffer.size ();
+  size_t hwm = buffer.capacity ();
+  if (hwm - lwm < blockSize / 2)
+    hwm += blockSize;
+  buffer.resize (hwm);
+
+  auto iter = buffer.begin () + lwm;
+  ssize_t count = read (fd, &*iter, hwm - lwm);
+  buffer.resize (lwm + (count >= 0 ? count : 0));
+
+  if (count < 0)
+    return errno;
+
+  if (!count)
+    // End of file
+    return -1;
+
+  bool more = true;
+  for (;;)
+    {
+      auto newline = std::find (iter, buffer.end (), S2C(u8"\n"));
+      if (newline == buffer.end ())
+	break;
+      more = newline != buffer.begin () && newline[-1] == CONTINUE;
+      iter = newline + 1;
+	
+      if (iter == buffer.end ())
+	break;
+
+      if (!more)
+	{
+	  // There is no continuation, but there are chars after the
+	  // newline.  Truncate the buffer and return an error
+	  buffer.resize (iter - buffer.begin ());
+	  return EINVAL;
+	}
+    }
+
+  return more ? EAGAIN : 0;
+}
+
+int MessageBuffer::Lex (std::vector<std::string> &result)
+{
+  result.clear ();
+
+  int err = ENOENT;
+  if (IsAtEnd ())
+    return ENOENT;
+
+  Assert (buffer.back () == S2C(u8"\n"));
+
+  auto iter = buffer.begin () + lastBol;
+
+  for (std::string *word = nullptr;;)
+    {
+      char c = *iter;
+
+      ++iter;
+      if (c == S2C(u8" ") || c == S2C(u8"\t"))
+	{
+	  word = nullptr;
+	  continue;
+	}
+
+      if (c == S2C(u8"\n"))
+	break;
+
+      if (c == CONTINUE)
+	{
+	  // Line continuation
+	  if (word || *iter != S2C(u8"\n"))
+	    goto malformed;
+	  ++iter;
+	  break;
+	}
+
+      if (c <= S2C(u8" ") || c >= 0x7f)
+	goto malformed;
+
+      if (!word)
+	{
+	  result.emplace_back ();
+	  word = &result.back ();
+	}
+
+      if (c == S2C(u8"'"))
+	{
+	  // Quoted word
+	  for (;;)
+	    {
+	      c = *iter;
+
+	      if (c == S2C(u8"\n"))
+		{
+		malformed:;
+		  result.clear ();
+		  iter = std::find (iter, buffer.end (), S2C(u8"\n"));
+		  auto back = iter;
+		  if (back[-1] == CONTINUE  && back[-2] == S2C(u8" "))
+		    // Smells like a line continuation
+		    back -= 2;
+		  result.emplace_back (&buffer[lastBol],
+				       back - buffer.begin () - lastBol);
+		  ++iter;
+		  lastBol = iter - buffer.begin ();
+		  return EINVAL;
+		}
+
+	      if (c < S2C(u8" ") || c >= 0x7f)
+		goto malformed;
+
+	      ++iter;
+	      if (c == S2C(u8"'"))
+		break;
+
+	      if (c == S2C(u8"\\"))
+		// escape
+		switch (c = *iter)
+		  {
+		    case S2C(u8"\\"):
+		    case S2C(u8"'"):
+		      ++iter;
+		      break;
+
+		    case S2C(u8"n"):
+		      c = S2C(u8"\n");
+		      ++iter;
+		      break;
+
+		    case S2C(u8"_"):
+		      // We used to escape SPACE as \_, so accept that
+		      c = S2C(u8" ");
+		      ++iter;
+		      break;
+
+		    case S2C(u8"t"):
+		      c = S2C(u8"\t");
+		      ++iter;
+		      break;
+
+		    default:
+		      {
+			unsigned v = 0;
+			for (unsigned nibble = 0; nibble != 2; nibble++)
+			  {
+			    c = *iter;
+			    if (c < S2C(u8"0"))
+			      {
+				if (!nibble)
+				  goto malformed;
+				break;
+			      }
+			    else if (c <= S2C(u8"9"))
+			      c -= S2C(u8"0");
+			    else if (c < S2C(u8"a"))
+			      {
+				if (!nibble)
+				  goto malformed;
+				break;
+			      }
+			    else if (c <= S2C(u8"f"))
+			      c -= S2C(u8"a") - 10;
+			    else
+			      {
+				if (!nibble)
+				  goto malformed;
+				break;
+			      }
+			    ++iter;
+			    v = (v << 4) | c;
+			  }
+			c = v;
+		      }
+		  }
+	      word->push_back (c);
+	    }
+	}
+      else
+	// Unquoted character
+	word->push_back (c);
+    }
+  lastBol = iter - buffer.begin ();
+  if (result.empty ())
+    return ENOENT;
+
+  return 0;
+}
+
+void MessageBuffer::LexedLine (std::string &str)
+{
+  if (lastBol)
+    {
+      size_t pos = lastBol - 1;
+      for (; pos; pos--)
+	if (buffer[pos-1] == S2C(u8"\n"))
+	  break;
+
+      size_t end = lastBol - 1;
+      if (buffer[end-1] == CONTINUE && buffer[end-2] == S2C(u8" "))
+	// Strip line continuation
+	end -= 2;
+      str.append (&buffer[pos], end - pos);
+    }
+}
+} // Detail
+} // Cody
author	Nathan Sidwell <nathan@acm.org>	2020-12-14 08:10:27 -0800
committer	Nathan Sidwell <nathan@acm.org>	2020-12-15 07:09:59 -0800
commit	362303298ac4c1f93bda87535df2b726481d54bb (patch)
tree	b728e42aa7e93c1fd673e75ee0071b86b8ae9c6c /libcody/buffer.cc
parent	c5271279d6e86df0d0203c11fc4c3e3c99a14bb7 (diff)
download	gcc-362303298ac4c1f93bda87535df2b726481d54bb.zip gcc-362303298ac4c1f93bda87535df2b726481d54bb.tar.gz gcc-362303298ac4c1f93bda87535df2b726481d54bb.tar.bz2