/* Shared functions related to mangling names for the GNU compiler
   for the Java(TM) language.
   Copyright (C) 2001-2015 Free Software Foundation, Inc.

This file is part of GCC.

GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.

GCC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3.  If not see
<http://www.gnu.org/licenses/>. 

Java and all Java-based marks are trademarks or registered trademarks
of Sun Microsystems, Inc. in the United States and other countries.
The Free Software Foundation is independent of Sun Microsystems, Inc.  */

/* Written by Alexandre Petit-Bianco <apbianco@cygnus.com> */

#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "jcf.h"
#include "hash-set.h"
#include "machmode.h"
#include "vec.h"
#include "double-int.h"
#include "input.h"
#include "alias.h"
#include "symtab.h"
#include "options.h"
#include "wide-int.h"
#include "inchash.h"
#include "tree.h"
#include "java-tree.h"
#include "obstack.h"
#include "diagnostic-core.h"

static void append_unicode_mangled_name (const char *, int);
#ifndef HAVE_AS_UTF8
static int  unicode_mangling_length (const char *, int);
#endif

extern struct obstack *mangle_obstack;

static int
utf8_cmp (const unsigned char *str, int length, const char *name)
{
  const unsigned char *limit = str + length;
  int i;

  for (i = 0; name[i]; ++i)
    {
      int ch = UTF8_GET (str, limit);
      if (ch != name[i])
	return ch - name[i];
    }

  return str == limit ? 0 : 1;
}

/* A sorted list of all C++ keywords.  If you change this, be sure
   also to change the list in
   libjava/classpath/tools/gnu/classpath/tools/javah/Keywords.java.  */
static const char *const cxx_keywords[] =
{
  "_Complex",
  "__alignof",
  "__alignof__",
  "__asm",
  "__asm__",
  "__attribute",
  "__attribute__",
  "__builtin_va_arg",
  "__complex",
  "__complex__",
  "__const",
  "__const__",
  "__extension__",
  "__imag",
  "__imag__",
  "__inline",
  "__inline__",
  "__label__",
  "__null",
  "__real",
  "__real__",
  "__restrict",
  "__restrict__",
  "__signed",
  "__signed__",
  "__typeof",
  "__typeof__",
  "__volatile",
  "__volatile__",
  "and",
  "and_eq",
  "asm",
  "auto",
  "bitand",
  "bitor",
  "bool",
  "break",
  "case",
  "catch",
  "char",
  "class",
  "compl",
  "const",
  "const_cast",
  "continue",
  "default",
  "delete",
  "do",
  "double",
  "dynamic_cast",
  "else",
  "enum",
  "explicit",
  "export",
  "extern",
  "false",
  "float",
  "for",
  "friend",
  "goto",
  "if",
  "inline",
  "int",
  "long",
  "mutable",
  "namespace",
  "new",
  "not",
  "not_eq",
  "operator",
  "or",
  "or_eq",
  "private",
  "protected",
  "public",
  "register",
  "reinterpret_cast",
  "return",
  "short",
  "signed",
  "sizeof",
  "static",
  "static_cast",
  "struct",
  "switch",
  "template",
  "this",      
  "throw",
  "true",
  "try",
  "typedef",
  "typeid",
  "typename",
  "typeof",
  "union",
  "unsigned",
  "using",
  "virtual",
  "void",
  "volatile",
  "wchar_t",
  "while",
  "xor",
  "xor_eq"
};

/* Return true if NAME is a C++ keyword.  */
int
cxx_keyword_p (const char *name, int length)
{
  int last = ARRAY_SIZE (cxx_keywords);
  int first = 0;
  int mid = (last + first) / 2;
  int old = -1;

  for (mid = (last + first) / 2;
       mid != old;
       old = mid, mid = (last + first) / 2)
    {
      int kwl = strlen (cxx_keywords[mid]);
      int min_length = kwl > length ? length : kwl;
      int r = utf8_cmp ((const unsigned char *) name, min_length, cxx_keywords[mid]);

      if (r == 0)
	{
	  int i;
	  /* We've found a match if all the remaining characters are `$'.  */
	  for (i = min_length; i < length && name[i] == '$'; ++i)
	    ;
	  if (i == length)
	    return 1;
	  r = 1;
	}

      if (r < 0)
	last = mid;
      else
	first = mid;
    }
  return 0;
}

/* If NAME happens to be a C++ keyword, add `$'.  */
#define MANGLE_CXX_KEYWORDS(NAME, LEN)			\
do							\
  {							\
    if (cxx_keyword_p ((NAME), (LEN)))			\
      {							\
	char *tmp_buf = (char *)alloca ((LEN)+1);	\
	memcpy (tmp_buf, (NAME), (LEN));		\
	tmp_buf[LEN]= '$';				\
	(NAME) = tmp_buf;				\
	(LEN)++;					\
      }							\
  }							\
while (0)


/* If the assembler doesn't support UTF8 in symbol names, some
   characters might need to be escaped.  */

#ifndef HAVE_AS_UTF8

/* Assuming (NAME, LEN) is a Utf8-encoding string, emit the string
   appropriately mangled (with Unicode escapes if needed) to
   MANGLE_OBSTACK.  Note that `java', `lang' and `Object' are used so
   frequently that they could be cached.  */

void
append_gpp_mangled_name (const char *name, int len)
{
  int encoded_len, needs_escapes;
  char buf[6];

  MANGLE_CXX_KEYWORDS (name, len);

  encoded_len = unicode_mangling_length (name, len);
  needs_escapes = encoded_len > 0;

  sprintf (buf, "%d", (needs_escapes ? encoded_len : len));
  obstack_grow (mangle_obstack, buf, strlen (buf));

  if (needs_escapes)
    append_unicode_mangled_name (name, len);
  else
    obstack_grow (mangle_obstack, name, len);
}

/* Assuming (NAME, LEN) is a Utf8-encoded string, emit the string
   appropriately mangled (with Unicode escapes) to MANGLE_OBSTACK.
   Characters needing an escape are encoded `__UNN_' to `__UNNNN_', in
   which case `__U' will be mangled `__U_'.  */

static void
append_unicode_mangled_name (const char *name, int len)
{
  const unsigned char *ptr;
  const unsigned char *limit = (const unsigned char *)name + len;
  int uuU = 0;
  for (ptr = (const unsigned char *) name;  ptr < limit;  )
    {
      int ch = UTF8_GET(ptr, limit);

      if ((ISALNUM (ch) && ch != 'U') || ch == '$')
        {
	  obstack_1grow (mangle_obstack, ch);
          uuU = 0;
        }
      /* Everything else needs encoding */
      else
	{
	  char buf [9];
	  if (ch == '_' || ch == 'U')
	    {
	      /* Prepare to recognize __U */
	      if (ch == '_' && (uuU < 3))
		{
		  uuU++;
		  obstack_1grow (mangle_obstack, ch);
		}
	      /* We recognize __U that we wish to encode
                 __U_. Finish the encoding. */
	      else if (ch == 'U' && (uuU == 2))
		{
		  uuU = 0;
		  obstack_grow (mangle_obstack, "U_", 2);
		}
	      /* Otherwise, just reset uuU and emit the character we
                 have. */
	      else
		{
		  uuU = 0;
		  obstack_1grow (mangle_obstack, ch);
		}
	      continue;
	    }
	  sprintf (buf, "__U%x_", ch);
	  obstack_grow (mangle_obstack, buf, strlen (buf));
	  uuU = 0;
	}
    }
}

/* Assuming (NAME, LEN) is a Utf8-encoding string, calculate the
   length of the string as mangled (a la g++) including Unicode
   escapes.  If no escapes are needed, return 0.  */

static int
unicode_mangling_length (const char *name, int len)
{
  const unsigned char *ptr;
  const unsigned char *limit = (const unsigned char *)name + len;
  int need_escapes = 0;		/* Whether we need an escape or not */
  int num_chars = 0;		/* Number of characters in the mangled name */
  int uuU = 0;			/* Help us to find __U. 0: '_', 1: '__' */
  for (ptr = (const unsigned char *) name;  ptr < limit;  )
    {
      int ch = UTF8_GET(ptr, limit);

      if (ch < 0)
	error ("internal error - invalid Utf8 name");
      if ((ISALNUM (ch) && ch != 'U') || ch == '$')
	{
	  num_chars++;
	  uuU = 0;
	}
      /* Everything else needs encoding */
      else
	{
	  int encoding_length = 2;

	  if (ch == '_' || ch == 'U')
	    {
	      /* It's always at least one character. */
	      num_chars++;

	      /* Prepare to recognize __U */
	      if (ch == '_' && (uuU < 3))
		uuU++;

	      /* We recognize __U that we wish to encode __U_, we
	         count one more character. */
	      else if (ch == 'U' && (uuU == 2))
		{
		  num_chars++;
		  need_escapes = 1;
		  uuU = 0;
		}
	      /* Otherwise, just reset uuU */
	      else
		uuU = 0;

	      continue;
	    }
	  
	  if (ch > 0xff)
	    encoding_length++;
	  if (ch > 0xfff)
	    encoding_length++;
	  
	  num_chars += (4 + encoding_length);
	  need_escapes = 1;
	  uuU = 0;
	}
    }
  if (need_escapes)
    return num_chars;
  else
    return 0;
}

#else

/* The assembler supports UTF8, we don't use escapes. Mangling is
   simply <N>NAME. <N> is the number of UTF8 encoded characters that
   are found in NAME. Note that `java', `lang' and `Object' are used
   so frequently that they could be cached.  */

void
append_gpp_mangled_name (const char *name, int len)
{
  const unsigned char *ptr;
  const unsigned char *limit;
  int encoded_len;
  char buf [6];
  
  MANGLE_CXX_KEYWORDS (name, len);

  limit = (const unsigned char *)name + len;

  /* Compute the length of the string we wish to mangle. */
  for (encoded_len =  0, ptr = (const unsigned char *) name;
       ptr < limit; encoded_len++)
    {
      int ch = UTF8_GET(ptr, limit);

      if (ch < 0)
	error ("internal error - invalid Utf8 name");
    }

  sprintf (buf, "%d", encoded_len);
  obstack_grow (mangle_obstack, buf, strlen (buf));
  obstack_grow (mangle_obstack, name, len);
}

#endif /* HAVE_AS_UTF8 */