Merge commit '490e23032baaece71f2ec09fa1805064b150fbc2' [#247]

author: Thomas Schwinge <thomas@codesourcery.com> 2022-01-24 10:03:47 +0100
committer: Thomas Schwinge <thomas_schwinge@mentor.com> 2022-01-24 10:06:43 +0100
commit: 21af490baa734a901fb798bc2ac4df62109bc895 (patch)
tree: a292dc4ac7de999d47f20ab9a2dff597afadea2a /libcpp/include/cpplib.h
parent: 2cce6b8919ce16acd37a7a203049a52925a7e295 (diff)
parent: 490e23032baaece71f2ec09fa1805064b150fbc2 (diff)
download: gcc-21af490baa734a901fb798bc2ac4df62109bc895.zip
gcc-21af490baa734a901fb798bc2ac4df62109bc895.tar.gz
gcc-21af490baa734a901fb798bc2ac4df62109bc895.tar.bz2
1 files changed, 97 insertions, 16 deletions
diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
index 6e2fcb6..ebf6fcc 100644
--- a/libcpp/include/cpplib.h
+++ b/libcpp/include/cpplib.h
@@ -1,5 +1,5 @@
 /* Definitions for CPP library.
-   Copyright (C) 1995-2021 Free Software Foundation, Inc.
+   Copyright (C) 1995-2022 Free Software Foundation, Inc.
    Written by Per Bothner, 1994-95.
 
 This program is free software; you can redistribute it and/or modify it
@@ -198,6 +198,7 @@ struct GTY(()) cpp_string {
 				    operator, or before this token
 				    after a # operator.  */
 #define NO_EXPAND	(1 << 10) /* Do not macro-expand this token.  */
+#define PRAGMA_OP	(1 << 11) /* _Pragma token.  */
 
 /* Specify which field, if any, of the cpp_token union is used.  */
 
@@ -318,6 +319,17 @@ enum cpp_main_search
   CMS_system,  /* Search the system INCLUDE path.  */
 };
 
+/* The possible bidirectional control characters checking levels, from least
+   restrictive to most.  */
+enum cpp_bidirectional_level {
+  /* No checking.  */
+  bidirectional_none,
+  /* Only detect unpaired uses of bidirectional control characters.  */
+  bidirectional_unpaired,
+  /* Detect any use of bidirectional control characters.  */
+  bidirectional_any
+};
+
 /* This structure is nested inside struct cpp_reader, and
    carries all the options visible to the command line.  */
 struct cpp_options
@@ -479,13 +491,9 @@ struct cpp_options
   unsigned char ext_numeric_literals;
 
   /* Nonzero means extended identifiers allow the characters specified
-     in C11 and C++11.  */
+     in C11.  */
   unsigned char c11_identifiers;
 
-  /* Nonzero means extended identifiers allow the characters specified
-     in C++23.  */
-  unsigned char cxx23_identifiers;
-
   /* Nonzero for C++ 2014 Standard binary constants.  */
   unsigned char binary_constants;
 
@@ -538,6 +546,10 @@ struct cpp_options
   /* True if warn about differences between C++98 and C++11.  */
   bool cpp_warn_cxx11_compat;
 
+  /* Nonzero if bidirectional control characters checking is on.  See enum
+     cpp_bidirectional_level.  */
+  unsigned char cpp_warn_bidirectional;
+
   /* Dependency generation.  */
   struct
   {
@@ -642,7 +654,8 @@ enum cpp_warning_reason {
   CPP_W_C90_C99_COMPAT,
   CPP_W_C11_C2X_COMPAT,
   CPP_W_CXX11_COMPAT,
-  CPP_W_EXPANSION_TO_DEFINED
+  CPP_W_EXPANSION_TO_DEFINED,
+  CPP_W_BIDIRECTIONAL
 };
 
 /* Callback for header lookup for HEADER, which is the name of a
@@ -739,10 +752,18 @@ struct cpp_callbacks
 
 #ifdef VMS
 #define INO_T_CPP ino_t ino[3]
+#elif defined (_AIX) && SIZEOF_INO_T == 4
+#define INO_T_CPP ino64_t ino
 #else
 #define INO_T_CPP ino_t ino
 #endif
 
+#if defined (_AIX) && SIZEOF_DEV_T == 4
+#define DEV_T_CPP dev64_t dev
+#else
+#define DEV_T_CPP dev_t dev
+#endif
+
 /* Chain of directories to look for include files in.  */
 struct cpp_dir
 {
@@ -777,7 +798,7 @@ struct cpp_dir
   /* The C front end uses these to recognize duplicated
      directories in the search path.  */
   INO_T_CPP;
-  dev_t dev;
+  DEV_T_CPP;
 };
 
 /* The kind of the cpp_macro.  */
@@ -1267,6 +1288,14 @@ extern bool cpp_warning_syshdr (cpp_reader *, enum cpp_warning_reason reason,
 				const char *msgid, ...)
   ATTRIBUTE_PRINTF_3;
 
+/* As their counterparts above, but use RICHLOC.  */
+extern bool cpp_warning_at (cpp_reader *, enum cpp_warning_reason,
+			    rich_location *richloc, const char *msgid, ...)
+  ATTRIBUTE_PRINTF_4;
+extern bool cpp_pedwarning_at (cpp_reader *, enum cpp_warning_reason,
+			       rich_location *richloc, const char *msgid, ...)
+  ATTRIBUTE_PRINTF_4;
+
 /* Output a diagnostic with "MSGID: " preceding the
    error string of errno.  No location is printed.  */
 extern bool cpp_errno (cpp_reader *, enum cpp_diagnostic_level,
@@ -1441,43 +1470,95 @@ extern const char * cpp_get_userdef_suffix
 
 /* In charset.c */
 
+/* The result of attempting to decode a run of UTF-8 bytes.  */
+
+struct cpp_decoded_char
+{
+  const char *m_start_byte;
+  const char *m_next_byte;
+
+  bool m_valid_ch;
+  cppchar_t m_ch;
+};
+
+/* Information for mapping between code points and display columns.
+
+   This is a tabstop value, along with a callback for getting the
+   widths of characters.  Normally this callback is cpp_wcwidth, but we
+   support other schemes for escaping non-ASCII unicode as a series of
+   ASCII chars when printing the user's source code in diagnostic-show-locus.c
+
+   For example, consider:
+   - the Unicode character U+03C0 "GREEK SMALL LETTER PI" (UTF-8: 0xCF 0x80)
+   - the Unicode character U+1F642 "SLIGHTLY SMILING FACE"
+     (UTF-8: 0xF0 0x9F 0x99 0x82)
+   - the byte 0xBF (a stray trailing byte of a UTF-8 character)
+   Normally U+03C0 would occupy one display column, U+1F642
+   would occupy two display columns, and the stray byte would be
+   printed verbatim as one display column.
+
+   However when escaping them as unicode code points as "<U+03C0>"
+   and "<U+1F642>" they occupy 8 and 9 display columns respectively,
+   and when escaping them as bytes as "<CF><80>" and "<F0><9F><99><82>"
+   they occupy 8 and 16 display columns respectively.  In both cases
+   the stray byte is escaped to <BF> as 4 display columns.  */
+
+struct cpp_char_column_policy
+{
+  cpp_char_column_policy (int tabstop,
+			  int (*width_cb) (cppchar_t c))
+  : m_tabstop (tabstop),
+    m_undecoded_byte_width (1),
+    m_width_cb (width_cb)
+  {}
+
+  int m_tabstop;
+  /* Width in display columns of a stray byte that isn't decodable
+     as UTF-8.  */
+  int m_undecoded_byte_width;
+  int (*m_width_cb) (cppchar_t c);
+};
+
 /* A class to manage the state while converting a UTF-8 sequence to cppchar_t
    and computing the display width one character at a time.  */
 class cpp_display_width_computation {
  public:
   cpp_display_width_computation (const char *data, int data_length,
-				 int tabstop);
+				 const cpp_char_column_policy &policy);
   const char *next_byte () const { return m_next; }
   int bytes_processed () const { return m_next - m_begin; }
   int bytes_left () const { return m_bytes_left; }
   bool done () const { return !bytes_left (); }
   int display_cols_processed () const { return m_display_cols; }
 
-  int process_next_codepoint ();
+  int process_next_codepoint (cpp_decoded_char *out);
   int advance_display_cols (int n);
 
  private:
   const char *const m_begin;
   const char *m_next;
   size_t m_bytes_left;
-  const int m_tabstop;
+  const cpp_char_column_policy &m_policy;
   int m_display_cols;
 };
 
 /* Convenience functions that are simple use cases for class
    cpp_display_width_computation.  Tab characters will be expanded to spaces
-   as determined by TABSTOP.  */
+   as determined by POLICY.m_tabstop, and non-printable-ASCII characters
+   will be escaped as per POLICY.  */
 
 int cpp_byte_column_to_display_column (const char *data, int data_length,
-				       int column, int tabstop);
+				       int column,
+				       const cpp_char_column_policy &policy);
 inline int cpp_display_width (const char *data, int data_length,
-			      int tabstop)
+			      const cpp_char_column_policy &policy)
 {
   return cpp_byte_column_to_display_column (data, data_length, data_length,
-					    tabstop);
+					    policy);
 }
 int cpp_display_column_to_byte_column (const char *data, int data_length,
-				       int display_col, int tabstop);
+				       int display_col,
+				       const cpp_char_column_policy &policy);
 int cpp_wcwidth (cppchar_t c);
 
 bool cpp_input_conversion_is_trivial (const char *input_charset);
author	Thomas Schwinge <thomas@codesourcery.com>	2022-01-24 10:03:47 +0100
committer	Thomas Schwinge <thomas_schwinge@mentor.com>	2022-01-24 10:06:43 +0100
commit	21af490baa734a901fb798bc2ac4df62109bc895 (patch)
tree	a292dc4ac7de999d47f20ab9a2dff597afadea2a /libcpp/include/cpplib.h
parent	2cce6b8919ce16acd37a7a203049a52925a7e295 (diff)
parent	490e23032baaece71f2ec09fa1805064b150fbc2 (diff)
download	gcc-21af490baa734a901fb798bc2ac4df62109bc895.zip gcc-21af490baa734a901fb798bc2ac4df62109bc895.tar.gz gcc-21af490baa734a901fb798bc2ac4df62109bc895.tar.bz2