aboutsummaryrefslogtreecommitdiff
path: root/libcpp/include/cpplib.h
diff options
context:
space:
mode:
authorThomas Schwinge <thomas@codesourcery.com>2022-01-24 10:03:47 +0100
committerThomas Schwinge <thomas_schwinge@mentor.com>2022-01-24 10:06:43 +0100
commit21af490baa734a901fb798bc2ac4df62109bc895 (patch)
treea292dc4ac7de999d47f20ab9a2dff597afadea2a /libcpp/include/cpplib.h
parent2cce6b8919ce16acd37a7a203049a52925a7e295 (diff)
parent490e23032baaece71f2ec09fa1805064b150fbc2 (diff)
downloadgcc-21af490baa734a901fb798bc2ac4df62109bc895.zip
gcc-21af490baa734a901fb798bc2ac4df62109bc895.tar.gz
gcc-21af490baa734a901fb798bc2ac4df62109bc895.tar.bz2
Merge commit '490e23032baaece71f2ec09fa1805064b150fbc2' [#247]
Diffstat (limited to 'libcpp/include/cpplib.h')
-rw-r--r--libcpp/include/cpplib.h113
1 files changed, 97 insertions, 16 deletions
diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
index 6e2fcb6..ebf6fcc 100644
--- a/libcpp/include/cpplib.h
+++ b/libcpp/include/cpplib.h
@@ -1,5 +1,5 @@
/* Definitions for CPP library.
- Copyright (C) 1995-2021 Free Software Foundation, Inc.
+ Copyright (C) 1995-2022 Free Software Foundation, Inc.
Written by Per Bothner, 1994-95.
This program is free software; you can redistribute it and/or modify it
@@ -198,6 +198,7 @@ struct GTY(()) cpp_string {
operator, or before this token
after a # operator. */
#define NO_EXPAND (1 << 10) /* Do not macro-expand this token. */
+#define PRAGMA_OP (1 << 11) /* _Pragma token. */
/* Specify which field, if any, of the cpp_token union is used. */
@@ -318,6 +319,17 @@ enum cpp_main_search
CMS_system, /* Search the system INCLUDE path. */
};
+/* The possible bidirectional control characters checking levels, from least
+ restrictive to most. */
+enum cpp_bidirectional_level {
+ /* No checking. */
+ bidirectional_none,
+ /* Only detect unpaired uses of bidirectional control characters. */
+ bidirectional_unpaired,
+ /* Detect any use of bidirectional control characters. */
+ bidirectional_any
+};
+
/* This structure is nested inside struct cpp_reader, and
carries all the options visible to the command line. */
struct cpp_options
@@ -479,13 +491,9 @@ struct cpp_options
unsigned char ext_numeric_literals;
/* Nonzero means extended identifiers allow the characters specified
- in C11 and C++11. */
+ in C11. */
unsigned char c11_identifiers;
- /* Nonzero means extended identifiers allow the characters specified
- in C++23. */
- unsigned char cxx23_identifiers;
-
/* Nonzero for C++ 2014 Standard binary constants. */
unsigned char binary_constants;
@@ -538,6 +546,10 @@ struct cpp_options
/* True if warn about differences between C++98 and C++11. */
bool cpp_warn_cxx11_compat;
+ /* Nonzero if bidirectional control characters checking is on. See enum
+ cpp_bidirectional_level. */
+ unsigned char cpp_warn_bidirectional;
+
/* Dependency generation. */
struct
{
@@ -642,7 +654,8 @@ enum cpp_warning_reason {
CPP_W_C90_C99_COMPAT,
CPP_W_C11_C2X_COMPAT,
CPP_W_CXX11_COMPAT,
- CPP_W_EXPANSION_TO_DEFINED
+ CPP_W_EXPANSION_TO_DEFINED,
+ CPP_W_BIDIRECTIONAL
};
/* Callback for header lookup for HEADER, which is the name of a
@@ -739,10 +752,18 @@ struct cpp_callbacks
#ifdef VMS
#define INO_T_CPP ino_t ino[3]
+#elif defined (_AIX) && SIZEOF_INO_T == 4
+#define INO_T_CPP ino64_t ino
#else
#define INO_T_CPP ino_t ino
#endif
+#if defined (_AIX) && SIZEOF_DEV_T == 4
+#define DEV_T_CPP dev64_t dev
+#else
+#define DEV_T_CPP dev_t dev
+#endif
+
/* Chain of directories to look for include files in. */
struct cpp_dir
{
@@ -777,7 +798,7 @@ struct cpp_dir
/* The C front end uses these to recognize duplicated
directories in the search path. */
INO_T_CPP;
- dev_t dev;
+ DEV_T_CPP;
};
/* The kind of the cpp_macro. */
@@ -1267,6 +1288,14 @@ extern bool cpp_warning_syshdr (cpp_reader *, enum cpp_warning_reason reason,
const char *msgid, ...)
ATTRIBUTE_PRINTF_3;
+/* As their counterparts above, but use RICHLOC. */
+extern bool cpp_warning_at (cpp_reader *, enum cpp_warning_reason,
+ rich_location *richloc, const char *msgid, ...)
+ ATTRIBUTE_PRINTF_4;
+extern bool cpp_pedwarning_at (cpp_reader *, enum cpp_warning_reason,
+ rich_location *richloc, const char *msgid, ...)
+ ATTRIBUTE_PRINTF_4;
+
/* Output a diagnostic with "MSGID: " preceding the
error string of errno. No location is printed. */
extern bool cpp_errno (cpp_reader *, enum cpp_diagnostic_level,
@@ -1441,43 +1470,95 @@ extern const char * cpp_get_userdef_suffix
/* In charset.c */
+/* The result of attempting to decode a run of UTF-8 bytes. */
+
+struct cpp_decoded_char
+{
+ const char *m_start_byte;
+ const char *m_next_byte;
+
+ bool m_valid_ch;
+ cppchar_t m_ch;
+};
+
+/* Information for mapping between code points and display columns.
+
+ This is a tabstop value, along with a callback for getting the
+ widths of characters. Normally this callback is cpp_wcwidth, but we
+ support other schemes for escaping non-ASCII unicode as a series of
+ ASCII chars when printing the user's source code in diagnostic-show-locus.c
+
+ For example, consider:
+ - the Unicode character U+03C0 "GREEK SMALL LETTER PI" (UTF-8: 0xCF 0x80)
+ - the Unicode character U+1F642 "SLIGHTLY SMILING FACE"
+ (UTF-8: 0xF0 0x9F 0x99 0x82)
+ - the byte 0xBF (a stray trailing byte of a UTF-8 character)
+ Normally U+03C0 would occupy one display column, U+1F642
+ would occupy two display columns, and the stray byte would be
+ printed verbatim as one display column.
+
+ However when escaping them as unicode code points as "<U+03C0>"
+ and "<U+1F642>" they occupy 8 and 9 display columns respectively,
+ and when escaping them as bytes as "<CF><80>" and "<F0><9F><99><82>"
+ they occupy 8 and 16 display columns respectively. In both cases
+ the stray byte is escaped to <BF> as 4 display columns. */
+
+struct cpp_char_column_policy
+{
+ cpp_char_column_policy (int tabstop,
+ int (*width_cb) (cppchar_t c))
+ : m_tabstop (tabstop),
+ m_undecoded_byte_width (1),
+ m_width_cb (width_cb)
+ {}
+
+ int m_tabstop;
+ /* Width in display columns of a stray byte that isn't decodable
+ as UTF-8. */
+ int m_undecoded_byte_width;
+ int (*m_width_cb) (cppchar_t c);
+};
+
/* A class to manage the state while converting a UTF-8 sequence to cppchar_t
and computing the display width one character at a time. */
class cpp_display_width_computation {
public:
cpp_display_width_computation (const char *data, int data_length,
- int tabstop);
+ const cpp_char_column_policy &policy);
const char *next_byte () const { return m_next; }
int bytes_processed () const { return m_next - m_begin; }
int bytes_left () const { return m_bytes_left; }
bool done () const { return !bytes_left (); }
int display_cols_processed () const { return m_display_cols; }
- int process_next_codepoint ();
+ int process_next_codepoint (cpp_decoded_char *out);
int advance_display_cols (int n);
private:
const char *const m_begin;
const char *m_next;
size_t m_bytes_left;
- const int m_tabstop;
+ const cpp_char_column_policy &m_policy;
int m_display_cols;
};
/* Convenience functions that are simple use cases for class
cpp_display_width_computation. Tab characters will be expanded to spaces
- as determined by TABSTOP. */
+ as determined by POLICY.m_tabstop, and non-printable-ASCII characters
+ will be escaped as per POLICY. */
int cpp_byte_column_to_display_column (const char *data, int data_length,
- int column, int tabstop);
+ int column,
+ const cpp_char_column_policy &policy);
inline int cpp_display_width (const char *data, int data_length,
- int tabstop)
+ const cpp_char_column_policy &policy)
{
return cpp_byte_column_to_display_column (data, data_length, data_length,
- tabstop);
+ policy);
}
int cpp_display_column_to_byte_column (const char *data, int data_length,
- int display_col, int tabstop);
+ int display_col,
+ const cpp_char_column_policy &policy);
int cpp_wcwidth (cppchar_t c);
bool cpp_input_conversion_is_trivial (const char *input_charset);