diff options
author | Thomas Schwinge <thomas@codesourcery.com> | 2022-01-24 10:03:47 +0100 |
---|---|---|
committer | Thomas Schwinge <thomas_schwinge@mentor.com> | 2022-01-24 10:06:43 +0100 |
commit | 21af490baa734a901fb798bc2ac4df62109bc895 (patch) | |
tree | a292dc4ac7de999d47f20ab9a2dff597afadea2a /libcpp/include/cpplib.h | |
parent | 2cce6b8919ce16acd37a7a203049a52925a7e295 (diff) | |
parent | 490e23032baaece71f2ec09fa1805064b150fbc2 (diff) | |
download | gcc-21af490baa734a901fb798bc2ac4df62109bc895.zip gcc-21af490baa734a901fb798bc2ac4df62109bc895.tar.gz gcc-21af490baa734a901fb798bc2ac4df62109bc895.tar.bz2 |
Merge commit '490e23032baaece71f2ec09fa1805064b150fbc2' [#247]
Diffstat (limited to 'libcpp/include/cpplib.h')
-rw-r--r-- | libcpp/include/cpplib.h | 113 |
1 files changed, 97 insertions, 16 deletions
diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h index 6e2fcb6..ebf6fcc 100644 --- a/libcpp/include/cpplib.h +++ b/libcpp/include/cpplib.h @@ -1,5 +1,5 @@ /* Definitions for CPP library. - Copyright (C) 1995-2021 Free Software Foundation, Inc. + Copyright (C) 1995-2022 Free Software Foundation, Inc. Written by Per Bothner, 1994-95. This program is free software; you can redistribute it and/or modify it @@ -198,6 +198,7 @@ struct GTY(()) cpp_string { operator, or before this token after a # operator. */ #define NO_EXPAND (1 << 10) /* Do not macro-expand this token. */ +#define PRAGMA_OP (1 << 11) /* _Pragma token. */ /* Specify which field, if any, of the cpp_token union is used. */ @@ -318,6 +319,17 @@ enum cpp_main_search CMS_system, /* Search the system INCLUDE path. */ }; +/* The possible bidirectional control characters checking levels, from least + restrictive to most. */ +enum cpp_bidirectional_level { + /* No checking. */ + bidirectional_none, + /* Only detect unpaired uses of bidirectional control characters. */ + bidirectional_unpaired, + /* Detect any use of bidirectional control characters. */ + bidirectional_any +}; + /* This structure is nested inside struct cpp_reader, and carries all the options visible to the command line. */ struct cpp_options @@ -479,13 +491,9 @@ struct cpp_options unsigned char ext_numeric_literals; /* Nonzero means extended identifiers allow the characters specified - in C11 and C++11. */ + in C11. */ unsigned char c11_identifiers; - /* Nonzero means extended identifiers allow the characters specified - in C++23. */ - unsigned char cxx23_identifiers; - /* Nonzero for C++ 2014 Standard binary constants. */ unsigned char binary_constants; @@ -538,6 +546,10 @@ struct cpp_options /* True if warn about differences between C++98 and C++11. */ bool cpp_warn_cxx11_compat; + /* Nonzero if bidirectional control characters checking is on. See enum + cpp_bidirectional_level. */ + unsigned char cpp_warn_bidirectional; + /* Dependency generation. */ struct { @@ -642,7 +654,8 @@ enum cpp_warning_reason { CPP_W_C90_C99_COMPAT, CPP_W_C11_C2X_COMPAT, CPP_W_CXX11_COMPAT, - CPP_W_EXPANSION_TO_DEFINED + CPP_W_EXPANSION_TO_DEFINED, + CPP_W_BIDIRECTIONAL }; /* Callback for header lookup for HEADER, which is the name of a @@ -739,10 +752,18 @@ struct cpp_callbacks #ifdef VMS #define INO_T_CPP ino_t ino[3] +#elif defined (_AIX) && SIZEOF_INO_T == 4 +#define INO_T_CPP ino64_t ino #else #define INO_T_CPP ino_t ino #endif +#if defined (_AIX) && SIZEOF_DEV_T == 4 +#define DEV_T_CPP dev64_t dev +#else +#define DEV_T_CPP dev_t dev +#endif + /* Chain of directories to look for include files in. */ struct cpp_dir { @@ -777,7 +798,7 @@ struct cpp_dir /* The C front end uses these to recognize duplicated directories in the search path. */ INO_T_CPP; - dev_t dev; + DEV_T_CPP; }; /* The kind of the cpp_macro. */ @@ -1267,6 +1288,14 @@ extern bool cpp_warning_syshdr (cpp_reader *, enum cpp_warning_reason reason, const char *msgid, ...) ATTRIBUTE_PRINTF_3; +/* As their counterparts above, but use RICHLOC. */ +extern bool cpp_warning_at (cpp_reader *, enum cpp_warning_reason, + rich_location *richloc, const char *msgid, ...) + ATTRIBUTE_PRINTF_4; +extern bool cpp_pedwarning_at (cpp_reader *, enum cpp_warning_reason, + rich_location *richloc, const char *msgid, ...) + ATTRIBUTE_PRINTF_4; + /* Output a diagnostic with "MSGID: " preceding the error string of errno. No location is printed. */ extern bool cpp_errno (cpp_reader *, enum cpp_diagnostic_level, @@ -1441,43 +1470,95 @@ extern const char * cpp_get_userdef_suffix /* In charset.c */ +/* The result of attempting to decode a run of UTF-8 bytes. */ + +struct cpp_decoded_char +{ + const char *m_start_byte; + const char *m_next_byte; + + bool m_valid_ch; + cppchar_t m_ch; +}; + +/* Information for mapping between code points and display columns. + + This is a tabstop value, along with a callback for getting the + widths of characters. Normally this callback is cpp_wcwidth, but we + support other schemes for escaping non-ASCII unicode as a series of + ASCII chars when printing the user's source code in diagnostic-show-locus.c + + For example, consider: + - the Unicode character U+03C0 "GREEK SMALL LETTER PI" (UTF-8: 0xCF 0x80) + - the Unicode character U+1F642 "SLIGHTLY SMILING FACE" + (UTF-8: 0xF0 0x9F 0x99 0x82) + - the byte 0xBF (a stray trailing byte of a UTF-8 character) + Normally U+03C0 would occupy one display column, U+1F642 + would occupy two display columns, and the stray byte would be + printed verbatim as one display column. + + However when escaping them as unicode code points as "<U+03C0>" + and "<U+1F642>" they occupy 8 and 9 display columns respectively, + and when escaping them as bytes as "<CF><80>" and "<F0><9F><99><82>" + they occupy 8 and 16 display columns respectively. In both cases + the stray byte is escaped to <BF> as 4 display columns. */ + +struct cpp_char_column_policy +{ + cpp_char_column_policy (int tabstop, + int (*width_cb) (cppchar_t c)) + : m_tabstop (tabstop), + m_undecoded_byte_width (1), + m_width_cb (width_cb) + {} + + int m_tabstop; + /* Width in display columns of a stray byte that isn't decodable + as UTF-8. */ + int m_undecoded_byte_width; + int (*m_width_cb) (cppchar_t c); +}; + /* A class to manage the state while converting a UTF-8 sequence to cppchar_t and computing the display width one character at a time. */ class cpp_display_width_computation { public: cpp_display_width_computation (const char *data, int data_length, - int tabstop); + const cpp_char_column_policy &policy); const char *next_byte () const { return m_next; } int bytes_processed () const { return m_next - m_begin; } int bytes_left () const { return m_bytes_left; } bool done () const { return !bytes_left (); } int display_cols_processed () const { return m_display_cols; } - int process_next_codepoint (); + int process_next_codepoint (cpp_decoded_char *out); int advance_display_cols (int n); private: const char *const m_begin; const char *m_next; size_t m_bytes_left; - const int m_tabstop; + const cpp_char_column_policy &m_policy; int m_display_cols; }; /* Convenience functions that are simple use cases for class cpp_display_width_computation. Tab characters will be expanded to spaces - as determined by TABSTOP. */ + as determined by POLICY.m_tabstop, and non-printable-ASCII characters + will be escaped as per POLICY. */ int cpp_byte_column_to_display_column (const char *data, int data_length, - int column, int tabstop); + int column, + const cpp_char_column_policy &policy); inline int cpp_display_width (const char *data, int data_length, - int tabstop) + const cpp_char_column_policy &policy) { return cpp_byte_column_to_display_column (data, data_length, data_length, - tabstop); + policy); } int cpp_display_column_to_byte_column (const char *data, int data_length, - int display_col, int tabstop); + int display_col, + const cpp_char_column_policy &policy); int cpp_wcwidth (cppchar_t c); bool cpp_input_conversion_is_trivial (const char *input_charset); |