diff options
author | Ian Lance Taylor <iant@golang.org> | 2022-02-11 15:02:44 -0800 |
---|---|---|
committer | Ian Lance Taylor <iant@golang.org> | 2022-02-11 15:02:44 -0800 |
commit | 9a510fb0970d3d9a4201bce8965cabe67850386b (patch) | |
tree | 43d7fd2bbfd7ad8c9625a718a5e8718889351994 /libcpp/include/cpplib.h | |
parent | a6d3012b274f38b20e2a57162106f625746af6c6 (diff) | |
parent | 8dc2499aa62f768c6395c9754b8cabc1ce25c494 (diff) | |
download | gcc-9a510fb0970d3d9a4201bce8965cabe67850386b.zip gcc-9a510fb0970d3d9a4201bce8965cabe67850386b.tar.gz gcc-9a510fb0970d3d9a4201bce8965cabe67850386b.tar.bz2 |
Merge from trunk revision 8dc2499aa62f768c6395c9754b8cabc1ce25c494
Diffstat (limited to 'libcpp/include/cpplib.h')
-rw-r--r-- | libcpp/include/cpplib.h | 134 |
1 files changed, 108 insertions, 26 deletions
diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h index 6e2fcb6..3eba6f7 100644 --- a/libcpp/include/cpplib.h +++ b/libcpp/include/cpplib.h @@ -1,5 +1,5 @@ /* Definitions for CPP library. - Copyright (C) 1995-2021 Free Software Foundation, Inc. + Copyright (C) 1995-2022 Free Software Foundation, Inc. Written by Per Bothner, 1994-95. This program is free software; you can redistribute it and/or modify it @@ -46,7 +46,7 @@ struct _cpp_file; '='. The lexer needs operators ending in '=', like ">>=", to be in the same order as their counterparts without the '=', like ">>". - See the cpp_operator table optab in expr.c if you change the order or + See the cpp_operator table optab in expr.cc if you change the order or add or remove anything in the first group. */ #define TTYPE_TABLE \ @@ -192,12 +192,13 @@ struct GTY(()) cpp_string { comment. */ #define BOL (1 << 6) /* Token at beginning of line. */ #define PURE_ZERO (1 << 7) /* Single 0 digit, used by the C++ frontend, - set in c-lex.c. */ + set in c-lex.cc. */ #define SP_DIGRAPH (1 << 8) /* # or ## token was a digraph. */ #define SP_PREV_WHITE (1 << 9) /* If whitespace before a ## operator, or before this token after a # operator. */ #define NO_EXPAND (1 << 10) /* Do not macro-expand this token. */ +#define PRAGMA_OP (1 << 11) /* _Pragma token. */ /* Specify which field, if any, of the cpp_token union is used. */ @@ -318,6 +319,18 @@ enum cpp_main_search CMS_system, /* Search the system INCLUDE path. */ }; +/* The possible bidirectional control characters checking levels. */ +enum cpp_bidirectional_level { + /* No checking. */ + bidirectional_none = 0, + /* Only detect unpaired uses of bidirectional control characters. */ + bidirectional_unpaired = 1, + /* Detect any use of bidirectional control characters. */ + bidirectional_any = 2, + /* Also warn about UCNs. */ + bidirectional_ucn = 4 +}; + /* This structure is nested inside struct cpp_reader, and carries all the options visible to the command line. */ struct cpp_options @@ -479,13 +492,9 @@ struct cpp_options unsigned char ext_numeric_literals; /* Nonzero means extended identifiers allow the characters specified - in C11 and C++11. */ + in C11. */ unsigned char c11_identifiers; - /* Nonzero means extended identifiers allow the characters specified - in C++23. */ - unsigned char cxx23_identifiers; - /* Nonzero for C++ 2014 Standard binary constants. */ unsigned char binary_constants; @@ -538,6 +547,10 @@ struct cpp_options /* True if warn about differences between C++98 and C++11. */ bool cpp_warn_cxx11_compat; + /* Nonzero if bidirectional control characters checking is on. See enum + cpp_bidirectional_level. */ + unsigned char cpp_warn_bidirectional; + /* Dependency generation. */ struct { @@ -642,7 +655,8 @@ enum cpp_warning_reason { CPP_W_C90_C99_COMPAT, CPP_W_C11_C2X_COMPAT, CPP_W_CXX11_COMPAT, - CPP_W_EXPANSION_TO_DEFINED + CPP_W_EXPANSION_TO_DEFINED, + CPP_W_BIDIRECTIONAL }; /* Callback for header lookup for HEADER, which is the name of a @@ -739,10 +753,18 @@ struct cpp_callbacks #ifdef VMS #define INO_T_CPP ino_t ino[3] +#elif defined (_AIX) && SIZEOF_INO_T == 4 +#define INO_T_CPP ino64_t ino #else #define INO_T_CPP ino_t ino #endif +#if defined (_AIX) && SIZEOF_DEV_T == 4 +#define DEV_T_CPP dev64_t dev +#else +#define DEV_T_CPP dev_t dev +#endif + /* Chain of directories to look for include files in. */ struct cpp_dir { @@ -777,7 +799,7 @@ struct cpp_dir /* The C front end uses these to recognize duplicated directories in the search path. */ INO_T_CPP; - dev_t dev; + DEV_T_CPP; }; /* The kind of the cpp_macro. */ @@ -1174,7 +1196,7 @@ extern int cpp_defined (cpp_reader *, const unsigned char *, int); the double integer are set to zero. */ /* This type has to be equal to unsigned HOST_WIDE_INT, see - gcc/c-family/c-lex.c. */ + gcc/c-family/c-lex.cc. */ typedef uint64_t cpp_num_part; typedef struct cpp_num cpp_num; struct cpp_num @@ -1267,6 +1289,14 @@ extern bool cpp_warning_syshdr (cpp_reader *, enum cpp_warning_reason reason, const char *msgid, ...) ATTRIBUTE_PRINTF_3; +/* As their counterparts above, but use RICHLOC. */ +extern bool cpp_warning_at (cpp_reader *, enum cpp_warning_reason, + rich_location *richloc, const char *msgid, ...) + ATTRIBUTE_PRINTF_4; +extern bool cpp_pedwarning_at (cpp_reader *, enum cpp_warning_reason, + rich_location *richloc, const char *msgid, ...) + ATTRIBUTE_PRINTF_4; + /* Output a diagnostic with "MSGID: " preceding the error string of errno. No location is printed. */ extern bool cpp_errno (cpp_reader *, enum cpp_diagnostic_level, @@ -1304,7 +1334,7 @@ extern bool cpp_error_at (cpp_reader * pfile, enum cpp_diagnostic_level, rich_location *richloc, const char *msgid, ...) ATTRIBUTE_PRINTF_4; -/* In lex.c */ +/* In lex.cc */ extern int cpp_ideq (const cpp_token *, const char *); extern void cpp_output_line (cpp_reader *, FILE *); extern unsigned char *cpp_output_line_to_string (cpp_reader *, @@ -1361,7 +1391,7 @@ extern cpp_hashnode *cpp_lookup (cpp_reader *, const unsigned char *, typedef int (*cpp_cb) (cpp_reader *, cpp_hashnode *, void *); extern void cpp_forall_identifiers (cpp_reader *, cpp_cb, void *); -/* In macro.c */ +/* In macro.cc */ extern void cpp_scan_nooutput (cpp_reader *); extern int cpp_sys_macro_p (cpp_reader *); extern unsigned char *cpp_quote_string (unsigned char *, const unsigned char *, @@ -1369,7 +1399,7 @@ extern unsigned char *cpp_quote_string (unsigned char *, const unsigned char *, extern bool cpp_compare_macros (const cpp_macro *macro1, const cpp_macro *macro2); -/* In files.c */ +/* In files.cc */ extern bool cpp_included (cpp_reader *, const char *); extern bool cpp_included_before (cpp_reader *, const char *, location_t); extern void cpp_make_system_header (cpp_reader *, int, int); @@ -1397,7 +1427,7 @@ struct cpp_converted_source cpp_converted_source cpp_get_converted_source (const char *fname, const char *input_charset); -/* In pch.c */ +/* In pch.cc */ struct save_macro_data; extern int cpp_save_state (cpp_reader *, FILE *); extern int cpp_write_pch_deps (cpp_reader *, FILE *); @@ -1407,7 +1437,7 @@ extern void cpp_prepare_state (cpp_reader *, struct save_macro_data **); extern int cpp_read_state (cpp_reader *, const char *, FILE *, struct save_macro_data *); -/* In lex.c */ +/* In lex.cc */ extern void cpp_force_token_locations (cpp_reader *, location_t); extern void cpp_stop_forcing_token_locations (cpp_reader *); enum CPP_DO_task @@ -1423,7 +1453,7 @@ extern void cpp_directive_only_process (cpp_reader *pfile, CPP_DO_task, void *data, ...)); -/* In expr.c */ +/* In expr.cc */ extern enum cpp_ttype cpp_userdef_string_remove_type (enum cpp_ttype type); extern enum cpp_ttype cpp_userdef_string_add_type @@ -1439,45 +1469,97 @@ extern bool cpp_userdef_char_p extern const char * cpp_get_userdef_suffix (const cpp_token *); -/* In charset.c */ +/* In charset.cc */ + +/* The result of attempting to decode a run of UTF-8 bytes. */ + +struct cpp_decoded_char +{ + const char *m_start_byte; + const char *m_next_byte; + + bool m_valid_ch; + cppchar_t m_ch; +}; + +/* Information for mapping between code points and display columns. + + This is a tabstop value, along with a callback for getting the + widths of characters. Normally this callback is cpp_wcwidth, but we + support other schemes for escaping non-ASCII unicode as a series of + ASCII chars when printing the user's source code in diagnostic-show-locus.cc + + For example, consider: + - the Unicode character U+03C0 "GREEK SMALL LETTER PI" (UTF-8: 0xCF 0x80) + - the Unicode character U+1F642 "SLIGHTLY SMILING FACE" + (UTF-8: 0xF0 0x9F 0x99 0x82) + - the byte 0xBF (a stray trailing byte of a UTF-8 character) + Normally U+03C0 would occupy one display column, U+1F642 + would occupy two display columns, and the stray byte would be + printed verbatim as one display column. + + However when escaping them as unicode code points as "<U+03C0>" + and "<U+1F642>" they occupy 8 and 9 display columns respectively, + and when escaping them as bytes as "<CF><80>" and "<F0><9F><99><82>" + they occupy 8 and 16 display columns respectively. In both cases + the stray byte is escaped to <BF> as 4 display columns. */ + +struct cpp_char_column_policy +{ + cpp_char_column_policy (int tabstop, + int (*width_cb) (cppchar_t c)) + : m_tabstop (tabstop), + m_undecoded_byte_width (1), + m_width_cb (width_cb) + {} + + int m_tabstop; + /* Width in display columns of a stray byte that isn't decodable + as UTF-8. */ + int m_undecoded_byte_width; + int (*m_width_cb) (cppchar_t c); +}; /* A class to manage the state while converting a UTF-8 sequence to cppchar_t and computing the display width one character at a time. */ class cpp_display_width_computation { public: cpp_display_width_computation (const char *data, int data_length, - int tabstop); + const cpp_char_column_policy &policy); const char *next_byte () const { return m_next; } int bytes_processed () const { return m_next - m_begin; } int bytes_left () const { return m_bytes_left; } bool done () const { return !bytes_left (); } int display_cols_processed () const { return m_display_cols; } - int process_next_codepoint (); + int process_next_codepoint (cpp_decoded_char *out); int advance_display_cols (int n); private: const char *const m_begin; const char *m_next; size_t m_bytes_left; - const int m_tabstop; + const cpp_char_column_policy &m_policy; int m_display_cols; }; /* Convenience functions that are simple use cases for class cpp_display_width_computation. Tab characters will be expanded to spaces - as determined by TABSTOP. */ + as determined by POLICY.m_tabstop, and non-printable-ASCII characters + will be escaped as per POLICY. */ int cpp_byte_column_to_display_column (const char *data, int data_length, - int column, int tabstop); + int column, + const cpp_char_column_policy &policy); inline int cpp_display_width (const char *data, int data_length, - int tabstop) + const cpp_char_column_policy &policy) { return cpp_byte_column_to_display_column (data, data_length, data_length, - tabstop); + policy); } int cpp_display_column_to_byte_column (const char *data, int data_length, - int display_col, int tabstop); + int display_col, + const cpp_char_column_policy &policy); int cpp_wcwidth (cppchar_t c); bool cpp_input_conversion_is_trivial (const char *input_charset); |