aboutsummaryrefslogtreecommitdiff
path: root/libcpp/include/cpplib.h
diff options
context:
space:
mode:
authorIan Lance Taylor <iant@golang.org>2022-02-11 15:02:44 -0800
committerIan Lance Taylor <iant@golang.org>2022-02-11 15:02:44 -0800
commit9a510fb0970d3d9a4201bce8965cabe67850386b (patch)
tree43d7fd2bbfd7ad8c9625a718a5e8718889351994 /libcpp/include/cpplib.h
parenta6d3012b274f38b20e2a57162106f625746af6c6 (diff)
parent8dc2499aa62f768c6395c9754b8cabc1ce25c494 (diff)
downloadgcc-9a510fb0970d3d9a4201bce8965cabe67850386b.zip
gcc-9a510fb0970d3d9a4201bce8965cabe67850386b.tar.gz
gcc-9a510fb0970d3d9a4201bce8965cabe67850386b.tar.bz2
Merge from trunk revision 8dc2499aa62f768c6395c9754b8cabc1ce25c494
Diffstat (limited to 'libcpp/include/cpplib.h')
-rw-r--r--libcpp/include/cpplib.h134
1 files changed, 108 insertions, 26 deletions
diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
index 6e2fcb6..3eba6f7 100644
--- a/libcpp/include/cpplib.h
+++ b/libcpp/include/cpplib.h
@@ -1,5 +1,5 @@
/* Definitions for CPP library.
- Copyright (C) 1995-2021 Free Software Foundation, Inc.
+ Copyright (C) 1995-2022 Free Software Foundation, Inc.
Written by Per Bothner, 1994-95.
This program is free software; you can redistribute it and/or modify it
@@ -46,7 +46,7 @@ struct _cpp_file;
'='. The lexer needs operators ending in '=', like ">>=", to be in
the same order as their counterparts without the '=', like ">>".
- See the cpp_operator table optab in expr.c if you change the order or
+ See the cpp_operator table optab in expr.cc if you change the order or
add or remove anything in the first group. */
#define TTYPE_TABLE \
@@ -192,12 +192,13 @@ struct GTY(()) cpp_string {
comment. */
#define BOL (1 << 6) /* Token at beginning of line. */
#define PURE_ZERO (1 << 7) /* Single 0 digit, used by the C++ frontend,
- set in c-lex.c. */
+ set in c-lex.cc. */
#define SP_DIGRAPH (1 << 8) /* # or ## token was a digraph. */
#define SP_PREV_WHITE (1 << 9) /* If whitespace before a ##
operator, or before this token
after a # operator. */
#define NO_EXPAND (1 << 10) /* Do not macro-expand this token. */
+#define PRAGMA_OP (1 << 11) /* _Pragma token. */
/* Specify which field, if any, of the cpp_token union is used. */
@@ -318,6 +319,18 @@ enum cpp_main_search
CMS_system, /* Search the system INCLUDE path. */
};
+/* The possible bidirectional control characters checking levels. */
+enum cpp_bidirectional_level {
+ /* No checking. */
+ bidirectional_none = 0,
+ /* Only detect unpaired uses of bidirectional control characters. */
+ bidirectional_unpaired = 1,
+ /* Detect any use of bidirectional control characters. */
+ bidirectional_any = 2,
+ /* Also warn about UCNs. */
+ bidirectional_ucn = 4
+};
+
/* This structure is nested inside struct cpp_reader, and
carries all the options visible to the command line. */
struct cpp_options
@@ -479,13 +492,9 @@ struct cpp_options
unsigned char ext_numeric_literals;
/* Nonzero means extended identifiers allow the characters specified
- in C11 and C++11. */
+ in C11. */
unsigned char c11_identifiers;
- /* Nonzero means extended identifiers allow the characters specified
- in C++23. */
- unsigned char cxx23_identifiers;
-
/* Nonzero for C++ 2014 Standard binary constants. */
unsigned char binary_constants;
@@ -538,6 +547,10 @@ struct cpp_options
/* True if warn about differences between C++98 and C++11. */
bool cpp_warn_cxx11_compat;
+ /* Nonzero if bidirectional control characters checking is on. See enum
+ cpp_bidirectional_level. */
+ unsigned char cpp_warn_bidirectional;
+
/* Dependency generation. */
struct
{
@@ -642,7 +655,8 @@ enum cpp_warning_reason {
CPP_W_C90_C99_COMPAT,
CPP_W_C11_C2X_COMPAT,
CPP_W_CXX11_COMPAT,
- CPP_W_EXPANSION_TO_DEFINED
+ CPP_W_EXPANSION_TO_DEFINED,
+ CPP_W_BIDIRECTIONAL
};
/* Callback for header lookup for HEADER, which is the name of a
@@ -739,10 +753,18 @@ struct cpp_callbacks
#ifdef VMS
#define INO_T_CPP ino_t ino[3]
+#elif defined (_AIX) && SIZEOF_INO_T == 4
+#define INO_T_CPP ino64_t ino
#else
#define INO_T_CPP ino_t ino
#endif
+#if defined (_AIX) && SIZEOF_DEV_T == 4
+#define DEV_T_CPP dev64_t dev
+#else
+#define DEV_T_CPP dev_t dev
+#endif
+
/* Chain of directories to look for include files in. */
struct cpp_dir
{
@@ -777,7 +799,7 @@ struct cpp_dir
/* The C front end uses these to recognize duplicated
directories in the search path. */
INO_T_CPP;
- dev_t dev;
+ DEV_T_CPP;
};
/* The kind of the cpp_macro. */
@@ -1174,7 +1196,7 @@ extern int cpp_defined (cpp_reader *, const unsigned char *, int);
the double integer are set to zero. */
/* This type has to be equal to unsigned HOST_WIDE_INT, see
- gcc/c-family/c-lex.c. */
+ gcc/c-family/c-lex.cc. */
typedef uint64_t cpp_num_part;
typedef struct cpp_num cpp_num;
struct cpp_num
@@ -1267,6 +1289,14 @@ extern bool cpp_warning_syshdr (cpp_reader *, enum cpp_warning_reason reason,
const char *msgid, ...)
ATTRIBUTE_PRINTF_3;
+/* As their counterparts above, but use RICHLOC. */
+extern bool cpp_warning_at (cpp_reader *, enum cpp_warning_reason,
+ rich_location *richloc, const char *msgid, ...)
+ ATTRIBUTE_PRINTF_4;
+extern bool cpp_pedwarning_at (cpp_reader *, enum cpp_warning_reason,
+ rich_location *richloc, const char *msgid, ...)
+ ATTRIBUTE_PRINTF_4;
+
/* Output a diagnostic with "MSGID: " preceding the
error string of errno. No location is printed. */
extern bool cpp_errno (cpp_reader *, enum cpp_diagnostic_level,
@@ -1304,7 +1334,7 @@ extern bool cpp_error_at (cpp_reader * pfile, enum cpp_diagnostic_level,
rich_location *richloc, const char *msgid, ...)
ATTRIBUTE_PRINTF_4;
-/* In lex.c */
+/* In lex.cc */
extern int cpp_ideq (const cpp_token *, const char *);
extern void cpp_output_line (cpp_reader *, FILE *);
extern unsigned char *cpp_output_line_to_string (cpp_reader *,
@@ -1361,7 +1391,7 @@ extern cpp_hashnode *cpp_lookup (cpp_reader *, const unsigned char *,
typedef int (*cpp_cb) (cpp_reader *, cpp_hashnode *, void *);
extern void cpp_forall_identifiers (cpp_reader *, cpp_cb, void *);
-/* In macro.c */
+/* In macro.cc */
extern void cpp_scan_nooutput (cpp_reader *);
extern int cpp_sys_macro_p (cpp_reader *);
extern unsigned char *cpp_quote_string (unsigned char *, const unsigned char *,
@@ -1369,7 +1399,7 @@ extern unsigned char *cpp_quote_string (unsigned char *, const unsigned char *,
extern bool cpp_compare_macros (const cpp_macro *macro1,
const cpp_macro *macro2);
-/* In files.c */
+/* In files.cc */
extern bool cpp_included (cpp_reader *, const char *);
extern bool cpp_included_before (cpp_reader *, const char *, location_t);
extern void cpp_make_system_header (cpp_reader *, int, int);
@@ -1397,7 +1427,7 @@ struct cpp_converted_source
cpp_converted_source cpp_get_converted_source (const char *fname,
const char *input_charset);
-/* In pch.c */
+/* In pch.cc */
struct save_macro_data;
extern int cpp_save_state (cpp_reader *, FILE *);
extern int cpp_write_pch_deps (cpp_reader *, FILE *);
@@ -1407,7 +1437,7 @@ extern void cpp_prepare_state (cpp_reader *, struct save_macro_data **);
extern int cpp_read_state (cpp_reader *, const char *, FILE *,
struct save_macro_data *);
-/* In lex.c */
+/* In lex.cc */
extern void cpp_force_token_locations (cpp_reader *, location_t);
extern void cpp_stop_forcing_token_locations (cpp_reader *);
enum CPP_DO_task
@@ -1423,7 +1453,7 @@ extern void cpp_directive_only_process (cpp_reader *pfile,
CPP_DO_task,
void *data, ...));
-/* In expr.c */
+/* In expr.cc */
extern enum cpp_ttype cpp_userdef_string_remove_type
(enum cpp_ttype type);
extern enum cpp_ttype cpp_userdef_string_add_type
@@ -1439,45 +1469,97 @@ extern bool cpp_userdef_char_p
extern const char * cpp_get_userdef_suffix
(const cpp_token *);
-/* In charset.c */
+/* In charset.cc */
+
+/* The result of attempting to decode a run of UTF-8 bytes. */
+
+struct cpp_decoded_char
+{
+ const char *m_start_byte;
+ const char *m_next_byte;
+
+ bool m_valid_ch;
+ cppchar_t m_ch;
+};
+
+/* Information for mapping between code points and display columns.
+
+ This is a tabstop value, along with a callback for getting the
+ widths of characters. Normally this callback is cpp_wcwidth, but we
+ support other schemes for escaping non-ASCII unicode as a series of
+ ASCII chars when printing the user's source code in diagnostic-show-locus.cc
+
+ For example, consider:
+ - the Unicode character U+03C0 "GREEK SMALL LETTER PI" (UTF-8: 0xCF 0x80)
+ - the Unicode character U+1F642 "SLIGHTLY SMILING FACE"
+ (UTF-8: 0xF0 0x9F 0x99 0x82)
+ - the byte 0xBF (a stray trailing byte of a UTF-8 character)
+ Normally U+03C0 would occupy one display column, U+1F642
+ would occupy two display columns, and the stray byte would be
+ printed verbatim as one display column.
+
+ However when escaping them as unicode code points as "<U+03C0>"
+ and "<U+1F642>" they occupy 8 and 9 display columns respectively,
+ and when escaping them as bytes as "<CF><80>" and "<F0><9F><99><82>"
+ they occupy 8 and 16 display columns respectively. In both cases
+ the stray byte is escaped to <BF> as 4 display columns. */
+
+struct cpp_char_column_policy
+{
+ cpp_char_column_policy (int tabstop,
+ int (*width_cb) (cppchar_t c))
+ : m_tabstop (tabstop),
+ m_undecoded_byte_width (1),
+ m_width_cb (width_cb)
+ {}
+
+ int m_tabstop;
+ /* Width in display columns of a stray byte that isn't decodable
+ as UTF-8. */
+ int m_undecoded_byte_width;
+ int (*m_width_cb) (cppchar_t c);
+};
/* A class to manage the state while converting a UTF-8 sequence to cppchar_t
and computing the display width one character at a time. */
class cpp_display_width_computation {
public:
cpp_display_width_computation (const char *data, int data_length,
- int tabstop);
+ const cpp_char_column_policy &policy);
const char *next_byte () const { return m_next; }
int bytes_processed () const { return m_next - m_begin; }
int bytes_left () const { return m_bytes_left; }
bool done () const { return !bytes_left (); }
int display_cols_processed () const { return m_display_cols; }
- int process_next_codepoint ();
+ int process_next_codepoint (cpp_decoded_char *out);
int advance_display_cols (int n);
private:
const char *const m_begin;
const char *m_next;
size_t m_bytes_left;
- const int m_tabstop;
+ const cpp_char_column_policy &m_policy;
int m_display_cols;
};
/* Convenience functions that are simple use cases for class
cpp_display_width_computation. Tab characters will be expanded to spaces
- as determined by TABSTOP. */
+ as determined by POLICY.m_tabstop, and non-printable-ASCII characters
+ will be escaped as per POLICY. */
int cpp_byte_column_to_display_column (const char *data, int data_length,
- int column, int tabstop);
+ int column,
+ const cpp_char_column_policy &policy);
inline int cpp_display_width (const char *data, int data_length,
- int tabstop)
+ const cpp_char_column_policy &policy)
{
return cpp_byte_column_to_display_column (data, data_length, data_length,
- tabstop);
+ policy);
}
int cpp_display_column_to_byte_column (const char *data, int data_length,
- int display_col, int tabstop);
+ int display_col,
+ const cpp_char_column_policy &policy);
int cpp_wcwidth (cppchar_t c);
bool cpp_input_conversion_is_trivial (const char *input_charset);