aboutsummaryrefslogtreecommitdiff
path: root/libcpp
diff options
context:
space:
mode:
Diffstat (limited to 'libcpp')
-rw-r--r--libcpp/charset.c98
-rw-r--r--libcpp/include/cpplib.h40
-rw-r--r--libcpp/init.c1
3 files changed, 101 insertions, 38 deletions
diff --git a/libcpp/charset.c b/libcpp/charset.c
index db47235..28b81c9c 100644
--- a/libcpp/charset.c
+++ b/libcpp/charset.c
@@ -2276,49 +2276,90 @@ cpp_string_location_reader::get_next ()
return result;
}
-/* Helper for cpp_byte_column_to_display_column and its inverse. Given a
- pointer to a UTF-8-encoded character, compute its display width. *INBUFP
- points on entry to the start of the UTF-8 encoding of the character, and
- is updated to point just after the last byte of the encoding. *INBYTESLEFTP
- contains on entry the remaining size of the buffer into which *INBUFP
- points, and this is also updated accordingly. If *INBUFP does not
+cpp_display_width_computation::
+cpp_display_width_computation (const char *data, int data_length, int tabstop) :
+ m_begin (data),
+ m_next (m_begin),
+ m_bytes_left (data_length),
+ m_tabstop (tabstop),
+ m_display_cols (0)
+{
+ gcc_assert (m_tabstop > 0);
+}
+
+
+/* The main implementation function for class cpp_display_width_computation.
+ m_next points on entry to the start of the UTF-8 encoding of the next
+ character, and is updated to point just after the last byte of the encoding.
+ m_bytes_left contains on entry the remaining size of the buffer into which
+ m_next points, and this is also updated accordingly. If m_next does not
point to a valid UTF-8-encoded sequence, then it will be treated as a single
- byte with display width 1. */
+ byte with display width 1. m_cur_display_col is the current display column,
+ relative to which tab stops should be expanded. Returns the display width of
+ the codepoint just processed. */
-static inline int
-compute_next_display_width (const uchar **inbufp, size_t *inbytesleftp)
+int
+cpp_display_width_computation::process_next_codepoint ()
{
cppchar_t c;
- if (one_utf8_to_cppchar (inbufp, inbytesleftp, &c) != 0)
+ int next_width;
+
+ if (*m_next == '\t')
+ {
+ ++m_next;
+ --m_bytes_left;
+ next_width = m_tabstop - (m_display_cols % m_tabstop);
+ }
+ else if (one_utf8_to_cppchar ((const uchar **) &m_next, &m_bytes_left, &c)
+ != 0)
{
/* Input is not convertible to UTF-8. This could be fine, e.g. in a
string literal, so don't complain. Just treat it as if it has a width
of one. */
- ++*inbufp;
- --*inbytesleftp;
- return 1;
+ ++m_next;
+ --m_bytes_left;
+ next_width = 1;
+ }
+ else
+ {
+ /* one_utf8_to_cppchar() has updated m_next and m_bytes_left for us. */
+ next_width = cpp_wcwidth (c);
}
- /* one_utf8_to_cppchar() has updated inbufp and inbytesleftp for us. */
- return cpp_wcwidth (c);
+ m_display_cols += next_width;
+ return next_width;
+}
+
+/* Utility to advance the byte stream by the minimum amount needed to consume
+ N display columns. Returns the number of display columns that were
+ actually skipped. This could be less than N, if there was not enough data,
+ or more than N, if the last character to be skipped had a sufficiently large
+ display width. */
+int
+cpp_display_width_computation::advance_display_cols (int n)
+{
+ const int start = m_display_cols;
+ const int target = start + n;
+ while (m_display_cols < target && !done ())
+ process_next_codepoint ();
+ return m_display_cols - start;
}
/* For the string of length DATA_LENGTH bytes that begins at DATA, compute
how many display columns are occupied by the first COLUMN bytes. COLUMN
may exceed DATA_LENGTH, in which case the phantom bytes at the end are
- treated as if they have display width 1. */
+ treated as if they have display width 1. Tabs are expanded to the next tab
+ stop, relative to the start of DATA. */
int
cpp_byte_column_to_display_column (const char *data, int data_length,
- int column)
+ int column, int tabstop)
{
- int display_col = 0;
- const uchar *udata = (const uchar *) data;
const int offset = MAX (0, column - data_length);
- size_t inbytesleft = column - offset;
- while (inbytesleft)
- display_col += compute_next_display_width (&udata, &inbytesleft);
- return display_col + offset;
+ cpp_display_width_computation dw (data, column - offset, tabstop);
+ while (!dw.done ())
+ dw.process_next_codepoint ();
+ return dw.display_cols_processed () + offset;
}
/* For the string of length DATA_LENGTH bytes that begins at DATA, compute
@@ -2328,14 +2369,11 @@ cpp_byte_column_to_display_column (const char *data, int data_length,
int
cpp_display_column_to_byte_column (const char *data, int data_length,
- int display_col)
+ int display_col, int tabstop)
{
- int column = 0;
- const uchar *udata = (const uchar *) data;
- size_t inbytesleft = data_length;
- while (column < display_col && inbytesleft)
- column += compute_next_display_width (&udata, &inbytesleft);
- return data_length - inbytesleft + MAX (0, display_col - column);
+ cpp_display_width_computation dw (data, data_length, tabstop);
+ const int avail_display = dw.advance_display_cols (display_col);
+ return dw.bytes_processed () + MAX (0, display_col - avail_display);
}
/* Our own version of wcwidth(). We don't use the actual wcwidth() in glibc,
diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
index e8bb15d..8e39886 100644
--- a/libcpp/include/cpplib.h
+++ b/libcpp/include/cpplib.h
@@ -312,9 +312,6 @@ enum cpp_normalize_level {
carries all the options visible to the command line. */
struct cpp_options
{
- /* Characters between tab stops. */
- unsigned int tabstop;
-
/* The language we're preprocessing. */
enum c_lang lang;
@@ -1335,14 +1332,43 @@ extern const char * cpp_get_userdef_suffix
(const cpp_token *);
/* In charset.c */
+
+/* A class to manage the state while converting a UTF-8 sequence to cppchar_t
+ and computing the display width one character at a time. */
+class cpp_display_width_computation {
+ public:
+ cpp_display_width_computation (const char *data, int data_length,
+ int tabstop);
+ const char *next_byte () const { return m_next; }
+ int bytes_processed () const { return m_next - m_begin; }
+ int bytes_left () const { return m_bytes_left; }
+ bool done () const { return !bytes_left (); }
+ int display_cols_processed () const { return m_display_cols; }
+
+ int process_next_codepoint ();
+ int advance_display_cols (int n);
+
+ private:
+ const char *const m_begin;
+ const char *m_next;
+ size_t m_bytes_left;
+ const int m_tabstop;
+ int m_display_cols;
+};
+
+/* Convenience functions that are simple use cases for class
+ cpp_display_width_computation. Tab characters will be expanded to spaces
+ as determined by TABSTOP. */
int cpp_byte_column_to_display_column (const char *data, int data_length,
- int column);
-inline int cpp_display_width (const char *data, int data_length)
+ int column, int tabstop);
+inline int cpp_display_width (const char *data, int data_length,
+ int tabstop)
{
- return cpp_byte_column_to_display_column (data, data_length, data_length);
+ return cpp_byte_column_to_display_column (data, data_length, data_length,
+ tabstop);
}
int cpp_display_column_to_byte_column (const char *data, int data_length,
- int display_col);
+ int display_col, int tabstop);
int cpp_wcwidth (cppchar_t c);
#endif /* ! LIBCPP_CPPLIB_H */
diff --git a/libcpp/init.c b/libcpp/init.c
index d641d0a1..0aac5ac 100644
--- a/libcpp/init.c
+++ b/libcpp/init.c
@@ -190,7 +190,6 @@ cpp_create_reader (enum c_lang lang, cpp_hash_table *table,
CPP_OPTION (pfile, discard_comments) = 1;
CPP_OPTION (pfile, discard_comments_in_macro_exp) = 1;
CPP_OPTION (pfile, max_include_depth) = 200;
- CPP_OPTION (pfile, tabstop) = 8;
CPP_OPTION (pfile, operator_names) = 1;
CPP_OPTION (pfile, warn_trigraphs) = 2;
CPP_OPTION (pfile, warn_endif_labels) = 1;