diff options
Diffstat (limited to 'libcpp/charset.c')
-rw-r--r-- | libcpp/charset.c | 63 |
1 files changed, 45 insertions, 18 deletions
diff --git a/libcpp/charset.c b/libcpp/charset.c index e4e45f6..0b0ccc6 100644 --- a/libcpp/charset.c +++ b/libcpp/charset.c @@ -1582,12 +1582,14 @@ convert_escape (cpp_reader *pfile, const uchar *from, const uchar *limit, "unknown escape sequence: '\\%c'", (int) c); else { + encoding_rich_location rich_loc (pfile); + /* diagnostic.c does not support "%03o". When it does, this code can use %03o directly in the diagnostic again. */ char buf[32]; sprintf(buf, "%03o", (int) c); - cpp_error (pfile, CPP_DL_PEDWARN, - "unknown escape sequence: '\\%s'", buf); + cpp_error_at (pfile, CPP_DL_PEDWARN, &rich_loc, + "unknown escape sequence: '\\%s'", buf); } } @@ -2345,14 +2347,16 @@ cpp_string_location_reader::get_next () } cpp_display_width_computation:: -cpp_display_width_computation (const char *data, int data_length, int tabstop) : +cpp_display_width_computation (const char *data, int data_length, + const cpp_char_column_policy &policy) : m_begin (data), m_next (m_begin), m_bytes_left (data_length), - m_tabstop (tabstop), + m_policy (policy), m_display_cols (0) { - gcc_assert (m_tabstop > 0); + gcc_assert (policy.m_tabstop > 0); + gcc_assert (policy.m_width_cb); } @@ -2364,19 +2368,28 @@ cpp_display_width_computation (const char *data, int data_length, int tabstop) : point to a valid UTF-8-encoded sequence, then it will be treated as a single byte with display width 1. m_cur_display_col is the current display column, relative to which tab stops should be expanded. Returns the display width of - the codepoint just processed. */ + the codepoint just processed. + If OUT is non-NULL, it is populated. */ int -cpp_display_width_computation::process_next_codepoint () +cpp_display_width_computation::process_next_codepoint (cpp_decoded_char *out) { cppchar_t c; int next_width; + if (out) + out->m_start_byte = m_next; + if (*m_next == '\t') { ++m_next; --m_bytes_left; - next_width = m_tabstop - (m_display_cols % m_tabstop); + next_width = m_policy.m_tabstop - (m_display_cols % m_policy.m_tabstop); + if (out) + { + out->m_ch = '\t'; + out->m_valid_ch = true; + } } else if (one_utf8_to_cppchar ((const uchar **) &m_next, &m_bytes_left, &c) != 0) @@ -2386,14 +2399,24 @@ cpp_display_width_computation::process_next_codepoint () of one. */ ++m_next; --m_bytes_left; - next_width = 1; + next_width = m_policy.m_undecoded_byte_width; + if (out) + out->m_valid_ch = false; } else { /* one_utf8_to_cppchar() has updated m_next and m_bytes_left for us. */ - next_width = cpp_wcwidth (c); + next_width = m_policy.m_width_cb (c); + if (out) + { + out->m_ch = c; + out->m_valid_ch = true; + } } + if (out) + out->m_next_byte = m_next; + m_display_cols += next_width; return next_width; } @@ -2409,7 +2432,7 @@ cpp_display_width_computation::advance_display_cols (int n) const int start = m_display_cols; const int target = start + n; while (m_display_cols < target && !done ()) - process_next_codepoint (); + process_next_codepoint (NULL); return m_display_cols - start; } @@ -2417,29 +2440,33 @@ cpp_display_width_computation::advance_display_cols (int n) how many display columns are occupied by the first COLUMN bytes. COLUMN may exceed DATA_LENGTH, in which case the phantom bytes at the end are treated as if they have display width 1. Tabs are expanded to the next tab - stop, relative to the start of DATA. */ + stop, relative to the start of DATA, and non-printable-ASCII characters + will be escaped as per POLICY. */ int cpp_byte_column_to_display_column (const char *data, int data_length, - int column, int tabstop) + int column, + const cpp_char_column_policy &policy) { const int offset = MAX (0, column - data_length); - cpp_display_width_computation dw (data, column - offset, tabstop); + cpp_display_width_computation dw (data, column - offset, policy); while (!dw.done ()) - dw.process_next_codepoint (); + dw.process_next_codepoint (NULL); return dw.display_cols_processed () + offset; } /* For the string of length DATA_LENGTH bytes that begins at DATA, compute the least number of bytes that will result in at least DISPLAY_COL display columns. The return value may exceed DATA_LENGTH if the entire string does - not occupy enough display columns. */ + not occupy enough display columns. Non-printable-ASCII characters + will be escaped as per POLICY. */ int cpp_display_column_to_byte_column (const char *data, int data_length, - int display_col, int tabstop) + int display_col, + const cpp_char_column_policy &policy) { - cpp_display_width_computation dw (data, data_length, tabstop); + cpp_display_width_computation dw (data, data_length, policy); const int avail_display = dw.advance_display_cols (display_col); return dw.bytes_processed () + MAX (0, display_col - avail_display); } |