aboutsummaryrefslogtreecommitdiff
path: root/libcpp/charset.cc
diff options
context:
space:
mode:
authorDavid Malcolm <dmalcolm@redhat.com>2023-06-21 21:49:00 -0400
committerDavid Malcolm <dmalcolm@redhat.com>2023-06-21 21:49:00 -0400
commit4f01ae3761ca1f8dd7a33b833ae30624f047ac9c (patch)
treed46321ec2e1f20854e8ba439a7cd3be3a811addd /libcpp/charset.cc
parent985d6480fe52a5b109960117ba6a876dd875157e (diff)
downloadgcc-4f01ae3761ca1f8dd7a33b833ae30624f047ac9c.zip
gcc-4f01ae3761ca1f8dd7a33b833ae30624f047ac9c.tar.gz
gcc-4f01ae3761ca1f8dd7a33b833ae30624f047ac9c.tar.bz2
diagnostics: add support for "text art" diagrams
Existing text output in GCC has to be implemented by writing sequentially to a pretty_printer instance. This makes it hard to implement some kinds of diagnostic output (see e.g. diagnostic-show-locus.cc). This patch adds more flexible ways of creating text output: - a canvas class, which can be "painted" to via random-access (rather that sequentially) - a table class for 2D grid layout, supporting items that span multiple rows/columns - a widget class for organizing diagrams hierarchically. The patch also expands GCC's diagnostics subsystem so that diagnostics can have "text art" diagrams - think ASCII art, but potentially including some Unicode characters, such as box-drawing chars. The new code is in a new "gcc/text-art" subdirectory and "text_art" namespace. The patch adds a new "-fdiagnostics-text-art-charset=VAL" option, with values: - "none": don't emit diagrams (added to -fdiagnostics-plain-output) - "ascii": use pure ASCII in diagrams - "unicode": allow for conservative use of unicode drawing characters (such as box-drawing characters). - "emoji" (the default): as "unicode", but potentially allow for conservative use of emoji in the output (such as U+26A0 WARNING SIGN). I made it possible to disable emoji separately from unicode as I believe there's a generation gap in acceptance of these characters (some older programmers have a visceral reaction against them, whereas younger programmers may have no problem with them). Diagrams are emitted to stderr by default. With SARIF output they are captured as a location in "relatedLocations", with the diagram as a code block in Markdown within a "markdown" property of a message. This patch doesn't add any such diagram usage to GCC, saving that for followups, apart from adding a plugin to the test suite to exercise the functionality. contrib/ChangeLog: * unicode/gen-box-drawing-chars.py: New file. * unicode/gen-combining-chars.py: New file. * unicode/gen-printable-chars.py: New file. gcc/ChangeLog: * Makefile.in (OBJS-libcommon): Add text-art/box-drawing.o, text-art/canvas.o, text-art/ruler.o, text-art/selftests.o, text-art/style.o, text-art/styled-string.o, text-art/table.o, text-art/theme.o, and text-art/widget.o. * color-macros.h (COLOR_FG_BRIGHT_BLACK): New. (COLOR_FG_BRIGHT_RED): New. (COLOR_FG_BRIGHT_GREEN): New. (COLOR_FG_BRIGHT_YELLOW): New. (COLOR_FG_BRIGHT_BLUE): New. (COLOR_FG_BRIGHT_MAGENTA): New. (COLOR_FG_BRIGHT_CYAN): New. (COLOR_FG_BRIGHT_WHITE): New. (COLOR_BG_BRIGHT_BLACK): New. (COLOR_BG_BRIGHT_RED): New. (COLOR_BG_BRIGHT_GREEN): New. (COLOR_BG_BRIGHT_YELLOW): New. (COLOR_BG_BRIGHT_BLUE): New. (COLOR_BG_BRIGHT_MAGENTA): New. (COLOR_BG_BRIGHT_CYAN): New. (COLOR_BG_BRIGHT_WHITE): New. * common.opt (fdiagnostics-text-art-charset=): New option. (diagnostic-text-art.h): New SourceInclude. (diagnostic_text_art_charset) New Enum and EnumValues. * configure: Regenerate. * configure.ac (gccdepdir): Add text-art to loop. * diagnostic-diagram.h: New file. * diagnostic-format-json.cc (json_emit_diagram): New. (diagnostic_output_format_init_json): Wire it up to context->m_diagrams.m_emission_cb. * diagnostic-format-sarif.cc: Include "diagnostic-diagram.h" and "text-art/canvas.h". (sarif_result::on_nested_diagnostic): Move code to... (sarif_result::add_related_location): ...this new function. (sarif_result::on_diagram): New. (sarif_builder::emit_diagram): New. (sarif_builder::make_message_object_for_diagram): New. (sarif_emit_diagram): New. (diagnostic_output_format_init_sarif): Set context->m_diagrams.m_emission_cb to sarif_emit_diagram. * diagnostic-text-art.h: New file. * diagnostic.cc: Include "diagnostic-text-art.h", "diagnostic-diagram.h", and "text-art/theme.h". (diagnostic_initialize): Initialize context->m_diagrams and call diagnostics_text_art_charset_init. (diagnostic_finish): Clean up context->m_diagrams.m_theme. (diagnostic_emit_diagram): New. (diagnostics_text_art_charset_init): New. * diagnostic.h (text_art::theme): New forward decl. (class diagnostic_diagram): Likewise. (diagnostic_context::m_diagrams): New field. (diagnostic_emit_diagram): New decl. * doc/invoke.texi (Diagnostic Message Formatting Options): Add -fdiagnostics-text-art-charset=. (-fdiagnostics-plain-output): Add -fdiagnostics-text-art-charset=none. * gcc.cc: Include "diagnostic-text-art.h". (driver_handle_option): Handle OPT_fdiagnostics_text_art_charset_. * opts-common.cc (decode_cmdline_options_to_array): Add "-fdiagnostics-text-art-charset=none" to expanded_args for -fdiagnostics-plain-output. * opts.cc: Include "diagnostic-text-art.h". (common_handle_option): Handle OPT_fdiagnostics_text_art_charset_. * pretty-print.cc (pp_unicode_character): New. * pretty-print.h (pp_unicode_character): New decl. * selftest-run-tests.cc: Include "text-art/selftests.h". (selftest::run_tests): Call text_art_tests. * text-art/box-drawing-chars.inc: New file, generated by contrib/unicode/gen-box-drawing-chars.py. * text-art/box-drawing.cc: New file. * text-art/box-drawing.h: New file. * text-art/canvas.cc: New file. * text-art/canvas.h: New file. * text-art/ruler.cc: New file. * text-art/ruler.h: New file. * text-art/selftests.cc: New file. * text-art/selftests.h: New file. * text-art/style.cc: New file. * text-art/styled-string.cc: New file. * text-art/table.cc: New file. * text-art/table.h: New file. * text-art/theme.cc: New file. * text-art/theme.h: New file. * text-art/types.h: New file. * text-art/widget.cc: New file. * text-art/widget.h: New file. gcc/testsuite/ChangeLog: * gcc.dg/plugin/diagnostic-test-text-art-ascii-bw.c: New test. * gcc.dg/plugin/diagnostic-test-text-art-ascii-color.c: New test. * gcc.dg/plugin/diagnostic-test-text-art-none.c: New test. * gcc.dg/plugin/diagnostic-test-text-art-unicode-bw.c: New test. * gcc.dg/plugin/diagnostic-test-text-art-unicode-color.c: New test. * gcc.dg/plugin/diagnostic_plugin_test_text_art.c: New test plugin. * gcc.dg/plugin/plugin.exp (plugin_test_list): Add them. libcpp/ChangeLog: * charset.cc (get_cppchar_property): New function template, based on... (cpp_wcwidth): ...this function. Rework to use the above. Include "combining-chars.inc". (cpp_is_combining_char): New function Include "printable-chars.inc". (cpp_is_printable_char): New function * combining-chars.inc: New file, generated by contrib/unicode/gen-combining-chars.py. * include/cpplib.h (cpp_is_combining_char): New function decl. (cpp_is_printable_char): New function decl. * printable-chars.inc: New file, generated by contrib/unicode/gen-printable-chars.py. Signed-off-by: David Malcolm <dmalcolm@redhat.com>
Diffstat (limited to 'libcpp/charset.cc')
-rw-r--r--libcpp/charset.cc89
1 files changed, 68 insertions, 21 deletions
diff --git a/libcpp/charset.cc b/libcpp/charset.cc
index d4f573e..d492f66 100644
--- a/libcpp/charset.cc
+++ b/libcpp/charset.cc
@@ -3154,34 +3154,26 @@ cpp_display_column_to_byte_column (const char *data, int data_length,
return dw.bytes_processed () + MAX (0, display_col - avail_display);
}
-/* Our own version of wcwidth(). We don't use the actual wcwidth() in glibc,
- because that will inspect the user's locale, and in particular in an ASCII
- locale, it will not return anything useful for extended characters. But GCC
- in other respects (see e.g. _cpp_default_encoding()) behaves as if
- everything is UTF-8. We also make some tweaks that are useful for the way
- GCC needs to use this data, e.g. tabs and other control characters should be
- treated as having width 1. The lookup tables are generated from
- contrib/unicode/gen_wcwidth.py and were made by simply calling glibc
- wcwidth() on all codepoints, then applying the small tweaks. These tables
- are not highly optimized, but for the present purpose of outputting
- diagnostics, they are sufficient. */
-
-#include "generated_cpp_wcwidth.h"
-int cpp_wcwidth (cppchar_t c)
+template <typename PropertyType>
+PropertyType
+get_cppchar_property (cppchar_t c,
+ const cppchar_t *range_ends,
+ const PropertyType *range_values,
+ size_t num_ranges,
+ PropertyType default_value)
{
- if (__builtin_expect (c <= wcwidth_range_ends[0], true))
- return wcwidth_widths[0];
+ if (__builtin_expect (c <= range_ends[0], true))
+ return range_values[0];
/* Binary search the tables. */
int begin = 1;
- static const int end
- = sizeof wcwidth_range_ends / sizeof (*wcwidth_range_ends);
+ static const int end = num_ranges;
int len = end - begin;
do
{
int half = len/2;
int middle = begin + half;
- if (c > wcwidth_range_ends[middle])
+ if (c > range_ends[middle])
{
begin = middle + 1;
len -= half + 1;
@@ -3191,6 +3183,61 @@ int cpp_wcwidth (cppchar_t c)
} while (len);
if (__builtin_expect (begin != end, true))
- return wcwidth_widths[begin];
- return 1;
+ return range_values[begin];
+
+ return default_value;
+}
+
+/* Our own version of wcwidth(). We don't use the actual wcwidth() in glibc,
+ because that will inspect the user's locale, and in particular in an ASCII
+ locale, it will not return anything useful for extended characters. But GCC
+ in other respects (see e.g. _cpp_default_encoding()) behaves as if
+ everything is UTF-8. We also make some tweaks that are useful for the way
+ GCC needs to use this data, e.g. tabs and other control characters should be
+ treated as having width 1. The lookup tables are generated from
+ contrib/unicode/gen_wcwidth.py and were made by simply calling glibc
+ wcwidth() on all codepoints, then applying the small tweaks. These tables
+ are not highly optimized, but for the present purpose of outputting
+ diagnostics, they are sufficient. */
+
+#include "generated_cpp_wcwidth.h"
+
+int
+cpp_wcwidth (cppchar_t c)
+{
+ const size_t num_ranges
+ = sizeof wcwidth_range_ends / sizeof (*wcwidth_range_ends);
+ return get_cppchar_property<unsigned char > (c,
+ &wcwidth_range_ends[0],
+ &wcwidth_widths[0],
+ num_ranges,
+ 1);
+}
+
+#include "combining-chars.inc"
+
+bool
+cpp_is_combining_char (cppchar_t c)
+{
+ const size_t num_ranges
+ = sizeof combining_range_ends / sizeof (*combining_range_ends);
+ return get_cppchar_property<bool> (c,
+ &combining_range_ends[0],
+ &is_combining[0],
+ num_ranges,
+ false);
+}
+
+#include "printable-chars.inc"
+
+bool
+cpp_is_printable_char (cppchar_t c)
+{
+ const size_t num_ranges
+ = sizeof printable_range_ends / sizeof (*printable_range_ends);
+ return get_cppchar_property<bool> (c,
+ &printable_range_ends[0],
+ &is_printable[0],
+ num_ranges,
+ false);
}