From ea314ccd625aada7ed8a324ac07cfc3a8aa0f03f Mon Sep 17 00:00:00 2001 From: Jonathan Wakely Date: Tue, 9 Jan 2024 14:43:40 +0000 Subject: libstdc++: Fix Unicode property detection functions Fix some copy & pasted logic in __is_extended_pictographic. This function should yield false for the values before the first edge, not true. Also add a missing boundary condition check in __incb_property. Also Fix an off-by-one error in _Utf_iterator::operator++() that would make dereferencing a past-the-end iterator undefined (where the intended design is that the iterator is always incrementable and dereferenceable, for better memory safety). Also simplify the grapheme view iterator, which still contained some remnants of an earlier design I was experimenting with. Slightly tweak the gen_libstdcxx_unicode_data.py script so that the _Gcb_property enumerators are in the order we encounter them in the data file, instead of sorting them alphabetically. Start with the "Other" property at value 0, because that's the default property for anything not in the file. This makes no practical difference, but seems cleaner. It causes the values in the __gcb_edges table to change, so can only be done now before anybody is using this code yet. The enumerator values and table entries become ABI artefacts for the function using them. contrib/ChangeLog: * unicode/gen_libstdcxx_unicode_data.py: Print out Gcb_property enumerators in the order they're seen, not alphabetical order. libstdc++-v3/ChangeLog: * include/bits/unicode-data.h: Regenerate. * include/bits/unicode.h (_Utf_iterator::operator++()): Fix off by one error. (__incb_property): Add missing check for values before the first edge. (__is_extended_pictographic): Invert return values to fix copy&pasted logic. (_Grapheme_cluster_view::_Iterator): Remove second iterator member and find end of cluster lazily. * testsuite/ext/unicode/grapheme_view.cc: New test. * testsuite/ext/unicode/properties.cc: New test. * testsuite/ext/unicode/view.cc: New test. --- contrib/unicode/gen_libstdcxx_unicode_data.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'contrib') diff --git a/contrib/unicode/gen_libstdcxx_unicode_data.py b/contrib/unicode/gen_libstdcxx_unicode_data.py index 1449145..f2f2f8a 100755 --- a/contrib/unicode/gen_libstdcxx_unicode_data.py +++ b/contrib/unicode/gen_libstdcxx_unicode_data.py @@ -122,7 +122,10 @@ for line in open("GraphemeBreakProperty.txt", "r"): process_code_points(code_points, grapheme_property.strip()) edges = find_edges(all_code_points) -gcb_props = {p:i+1 for i,p in enumerate(sorted(set([x[1] for x in edges])))} +gcb_props = {"Other":0} +for c, p in edges: + if p not in gcb_props: + gcb_props[p] = len(gcb_props) shift_bits = int(math.ceil(math.log2(len(gcb_props)))) # Enum definition for std::__unicode::_Gcb_property -- cgit v1.1 From 73ce73fcaded5525c070ee4c9ed3ae16c98e86eb Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Wed, 10 Jan 2024 00:18:30 +0000 Subject: Daily bump. --- contrib/ChangeLog | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'contrib') diff --git a/contrib/ChangeLog b/contrib/ChangeLog index 569b889..04bde02 100644 --- a/contrib/ChangeLog +++ b/contrib/ChangeLog @@ -1,3 +1,8 @@ +2024-01-09 Jonathan Wakely + + * unicode/gen_libstdcxx_unicode_data.py: Print out Gcb_property + enumerators in the order they're seen, not alphabetical order. + 2024-01-08 Jonathan Wakely * unicode/README: Add notes about generating libstdc++ tables. -- cgit v1.1 From 8f67953d0198fe9e053cc925eb631d2f29005466 Mon Sep 17 00:00:00 2001 From: Paul Iannetta Date: Tue, 24 Oct 2023 09:48:42 +0200 Subject: dg-extract-results.py: Ignore case in header line DejaGNU changed its header line from "Test Run By" to "Test run by" around 2016. This patch makes it so that both alternatives are correcly detected. contrib/ChangeLog: * dg-extract-results.py: Make the test_run regex case insensitive. --- contrib/dg-extract-results.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'contrib') diff --git a/contrib/dg-extract-results.py b/contrib/dg-extract-results.py index d67ce4f..0fe3c5f 100644 --- a/contrib/dg-extract-results.py +++ b/contrib/dg-extract-results.py @@ -113,7 +113,8 @@ class Prog: # Whether to create .sum rather than .log output. self.do_sum = True # Regexps used while parsing. - self.test_run_re = re.compile (r'^Test run by (\S+) on (.*)$') + self.test_run_re = re.compile (r'^Test run by (\S+) on (.*)$', + re.IGNORECASE) self.tool_re = re.compile (r'^\t\t=== (.*) tests ===$') self.result_re = re.compile (r'^(PASS|XPASS|FAIL|XFAIL|UNRESOLVED' r'|WARNING|ERROR|UNSUPPORTED|UNTESTED' -- cgit v1.1 From 1a80e9558dd7fed1a9d22b3606489f72e4dd8c20 Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Fri, 12 Jan 2024 00:17:54 +0000 Subject: Daily bump. --- contrib/ChangeLog | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'contrib') diff --git a/contrib/ChangeLog b/contrib/ChangeLog index 04bde02..0d94109 100644 --- a/contrib/ChangeLog +++ b/contrib/ChangeLog @@ -1,3 +1,8 @@ +2024-01-11 Paul Iannetta + + * dg-extract-results.py: Make the test_run regex case + insensitive. + 2024-01-09 Jonathan Wakely * unicode/gen_libstdcxx_unicode_data.py: Print out Gcb_property -- cgit v1.1