diff options
author | Ian Lance Taylor <ian@gcc.gnu.org> | 2016-07-22 18:15:38 +0000 |
---|---|---|
committer | Ian Lance Taylor <ian@gcc.gnu.org> | 2016-07-22 18:15:38 +0000 |
commit | 22b955cca564a9a3a5b8c9d9dd1e295b7943c128 (patch) | |
tree | abdbd898676e1f853fca2d7e031d105d7ebcf676 /libgo/go/unicode | |
parent | 9d04a3af4c6491536badf6bde9707c907e4d196b (diff) | |
download | gcc-22b955cca564a9a3a5b8c9d9dd1e295b7943c128.zip gcc-22b955cca564a9a3a5b8c9d9dd1e295b7943c128.tar.gz gcc-22b955cca564a9a3a5b8c9d9dd1e295b7943c128.tar.bz2 |
libgo: update to go1.7rc3
Reviewed-on: https://go-review.googlesource.com/25150
From-SVN: r238662
Diffstat (limited to 'libgo/go/unicode')
-rw-r--r-- | libgo/go/unicode/graphic.go | 2 | ||||
-rw-r--r-- | libgo/go/unicode/letter.go | 20 | ||||
-rw-r--r-- | libgo/go/unicode/letter_test.go | 2 | ||||
-rw-r--r-- | libgo/go/unicode/script_test.go | 10 | ||||
-rw-r--r-- | libgo/go/unicode/tables.go | 618 | ||||
-rw-r--r-- | libgo/go/unicode/utf16/export_test.go | 2 | ||||
-rw-r--r-- | libgo/go/unicode/utf16/utf16.go | 34 | ||||
-rw-r--r-- | libgo/go/unicode/utf16/utf16_test.go | 55 | ||||
-rw-r--r-- | libgo/go/unicode/utf8/utf8.go | 6 |
9 files changed, 612 insertions, 137 deletions
diff --git a/libgo/go/unicode/graphic.go b/libgo/go/unicode/graphic.go index 81eae3e..ca62419 100644 --- a/libgo/go/unicode/graphic.go +++ b/libgo/go/unicode/graphic.go @@ -45,7 +45,7 @@ func IsGraphic(r rune) bool { // IsPrint reports whether the rune is defined as printable by Go. Such // characters include letters, marks, numbers, punctuation, symbols, and the // ASCII space character, from categories L, M, N, P, S and the ASCII space -// character. This categorization is the same as IsGraphic except that the +// character. This categorization is the same as IsGraphic except that the // only spacing character is ASCII space, U+0020. func IsPrint(r rune) bool { if uint32(r) <= MaxLatin1 { diff --git a/libgo/go/unicode/letter.go b/libgo/go/unicode/letter.go index 7fe4241..8aec920 100644 --- a/libgo/go/unicode/letter.go +++ b/libgo/go/unicode/letter.go @@ -27,7 +27,7 @@ type RangeTable struct { LatinOffset int // number of entries in R16 with Hi <= MaxLatin1 } -// Range16 represents of a range of 16-bit Unicode code points. The range runs from Lo to Hi +// Range16 represents of a range of 16-bit Unicode code points. The range runs from Lo to Hi // inclusive and has the specified stride. type Range16 struct { Lo uint16 @@ -36,7 +36,7 @@ type Range16 struct { } // Range32 represents of a range of Unicode code points and is used when one or -// more of the values will not fit in 16 bits. The range runs from Lo to Hi +// more of the values will not fit in 16 bits. The range runs from Lo to Hi // inclusive and has the specified stride. Lo and Hi must always be >= 1<<16. type Range32 struct { Lo uint32 @@ -48,10 +48,10 @@ type Range32 struct { // code point to one code point) case conversion. // The range runs from Lo to Hi inclusive, with a fixed stride of 1. Deltas // are the number to add to the code point to reach the code point for a -// different case for that character. They may be negative. If zero, it +// different case for that character. They may be negative. If zero, it // means the character is in the corresponding case. There is a special // case representing sequences of alternating corresponding Upper and Lower -// pairs. It appears with a fixed Delta of +// pairs. It appears with a fixed Delta of // {UpperLower, UpperLower, UpperLower} // The constant UpperLower has an otherwise impossible delta value. type CaseRange struct { @@ -217,7 +217,7 @@ func to(_case int, r rune, caseRange []CaseRange) rune { m := lo + (hi-lo)/2 cr := caseRange[m] if rune(cr.Lo) <= r && r <= rune(cr.Hi) { - delta := rune(cr.Delta[_case]) + delta := cr.Delta[_case] if delta > MaxRune { // In an Upper-Lower sequence, which always starts with // an UpperCase letter, the real deltas always look like: @@ -307,7 +307,7 @@ func (special SpecialCase) ToLower(r rune) rune { return r1 } -// caseOrbit is defined in tables.go as []foldPair. Right now all the +// caseOrbit is defined in tables.go as []foldPair. Right now all the // entries fit in uint16, so use uint16. If that changes, compilation // will fail (the constants in the composite literal will not fit in uint16) // and the types here can change to uint32. @@ -317,7 +317,7 @@ type foldPair struct { } // SimpleFold iterates over Unicode code points equivalent under -// the Unicode-defined simple case folding. Among the code points +// the Unicode-defined simple case folding. Among the code points // equivalent to rune (including rune itself), SimpleFold returns the // smallest rune > r if one exists, or else the smallest rune >= 0. // @@ -332,6 +332,10 @@ type foldPair struct { // SimpleFold('1') = '1' // func SimpleFold(r rune) rune { + if int(r) < len(asciiFold) { + return rune(asciiFold[r]) + } + // Consult caseOrbit table for special cases. lo := 0 hi := len(caseOrbit) @@ -347,7 +351,7 @@ func SimpleFold(r rune) rune { return rune(caseOrbit[lo].To) } - // No folding specified. This is a one- or two-element + // No folding specified. This is a one- or two-element // equivalence class containing rune and ToLower(rune) // and ToUpper(rune) if they are different from rune. if l := ToLower(r); l != r { diff --git a/libgo/go/unicode/letter_test.go b/libgo/go/unicode/letter_test.go index a40b412..0eb9ee9 100644 --- a/libgo/go/unicode/letter_test.go +++ b/libgo/go/unicode/letter_test.go @@ -73,7 +73,6 @@ var letterTest = []rune{ 0x1200, 0x1312, 0x1401, - 0x1885, 0x2c00, 0xa800, 0xf900, @@ -94,6 +93,7 @@ var notletterTest = []rune{ 0x375, 0x619, 0x700, + 0x1885, 0xfffe, 0x1ffff, 0x10ffff, diff --git a/libgo/go/unicode/script_test.go b/libgo/go/unicode/script_test.go index 935c225..1fe4581 100644 --- a/libgo/go/unicode/script_test.go +++ b/libgo/go/unicode/script_test.go @@ -18,10 +18,12 @@ type T struct { // mostly to discover when new scripts and categories arise. var inTest = []T{ {0x11711, "Ahom"}, + {0x1e900, "Adlam"}, {0x14646, "Anatolian_Hieroglyphs"}, {0x06e2, "Arabic"}, {0x0567, "Armenian"}, {0x10b20, "Avestan"}, + {0x11c00, "Bhaiksuki"}, {0x1b37, "Balinese"}, {0xa6af, "Bamum"}, {0x16ada, "Bassa_Vah"}, @@ -89,6 +91,7 @@ var inTest = []T{ {0x0d42, "Malayalam"}, {0x0843, "Mandaic"}, {0x10ac8, "Manichaean"}, + {0x11cB6, "Marchen"}, {0xabd0, "Meetei_Mayek"}, {0x1e800, "Mende_Kikakui"}, {0x1099f, "Meroitic_Hieroglyphs"}, @@ -100,6 +103,7 @@ var inTest = []T{ {0x11293, "Multani"}, {0x104c, "Myanmar"}, {0x10880, "Nabataean"}, + {0x11400, "Newa"}, {0x19c3, "New_Tai_Lue"}, {0x07f8, "Nko"}, {0x169b, "Ogham"}, @@ -112,6 +116,7 @@ var inTest = []T{ {0x10a6f, "Old_South_Arabian"}, {0x10c20, "Old_Turkic"}, {0x0b3e, "Oriya"}, + {0x104d9, "Osage"}, {0x10491, "Osmanya"}, {0x16b2b, "Pahawh_Hmong"}, {0x10876, "Palmyrene"}, @@ -139,6 +144,7 @@ var inTest = []T{ {0xaadc, "Tai_Viet"}, {0x116c9, "Takri"}, {0x0bbf, "Tamil"}, + {0x17000, "Tangut"}, {0x0c55, "Telugu"}, {0x07a7, "Thaana"}, {0x0e46, "Thai"}, @@ -220,9 +226,11 @@ var inPropTest = []T{ {0x216F, "Other_Uppercase"}, {0x0027, "Pattern_Syntax"}, {0x0020, "Pattern_White_Space"}, + {0x06DD, "Prepended_Concatenation_Mark"}, {0x300D, "Quotation_Mark"}, {0x2EF3, "Radical"}, - {0x061F, "STerm"}, + {0x061F, "STerm"}, // Deprecated alias of Sentence_Terminal + {0x061F, "Sentence_Terminal"}, {0x2071, "Soft_Dotted"}, {0x003A, "Terminal_Punctuation"}, {0x9FC3, "Unified_Ideograph"}, diff --git a/libgo/go/unicode/tables.go b/libgo/go/unicode/tables.go index 8bb4206..15fecd9 100644 --- a/libgo/go/unicode/tables.go +++ b/libgo/go/unicode/tables.go @@ -3,13 +3,13 @@ // license that can be found in the LICENSE file. // Generated by running -// maketables --tables=all --data=http://www.unicode.org/Public/8.0.0/ucd/UnicodeData.txt --casefolding=http://www.unicode.org/Public/8.0.0/ucd/CaseFolding.txt +// maketables --tables=all --data=http://www.unicode.org/Public/9.0.0/ucd/UnicodeData.txt --casefolding=http://www.unicode.org/Public/9.0.0/ucd/CaseFolding.txt // DO NOT EDIT package unicode // Version is the Unicode edition from which the tables are derived. -const Version = "8.0.0" +const Version = "9.0.0" // Categories is the set of Unicode category tables. var Categories = map[string]*RangeTable{ @@ -58,8 +58,9 @@ var _C = &RangeTable{ {0x00ad, 0x0600, 1363}, {0x0601, 0x0605, 1}, {0x061c, 0x06dd, 193}, - {0x070f, 0x180e, 4351}, - {0x200b, 0x200f, 1}, + {0x070f, 0x08e2, 467}, + {0x180e, 0x200b, 2045}, + {0x200c, 0x200f, 1}, {0x202a, 0x202e, 1}, {0x2060, 0x2064, 1}, {0x2066, 0x206f, 1}, @@ -92,8 +93,9 @@ var _Cf = &RangeTable{ {0x00ad, 0x0600, 1363}, {0x0601, 0x0605, 1}, {0x061c, 0x06dd, 193}, - {0x070f, 0x180e, 4351}, - {0x200b, 0x200f, 1}, + {0x070f, 0x08e2, 467}, + {0x180e, 0x200b, 2045}, + {0x200c, 0x200f, 1}, {0x202a, 0x202e, 1}, {0x2060, 0x2064, 1}, {0x2066, 0x206f, 1}, @@ -171,6 +173,7 @@ var _L = &RangeTable{ {0x0828, 0x0840, 24}, {0x0841, 0x0858, 1}, {0x08a0, 0x08b4, 1}, + {0x08b6, 0x08bd, 1}, {0x0904, 0x0939, 1}, {0x093d, 0x0950, 19}, {0x0958, 0x0961, 1}, @@ -231,7 +234,8 @@ var _L = &RangeTable{ {0x0c3d, 0x0c58, 27}, {0x0c59, 0x0c5a, 1}, {0x0c60, 0x0c61, 1}, - {0x0c85, 0x0c8c, 1}, + {0x0c80, 0x0c85, 5}, + {0x0c86, 0x0c8c, 1}, {0x0c8e, 0x0c90, 1}, {0x0c92, 0x0ca8, 1}, {0x0caa, 0x0cb3, 1}, @@ -242,8 +246,9 @@ var _L = &RangeTable{ {0x0d05, 0x0d0c, 1}, {0x0d0e, 0x0d10, 1}, {0x0d12, 0x0d3a, 1}, - {0x0d3d, 0x0d5f, 17}, - {0x0d60, 0x0d61, 1}, + {0x0d3d, 0x0d4e, 17}, + {0x0d54, 0x0d56, 1}, + {0x0d5f, 0x0d61, 1}, {0x0d7a, 0x0d7f, 1}, {0x0d85, 0x0d96, 1}, {0x0d9a, 0x0db1, 1}, @@ -317,7 +322,8 @@ var _L = &RangeTable{ {0x1780, 0x17b3, 1}, {0x17d7, 0x17dc, 5}, {0x1820, 0x1877, 1}, - {0x1880, 0x18a8, 1}, + {0x1880, 0x1884, 1}, + {0x1887, 0x18a8, 1}, {0x18aa, 0x18b0, 6}, {0x18b1, 0x18f5, 1}, {0x1900, 0x191e, 1}, @@ -336,6 +342,7 @@ var _L = &RangeTable{ {0x1c00, 0x1c23, 1}, {0x1c4d, 0x1c4f, 1}, {0x1c5a, 0x1c7d, 1}, + {0x1c80, 0x1c88, 1}, {0x1ce9, 0x1cec, 1}, {0x1cee, 0x1cf1, 1}, {0x1cf5, 0x1cf6, 1}, @@ -412,7 +419,7 @@ var _L = &RangeTable{ {0xa6a0, 0xa6e5, 1}, {0xa717, 0xa71f, 1}, {0xa722, 0xa788, 1}, - {0xa78b, 0xa7ad, 1}, + {0xa78b, 0xa7ae, 1}, {0xa7b0, 0xa7b7, 1}, {0xa7f7, 0xa801, 1}, {0xa803, 0xa805, 1}, @@ -498,6 +505,8 @@ var _L = &RangeTable{ {0x103a0, 0x103c3, 1}, {0x103c8, 0x103cf, 1}, {0x10400, 0x1049d, 1}, + {0x104b0, 0x104d3, 1}, + {0x104d8, 0x104fb, 1}, {0x10500, 0x10527, 1}, {0x10530, 0x10563, 1}, {0x10600, 0x10736, 1}, @@ -557,6 +566,8 @@ var _L = &RangeTable{ {0x11335, 0x11339, 1}, {0x1133d, 0x11350, 19}, {0x1135d, 0x11361, 1}, + {0x11400, 0x11434, 1}, + {0x11447, 0x1144a, 1}, {0x11480, 0x114af, 1}, {0x114c4, 0x114c5, 1}, {0x114c7, 0x11580, 185}, @@ -569,6 +580,10 @@ var _L = &RangeTable{ {0x118a0, 0x118df, 1}, {0x118ff, 0x11ac0, 449}, {0x11ac1, 0x11af8, 1}, + {0x11c00, 0x11c08, 1}, + {0x11c0a, 0x11c2e, 1}, + {0x11c40, 0x11c72, 50}, + {0x11c73, 0x11c8f, 1}, {0x12000, 0x12399, 1}, {0x12480, 0x12543, 1}, {0x13000, 0x1342e, 1}, @@ -583,6 +598,9 @@ var _L = &RangeTable{ {0x16f00, 0x16f44, 1}, {0x16f50, 0x16f93, 67}, {0x16f94, 0x16f9f, 1}, + {0x16fe0, 0x17000, 32}, + {0x17001, 0x187ec, 1}, + {0x18800, 0x18af2, 1}, {0x1b000, 0x1b001, 1}, {0x1bc00, 0x1bc6a, 1}, {0x1bc70, 0x1bc7c, 1}, @@ -619,6 +637,7 @@ var _L = &RangeTable{ {0x1d7aa, 0x1d7c2, 1}, {0x1d7c4, 0x1d7cb, 1}, {0x1e800, 0x1e8c4, 1}, + {0x1e900, 0x1e943, 1}, {0x1ee00, 0x1ee03, 1}, {0x1ee05, 0x1ee1f, 1}, {0x1ee21, 0x1ee22, 1}, @@ -706,6 +725,7 @@ var _Ll = &RangeTable{ {0x04cf, 0x052f, 2}, {0x0561, 0x0587, 1}, {0x13f8, 0x13fd, 1}, + {0x1c80, 0x1c88, 1}, {0x1d00, 0x1d2b, 1}, {0x1d6b, 0x1d77, 1}, {0x1d79, 0x1d9a, 1}, @@ -773,6 +793,7 @@ var _Ll = &RangeTable{ }, R32: []Range32{ {0x10428, 0x1044f, 1}, + {0x104d8, 0x104fb, 1}, {0x10cc0, 0x10cf2, 1}, {0x118c0, 0x118df, 1}, {0x1d41a, 0x1d433, 1}, @@ -802,7 +823,8 @@ var _Ll = &RangeTable{ {0x1d78a, 0x1d78f, 1}, {0x1d7aa, 0x1d7c2, 1}, {0x1d7c4, 0x1d7c9, 1}, - {0x1d7cb, 0x1d7cb, 1}, + {0x1d7cb, 0x1e922, 4439}, + {0x1e923, 0x1e943, 1}, }, LatinOffset: 4, } @@ -854,6 +876,7 @@ var _Lm = &RangeTable{ {0x16b40, 0x16b40, 1}, {0x16b41, 0x16b43, 1}, {0x16f93, 0x16f9f, 1}, + {0x16fe0, 0x16fe0, 1}, }, } @@ -880,6 +903,7 @@ var _Lo = &RangeTable{ {0x0800, 0x0815, 1}, {0x0840, 0x0858, 1}, {0x08a0, 0x08b4, 1}, + {0x08b6, 0x08bd, 1}, {0x0904, 0x0939, 1}, {0x093d, 0x0950, 19}, {0x0958, 0x0961, 1}, @@ -940,7 +964,8 @@ var _Lo = &RangeTable{ {0x0c3d, 0x0c58, 27}, {0x0c59, 0x0c5a, 1}, {0x0c60, 0x0c61, 1}, - {0x0c85, 0x0c8c, 1}, + {0x0c80, 0x0c85, 5}, + {0x0c86, 0x0c8c, 1}, {0x0c8e, 0x0c90, 1}, {0x0c92, 0x0ca8, 1}, {0x0caa, 0x0cb3, 1}, @@ -951,8 +976,9 @@ var _Lo = &RangeTable{ {0x0d05, 0x0d0c, 1}, {0x0d0e, 0x0d10, 1}, {0x0d12, 0x0d3a, 1}, - {0x0d3d, 0x0d5f, 17}, - {0x0d60, 0x0d61, 1}, + {0x0d3d, 0x0d4e, 17}, + {0x0d54, 0x0d56, 1}, + {0x0d5f, 0x0d61, 1}, {0x0d7a, 0x0d7f, 1}, {0x0d85, 0x0d96, 1}, {0x0d9a, 0x0db1, 1}, @@ -1022,7 +1048,8 @@ var _Lo = &RangeTable{ {0x17dc, 0x1820, 68}, {0x1821, 0x1842, 1}, {0x1844, 0x1877, 1}, - {0x1880, 0x18a8, 1}, + {0x1880, 0x1884, 1}, + {0x1887, 0x18a8, 1}, {0x18aa, 0x18b0, 6}, {0x18b1, 0x18f5, 1}, {0x1900, 0x191e, 1}, @@ -1211,6 +1238,8 @@ var _Lo = &RangeTable{ {0x11335, 0x11339, 1}, {0x1133d, 0x11350, 19}, {0x1135d, 0x11361, 1}, + {0x11400, 0x11434, 1}, + {0x11447, 0x1144a, 1}, {0x11480, 0x114af, 1}, {0x114c4, 0x114c5, 1}, {0x114c7, 0x11580, 185}, @@ -1222,6 +1251,10 @@ var _Lo = &RangeTable{ {0x11700, 0x11719, 1}, {0x118ff, 0x11ac0, 449}, {0x11ac1, 0x11af8, 1}, + {0x11c00, 0x11c08, 1}, + {0x11c0a, 0x11c2e, 1}, + {0x11c40, 0x11c72, 50}, + {0x11c73, 0x11c8f, 1}, {0x12000, 0x12399, 1}, {0x12480, 0x12543, 1}, {0x13000, 0x1342e, 1}, @@ -1233,9 +1266,11 @@ var _Lo = &RangeTable{ {0x16b63, 0x16b77, 1}, {0x16b7d, 0x16b8f, 1}, {0x16f00, 0x16f44, 1}, - {0x16f50, 0x1b000, 16560}, - {0x1b001, 0x1bc00, 3071}, - {0x1bc01, 0x1bc6a, 1}, + {0x16f50, 0x17000, 176}, + {0x17001, 0x187ec, 1}, + {0x18800, 0x18af2, 1}, + {0x1b000, 0x1b001, 1}, + {0x1bc00, 0x1bc6a, 1}, {0x1bc70, 0x1bc7c, 1}, {0x1bc80, 0x1bc88, 1}, {0x1bc90, 0x1bc99, 1}, @@ -1386,13 +1421,14 @@ var _Lu = &RangeTable{ {0xa78b, 0xa78d, 2}, {0xa790, 0xa792, 2}, {0xa796, 0xa7aa, 2}, - {0xa7ab, 0xa7ad, 1}, + {0xa7ab, 0xa7ae, 1}, {0xa7b0, 0xa7b4, 1}, {0xa7b6, 0xff21, 22379}, {0xff22, 0xff3a, 1}, }, R32: []Range32{ {0x10400, 0x10427, 1}, + {0x104b0, 0x104d3, 1}, {0x10c80, 0x10cb2, 1}, {0x118a0, 0x118bf, 1}, {0x1d400, 0x1d419, 1}, @@ -1424,7 +1460,8 @@ var _Lu = &RangeTable{ {0x1d71c, 0x1d734, 1}, {0x1d756, 0x1d76e, 1}, {0x1d790, 0x1d7a8, 1}, - {0x1d7ca, 0x1d7ca, 1}, + {0x1d7ca, 0x1e900, 4406}, + {0x1e901, 0x1e921, 1}, }, LatinOffset: 3, } @@ -1453,6 +1490,7 @@ var _M = &RangeTable{ {0x0825, 0x0827, 1}, {0x0829, 0x082d, 1}, {0x0859, 0x085b, 1}, + {0x08d4, 0x08e1, 1}, {0x08e3, 0x0903, 1}, {0x093a, 0x093c, 1}, {0x093e, 0x094f, 1}, @@ -1546,6 +1584,7 @@ var _M = &RangeTable{ {0x17b4, 0x17d3, 1}, {0x17dd, 0x180b, 46}, {0x180c, 0x180d, 1}, + {0x1885, 0x1886, 1}, {0x18a9, 0x1920, 119}, {0x1921, 0x192b, 1}, {0x1930, 0x193b, 1}, @@ -1567,7 +1606,7 @@ var _M = &RangeTable{ {0x1cf3, 0x1cf4, 1}, {0x1cf8, 0x1cf9, 1}, {0x1dc0, 0x1df5, 1}, - {0x1dfc, 0x1dff, 1}, + {0x1dfb, 0x1dff, 1}, {0x20d0, 0x20f0, 1}, {0x2cef, 0x2cf1, 1}, {0x2d7f, 0x2de0, 97}, @@ -1582,7 +1621,7 @@ var _M = &RangeTable{ {0xa80b, 0xa823, 24}, {0xa824, 0xa827, 1}, {0xa880, 0xa881, 1}, - {0xa8b4, 0xa8c4, 1}, + {0xa8b4, 0xa8c5, 1}, {0xa8e0, 0xa8f1, 1}, {0xa926, 0xa92d, 1}, {0xa947, 0xa953, 1}, @@ -1626,7 +1665,8 @@ var _M = &RangeTable{ {0x111b3, 0x111c0, 1}, {0x111ca, 0x111cc, 1}, {0x1122c, 0x11237, 1}, - {0x112df, 0x112ea, 1}, + {0x1123e, 0x112df, 161}, + {0x112e0, 0x112ea, 1}, {0x11300, 0x11303, 1}, {0x1133c, 0x1133e, 2}, {0x1133f, 0x11344, 1}, @@ -1636,6 +1676,7 @@ var _M = &RangeTable{ {0x11363, 0x11366, 3}, {0x11367, 0x1136c, 1}, {0x11370, 0x11374, 1}, + {0x11435, 0x11446, 1}, {0x114b0, 0x114c3, 1}, {0x115af, 0x115b5, 1}, {0x115b8, 0x115c0, 1}, @@ -1643,6 +1684,10 @@ var _M = &RangeTable{ {0x11630, 0x11640, 1}, {0x116ab, 0x116b7, 1}, {0x1171d, 0x1172b, 1}, + {0x11c2f, 0x11c36, 1}, + {0x11c38, 0x11c3f, 1}, + {0x11c92, 0x11ca7, 1}, + {0x11ca9, 0x11cb6, 1}, {0x16af0, 0x16af4, 1}, {0x16b30, 0x16b36, 1}, {0x16f51, 0x16f7e, 1}, @@ -1659,7 +1704,13 @@ var _M = &RangeTable{ {0x1da75, 0x1da84, 15}, {0x1da9b, 0x1da9f, 1}, {0x1daa1, 0x1daaf, 1}, + {0x1e000, 0x1e006, 1}, + {0x1e008, 0x1e018, 1}, + {0x1e01b, 0x1e021, 1}, + {0x1e023, 0x1e024, 1}, + {0x1e026, 0x1e02a, 1}, {0x1e8d0, 0x1e8d6, 1}, + {0x1e944, 0x1e94a, 1}, {0xe0100, 0xe01ef, 1}, }, } @@ -1781,7 +1832,10 @@ var _Mc = &RangeTable{ {0x11347, 0x11348, 1}, {0x1134b, 0x1134d, 1}, {0x11357, 0x11362, 11}, - {0x11363, 0x114b0, 333}, + {0x11363, 0x11435, 210}, + {0x11436, 0x11437, 1}, + {0x11440, 0x11441, 1}, + {0x11445, 0x114b0, 107}, {0x114b1, 0x114b2, 1}, {0x114b9, 0x114bb, 2}, {0x114bc, 0x114be, 1}, @@ -1795,7 +1849,10 @@ var _Mc = &RangeTable{ {0x116ae, 0x116af, 1}, {0x116b6, 0x11720, 106}, {0x11721, 0x11726, 5}, - {0x16f51, 0x16f7e, 1}, + {0x11c2f, 0x11c3e, 15}, + {0x11ca9, 0x11cb1, 8}, + {0x11cb4, 0x16f51, 21149}, + {0x16f52, 0x16f7e, 1}, {0x1d165, 0x1d166, 1}, {0x1d16d, 0x1d172, 1}, }, @@ -1835,6 +1892,7 @@ var _Mn = &RangeTable{ {0x0825, 0x0827, 1}, {0x0829, 0x082d, 1}, {0x0859, 0x085b, 1}, + {0x08d4, 0x08e1, 1}, {0x08e3, 0x0902, 1}, {0x093a, 0x093c, 2}, {0x0941, 0x0948, 1}, @@ -1913,6 +1971,7 @@ var _Mn = &RangeTable{ {0x17ca, 0x17d3, 1}, {0x17dd, 0x180b, 46}, {0x180c, 0x180d, 1}, + {0x1885, 0x1886, 1}, {0x18a9, 0x1920, 119}, {0x1921, 0x1922, 1}, {0x1927, 0x1928, 1}, @@ -1946,7 +2005,7 @@ var _Mn = &RangeTable{ {0x1ced, 0x1cf4, 7}, {0x1cf8, 0x1cf9, 1}, {0x1dc0, 0x1df5, 1}, - {0x1dfc, 0x1dff, 1}, + {0x1dfb, 0x1dff, 1}, {0x20d0, 0x20dc, 1}, {0x20e1, 0x20e5, 4}, {0x20e6, 0x20f0, 1}, @@ -1962,7 +2021,8 @@ var _Mn = &RangeTable{ {0xa802, 0xa806, 4}, {0xa80b, 0xa825, 26}, {0xa826, 0xa8c4, 158}, - {0xa8e0, 0xa8f1, 1}, + {0xa8c5, 0xa8e0, 27}, + {0xa8e1, 0xa8f1, 1}, {0xa926, 0xa92d, 1}, {0xa947, 0xa951, 1}, {0xa980, 0xa982, 1}, @@ -2006,13 +2066,17 @@ var _Mn = &RangeTable{ {0x111ca, 0x111cc, 1}, {0x1122f, 0x11231, 1}, {0x11234, 0x11236, 2}, - {0x11237, 0x112df, 168}, - {0x112e3, 0x112ea, 1}, + {0x11237, 0x1123e, 7}, + {0x112df, 0x112e3, 4}, + {0x112e4, 0x112ea, 1}, {0x11300, 0x11301, 1}, {0x1133c, 0x11340, 4}, {0x11366, 0x1136c, 1}, {0x11370, 0x11374, 1}, - {0x114b3, 0x114b8, 1}, + {0x11438, 0x1143f, 1}, + {0x11442, 0x11444, 1}, + {0x11446, 0x114b3, 109}, + {0x114b4, 0x114b8, 1}, {0x114ba, 0x114bf, 5}, {0x114c0, 0x114c2, 2}, {0x114c3, 0x115b2, 239}, @@ -2029,6 +2093,13 @@ var _Mn = &RangeTable{ {0x1171e, 0x1171f, 1}, {0x11722, 0x11725, 1}, {0x11727, 0x1172b, 1}, + {0x11c30, 0x11c36, 1}, + {0x11c38, 0x11c3d, 1}, + {0x11c3f, 0x11c92, 83}, + {0x11c93, 0x11ca7, 1}, + {0x11caa, 0x11cb0, 1}, + {0x11cb2, 0x11cb3, 1}, + {0x11cb5, 0x11cb6, 1}, {0x16af0, 0x16af4, 1}, {0x16b30, 0x16b36, 1}, {0x16f8f, 0x16f92, 1}, @@ -2043,7 +2114,13 @@ var _Mn = &RangeTable{ {0x1da75, 0x1da84, 15}, {0x1da9b, 0x1da9f, 1}, {0x1daa1, 0x1daaf, 1}, + {0x1e000, 0x1e006, 1}, + {0x1e008, 0x1e018, 1}, + {0x1e01b, 0x1e021, 1}, + {0x1e023, 0x1e024, 1}, + {0x1e026, 0x1e02a, 1}, {0x1e8d0, 0x1e8d6, 1}, + {0x1e944, 0x1e94a, 1}, {0xe0100, 0xe01ef, 1}, }, } @@ -2068,7 +2145,8 @@ var _N = &RangeTable{ {0x0c66, 0x0c6f, 1}, {0x0c78, 0x0c7e, 1}, {0x0ce6, 0x0cef, 1}, - {0x0d66, 0x0d75, 1}, + {0x0d58, 0x0d5e, 1}, + {0x0d66, 0x0d78, 1}, {0x0de6, 0x0def, 1}, {0x0e50, 0x0e59, 1}, {0x0ed0, 0x0ed9, 1}, @@ -2148,11 +2226,13 @@ var _N = &RangeTable{ {0x111d0, 0x111d9, 1}, {0x111e1, 0x111f4, 1}, {0x112f0, 0x112f9, 1}, + {0x11450, 0x11459, 1}, {0x114d0, 0x114d9, 1}, {0x11650, 0x11659, 1}, {0x116c0, 0x116c9, 1}, {0x11730, 0x1173b, 1}, {0x118e0, 0x118f2, 1}, + {0x11c50, 0x11c6c, 1}, {0x12400, 0x1246e, 1}, {0x16a60, 0x16a69, 1}, {0x16b50, 0x16b59, 1}, @@ -2160,6 +2240,7 @@ var _N = &RangeTable{ {0x1d360, 0x1d371, 1}, {0x1d7ce, 0x1d7ff, 1}, {0x1e8c7, 0x1e8cf, 1}, + {0x1e950, 0x1e959, 1}, {0x1f100, 0x1f10c, 1}, }, LatinOffset: 4, @@ -2212,14 +2293,17 @@ var _Nd = &RangeTable{ {0x11136, 0x1113f, 1}, {0x111d0, 0x111d9, 1}, {0x112f0, 0x112f9, 1}, + {0x11450, 0x11459, 1}, {0x114d0, 0x114d9, 1}, {0x11650, 0x11659, 1}, {0x116c0, 0x116c9, 1}, {0x11730, 0x11739, 1}, {0x118e0, 0x118e9, 1}, + {0x11c50, 0x11c59, 1}, {0x16a60, 0x16a69, 1}, {0x16b50, 0x16b59, 1}, {0x1d7ce, 0x1d7ff, 1}, + {0x1e950, 0x1e959, 1}, }, LatinOffset: 1, } @@ -2251,7 +2335,8 @@ var _No = &RangeTable{ {0x0b72, 0x0b77, 1}, {0x0bf0, 0x0bf2, 1}, {0x0c78, 0x0c7e, 1}, - {0x0d70, 0x0d75, 1}, + {0x0d58, 0x0d5e, 1}, + {0x0d70, 0x0d78, 1}, {0x0f2a, 0x0f33, 1}, {0x1369, 0x137c, 1}, {0x17f0, 0x17f9, 1}, @@ -2299,6 +2384,7 @@ var _No = &RangeTable{ {0x111e1, 0x111f4, 1}, {0x1173a, 0x1173b, 1}, {0x118ea, 0x118f2, 1}, + {0x11c5a, 0x11c6c, 1}, {0x16b5b, 0x16b61, 1}, {0x1d360, 0x1d371, 1}, {0x1e8c7, 0x1e8cf, 1}, @@ -2385,7 +2471,7 @@ var _P = &RangeTable{ {0x2cfe, 0x2cff, 1}, {0x2d70, 0x2e00, 144}, {0x2e01, 0x2e2e, 1}, - {0x2e30, 0x2e42, 1}, + {0x2e30, 0x2e44, 1}, {0x3001, 0x3003, 1}, {0x3008, 0x3011, 1}, {0x3014, 0x301f, 1}, @@ -2441,16 +2527,23 @@ var _P = &RangeTable{ {0x111cd, 0x111db, 14}, {0x111dd, 0x111df, 1}, {0x11238, 0x1123d, 1}, - {0x112a9, 0x114c6, 541}, - {0x115c1, 0x115d7, 1}, + {0x112a9, 0x1144b, 418}, + {0x1144c, 0x1144f, 1}, + {0x1145b, 0x1145d, 2}, + {0x114c6, 0x115c1, 251}, + {0x115c2, 0x115d7, 1}, {0x11641, 0x11643, 1}, + {0x11660, 0x1166c, 1}, {0x1173c, 0x1173e, 1}, + {0x11c41, 0x11c45, 1}, + {0x11c70, 0x11c71, 1}, {0x12470, 0x12474, 1}, {0x16a6e, 0x16a6f, 1}, {0x16af5, 0x16b37, 66}, {0x16b38, 0x16b3b, 1}, {0x16b44, 0x1bc9f, 20827}, {0x1da87, 0x1da8b, 1}, + {0x1e95e, 0x1e95f, 1}, }, LatinOffset: 11, } @@ -2605,7 +2698,8 @@ var _Po = &RangeTable{ {0x2e2b, 0x2e2e, 1}, {0x2e30, 0x2e39, 1}, {0x2e3c, 0x2e3f, 1}, - {0x2e41, 0x3001, 448}, + {0x2e41, 0x2e43, 2}, + {0x2e44, 0x3001, 445}, {0x3002, 0x3003, 1}, {0x303d, 0x30fb, 190}, {0xa4fe, 0xa4ff, 1}, @@ -2661,16 +2755,23 @@ var _Po = &RangeTable{ {0x111cd, 0x111db, 14}, {0x111dd, 0x111df, 1}, {0x11238, 0x1123d, 1}, - {0x112a9, 0x114c6, 541}, - {0x115c1, 0x115d7, 1}, + {0x112a9, 0x1144b, 418}, + {0x1144c, 0x1144f, 1}, + {0x1145b, 0x1145d, 2}, + {0x114c6, 0x115c1, 251}, + {0x115c2, 0x115d7, 1}, {0x11641, 0x11643, 1}, + {0x11660, 0x1166c, 1}, {0x1173c, 0x1173e, 1}, + {0x11c41, 0x11c45, 1}, + {0x11c70, 0x11c71, 1}, {0x12470, 0x12474, 1}, {0x16a6e, 0x16a6f, 1}, {0x16af5, 0x16b37, 66}, {0x16b38, 0x16b3b, 1}, {0x16b44, 0x1bc9f, 20827}, {0x1da87, 0x1da8b, 1}, + {0x1e95e, 0x1e95f, 1}, }, LatinOffset: 8, } @@ -2736,9 +2837,9 @@ var _S = &RangeTable{ {0x09fa, 0x09fb, 1}, {0x0af1, 0x0b70, 127}, {0x0bf3, 0x0bfa, 1}, - {0x0c7f, 0x0d79, 250}, - {0x0e3f, 0x0f01, 194}, - {0x0f02, 0x0f03, 1}, + {0x0c7f, 0x0d4f, 208}, + {0x0d79, 0x0e3f, 198}, + {0x0f01, 0x0f03, 1}, {0x0f13, 0x0f15, 2}, {0x0f16, 0x0f17, 1}, {0x0f1a, 0x0f1f, 1}, @@ -2778,7 +2879,7 @@ var _S = &RangeTable{ {0x218b, 0x2190, 5}, {0x2191, 0x2307, 1}, {0x230c, 0x2328, 1}, - {0x232b, 0x23fa, 1}, + {0x232b, 0x23fe, 1}, {0x2400, 0x2426, 1}, {0x2440, 0x244a, 1}, {0x249c, 0x24e9, 1}, @@ -2839,8 +2940,8 @@ var _S = &RangeTable{ R32: []Range32{ {0x10137, 0x1013f, 1}, {0x10179, 0x10189, 1}, - {0x1018c, 0x10190, 4}, - {0x10191, 0x1019b, 1}, + {0x1018c, 0x1018e, 1}, + {0x10190, 0x1019b, 1}, {0x101a0, 0x101d0, 48}, {0x101d1, 0x101fc, 1}, {0x10877, 0x10878, 1}, @@ -2876,16 +2977,14 @@ var _S = &RangeTable{ {0x1f0d1, 0x1f0f5, 1}, {0x1f110, 0x1f12e, 1}, {0x1f130, 0x1f16b, 1}, - {0x1f170, 0x1f19a, 1}, + {0x1f170, 0x1f1ac, 1}, {0x1f1e6, 0x1f202, 1}, - {0x1f210, 0x1f23a, 1}, + {0x1f210, 0x1f23b, 1}, {0x1f240, 0x1f248, 1}, {0x1f250, 0x1f251, 1}, - {0x1f300, 0x1f579, 1}, - {0x1f57b, 0x1f5a3, 1}, - {0x1f5a5, 0x1f6d0, 1}, + {0x1f300, 0x1f6d2, 1}, {0x1f6e0, 0x1f6ec, 1}, - {0x1f6f0, 0x1f6f3, 1}, + {0x1f6f0, 0x1f6f6, 1}, {0x1f700, 0x1f773, 1}, {0x1f780, 0x1f7d4, 1}, {0x1f800, 0x1f80b, 1}, @@ -2893,8 +2992,13 @@ var _S = &RangeTable{ {0x1f850, 0x1f859, 1}, {0x1f860, 0x1f887, 1}, {0x1f890, 0x1f8ad, 1}, - {0x1f910, 0x1f918, 1}, - {0x1f980, 0x1f984, 1}, + {0x1f910, 0x1f91e, 1}, + {0x1f920, 0x1f927, 1}, + {0x1f930, 0x1f933, 3}, + {0x1f934, 0x1f93e, 1}, + {0x1f940, 0x1f94b, 1}, + {0x1f950, 0x1f95e, 1}, + {0x1f980, 0x1f991, 1}, {0x1f9c0, 0x1f9c0, 1}, }, LatinOffset: 10, @@ -3020,8 +3124,8 @@ var _So = &RangeTable{ {0x09fa, 0x0b70, 374}, {0x0bf3, 0x0bf8, 1}, {0x0bfa, 0x0c7f, 133}, - {0x0d79, 0x0f01, 392}, - {0x0f02, 0x0f03, 1}, + {0x0d4f, 0x0d79, 42}, + {0x0f01, 0x0f03, 1}, {0x0f13, 0x0f15, 2}, {0x0f16, 0x0f17, 1}, {0x0f1a, 0x0f1f, 1}, @@ -3063,7 +3167,7 @@ var _So = &RangeTable{ {0x232b, 0x237b, 1}, {0x237d, 0x239a, 1}, {0x23b4, 0x23db, 1}, - {0x23e2, 0x23fa, 1}, + {0x23e2, 0x23fe, 1}, {0x2400, 0x2426, 1}, {0x2440, 0x244a, 1}, {0x249c, 0x24e9, 1}, @@ -3116,8 +3220,8 @@ var _So = &RangeTable{ {0x10137, 0x10137, 1}, {0x10138, 0x1013f, 1}, {0x10179, 0x10189, 1}, - {0x1018c, 0x10190, 4}, - {0x10191, 0x1019b, 1}, + {0x1018c, 0x1018e, 1}, + {0x10190, 0x1019b, 1}, {0x101a0, 0x101d0, 48}, {0x101d1, 0x101fc, 1}, {0x10877, 0x10878, 1}, @@ -3147,17 +3251,15 @@ var _So = &RangeTable{ {0x1f0d1, 0x1f0f5, 1}, {0x1f110, 0x1f12e, 1}, {0x1f130, 0x1f16b, 1}, - {0x1f170, 0x1f19a, 1}, + {0x1f170, 0x1f1ac, 1}, {0x1f1e6, 0x1f202, 1}, - {0x1f210, 0x1f23a, 1}, + {0x1f210, 0x1f23b, 1}, {0x1f240, 0x1f248, 1}, {0x1f250, 0x1f251, 1}, {0x1f300, 0x1f3fa, 1}, - {0x1f400, 0x1f579, 1}, - {0x1f57b, 0x1f5a3, 1}, - {0x1f5a5, 0x1f6d0, 1}, + {0x1f400, 0x1f6d2, 1}, {0x1f6e0, 0x1f6ec, 1}, - {0x1f6f0, 0x1f6f3, 1}, + {0x1f6f0, 0x1f6f6, 1}, {0x1f700, 0x1f773, 1}, {0x1f780, 0x1f7d4, 1}, {0x1f800, 0x1f80b, 1}, @@ -3165,8 +3267,13 @@ var _So = &RangeTable{ {0x1f850, 0x1f859, 1}, {0x1f860, 0x1f887, 1}, {0x1f890, 0x1f8ad, 1}, - {0x1f910, 0x1f918, 1}, - {0x1f980, 0x1f984, 1}, + {0x1f910, 0x1f91e, 1}, + {0x1f920, 0x1f927, 1}, + {0x1f930, 0x1f933, 3}, + {0x1f934, 0x1f93e, 1}, + {0x1f940, 0x1f94b, 1}, + {0x1f950, 0x1f95e, 1}, + {0x1f980, 0x1f991, 1}, {0x1f9c0, 0x1f9c0, 1}, }, LatinOffset: 2, @@ -3259,11 +3366,12 @@ var ( ) // Generated by running -// maketables --scripts=all --url=http://www.unicode.org/Public/8.0.0/ucd/ +// maketables --scripts=all --url=http://www.unicode.org/Public/9.0.0/ucd/ // DO NOT EDIT // Scripts is the set of Unicode script tables. var Scripts = map[string]*RangeTable{ + "Adlam": Adlam, "Ahom": Ahom, "Anatolian_Hieroglyphs": Anatolian_Hieroglyphs, "Arabic": Arabic, @@ -3274,6 +3382,7 @@ var Scripts = map[string]*RangeTable{ "Bassa_Vah": Bassa_Vah, "Batak": Batak, "Bengali": Bengali, + "Bhaiksuki": Bhaiksuki, "Bopomofo": Bopomofo, "Brahmi": Brahmi, "Braille": Braille, @@ -3335,6 +3444,7 @@ var Scripts = map[string]*RangeTable{ "Malayalam": Malayalam, "Mandaic": Mandaic, "Manichaean": Manichaean, + "Marchen": Marchen, "Meetei_Mayek": Meetei_Mayek, "Mende_Kikakui": Mende_Kikakui, "Meroitic_Cursive": Meroitic_Cursive, @@ -3347,6 +3457,7 @@ var Scripts = map[string]*RangeTable{ "Myanmar": Myanmar, "Nabataean": Nabataean, "New_Tai_Lue": New_Tai_Lue, + "Newa": Newa, "Nko": Nko, "Ogham": Ogham, "Ol_Chiki": Ol_Chiki, @@ -3358,6 +3469,7 @@ var Scripts = map[string]*RangeTable{ "Old_South_Arabian": Old_South_Arabian, "Old_Turkic": Old_Turkic, "Oriya": Oriya, + "Osage": Osage, "Osmanya": Osmanya, "Pahawh_Hmong": Pahawh_Hmong, "Palmyrene": Palmyrene, @@ -3385,6 +3497,7 @@ var Scripts = map[string]*RangeTable{ "Tai_Viet": Tai_Viet, "Takri": Takri, "Tamil": Tamil, + "Tangut": Tangut, "Telugu": Telugu, "Thaana": Thaana, "Thai": Thai, @@ -3397,6 +3510,15 @@ var Scripts = map[string]*RangeTable{ "Yi": Yi, } +var _Adlam = &RangeTable{ + R16: []Range16{}, + R32: []Range32{ + {0x1e900, 0x1e94a, 1}, + {0x1e950, 0x1e959, 1}, + {0x1e95e, 0x1e95f, 1}, + }, +} + var _Ahom = &RangeTable{ R16: []Range16{}, R32: []Range32{ @@ -3426,6 +3548,8 @@ var _Arabic = &RangeTable{ {0x06de, 0x06ff, 1}, {0x0750, 0x077f, 1}, {0x08a0, 0x08b4, 1}, + {0x08b6, 0x08bd, 1}, + {0x08d4, 0x08e1, 1}, {0x08e3, 0x08ff, 1}, {0xfb50, 0xfbc1, 1}, {0xfbd3, 0xfd3d, 1}, @@ -3543,6 +3667,16 @@ var _Bengali = &RangeTable{ }, } +var _Bhaiksuki = &RangeTable{ + R16: []Range16{}, + R32: []Range32{ + {0x11c00, 0x11c08, 1}, + {0x11c0a, 0x11c36, 1}, + {0x11c38, 0x11c45, 1}, + {0x11c50, 0x11c6c, 1}, + }, +} + var _Bopomofo = &RangeTable{ R16: []Range16{ {0x02ea, 0x02eb, 1}, @@ -3649,6 +3783,7 @@ var _Common = &RangeTable{ {0x061f, 0x061f, 1}, {0x0640, 0x0640, 1}, {0x06dd, 0x06dd, 1}, + {0x08e2, 0x08e2, 1}, {0x0964, 0x0965, 1}, {0x0e3f, 0x0e3f, 1}, {0x0fd5, 0x0fd8, 1}, @@ -3674,7 +3809,7 @@ var _Common = &RangeTable{ {0x2133, 0x214d, 1}, {0x214f, 0x215f, 1}, {0x2189, 0x218b, 1}, - {0x2190, 0x23fa, 1}, + {0x2190, 0x23fe, 1}, {0x2400, 0x2426, 1}, {0x2440, 0x244a, 1}, {0x2460, 0x27ff, 1}, @@ -3684,7 +3819,7 @@ var _Common = &RangeTable{ {0x2bbd, 0x2bc8, 1}, {0x2bca, 0x2bd1, 1}, {0x2bec, 0x2bef, 1}, - {0x2e00, 0x2e42, 1}, + {0x2e00, 0x2e44, 1}, {0x2ff0, 0x2ffb, 1}, {0x3000, 0x3004, 1}, {0x3006, 0x3006, 1}, @@ -3768,17 +3903,15 @@ var _Common = &RangeTable{ {0x1f100, 0x1f10c, 1}, {0x1f110, 0x1f12e, 1}, {0x1f130, 0x1f16b, 1}, - {0x1f170, 0x1f19a, 1}, + {0x1f170, 0x1f1ac, 1}, {0x1f1e6, 0x1f1ff, 1}, {0x1f201, 0x1f202, 1}, - {0x1f210, 0x1f23a, 1}, + {0x1f210, 0x1f23b, 1}, {0x1f240, 0x1f248, 1}, {0x1f250, 0x1f251, 1}, - {0x1f300, 0x1f579, 1}, - {0x1f57b, 0x1f5a3, 1}, - {0x1f5a5, 0x1f6d0, 1}, + {0x1f300, 0x1f6d2, 1}, {0x1f6e0, 0x1f6ec, 1}, - {0x1f6f0, 0x1f6f3, 1}, + {0x1f6f0, 0x1f6f6, 1}, {0x1f700, 0x1f773, 1}, {0x1f780, 0x1f7d4, 1}, {0x1f800, 0x1f80b, 1}, @@ -3786,8 +3919,13 @@ var _Common = &RangeTable{ {0x1f850, 0x1f859, 1}, {0x1f860, 0x1f887, 1}, {0x1f890, 0x1f8ad, 1}, - {0x1f910, 0x1f918, 1}, - {0x1f980, 0x1f984, 1}, + {0x1f910, 0x1f91e, 1}, + {0x1f920, 0x1f927, 1}, + {0x1f930, 0x1f930, 1}, + {0x1f933, 0x1f93e, 1}, + {0x1f940, 0x1f94b, 1}, + {0x1f950, 0x1f95e, 1}, + {0x1f980, 0x1f991, 1}, {0x1f9c0, 0x1f9c0, 1}, {0xe0001, 0xe0001, 1}, {0xe0020, 0xe007f, 1}, @@ -3829,6 +3967,7 @@ var _Cyrillic = &RangeTable{ R16: []Range16{ {0x0400, 0x0484, 1}, {0x0487, 0x052f, 1}, + {0x1c80, 0x1c88, 1}, {0x1d2b, 0x1d2b, 1}, {0x1d78, 0x1d78, 1}, {0x2de0, 0x2dff, 1}, @@ -3933,6 +4072,13 @@ var _Glagolitic = &RangeTable{ {0x2c00, 0x2c2e, 1}, {0x2c30, 0x2c5e, 1}, }, + R32: []Range32{ + {0x1e000, 0x1e006, 1}, + {0x1e008, 0x1e018, 1}, + {0x1e01b, 0x1e021, 1}, + {0x1e023, 0x1e024, 1}, + {0x1e026, 0x1e02a, 1}, + }, } var _Gothic = &RangeTable{ @@ -4000,7 +4146,7 @@ var _Greek = &RangeTable{ {0xab65, 0xab65, 1}, }, R32: []Range32{ - {0x10140, 0x1018c, 1}, + {0x10140, 0x1018e, 1}, {0x101a0, 0x101a0, 1}, {0x1d200, 0x1d245, 1}, }, @@ -4151,7 +4297,7 @@ var _Inherited = &RangeTable{ {0x1cf4, 0x1cf4, 1}, {0x1cf8, 0x1cf9, 1}, {0x1dc0, 0x1df5, 1}, - {0x1dfc, 0x1dff, 1}, + {0x1dfb, 0x1dff, 1}, {0x200c, 0x200d, 1}, {0x20d0, 0x20f0, 1}, {0x302a, 0x302d, 1}, @@ -4203,7 +4349,7 @@ var _Kaithi = &RangeTable{ var _Kannada = &RangeTable{ R16: []Range16{ - {0x0c81, 0x0c83, 1}, + {0x0c80, 0x0c83, 1}, {0x0c85, 0x0c8c, 1}, {0x0c8e, 0x0c90, 1}, {0x0c92, 0x0ca8, 1}, @@ -4269,7 +4415,7 @@ var _Khojki = &RangeTable{ R16: []Range16{}, R32: []Range32{ {0x11200, 0x11211, 1}, - {0x11213, 0x1123d, 1}, + {0x11213, 0x1123e, 1}, }, } @@ -4329,7 +4475,7 @@ var _Latin = &RangeTable{ {0x2160, 0x2188, 1}, {0x2c60, 0x2c7f, 1}, {0xa722, 0xa787, 1}, - {0xa78b, 0xa7ad, 1}, + {0xa78b, 0xa7ae, 1}, {0xa7b0, 0xa7b7, 1}, {0xa7f7, 0xa7ff, 1}, {0xab30, 0xab5a, 1}, @@ -4417,11 +4563,9 @@ var _Malayalam = &RangeTable{ {0x0d12, 0x0d3a, 1}, {0x0d3d, 0x0d44, 1}, {0x0d46, 0x0d48, 1}, - {0x0d4a, 0x0d4e, 1}, - {0x0d57, 0x0d57, 1}, - {0x0d5f, 0x0d63, 1}, - {0x0d66, 0x0d75, 1}, - {0x0d79, 0x0d7f, 1}, + {0x0d4a, 0x0d4f, 1}, + {0x0d54, 0x0d63, 1}, + {0x0d66, 0x0d7f, 1}, }, } @@ -4440,6 +4584,15 @@ var _Manichaean = &RangeTable{ }, } +var _Marchen = &RangeTable{ + R16: []Range16{}, + R32: []Range32{ + {0x11c70, 0x11c8f, 1}, + {0x11c92, 0x11ca7, 1}, + {0x11ca9, 0x11cb6, 1}, + }, +} + var _Meetei_Mayek = &RangeTable{ R16: []Range16{ {0xaae0, 0xaaf6, 1}, @@ -4498,6 +4651,9 @@ var _Mongolian = &RangeTable{ {0x1820, 0x1877, 1}, {0x1880, 0x18aa, 1}, }, + R32: []Range32{ + {0x11660, 0x1166c, 1}, + }, } var _Mro = &RangeTable{ @@ -4545,6 +4701,15 @@ var _New_Tai_Lue = &RangeTable{ }, } +var _Newa = &RangeTable{ + R16: []Range16{}, + R32: []Range32{ + {0x11400, 0x11459, 1}, + {0x1145b, 0x1145b, 1}, + {0x1145d, 0x1145d, 1}, + }, +} + var _Nko = &RangeTable{ R16: []Range16{ {0x07c0, 0x07fa, 1}, @@ -4634,6 +4799,14 @@ var _Oriya = &RangeTable{ }, } +var _Osage = &RangeTable{ + R16: []Range16{}, + R32: []Range32{ + {0x104b0, 0x104d3, 1}, + {0x104d8, 0x104fb, 1}, + }, +} + var _Osmanya = &RangeTable{ R16: []Range16{}, R32: []Range32{ @@ -4713,7 +4886,7 @@ var _Samaritan = &RangeTable{ var _Saurashtra = &RangeTable{ R16: []Range16{ - {0xa880, 0xa8c4, 1}, + {0xa880, 0xa8c5, 1}, {0xa8ce, 0xa8d9, 1}, }, } @@ -4867,6 +5040,15 @@ var _Tamil = &RangeTable{ }, } +var _Tangut = &RangeTable{ + R16: []Range16{}, + R32: []Range32{ + {0x16fe0, 0x16fe0, 1}, + {0x17000, 0x187ec, 1}, + {0x18800, 0x18af2, 1}, + }, +} + var _Telugu = &RangeTable{ R16: []Range16{ {0x0c00, 0x0c03, 1}, @@ -4957,6 +5139,7 @@ var _Yi = &RangeTable{ // These variables have type *RangeTable. var ( + Adlam = _Adlam // Adlam is the set of Unicode characters in script Adlam. Ahom = _Ahom // Ahom is the set of Unicode characters in script Ahom. Anatolian_Hieroglyphs = _Anatolian_Hieroglyphs // Anatolian_Hieroglyphs is the set of Unicode characters in script Anatolian_Hieroglyphs. Arabic = _Arabic // Arabic is the set of Unicode characters in script Arabic. @@ -4967,6 +5150,7 @@ var ( Bassa_Vah = _Bassa_Vah // Bassa_Vah is the set of Unicode characters in script Bassa_Vah. Batak = _Batak // Batak is the set of Unicode characters in script Batak. Bengali = _Bengali // Bengali is the set of Unicode characters in script Bengali. + Bhaiksuki = _Bhaiksuki // Bhaiksuki is the set of Unicode characters in script Bhaiksuki. Bopomofo = _Bopomofo // Bopomofo is the set of Unicode characters in script Bopomofo. Brahmi = _Brahmi // Brahmi is the set of Unicode characters in script Brahmi. Braille = _Braille // Braille is the set of Unicode characters in script Braille. @@ -5028,6 +5212,7 @@ var ( Malayalam = _Malayalam // Malayalam is the set of Unicode characters in script Malayalam. Mandaic = _Mandaic // Mandaic is the set of Unicode characters in script Mandaic. Manichaean = _Manichaean // Manichaean is the set of Unicode characters in script Manichaean. + Marchen = _Marchen // Marchen is the set of Unicode characters in script Marchen. Meetei_Mayek = _Meetei_Mayek // Meetei_Mayek is the set of Unicode characters in script Meetei_Mayek. Mende_Kikakui = _Mende_Kikakui // Mende_Kikakui is the set of Unicode characters in script Mende_Kikakui. Meroitic_Cursive = _Meroitic_Cursive // Meroitic_Cursive is the set of Unicode characters in script Meroitic_Cursive. @@ -5040,6 +5225,7 @@ var ( Myanmar = _Myanmar // Myanmar is the set of Unicode characters in script Myanmar. Nabataean = _Nabataean // Nabataean is the set of Unicode characters in script Nabataean. New_Tai_Lue = _New_Tai_Lue // New_Tai_Lue is the set of Unicode characters in script New_Tai_Lue. + Newa = _Newa // Newa is the set of Unicode characters in script Newa. Nko = _Nko // Nko is the set of Unicode characters in script Nko. Ogham = _Ogham // Ogham is the set of Unicode characters in script Ogham. Ol_Chiki = _Ol_Chiki // Ol_Chiki is the set of Unicode characters in script Ol_Chiki. @@ -5051,6 +5237,7 @@ var ( Old_South_Arabian = _Old_South_Arabian // Old_South_Arabian is the set of Unicode characters in script Old_South_Arabian. Old_Turkic = _Old_Turkic // Old_Turkic is the set of Unicode characters in script Old_Turkic. Oriya = _Oriya // Oriya is the set of Unicode characters in script Oriya. + Osage = _Osage // Osage is the set of Unicode characters in script Osage. Osmanya = _Osmanya // Osmanya is the set of Unicode characters in script Osmanya. Pahawh_Hmong = _Pahawh_Hmong // Pahawh_Hmong is the set of Unicode characters in script Pahawh_Hmong. Palmyrene = _Palmyrene // Palmyrene is the set of Unicode characters in script Palmyrene. @@ -5078,6 +5265,7 @@ var ( Tai_Viet = _Tai_Viet // Tai_Viet is the set of Unicode characters in script Tai_Viet. Takri = _Takri // Takri is the set of Unicode characters in script Takri. Tamil = _Tamil // Tamil is the set of Unicode characters in script Tamil. + Tangut = _Tangut // Tangut is the set of Unicode characters in script Tangut. Telugu = _Telugu // Telugu is the set of Unicode characters in script Telugu. Thaana = _Thaana // Thaana is the set of Unicode characters in script Thaana. Thai = _Thai // Thai is the set of Unicode characters in script Thai. @@ -5091,7 +5279,7 @@ var ( ) // Generated by running -// maketables --props=all --url=http://www.unicode.org/Public/8.0.0/ucd/ +// maketables --props=all --url=http://www.unicode.org/Public/9.0.0/ucd/ // DO NOT EDIT // Properties is the set of Unicode property tables. @@ -5120,9 +5308,11 @@ var Properties = map[string]*RangeTable{ "Other_Uppercase": Other_Uppercase, "Pattern_Syntax": Pattern_Syntax, "Pattern_White_Space": Pattern_White_Space, + "Prepended_Concatenation_Mark": Prepended_Concatenation_Mark, "Quotation_Mark": Quotation_Mark, "Radical": Radical, - "STerm": STerm, + "Sentence_Terminal": Sentence_Terminal, + "STerm": Sentence_Terminal, "Soft_Dotted": Soft_Dotted, "Terminal_Punctuation": Terminal_Punctuation, "Unified_Ideograph": Unified_Ideograph, @@ -5187,7 +5377,6 @@ var _Deprecated = &RangeTable{ }, R32: []Range32{ {0xe0001, 0xe0001, 1}, - {0xe007f, 0xe007f, 1}, }, } @@ -5329,11 +5518,14 @@ var _Diacritic = &RangeTable{ {0x1134d, 0x1134d, 1}, {0x11366, 0x1136c, 1}, {0x11370, 0x11374, 1}, + {0x11442, 0x11442, 1}, + {0x11446, 0x11446, 1}, {0x114c2, 0x114c3, 1}, {0x115bf, 0x115c0, 1}, {0x1163f, 0x1163f, 1}, {0x116b6, 0x116b7, 1}, {0x1172b, 0x1172b, 1}, + {0x11c3f, 0x11c3f, 1}, {0x16af0, 0x16af4, 1}, {0x16f8f, 0x16f9f, 1}, {0x1d167, 0x1d169, 1}, @@ -5342,6 +5534,8 @@ var _Diacritic = &RangeTable{ {0x1d185, 0x1d18b, 1}, {0x1d1aa, 0x1d1ad, 1}, {0x1e8d0, 0x1e8d6, 1}, + {0x1e944, 0x1e946, 1}, + {0x1e948, 0x1e94a, 1}, }, LatinOffset: 6, } @@ -5376,6 +5570,8 @@ var _Extender = &RangeTable{ {0x1135d, 0x1135d, 1}, {0x115c6, 0x115c8, 1}, {0x16b42, 0x16b43, 1}, + {0x16fe0, 0x16fe0, 1}, + {0x1e944, 0x1e946, 1}, }, LatinOffset: 1, } @@ -5432,6 +5628,8 @@ var _Ideographic = &RangeTable{ {0xfa70, 0xfad9, 1}, }, R32: []Range32{ + {0x17000, 0x187ec, 1}, + {0x18800, 0x18af2, 1}, {0x20000, 0x2a6d6, 1}, {0x2a700, 0x2b734, 1}, {0x2b740, 0x2b81d, 1}, @@ -5506,6 +5704,7 @@ var _Other_Alphabetic = &RangeTable{ {0x081b, 0x0823, 1}, {0x0825, 0x0827, 1}, {0x0829, 0x082c, 1}, + {0x08d4, 0x08df, 1}, {0x08e3, 0x08e9, 1}, {0x08f0, 0x0903, 1}, {0x093a, 0x093b, 1}, @@ -5591,6 +5790,7 @@ var _Other_Alphabetic = &RangeTable{ {0x1752, 0x1753, 1}, {0x1772, 0x1773, 1}, {0x17b6, 0x17c8, 1}, + {0x1885, 0x1886, 1}, {0x18a9, 0x18a9, 1}, {0x1920, 0x192b, 1}, {0x1930, 0x1938, 1}, @@ -5613,6 +5813,7 @@ var _Other_Alphabetic = &RangeTable{ {0xa823, 0xa827, 1}, {0xa880, 0xa881, 1}, {0xa8b4, 0xa8c3, 1}, + {0xa8c5, 0xa8c5, 1}, {0xa926, 0xa92a, 1}, {0xa947, 0xa952, 1}, {0xa980, 0xa983, 1}, @@ -5644,6 +5845,7 @@ var _Other_Alphabetic = &RangeTable{ {0x111b3, 0x111bf, 1}, {0x1122c, 0x11234, 1}, {0x11237, 0x11237, 1}, + {0x1123e, 0x1123e, 1}, {0x112df, 0x112e8, 1}, {0x11300, 0x11303, 1}, {0x1133e, 0x11344, 1}, @@ -5651,6 +5853,8 @@ var _Other_Alphabetic = &RangeTable{ {0x1134b, 0x1134c, 1}, {0x11357, 0x11357, 1}, {0x11362, 0x11363, 1}, + {0x11435, 0x11441, 1}, + {0x11443, 0x11445, 1}, {0x114b0, 0x114c1, 1}, {0x115af, 0x115b5, 1}, {0x115b8, 0x115be, 1}, @@ -5659,9 +5863,19 @@ var _Other_Alphabetic = &RangeTable{ {0x11640, 0x11640, 1}, {0x116ab, 0x116b5, 1}, {0x1171d, 0x1172a, 1}, + {0x11c2f, 0x11c36, 1}, + {0x11c38, 0x11c3e, 1}, + {0x11c92, 0x11ca7, 1}, + {0x11ca9, 0x11cb6, 1}, {0x16b30, 0x16b36, 1}, {0x16f51, 0x16f7e, 1}, {0x1bc9e, 0x1bc9e, 1}, + {0x1e000, 0x1e006, 1}, + {0x1e008, 0x1e018, 1}, + {0x1e01b, 0x1e021, 1}, + {0x1e023, 0x1e024, 1}, + {0x1e026, 0x1e02a, 1}, + {0x1e947, 0x1e947, 1}, {0x1f130, 0x1f149, 1}, {0x1f150, 0x1f169, 1}, {0x1f170, 0x1f189, 1}, @@ -5700,7 +5914,7 @@ var _Other_Grapheme_Extend = &RangeTable{ {0x0d57, 0x0d57, 1}, {0x0dcf, 0x0dcf, 1}, {0x0ddf, 0x0ddf, 1}, - {0x200c, 0x200d, 1}, + {0x200c, 0x200c, 1}, {0x302e, 0x302f, 1}, {0xff9e, 0xff9f, 1}, }, @@ -5712,6 +5926,7 @@ var _Other_Grapheme_Extend = &RangeTable{ {0x115af, 0x115af, 1}, {0x1d165, 0x1d165, 1}, {0x1d16e, 0x1d172, 1}, + {0xe0020, 0xe007f, 1}, }, } @@ -5727,6 +5942,7 @@ var _Other_ID_Continue = &RangeTable{ var _Other_ID_Start = &RangeTable{ R16: []Range16{ + {0x1885, 0x1886, 1}, {0x2118, 0x2118, 1}, {0x212e, 0x212e, 1}, {0x309b, 0x309c, 1}, @@ -5958,6 +6174,18 @@ var _Pattern_White_Space = &RangeTable{ LatinOffset: 3, } +var _Prepended_Concatenation_Mark = &RangeTable{ + R16: []Range16{ + {0x0600, 0x0605, 1}, + {0x06dd, 0x06dd, 1}, + {0x070f, 0x070f, 1}, + {0x08e2, 0x08e2, 1}, + }, + R32: []Range32{ + {0x110bd, 0x110bd, 1}, + }, +} + var _Quotation_Mark = &RangeTable{ R16: []Range16{ {0x0022, 0x0022, 1}, @@ -5985,7 +6213,7 @@ var _Radical = &RangeTable{ }, } -var _STerm = &RangeTable{ +var _Sentence_Terminal = &RangeTable{ R16: []Range16{ {0x0021, 0x0021, 1}, {0x002e, 0x002e, 1}, @@ -6043,10 +6271,12 @@ var _STerm = &RangeTable{ {0x11238, 0x11239, 1}, {0x1123b, 0x1123c, 1}, {0x112a9, 0x112a9, 1}, + {0x1144b, 0x1144c, 1}, {0x115c2, 0x115c3, 1}, {0x115c9, 0x115d7, 1}, {0x11641, 0x11642, 1}, {0x1173c, 0x1173e, 1}, + {0x11c41, 0x11c42, 1}, {0x16a6e, 0x16a6f, 1}, {0x16af5, 0x16af5, 1}, {0x16b37, 0x16b38, 1}, @@ -6179,10 +6409,14 @@ var _Terminal_Punctuation = &RangeTable{ {0x111de, 0x111df, 1}, {0x11238, 0x1123c, 1}, {0x112a9, 0x112a9, 1}, + {0x1144b, 0x1144d, 1}, + {0x1145b, 0x1145b, 1}, {0x115c2, 0x115c5, 1}, {0x115c9, 0x115d7, 1}, {0x11641, 0x11642, 1}, {0x1173c, 0x1173e, 1}, + {0x11c41, 0x11c43, 1}, + {0x11c71, 0x11c71, 1}, {0x12470, 0x12474, 1}, {0x16a6e, 0x16a6f, 1}, {0x16af5, 0x16af5, 1}, @@ -6266,9 +6500,11 @@ var ( Other_Uppercase = _Other_Uppercase // Other_Uppercase is the set of Unicode characters with property Other_Uppercase. Pattern_Syntax = _Pattern_Syntax // Pattern_Syntax is the set of Unicode characters with property Pattern_Syntax. Pattern_White_Space = _Pattern_White_Space // Pattern_White_Space is the set of Unicode characters with property Pattern_White_Space. + Prepended_Concatenation_Mark = _Prepended_Concatenation_Mark // Prepended_Concatenation_Mark is the set of Unicode characters with property Prepended_Concatenation_Mark. Quotation_Mark = _Quotation_Mark // Quotation_Mark is the set of Unicode characters with property Quotation_Mark. Radical = _Radical // Radical is the set of Unicode characters with property Radical. - STerm = _STerm // STerm is the set of Unicode characters with property STerm. + STerm = _Sentence_Terminal // STerm is an alias for Sentence_Terminal. + Sentence_Terminal = _Sentence_Terminal // Sentence_Terminal is the set of Unicode characters with property Sentence_Terminal. Soft_Dotted = _Soft_Dotted // Soft_Dotted is the set of Unicode characters with property Soft_Dotted. Terminal_Punctuation = _Terminal_Punctuation // Terminal_Punctuation is the set of Unicode characters with property Terminal_Punctuation. Unified_Ideograph = _Unified_Ideograph // Unified_Ideograph is the set of Unicode characters with property Unified_Ideograph. @@ -6277,7 +6513,7 @@ var ( ) // Generated by running -// maketables --data=http://www.unicode.org/Public/8.0.0/ucd/UnicodeData.txt --casefolding=http://www.unicode.org/Public/8.0.0/ucd/CaseFolding.txt +// maketables --data=http://www.unicode.org/Public/9.0.0/ucd/UnicodeData.txt --casefolding=http://www.unicode.org/Public/9.0.0/ucd/CaseFolding.txt // DO NOT EDIT // CaseRanges is the table describing case mappings for all letters with @@ -6383,6 +6619,7 @@ var _CaseRanges = []CaseRange{ {0x0266, 0x0266, d{42308, 0, 42308}}, {0x0268, 0x0268, d{-209, 0, -209}}, {0x0269, 0x0269, d{-211, 0, -211}}, + {0x026A, 0x026A, d{42308, 0, 42308}}, {0x026B, 0x026B, d{10743, 0, 10743}}, {0x026C, 0x026C, d{42305, 0, 42305}}, {0x026F, 0x026F, d{-211, 0, -211}}, @@ -6453,6 +6690,14 @@ var _CaseRanges = []CaseRange{ {0x13A0, 0x13EF, d{0, 38864, 0}}, {0x13F0, 0x13F5, d{0, 8, 0}}, {0x13F8, 0x13FD, d{-8, 0, -8}}, + {0x1C80, 0x1C80, d{-6254, 0, -6254}}, + {0x1C81, 0x1C81, d{-6253, 0, -6253}}, + {0x1C82, 0x1C82, d{-6244, 0, -6244}}, + {0x1C83, 0x1C84, d{-6242, 0, -6242}}, + {0x1C85, 0x1C85, d{-6243, 0, -6243}}, + {0x1C86, 0x1C86, d{-6236, 0, -6236}}, + {0x1C87, 0x1C87, d{-6181, 0, -6181}}, + {0x1C88, 0x1C88, d{35266, 0, 35266}}, {0x1D79, 0x1D79, d{35332, 0, 35332}}, {0x1D7D, 0x1D7D, d{3814, 0, 3814}}, {0x1E00, 0x1E95, d{UpperLower, UpperLower, UpperLower}}, @@ -6559,6 +6804,7 @@ var _CaseRanges = []CaseRange{ {0xA7AB, 0xA7AB, d{0, -42319, 0}}, {0xA7AC, 0xA7AC, d{0, -42315, 0}}, {0xA7AD, 0xA7AD, d{0, -42305, 0}}, + {0xA7AE, 0xA7AE, d{0, -42308, 0}}, {0xA7B0, 0xA7B0, d{0, -42258, 0}}, {0xA7B1, 0xA7B1, d{0, -42282, 0}}, {0xA7B2, 0xA7B2, d{0, -42261, 0}}, @@ -6570,10 +6816,14 @@ var _CaseRanges = []CaseRange{ {0xFF41, 0xFF5A, d{-32, 0, -32}}, {0x10400, 0x10427, d{0, 40, 0}}, {0x10428, 0x1044F, d{-40, 0, -40}}, + {0x104B0, 0x104D3, d{0, 40, 0}}, + {0x104D8, 0x104FB, d{-40, 0, -40}}, {0x10C80, 0x10CB2, d{0, 64, 0}}, {0x10CC0, 0x10CF2, d{-64, 0, -64}}, {0x118A0, 0x118BF, d{0, 32, 0}}, {0x118C0, 0x118DF, d{-32, 0, -32}}, + {0x1E900, 0x1E921, d{0, 34, 0}}, + {0x1E922, 0x1E943, d{-34, 0, -34}}, } var properties = [MaxLatin1 + 1]uint8{ 0x00: pC, // '\x00' @@ -6834,6 +7084,137 @@ var properties = [MaxLatin1 + 1]uint8{ 0xFF: pLl | pp, // 'ÿ' } +var asciiFold = [MaxASCII + 1]uint16{ + 0x0000, + 0x0001, + 0x0002, + 0x0003, + 0x0004, + 0x0005, + 0x0006, + 0x0007, + 0x0008, + 0x0009, + 0x000A, + 0x000B, + 0x000C, + 0x000D, + 0x000E, + 0x000F, + 0x0010, + 0x0011, + 0x0012, + 0x0013, + 0x0014, + 0x0015, + 0x0016, + 0x0017, + 0x0018, + 0x0019, + 0x001A, + 0x001B, + 0x001C, + 0x001D, + 0x001E, + 0x001F, + 0x0020, + 0x0021, + 0x0022, + 0x0023, + 0x0024, + 0x0025, + 0x0026, + 0x0027, + 0x0028, + 0x0029, + 0x002A, + 0x002B, + 0x002C, + 0x002D, + 0x002E, + 0x002F, + 0x0030, + 0x0031, + 0x0032, + 0x0033, + 0x0034, + 0x0035, + 0x0036, + 0x0037, + 0x0038, + 0x0039, + 0x003A, + 0x003B, + 0x003C, + 0x003D, + 0x003E, + 0x003F, + 0x0040, + 0x0061, + 0x0062, + 0x0063, + 0x0064, + 0x0065, + 0x0066, + 0x0067, + 0x0068, + 0x0069, + 0x006A, + 0x006B, + 0x006C, + 0x006D, + 0x006E, + 0x006F, + 0x0070, + 0x0071, + 0x0072, + 0x0073, + 0x0074, + 0x0075, + 0x0076, + 0x0077, + 0x0078, + 0x0079, + 0x007A, + 0x005B, + 0x005C, + 0x005D, + 0x005E, + 0x005F, + 0x0060, + 0x0041, + 0x0042, + 0x0043, + 0x0044, + 0x0045, + 0x0046, + 0x0047, + 0x0048, + 0x0049, + 0x004A, + 0x212A, + 0x004C, + 0x004D, + 0x004E, + 0x004F, + 0x0050, + 0x0051, + 0x0052, + 0x017F, + 0x0054, + 0x0055, + 0x0056, + 0x0057, + 0x0058, + 0x0059, + 0x005A, + 0x007B, + 0x007C, + 0x007D, + 0x007E, + 0x007F, +} + var caseOrbit = []foldPair{ {0x004B, 0x006B}, {0x0053, 0x0073}, @@ -6890,6 +7271,29 @@ var caseOrbit = []foldPair{ {0x03F1, 0x03A1}, {0x03F4, 0x0398}, {0x03F5, 0x0395}, + {0x0412, 0x0432}, + {0x0414, 0x0434}, + {0x041E, 0x043E}, + {0x0421, 0x0441}, + {0x0422, 0x0442}, + {0x042A, 0x044A}, + {0x0432, 0x1C80}, + {0x0434, 0x1C81}, + {0x043E, 0x1C82}, + {0x0441, 0x1C83}, + {0x0442, 0x1C84}, + {0x044A, 0x1C86}, + {0x0462, 0x0463}, + {0x0463, 0x1C87}, + {0x1C80, 0x0412}, + {0x1C81, 0x0414}, + {0x1C82, 0x041E}, + {0x1C83, 0x0421}, + {0x1C84, 0x1C85}, + {0x1C85, 0x0422}, + {0x1C86, 0x042A}, + {0x1C87, 0x0462}, + {0x1C88, 0xA64A}, {0x1E60, 0x1E61}, {0x1E61, 0x1E9B}, {0x1E9B, 0x1E60}, @@ -6898,6 +7302,8 @@ var caseOrbit = []foldPair{ {0x2126, 0x03A9}, {0x212A, 0x004B}, {0x212B, 0x00C5}, + {0xA64A, 0xA64B}, + {0xA64B, 0x1C88}, } // FoldCategory maps a category name to a table of @@ -7041,15 +7447,17 @@ var foldLl = &RangeTable{ {0xa78b, 0xa78d, 2}, {0xa790, 0xa792, 2}, {0xa796, 0xa7aa, 2}, - {0xa7ab, 0xa7ad, 1}, + {0xa7ab, 0xa7ae, 1}, {0xa7b0, 0xa7b4, 1}, {0xa7b6, 0xff21, 22379}, {0xff22, 0xff3a, 1}, }, R32: []Range32{ {0x10400, 0x10427, 1}, + {0x104b0, 0x104d3, 1}, {0x10c80, 0x10cb2, 1}, {0x118a0, 0x118bf, 1}, + {0x1e900, 0x1e921, 1}, }, LatinOffset: 3, } @@ -7108,11 +7516,10 @@ var foldLu = &RangeTable{ {0x025c, 0x0260, 4}, {0x0261, 0x0265, 2}, {0x0266, 0x0268, 2}, - {0x0269, 0x026b, 2}, - {0x026c, 0x026f, 3}, - {0x0271, 0x0272, 1}, - {0x0275, 0x027d, 8}, - {0x0280, 0x0283, 3}, + {0x0269, 0x026c, 1}, + {0x026f, 0x0271, 2}, + {0x0272, 0x0275, 3}, + {0x027d, 0x0283, 3}, {0x0287, 0x028c, 1}, {0x0292, 0x029d, 11}, {0x029e, 0x0345, 167}, @@ -7133,6 +7540,7 @@ var foldLu = &RangeTable{ {0x04cf, 0x052f, 2}, {0x0561, 0x0586, 1}, {0x13f8, 0x13fd, 1}, + {0x1c80, 0x1c88, 1}, {0x1d79, 0x1d7d, 4}, {0x1e01, 0x1e95, 2}, {0x1e9b, 0x1ea1, 6}, @@ -7175,8 +7583,10 @@ var foldLu = &RangeTable{ }, R32: []Range32{ {0x10428, 0x1044f, 1}, + {0x104d8, 0x104fb, 1}, {0x10cc0, 0x10cf2, 1}, {0x118c0, 0x118df, 1}, + {0x1e922, 0x1e943, 1}, }, LatinOffset: 4, } @@ -7201,7 +7611,7 @@ var foldMn = &RangeTable{ // If there is no entry for a script name, there are no such points. var FoldScript = map[string]*RangeTable{} -// Range entries: 3546 16-bit, 1306 32-bit, 4852 total. -// Range bytes: 21276 16-bit, 15672 32-bit, 36948 total. +// Range entries: 3576 16-bit, 1454 32-bit, 5030 total. +// Range bytes: 21456 16-bit, 17448 32-bit, 38904 total. -// Fold orbit bytes: 63 pairs, 252 bytes +// Fold orbit bytes: 88 pairs, 352 bytes diff --git a/libgo/go/unicode/utf16/export_test.go b/libgo/go/unicode/utf16/export_test.go index 306247e..e0c57f5 100644 --- a/libgo/go/unicode/utf16/export_test.go +++ b/libgo/go/unicode/utf16/export_test.go @@ -1,4 +1,4 @@ -// Copyright 2012 The Go Authors. All rights reserved. +// Copyright 2012 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. diff --git a/libgo/go/unicode/utf16/utf16.go b/libgo/go/unicode/utf16/utf16.go index b497500..1a881aa 100644 --- a/libgo/go/unicode/utf16/utf16.go +++ b/libgo/go/unicode/utf16/utf16.go @@ -1,4 +1,4 @@ -// Copyright 2010 The Go Authors. All rights reserved. +// Copyright 2010 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. @@ -36,7 +36,7 @@ func IsSurrogate(r rune) bool { // the Unicode replacement code point U+FFFD. func DecodeRune(r1, r2 rune) rune { if surr1 <= r1 && r1 < surr2 && surr2 <= r2 && r2 < surr3 { - return (r1-surr1)<<10 | (r2 - surr2) + 0x10000 + return (r1-surr1)<<10 | (r2 - surr2) + surrSelf } return replacementChar } @@ -45,7 +45,7 @@ func DecodeRune(r1, r2 rune) rune { // If the rune is not a valid Unicode code point or does not need encoding, // EncodeRune returns U+FFFD, U+FFFD. func EncodeRune(r rune) (r1, r2 rune) { - if r < surrSelf || r > maxRune || IsSurrogate(r) { + if r < surrSelf || r > maxRune { return replacementChar, replacementChar } r -= surrSelf @@ -65,20 +65,22 @@ func Encode(s []rune) []uint16 { n = 0 for _, v := range s { switch { - case v < 0, surr1 <= v && v < surr3, v > maxRune: - v = replacementChar - fallthrough - case v < surrSelf: + case 0 <= v && v < surr1, surr3 <= v && v < surrSelf: + // normal rune a[n] = uint16(v) n++ - default: + case surrSelf <= v && v <= maxRune: + // needs surrogate sequence r1, r2 := EncodeRune(v) a[n] = uint16(r1) a[n+1] = uint16(r2) n += 2 + default: + a[n] = uint16(replacementChar) + n++ } } - return a[0:n] + return a[:n] } // Decode returns the Unicode code point sequence represented @@ -88,21 +90,19 @@ func Decode(s []uint16) []rune { n := 0 for i := 0; i < len(s); i++ { switch r := s[i]; { + case r < surr1, surr3 <= r: + // normal rune + a[n] = rune(r) case surr1 <= r && r < surr2 && i+1 < len(s) && surr2 <= s[i+1] && s[i+1] < surr3: // valid surrogate sequence a[n] = DecodeRune(rune(r), rune(s[i+1])) i++ - n++ - case surr1 <= r && r < surr3: + default: // invalid surrogate sequence a[n] = replacementChar - n++ - default: - // normal rune - a[n] = rune(r) - n++ } + n++ } - return a[0:n] + return a[:n] } diff --git a/libgo/go/unicode/utf16/utf16_test.go b/libgo/go/unicode/utf16/utf16_test.go index 3dca472..d258f0b 100644 --- a/libgo/go/unicode/utf16/utf16_test.go +++ b/libgo/go/unicode/utf16/utf16_test.go @@ -1,4 +1,4 @@ -// Copyright 2010 The Go Authors. All rights reserved. +// Copyright 2010 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. @@ -147,3 +147,56 @@ func TestIsSurrogate(t *testing.T) { } } } + +func BenchmarkDecodeValidASCII(b *testing.B) { + // "hello world" + data := []uint16{104, 101, 108, 108, 111, 32, 119, 111, 114, 108, 100} + for i := 0; i < b.N; i++ { + Decode(data) + } +} + +func BenchmarkDecodeValidJapaneseChars(b *testing.B) { + // "日本語日本語日本語" + data := []uint16{26085, 26412, 35486, 26085, 26412, 35486, 26085, 26412, 35486} + for i := 0; i < b.N; i++ { + Decode(data) + } +} + +func BenchmarkDecodeRune(b *testing.B) { + rs := make([]rune, 10) + // U+1D4D0 to U+1D4D4: MATHEMATICAL BOLD SCRIPT CAPITAL LETTERS + for i, u := range []rune{'𝓐', '𝓑', '𝓒', '𝓓', '𝓔'} { + rs[2*i], rs[2*i+1] = EncodeRune(u) + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + for j := 0; j < 5; j++ { + DecodeRune(rs[2*j], rs[2*j+1]) + } + } +} + +func BenchmarkEncodeValidASCII(b *testing.B) { + data := []rune{'h', 'e', 'l', 'l', 'o'} + for i := 0; i < b.N; i++ { + Encode(data) + } +} + +func BenchmarkEncodeValidJapaneseChars(b *testing.B) { + data := []rune{'日', '本', '語'} + for i := 0; i < b.N; i++ { + Encode(data) + } +} + +func BenchmarkEncodeRune(b *testing.B) { + for i := 0; i < b.N; i++ { + for _, u := range []rune{'𝓐', '𝓑', '𝓒', '𝓓', '𝓔'} { + EncodeRune(u) + } + } +} diff --git a/libgo/go/unicode/utf8/utf8.go b/libgo/go/unicode/utf8/utf8.go index bbaf14a..9d35be6 100644 --- a/libgo/go/unicode/utf8/utf8.go +++ b/libgo/go/unicode/utf8/utf8.go @@ -341,7 +341,7 @@ func RuneLen(r rune) int { // EncodeRune writes into p (which must be large enough) the UTF-8 encoding of the rune. // It returns the number of bytes written. func EncodeRune(p []byte, r rune) int { - // Negative values are erroneous. Making it unsigned addresses the problem. + // Negative values are erroneous. Making it unsigned addresses the problem. switch i := uint32(r); { case i <= rune1Max: p[0] = byte(r) @@ -367,7 +367,7 @@ func EncodeRune(p []byte, r rune) int { } } -// RuneCount returns the number of runes in p. Erroneous and short +// RuneCount returns the number of runes in p. Erroneous and short // encodings are treated as single runes of width 1 byte. func RuneCount(p []byte) int { np := len(p) @@ -441,7 +441,7 @@ func RuneCountInString(s string) (n int) { } // RuneStart reports whether the byte could be the first byte of an encoded, -// possibly invalid rune. Second and subsequent bytes always have the top two +// possibly invalid rune. Second and subsequent bytes always have the top two // bits set to 10. func RuneStart(b byte) bool { return b&0xC0 != 0x80 } |