aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorK.Kosako <kkosako0@gmail.com>2023-08-21 15:21:55 +0900
committerK.Kosako <kkosako0@gmail.com>2023-08-21 15:21:55 +0900
commit276d83c5dd1e6cae7db42aae87d8deeb63fcb639 (patch)
treed63d85fb43c0b13720afb271b817c40b30a89432
parente7e97ca3345561d6d9ed2ea0bd0b0da2b5bf24e0 (diff)
downloadoniguruma-276d83c5dd1e6cae7db42aae87d8deeb63fcb639.zip
oniguruma-276d83c5dd1e6cae7db42aae87d8deeb63fcb639.tar.gz
oniguruma-276d83c5dd1e6cae7db42aae87d8deeb63fcb639.tar.bz2
migration from python2 to python3
-rwxr-xr-xsrc/gperf_fold_key_conv.py8
-rwxr-xr-xsrc/gperf_unfold_key_conv.py8
-rwxr-xr-xsrc/make_unicode_egcb.sh2
-rwxr-xr-xsrc/make_unicode_egcb_data.py39
-rwxr-xr-xsrc/make_unicode_fold.sh10
-rwxr-xr-xsrc/make_unicode_fold_data.py74
-rwxr-xr-xsrc/make_unicode_property.sh4
-rwxr-xr-xsrc/make_unicode_property_data.py82
-rwxr-xr-xsrc/make_unicode_wb.sh2
-rwxr-xr-xsrc/make_unicode_wb_data.py39
10 files changed, 135 insertions, 133 deletions
diff --git a/src/gperf_fold_key_conv.py b/src/gperf_fold_key_conv.py
index c633100..d943c3e 100755
--- a/src/gperf_fold_key_conv.py
+++ b/src/gperf_fold_key_conv.py
@@ -1,7 +1,7 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# gperf_fold_key_conv.py
-# Copyright (c) 2016-2018 K.Kosako
+# Copyright (c) 2016-2023 K.Kosako
import sys
import re
@@ -52,7 +52,7 @@ def parse_line(s, key_len):
return s
def parse_file(f, key_len):
- print "/* This file was converted by gperf_fold_key_conv.py\n from gperf output file. */"
+ print("/* This file was converted by gperf_fold_key_conv.py\n from gperf output file. */")
while True:
line = f.readline()
@@ -60,7 +60,7 @@ def parse_file(f, key_len):
break
s = parse_line(line, key_len)
- print s
+ print(s)
# main
diff --git a/src/gperf_unfold_key_conv.py b/src/gperf_unfold_key_conv.py
index d999d4e..deda85d 100755
--- a/src/gperf_unfold_key_conv.py
+++ b/src/gperf_unfold_key_conv.py
@@ -1,7 +1,7 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# gperf_unfold_key_conv.py
-# Copyright (c) 2016-2018 K.Kosako
+# Copyright (c) 2016-2023 K.Kosako
import sys
import re
@@ -44,12 +44,12 @@ def parse_line(s):
return s
def parse_file(f):
- print "/* This file was converted by gperf_unfold_key_conv.py\n from gperf output file. */"
+ print("/* This file was converted by gperf_unfold_key_conv.py\n from gperf output file. */")
line = f.readline()
while line:
s = parse_line(line)
- print s
+ print(s)
line = f.readline()
diff --git a/src/make_unicode_egcb.sh b/src/make_unicode_egcb.sh
index 1d0719a..be60d36 100755
--- a/src/make_unicode_egcb.sh
+++ b/src/make_unicode_egcb.sh
@@ -2,6 +2,6 @@
NAME=unicode_egcb_data
-python2 make_unicode_egcb_data.py > ${NAME}.c
+python3 make_unicode_egcb_data.py > ${NAME}.c
exit 0
diff --git a/src/make_unicode_egcb_data.py b/src/make_unicode_egcb_data.py
index 892f5ef..80db3f0 100755
--- a/src/make_unicode_egcb_data.py
+++ b/src/make_unicode_egcb_data.py
@@ -1,7 +1,7 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# make_unicode_egcb_data.py
-# Copyright (c) 2017-2022 K.Kosako
+# Copyright (c) 2017-2023 K.Kosako
import sys
import re
@@ -29,10 +29,10 @@ def check_version_info(s):
def print_ranges(ranges):
for (start, end) in ranges:
- print "0x%06x, 0x%06x" % (start, end)
+ print("0x%06x, 0x%06x" % (start, end))
def print_prop_and_index(prop, i):
- print "%-35s %3d" % (prop + ',', i)
+ print("%-35s %3d" % (prop + ',', i))
PropIndex[prop] = i
def dic_find_by_value(dic, v):
@@ -52,7 +52,7 @@ def normalize_ranges(in_ranges, sort=False):
r = []
prev = None
for (start, end) in ranges:
- if prev >= start - 1:
+ if prev is not None and prev >= start - 1:
(pstart, pend) = r.pop()
end = max(pend, end)
start = pstart
@@ -192,10 +192,11 @@ merge_props(PROPS, props)
PROPS = sorted(PROPS)
-print '/* unicode_egcb_data.c: Generated by make_unicode_egcb_data.py. */'
+print('/* unicode_egcb_data.c: Generated by make_unicode_egcb_data.py. */')
+
COPYRIGHT = '''
/*-
- * Copyright (c) 2017-2022 K.Kosako
+ * Copyright (c) 2017-2023 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -221,13 +222,13 @@ COPYRIGHT = '''
*/
'''.strip()
-print COPYRIGHT
-print ''
+print(COPYRIGHT)
+print('')
if VERSION_INFO[0] < 0:
raise RuntimeError("Version is not found")
-print "#define GRAPHEME_BREAK_PROPERTY_VERSION %02d%02d%02d" % (VERSION_INFO[0], VERSION_INFO[1], VERSION_INFO[2])
-print ''
+print("#define GRAPHEME_BREAK_PROPERTY_VERSION %02d%02d%02d" % (VERSION_INFO[0], VERSION_INFO[1], VERSION_INFO[2]))
+print('')
ranges = []
for prop in PROPS:
@@ -243,16 +244,16 @@ for (start, end, prop) in ranges:
raise ValueError("{2}:{0} - {1} range overlap prev value {3}".format(start, end, prop, prev))
-print '/*'
+print('/*')
for prop in PROPS:
- print "%s" % prop
-print '*/'
-print ''
+ print("%s" % prop)
+print('*/')
+print('')
num_ranges = len(ranges)
-print "static int EGCB_RANGE_NUM = %d;" % num_ranges
+print("static int EGCB_RANGE_NUM = %d;" % num_ranges)
-print 'static EGCB_RANGE_TYPE EGCB_RANGES[] = {'
+print('static EGCB_RANGE_TYPE EGCB_RANGES[] = {')
for i, (start, end, prop) in enumerate(ranges):
if i == num_ranges - 1:
comma = ''
@@ -260,8 +261,8 @@ for i, (start, end, prop) in enumerate(ranges):
comma = ','
type_name = 'EGCB_' + prop
- print " {0x%06x, 0x%06x, %s }%s" % (start, end, type_name, comma)
+ print(" {0x%06x, 0x%06x, %s }%s" % (start, end, type_name, comma))
-print '};'
+print('};')
sys.exit(0)
diff --git a/src/make_unicode_fold.sh b/src/make_unicode_fold.sh
index 968b339..625b3df 100755
--- a/src/make_unicode_fold.sh
+++ b/src/make_unicode_fold.sh
@@ -9,19 +9,19 @@ TMP3=gperf3.tmp
GPERF_OPT='-n -C -T -c -t -j1 -L ANSI-C '
-python2 make_unicode_fold_data.py > unicode_fold_data.c
+python3 make_unicode_fold_data.py > unicode_fold_data.c
${GPERF} ${GPERF_OPT} -F,-1,0 -N onigenc_unicode_unfold_key unicode_unfold_key.gperf > ${TMP0}
-python2 gperf_unfold_key_conv.py < ${TMP0} > unicode_unfold_key.c
+python3 gperf_unfold_key_conv.py < ${TMP0} > unicode_unfold_key.c
${GPERF} ${GPERF_OPT} -F,-1 -N onigenc_unicode_fold1_key unicode_fold1_key.gperf > ${TMP1}
-python2 gperf_fold_key_conv.py 1 < ${TMP1} > unicode_fold1_key.c
+python3 gperf_fold_key_conv.py 1 < ${TMP1} > unicode_fold1_key.c
${GPERF} ${GPERF_OPT} -F,-1 -N onigenc_unicode_fold2_key unicode_fold2_key.gperf > ${TMP2}
-python2 gperf_fold_key_conv.py 2 < ${TMP2} > unicode_fold2_key.c
+python3 gperf_fold_key_conv.py 2 < ${TMP2} > unicode_fold2_key.c
${GPERF} ${GPERF_OPT} -F,-1 -N onigenc_unicode_fold3_key unicode_fold3_key.gperf > ${TMP3}
-python2 gperf_fold_key_conv.py 3 < ${TMP3} > unicode_fold3_key.c
+python3 gperf_fold_key_conv.py 3 < ${TMP3} > unicode_fold3_key.c
# remove redundant EOLs before EOF
perl -i -pe 'BEGIN{undef $/}s/\n\n*\z/\n/;' unicode_fold_data.c
diff --git a/src/make_unicode_fold_data.py b/src/make_unicode_fold_data.py
index e04c12a..1600021 100755
--- a/src/make_unicode_fold_data.py
+++ b/src/make_unicode_fold_data.py
@@ -1,7 +1,7 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# make_unicode_fold_data.py
-# Copyright (c) 2016-2022 K.Kosako
+# Copyright (c) 2016-2023 K.Kosako
import sys
import re
@@ -30,7 +30,7 @@ LOCALE_UNFOLDS = {}
COPYRIGHT = '''
/*-
- * Copyright (c) 2017-2022 K.Kosako
+ * Copyright (c) 2017-2023 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -182,12 +182,12 @@ typedef unsigned long OnigCodePoint;
def divide_by_fold_len(d):
l = d.items()
- l1 = filter(lambda (k,e):e.fold_len == 1, l)
- l2 = filter(lambda (k,e):e.fold_len == 2, l)
- l3 = filter(lambda (k,e):e.fold_len == 3, l)
- sl1 = sorted(l1, key=lambda (k,e):k)
- sl2 = sorted(l2, key=lambda (k,e):k)
- sl3 = sorted(l3, key=lambda (k,e):k)
+ l1 = filter(lambda x:x[1].fold_len == 1, l)
+ l2 = filter(lambda x:x[1].fold_len == 2, l)
+ l3 = filter(lambda x:x[1].fold_len == 3, l)
+ sl1 = sorted(l1, key=lambda x:x[0])
+ sl2 = sorted(l2, key=lambda x:x[0])
+ sl3 = sorted(l3, key=lambda x:x[0])
return (sl1, sl2, sl3)
def output_comment(f, s):
@@ -198,7 +198,7 @@ def output_data_n1(f, n, fn, c, out_comment):
e.index = c
if out_comment and n > 1 and e.comment is not None:
output_comment(f, e.comment)
- print >> f, ''
+ print('', file=f)
f.write(' ')
f.write("/*%4d*/ " % c)
@@ -226,25 +226,25 @@ def output_data_n1(f, n, fn, c, out_comment):
return c
def output_data_n(f, name, n, fn, lfn, out_comment):
- print >> f, "OnigCodePoint %s%d[] = {" % (name, n)
+ print("OnigCodePoint %s%d[] = {" % (name, n), file=f)
c = 0
c = output_data_n1(f, n, fn, c, out_comment)
- print >> f, "#define FOLDS%d_NORMAL_END_INDEX %d" % (n, c)
- print >> f, " /* ----- LOCALE ----- */"
+ print("#define FOLDS%d_NORMAL_END_INDEX %d" % (n, c), file=f)
+ print(" /* ----- LOCALE ----- */", file=f)
c = output_data_n1(f, n, lfn, c, out_comment)
- print >> f, "#define FOLDS%d_END_INDEX %d" % (n, c)
- print >> f, "};"
+ print("#define FOLDS%d_END_INDEX %d" % (n, c), file=f)
+ print("};", file=f)
def output_fold_data(f, name, out_comment):
f1, f2, f3 = divide_by_fold_len(FOLDS)
lf1, lf2, lf3 = divide_by_fold_len(LOCALE_FOLDS)
output_data_n(f, name, 1, f1, lf1, out_comment)
- print >> f, ''
+ print('', file=f)
output_data_n(f, name, 2, f2, lf2, out_comment)
- print >> f, ''
+ print('', file=f)
output_data_n(f, name, 3, f3, lf3, out_comment)
- print >> f, ''
+ print('', file=f)
def output_macros(f, name):
print >> f, "#define FOLDS1_FOLD(i) (%s1 + (i))" % name
@@ -264,18 +264,18 @@ def output_macros(f, name):
print >> f, "#define FOLDS3_NEXT_INDEX(i) ((i) + 4 + %s1[(i)+3])" % name
def output_fold_source(f, out_comment):
- print >> f, "/* This file was generated by make_unicode_fold_data.py. */"
- print >> f, COPYRIGHT
- print >> f, "\n"
- print >> f, '#include "regenc.h"'
- print >> f, ''
+ print("/* This file was generated by make_unicode_fold_data.py. */", file=f)
+ print(COPYRIGHT, file=f)
+ print("\n", file=f)
+ print('#include "regenc.h"', file=f)
+ print('', file=f)
if VERSION_INFO[0] < 0:
raise RuntimeError("Version is not found")
- print "#define UNICODE_CASEFOLD_VERSION %02d%02d%02d" % (VERSION_INFO[0], VERSION_INFO[1], VERSION_INFO[2])
- print ''
+ print("#define UNICODE_CASEFOLD_VERSION %02d%02d%02d" % (VERSION_INFO[0], VERSION_INFO[1], VERSION_INFO[2]))
+ print('')
#output_macros(f, DataName)
- print >> f, ''
+ print('', file=f)
#output_typedef(f)
output_fold_data(f, DataName, out_comment)
@@ -296,12 +296,12 @@ struct ByUnfoldKey {
f.write(head)
UNFOLDS.update(LOCALE_UNFOLDS)
l = UNFOLDS.items()
- sl = sorted(l, key=lambda (k,e):(e.fold_len, e.index))
+ sl = sorted(l, key=lambda x:(x[1].fold_len, x[1].index))
for k, e in sl:
f.write('"%s", /*0x%04x*/ %4d, %d\n' %
(form3bytes(k), k, e.index, e.fold_len))
- print >> f, '%%'
+ print('%%', file=f)
def output_gperf_fold_key(f, key_len):
head = "%{\n/* This gperf source file was generated by make_unicode_fold_data.py */\n\n" + COPYRIGHT + """\
@@ -314,13 +314,13 @@ short int
"""
f.write(head)
l = FOLDS.items()
- l = filter(lambda (k,e):e.fold_len == key_len, l)
- sl = sorted(l, key=lambda (k,e):e.index)
+ l = filter(lambda x:x[1].fold_len == key_len, l)
+ sl = sorted(l, key=lambda x:x[1].index)
for k, e in sl:
skey = ''.join(map(lambda i: form3bytes(i), e.fold))
f.write('"%s", %4d\n' % (skey, e.index))
- print >> f, '%%'
+ print('%%', file=f)
def output_gperf_source():
with open(GPERF_UNFOLD_KEY_FILE, 'w') as f:
@@ -334,7 +334,7 @@ def output_gperf_source():
def unfolds_byte_length_check(encode):
l = UNFOLDS.items()
- sl = sorted(l, key=lambda (k,e):(e.fold_len, e.index))
+ sl = sorted(l, key=lambda x:(x[1].fold_len, x[1].index))
for unfold, e in sl:
key_len = enc_len(unfold, encode)
fold_len = sum(map(lambda c: enc_len(c, encode), e.fold))
@@ -345,7 +345,7 @@ def unfolds_byte_length_check(encode):
def double_fold_check():
l = UNFOLDS.items()
- sl = sorted(l, key=lambda (k,e):(e.fold_len, e.index))
+ sl = sorted(l, key=lambda x:(x[1].fold_len, x[1].index))
for unfold, e in sl:
for f in e.fold:
#print >> sys.stderr, ("check 0x%06x" % f)
@@ -356,9 +356,9 @@ def double_fold_check():
def unfold_is_multi_code_folds_head_check():
l = UNFOLDS.items()
- l2 = filter(lambda (k,e):e.fold_len == 2, l)
- l3 = filter(lambda (k,e):e.fold_len == 3, l)
- sl = sorted(l, key=lambda (k,e):(e.fold_len, e.index))
+ l2 = filter(lambda x:x[1].fold_len == 2, l)
+ l3 = filter(lambda x:x[1].fold_len == 3, l)
+ sl = sorted(l, key=lambda x:(x[1].fold_len, x[1].index))
for unfold, _ in sl:
for k, e in l2:
if e.fold[0] == unfold:
@@ -454,7 +454,7 @@ def get_all_folds_expansion_max_num():
one_folds = make_one_folds(l)
fold2_heads = make_foldn_heads(l, 2, one_folds)
fold3_heads = make_foldn_heads(l, 3, one_folds)
- sl = sorted(l, key=lambda (k,e):(e.fold_len, e.index))
+ sl = sorted(l, key=lambda x:(x[1].fold_len, x[1].index))
nmax = 0
max_unfold = None
for unfold, e in sl:
diff --git a/src/make_unicode_property.sh b/src/make_unicode_property.sh
index 7503e7b..d36484a 100755
--- a/src/make_unicode_property.sh
+++ b/src/make_unicode_property.sh
@@ -13,8 +13,8 @@ POOL_CAST='s/\(int *\)\(size_t *\)&\(\(struct +unicode_prop_name_pool_t *\* *\)
ADD_STATIC='s/(const +struct +PoolPropertyNameCtype +\*)/static \1/'
ADD_CAST='s/unsigned +int +hval *= *len/unsigned int hval = (unsigned int )len/'
-python2 make_unicode_property_data.py > ${NAME}.gperf
-python2 make_unicode_property_data.py -posix > ${NAME}_posix.gperf
+python3 make_unicode_property_data.py > ${NAME}.gperf
+python3 make_unicode_property_data.py -posix > ${NAME}_posix.gperf
${GPERF} ${GPERF_OPT} -N unicode_lookup_property_name --output-file ${TMP1} ${NAME}.gperf
cat ${TMP1} | ${SED} -e 's/^#line.*$//g' | ${SED} -E "${POOL_CAST}" | ${SED} -E "${ADD_STATIC}" | ${SED} -E "${ADD_CAST}" > ${NAME}.c
diff --git a/src/make_unicode_property_data.py b/src/make_unicode_property_data.py
index d30a92b..11c453b 100755
--- a/src/make_unicode_property_data.py
+++ b/src/make_unicode_property_data.py
@@ -1,7 +1,7 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# make_unicode_property_data.py
-# Copyright (c) 2016-2022 K.Kosako
+# Copyright (c) 2016-2023 K.Kosako
import sys
import re
@@ -46,31 +46,31 @@ def fix_block_name(name):
def print_ranges(ranges):
for (start, end) in ranges:
- print "0x%06x, 0x%06x" % (start, end)
+ print("0x%06x, 0x%06x" % (start, end))
- print len(ranges)
+ print(len(ranges))
def print_prop_and_index(prop, i):
- print "%-35s %3d" % (prop + ',', i)
+ print("%-35s %3d" % (prop + ',', i))
PropIndex[prop] = i
PRINT_CACHE = { }
def print_property(prop, data, desc):
- print ''
- print "/* PROPERTY: '%s': %s */" % (prop, desc)
+ print('')
+ print("/* PROPERTY: '%s': %s */" % (prop, desc))
prev_prop = dic_find_by_value(PRINT_CACHE, data)
if prev_prop is not None:
- print "#define CR_%s CR_%s" % (prop, prev_prop)
+ print("#define CR_%s CR_%s" % (prop, prev_prop))
else:
PRINT_CACHE[prop] = data
- print "static const OnigCodePoint"
- print "CR_%s[] = { %d," % (prop, len(data))
+ print("static const OnigCodePoint")
+ print("CR_%s[] = { %d," % (prop, len(data)))
for (start, end) in data:
- print "0x%04x, 0x%04x," % (start, end)
+ print("0x%04x, 0x%04x," % (start, end))
- print "}; /* END of CR_%s */" % prop
+ print("}; /* END of CR_%s */" % prop)
def dic_find_by_value(dic, v):
@@ -100,7 +100,7 @@ def normalize_ranges(in_ranges, sort=False):
r = []
prev = None
for (start, end) in ranges:
- if prev >= start - 1:
+ if prev is not None and prev >= start - 1:
(pstart, pend) = r.pop()
end = max(pend, end)
start = pstart
@@ -175,14 +175,14 @@ def merge_dic(to_dic, from_dic):
from_keys = from_dic.keys()
common = list(set(to_keys) & set(from_keys))
if len(common) != 0:
- print >> sys.stderr, "merge_dic: collision: %s" % sorted(common)
+ print("merge_dic: collision: %s" % sorted(common), file=sys.stderr)
to_dic.update(from_dic)
def merge_props(to_props, from_props):
common = list(set(to_props) & set(from_props))
if len(common) != 0:
- print >> sys.stderr, "merge_props: collision: %s" % sorted(common)
+ print("merge_props: collision: %s" % sorted(common), file=sys.stderr)
to_props.extend(from_props)
@@ -406,7 +406,7 @@ def set_max_prop_name(name):
def entry_prop_name(name, index):
set_max_prop_name(name)
if OUTPUT_LIST_MODE and index >= len(POSIX_LIST):
- print >> UPF, "%s" % (name)
+ print("%s" % (name), file=UPF)
def entry_and_print_prop_and_index(name, index):
entry_prop_name(name, index)
@@ -426,7 +426,7 @@ argc = len(argv)
COPYRIGHT = '''
/*-
- * Copyright (c) 2016-2022 K.Kosako
+ * Copyright (c) 2016-2023 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -462,7 +462,7 @@ for i in range(1, argc):
elif arg == '-gc':
INCLUDE_GRAPHEME_CLUSTER_DATA = True
else:
- print >> sys.stderr, "Invalid argument: %s" % arg
+ print("Invalid argument: %s" % arg, file=sys.stderr)
OUTPUT_LIST_MODE = not(POSIX_ONLY)
@@ -519,9 +519,9 @@ PROPS = sorted(PROPS)
s = '''%{
/* Generated by make_unicode_property_data.py. */
'''
-print s
-print COPYRIGHT
-print ''
+print(s)
+print(COPYRIGHT)
+print('')
for prop in POSIX_LIST:
if prop == 'PosixPunct':
@@ -531,7 +531,7 @@ for prop in POSIX_LIST:
print_property(prop, DIC[prop], desc)
-print ''
+print('')
if not(POSIX_ONLY):
for prop in PROPS:
@@ -551,18 +551,18 @@ if not(POSIX_ONLY):
print_property(block, DIC[block], 'Block')
-print ''
-print "static const OnigCodePoint*\nconst CodeRanges[] = {"
+print('')
+print("static const OnigCodePoint*\nconst CodeRanges[] = {")
for prop in POSIX_LIST:
- print " CR_%s," % prop
+ print(" CR_%s," % prop)
if not(POSIX_ONLY):
for prop in PROPS:
- print " CR_%s," % prop
+ print(" CR_%s," % prop)
for prop in BLOCKS:
- print " CR_%s," % prop
+ print(" CR_%s," % prop)
s = '''};
@@ -585,8 +585,8 @@ if OUTPUT_LIST_MODE:
if EMOJI_VERSION_INFO[0] < 0:
raise RuntimeError("Emoji Version is not found")
- print >> UPF, "Unicode Properties (Unicode Version: %d.%d.%d, Emoji: %d.%d)" % (VERSION_INFO[0], VERSION_INFO[1], VERSION_INFO[2], EMOJI_VERSION_INFO[0], EMOJI_VERSION_INFO[1])
- print >> UPF, ''
+ print("Unicode Properties (Unicode Version: %d.%d.%d, Emoji: %d.%d)" % (VERSION_INFO[0], VERSION_INFO[1], VERSION_INFO[2], EMOJI_VERSION_INFO[0], EMOJI_VERSION_INFO[1]), file=UPF)
+ print('', file=UPF)
index = -1
for prop in POSIX_LIST:
@@ -598,16 +598,16 @@ if not(POSIX_ONLY):
index += 1
entry_and_print_prop_and_index(prop, index)
- NALIASES = map(lambda (k,v):(normalize_prop_name(k), k, v), ALIASES.items())
+ NALIASES = map(lambda x:(normalize_prop_name(x[0]), x[0], x[1]), ALIASES.items())
NALIASES = sorted(NALIASES)
for (nk, k, v) in NALIASES:
nv = normalize_prop_name(v)
if PropIndex.get(nk, None) is not None:
- print >> sys.stderr, "ALIASES: already exists: %s => %s" % (k, v)
+ print("ALIASES: already exists: %s => %s" % (k, v), file=sys.stderr)
continue
aindex = PropIndex.get(nv, None)
if aindex is None:
- #print >> sys.stderr, "ALIASES: value is not exist: %s => %s" % (k, v)
+ #print("ALIASES: value is not exist: %s => %s" % (k, v), file=sys.stderr)
continue
entry_prop_name(k, aindex)
@@ -617,26 +617,26 @@ if not(POSIX_ONLY):
index += 1
entry_and_print_prop_and_index(name, index)
-print '%%'
-print ''
+print('%%')
+print('')
if not(POSIX_ONLY):
if VERSION_INFO[0] < 0:
raise RuntimeError("Unicode Version is not found")
if EMOJI_VERSION_INFO[0] < 0:
raise RuntimeError("Emoji Version is not found")
- print "#define UNICODE_PROPERTY_VERSION %02d%02d%02d" % (VERSION_INFO[0], VERSION_INFO[1], VERSION_INFO[2])
- print "#define UNICODE_EMOJI_VERSION %02d%02d" % (EMOJI_VERSION_INFO[0], EMOJI_VERSION_INFO[1])
- print ''
+ print("#define UNICODE_PROPERTY_VERSION %02d%02d%02d" % (VERSION_INFO[0], VERSION_INFO[1], VERSION_INFO[2]))
+ print("#define UNICODE_EMOJI_VERSION %02d%02d" % (EMOJI_VERSION_INFO[0], EMOJI_VERSION_INFO[1]))
+ print('')
-print "#define PROPERTY_NAME_MAX_SIZE %d" % (PROPERTY_NAME_MAX_LEN + 10)
-print "#define CODE_RANGES_NUM %d" % (index + 1)
+print("#define PROPERTY_NAME_MAX_SIZE %d" % (PROPERTY_NAME_MAX_LEN + 10))
+print("#define CODE_RANGES_NUM %d" % (index + 1))
index_props = make_reverse_dic(PropIndex)
-print ''
+print('')
for i in range(index + 1):
for p in index_props[i]:
- print "#define PROP_INDEX_%s %d" % (p.upper(), i)
+ print("#define PROP_INDEX_%s %d" % (p.upper(), i))
if OUTPUT_LIST_MODE:
UPF.close()
diff --git a/src/make_unicode_wb.sh b/src/make_unicode_wb.sh
index 0dabbd4..61fafe5 100755
--- a/src/make_unicode_wb.sh
+++ b/src/make_unicode_wb.sh
@@ -2,6 +2,6 @@
NAME=unicode_wb_data
-python2 make_unicode_wb_data.py > ${NAME}.c
+python3 make_unicode_wb_data.py > ${NAME}.c
exit 0
diff --git a/src/make_unicode_wb_data.py b/src/make_unicode_wb_data.py
index 4f6599e..fc7d93a 100755
--- a/src/make_unicode_wb_data.py
+++ b/src/make_unicode_wb_data.py
@@ -1,7 +1,7 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# make_unicode_wb_data.py
-# Copyright (c) 2019-2022 K.Kosako
+# Copyright (c) 2019-2023 K.Kosako
import sys
import re
@@ -29,10 +29,10 @@ def check_version_info(s):
def print_ranges(ranges):
for (start, end) in ranges:
- print "0x%06x, 0x%06x" % (start, end)
+ print("0x%06x, 0x%06x" % (start, end))
def print_prop_and_index(prop, i):
- print "%-35s %3d" % (prop + ',', i)
+ print("%-35s %3d" % (prop + ',', i))
PropIndex[prop] = i
def dic_find_by_value(dic, v):
@@ -52,7 +52,7 @@ def normalize_ranges(in_ranges, sort=False):
r = []
prev = None
for (start, end) in ranges:
- if prev >= start - 1:
+ if prev is not None and prev >= start - 1:
(pstart, pend) = r.pop()
end = max(pend, end)
start = pstart
@@ -192,10 +192,11 @@ merge_props(PROPS, props)
PROPS = sorted(PROPS)
-print '/* unicode_wb_data.c: Generated by make_unicode_wb_data.py. */'
+print('/* unicode_wb_data.c: Generated by make_unicode_wb_data.py. */')
+
COPYRIGHT = '''
/*-
- * Copyright (c) 2019-2022 K.Kosako
+ * Copyright (c) 2019-2023 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -221,13 +222,13 @@ COPYRIGHT = '''
*/
'''.strip()
-print COPYRIGHT
-print ''
+print(COPYRIGHT)
+print('')
if VERSION_INFO[0] < 0:
raise RuntimeError("Version is not found.")
-print "#define WORD_BREAK_PROPERTY_VERSION %02d%02d%02d" % (VERSION_INFO[0], VERSION_INFO[1], VERSION_INFO[2])
-print ''
+print("#define WORD_BREAK_PROPERTY_VERSION %02d%02d%02d" % (VERSION_INFO[0], VERSION_INFO[1], VERSION_INFO[2]))
+print('')
ranges = []
for prop in PROPS:
@@ -243,16 +244,16 @@ for (start, end, prop) in ranges:
raise ValueError("{2}:{0} - {1} range overlap prev value {3}".format(start, end, prop, prev))
-print '/*'
+print('/*')
for prop in PROPS:
- print "%s" % prop
-print '*/'
-print ''
+ print("%s" % prop)
+print('*/')
+print('')
num_ranges = len(ranges)
-print "static int WB_RANGE_NUM = %d;" % num_ranges
+print("static int WB_RANGE_NUM = %d;" % num_ranges)
-print 'static WB_RANGE_TYPE WB_RANGES[] = {'
+print('static WB_RANGE_TYPE WB_RANGES[] = {')
for i, (start, end, prop) in enumerate(ranges):
if i == num_ranges - 1:
comma = ''
@@ -260,8 +261,8 @@ for i, (start, end, prop) in enumerate(ranges):
comma = ','
type_name = 'WB_' + prop
- print " {0x%06x, 0x%06x, %s }%s" % (start, end, type_name, comma)
+ print(" {0x%06x, 0x%06x, %s }%s" % (start, end, type_name, comma))
-print '};'
+print('};')
sys.exit(0)