diff options
Diffstat (limited to 'gcc/cobol')
| -rw-r--r-- | gcc/cobol/ChangeLog | 97 | ||||
| -rw-r--r-- | gcc/cobol/Make-lang.in | 2 | ||||
| -rw-r--r-- | gcc/cobol/cdf.y | 16 | ||||
| -rw-r--r-- | gcc/cobol/cobol1.cc | 1 | ||||
| -rw-r--r-- | gcc/cobol/genapi.cc | 768 | ||||
| -rw-r--r-- | gcc/cobol/genapi.h | 6 | ||||
| -rw-r--r-- | gcc/cobol/genmath.cc | 1 | ||||
| -rw-r--r-- | gcc/cobol/genutil.cc | 2 | ||||
| -rw-r--r-- | gcc/cobol/parse.y | 320 | ||||
| -rw-r--r-- | gcc/cobol/parse_ante.h | 142 | ||||
| -rw-r--r-- | gcc/cobol/scan.l | 4 | ||||
| -rw-r--r-- | gcc/cobol/show_parse.h | 8 | ||||
| -rw-r--r-- | gcc/cobol/symbols.cc | 292 | ||||
| -rw-r--r-- | gcc/cobol/symbols.h | 137 | ||||
| -rw-r--r-- | gcc/cobol/util.cc | 33 |
15 files changed, 1277 insertions, 552 deletions
diff --git a/gcc/cobol/ChangeLog b/gcc/cobol/ChangeLog index d2cc68a..0c42078 100644 --- a/gcc/cobol/ChangeLog +++ b/gcc/cobol/ChangeLog @@ -1,3 +1,100 @@ +2025-11-06 Robert Dubner <rdubner@symas.com> + James K. Lowden <jklowden@cobolworx.com> + + * Make-lang.in: Repair documentation generation. + * cdf.y: Changes to tokens. + * cobol1.cc (cobol_langhook_handle_option): Add comment. + * genapi.cc (function_pointer_from_name): Use data.original() for + function name. + (parser_initialize_programs): Likewise. + (cobol_compare): Make sure encodings of comparands are the same. + (move_tree): Change name of DEFAULT_SOURCE_ENCODING macro. + (parser_enter_program): Typo. + (psa_FldLiteralN): Break out dirty_to_binary() support routine. + (dirty_to_binary): Likewise. + (parser_alphabet): Rename 'alphabet' to 'collation_sequence'. + (parser_allocate): Change wsclear() to be uint32_t instead of char. + (parser_label_label): Formatting. + (parser_label_goto): Likewise. + (get_the_filename): Breakout get_the_filename(), which handles + encoding. + (parser_file_open): Likewise. + (set_up_delete_file_label): Implement DELETE FILE (Format 2). + (parser_file_delete_file): Likewise. + (parser_file_delete_on_exception): Likewise. + (parser_file_delete_not_exception): Likewise. + (parser_file_delete_end): Likewise. + (parser_call): Use data.original(). + (parser_entry): Use data.original(). + (mh_source_is_literalN): Convert from + sourceref.field->codeset.encoding. + (binary_initial_from_float128): Change to "binary_initial". + (binary_initial): Calculate in FIXED_WIDE_INT(128) instead of + REAL_VALUE_TYPE. + (digits_from_int128): New routine uses binary_initial. + (digits_from_float128): Removed. Kept as comment for reference. + (initial_from_initial): Use binary_initial. + (actually_create_the_static_field): Use correct encoding. + (parser_symbol_add): Likewise. + * genapi.h (parser_file_delete_file): Implement FILE DELETE. + (parser_file_delete_on_exception): Implement FILE DELETE. + (parser_file_delete_not_exception): Implement FILE DELETE. + (parser_file_delete_end): Implement FILE DELETE. + * genmath.cc: Include charmaps.h. + * genutil.cc (get_literal_string): Change name of + DEFAULT_SOURCE_ENCODING macro. + * parse.y: Token changes; numerous changes in support of encoding; + support for DELETE FILE. + * parse_ante.h (name_of): Use data.original(). + (class prog_descr_t): Support of locales. + (current_options): Formatting. + (current_encoding): Formatting. + (current_program_index): Formatting. + (current_section): Formatting. + (current_paragraph): Formatting. + (is_integer_literal): Use correct encoding. + (value_encoding_check): Handle encoding changes. + (alphabet_add): Likewise. + (data_division_ready): Likewise. + * scan.l: Use data.original(). + * show_parse.h: Use correct encoding. + * symbols.cc (elementize): Likewise. + (symbol_elem_cmp): Handle locale. + (struct symbol_elem_t): Likewise. + (symbol_locale): Likewise. + (field_str): Change DEFAULT_SOURCE_ENCODING macro name. + (symbols_alphabet_set): Formatting. + (symbols_update): Modify consistency checks. + (symbol_locale_add): Locale support. + (cbl_locale_t::cbl_locale_t): Locale support. + (cbl_alphabet_t::cbl_alphabet_t): New structure. + (cbl_alphabet_t::reencode): Formatting. + (cbl_alphabet_t::assign): Change name of collation_sequence. + (cbl_alphabet_t::also): Likewise. + (new_literal_add): Anticipate the need for four-byte characters. + (guess_encoding): Eliminate. + (cbl_field_t::internalize): Refine conversion of data.initial to + specified encoding. + * symbols.h (enum symbol_type_t): Add SymLocale. + (struct cbl_field_data_t): Incorporate data.orig. + (struct cbl_field_t): Likewise. + (struct cbl_delete_file_t): New structure. + (struct cbl_label_t): Incorporate cbl_delete_file_t. + (struct cbl_locale_t): Support for locale. + (hex_decode): Comment. + (struct cbl_alphabet_t): Incorporate locale; change variable name + to collation_sequence. + (struct symbol_elem_t): Incorporate locale. + (cbl_locale_of): Likewise. + (cbl_alphabet_of): Likewise. + (symbol_locale_add): Likewise. + (wsclear): Type is now uint32_t instead of char. + * util.cc (symbol_type_str): Incorporate locale. + (cbl_field_t::report_invalid_initial_value): Change test so that + pure PIC A() variables are limited to [a-zA-Z] and space. + (valid_move): Use DEFAULT_SOURCE_ENCODING macro. + (cobol_filename): Formatting. + 2025-10-26 Eric Botcazou <ebotcazou@adacore.com> * Make-lang.in ($(srcdir)/cobol/token_names.h): Silence recipe. diff --git a/gcc/cobol/Make-lang.in b/gcc/cobol/Make-lang.in index ed6b588..9f28752 100644 --- a/gcc/cobol/Make-lang.in +++ b/gcc/cobol/Make-lang.in @@ -330,7 +330,7 @@ cobol.srcpdf: gcobol.pdf gcobol-io.pdf ln $^ $(srcdir)/cobol/ gcobol.pdf: $(srcdir)/cobol/gcobol.1 - groff -mdoc -T pdf $^ > $@~ + groff -mdoc -t -T pdf $^ > $@~ @mv $@~ $@ gcobol-io.pdf: $(srcdir)/cobol/gcobol.3 groff -mdoc -T pdf $^ > $@~ diff --git a/gcc/cobol/cdf.y b/gcc/cobol/cdf.y index f72ed77..2d3f819 100644 --- a/gcc/cobol/cdf.y +++ b/gcc/cobol/cdf.y @@ -244,21 +244,21 @@ apply_cdf_turn( const exception_turn_t& turn ) { %type <boolean> DEFINED %token OTHER 699 PARAMETER_kw 369 "PARAMETER" %token OFF 688 OVERRIDE 370 -%token THRU 949 -%token TRUE_kw 814 "True" +%token THRU 950 +%token TRUE_kw 815 "True" %token CALL_COBOL 393 "CALL" %token CALL_VERBATIM 394 "CALL (as C)" -%token TURN 816 CHECKING 497 LOCATION 650 ON 690 WITH 843 +%token TURN 817 CHECKING 497 LOCATION 650 ON 690 WITH 844 -%left OR 950 -%left AND 951 -%right NOT 952 -%left '<' '>' '=' NE 953 LE 954 GE 955 +%left OR 951 +%left AND 952 +%right NOT 953 +%left '<' '>' '=' NE 954 LE 955 GE 956 %left '-' '+' %left '*' '/' -%right NEG 957 +%right NEG 958 %define api.prefix {ydf} %define api.token.prefix{YDF_} diff --git a/gcc/cobol/cobol1.cc b/gcc/cobol/cobol1.cc index 3146da5..77c457d 100644 --- a/gcc/cobol/cobol1.cc +++ b/gcc/cobol/cobol1.cc @@ -365,6 +365,7 @@ cobol_langhook_handle_option (size_t scode, return true; case OPT_fdefaultbyte: + // cobol_default_byte is an unsigned ing wsclear(cobol_default_byte); return true; diff --git a/gcc/cobol/genapi.cc b/gcc/cobol/genapi.cc index 9d30dde..8c5f28a 100644 --- a/gcc/cobol/genapi.cc +++ b/gcc/cobol/genapi.cc @@ -863,8 +863,12 @@ function_pointer_from_name(const cbl_refer_t &name, NULL); // And, hence, no types // Fetch the FUNCTION_DECL for that FUNCTION_TYPE - tree function_decl = gg_build_fn_decl(name.field->data.initial, + char *tname = static_cast<char *>(xmalloc(name.field->data.capacity+1)); + memcpy(tname, name.field->data.original(), name.field->data.capacity); + tname[name.field->data.capacity] = '\0'; + tree function_decl = gg_build_fn_decl(tname, fndecl_type); + free(tname); // Take the address of the function decl: tree address_of_function = gg_get_address_of(function_decl); gg_assign(function_pointer, address_of_function); @@ -877,11 +881,11 @@ function_pointer_from_name(const cbl_refer_t &name, gg_assign(function_pointer, gg_cast(build_pointer_type(function_type), gg_call_expr( VOID_P, - "__gg__function_handle_from_literal", - build_int_cst_type(INT, - current_function->our_symbol_table_index), - gg_string_literal(name.field->data.initial), - NULL_TREE))); + "__gg__function_handle_from_literal", + build_int_cst_type(INT, + current_function->our_symbol_table_index), + gg_string_literal(name.field->data.original()), + NULL_TREE))); } else { @@ -919,7 +923,7 @@ parser_initialize_programs( size_t nprogs, if( progs[i].field->type == FldLiteralA ) { SHOW_PARSE_TEXT("\"") - SHOW_PARSE_TEXT(progs[i].field->data.initial) + SHOW_PARSE_TEXT(progs[i].field->data.original()) SHOW_PARSE_TEXT("\"") } else @@ -2246,21 +2250,19 @@ cobol_compare( tree return_int, { // Comparing a FldLiteralN to an alphanumeric - // CONVERSION ALERT. lefty->field->data.initial is an ASCII - // string. We want to convert it to the same encoding as the - // right side. - - cbl_encoding_t enc_left = DEFAULT_CHARMAP_SOURCE; - cbl_encoding_t enc_right = - static_cast<cbl_encoding_t>(righty->field->codeset.encoding); - + // This next conversion may be overkill. But just in case + // the encodings of the two variables are different, we are + // going to convert left-side text to the right-side encoding + cbl_encoding_t enc_left = lefty->field->codeset.encoding; + cbl_encoding_t enc_right = righty->field->codeset.encoding; size_t outlength; - char *converted = __gg__iconverter(enc_left, - enc_right, - lefty->field->data.initial, - strlen(lefty->field->data.initial)+1, - &outlength ); - + size_t inlength = strlen(lefty->field->data.initial); + char *converted = __gg__iconverter( + enc_left, + enc_right, + lefty->field->data.initial, + inlength, + &outlength ); gg_assign( return_int, gg_call_expr( INT, "__gg__literaln_alpha_compare", @@ -2458,7 +2460,7 @@ move_tree( cbl_field_t *dest, gg_call(VOID, "__gg__string_to_alpha_edited", location, - build_int_cst_type(INT, DEFAULT_CHARMAP_SOURCE), + build_int_cst_type(INT, DEFAULT_SOURCE_ENCODING), psz_source, min_length, member(dest->var_decl_node, "picture"), @@ -3956,7 +3958,7 @@ parser_enter_program( const char *funcname_, if( strcmp(funcname_, "main") == 0 && this_module_has_main ) { - // setting 'retval' to 1 let's the caller know that we are being told + // Setting 'retval' to 1 lets the caller know that we are being told // both to synthesize a main() entry point to duplicate GCC's default // behavior, and to create an explicit entry point named "main". This will // eventually result in a link error (because of the duplicated entry @@ -4164,178 +4166,197 @@ parser_init_list() gg_call(VOID, "__gg__variables_to_init", gg_get_address_of(array), - wsclear() ? gg_string_literal(wsclear()) : null_pointer_node, + wsclear() ? build_string_literal(1, (const char *)wsclear()) + : null_pointer_node, NULL_TREE); } -static void -psa_FldLiteralN(struct cbl_field_t *field ) +static +FIXED_WIDE_INT(128) +dirty_to_binary(const char *instring, + uint32_t &capacity, + uint32_t &digits, + int32_t &rdigits, + uint64_t &attr) { - Analyze(); - SHOW_PARSE - { - SHOW_PARSE_HEADER - SHOW_PARSE_FIELD(" ", field) - SHOW_PARSE_END - } - // We are constructing a completely static constant structure, based on the - // text string in .initial - - CHECK_FIELD(field); + digits = 0; + rdigits = 0; + attr = 0; FIXED_WIDE_INT(128) value = 0; - do + // We need to convert data.initial to an FIXED_WIDE_INT(128) value + const char *p = instring; + int sign = 1; + if( *p == '-' ) { - // This is a false do{}while, to isolate the variables: + attr |= signable_e; + sign = -1; + p += 1; + } + else if( *p == '+' ) + { + // We set it signable so that the instruction DISPLAY +1 + // actually outputs "+1" + attr |= signable_e; + p += 1; + } - // We need to convert data.initial to an FIXED_WIDE_INT(128) value - char *p = const_cast<char *>(field->data.initial); - int sign = 1; - if( *p == '-' ) - { - field->attr |= signable_e; - sign = -1; - p += 1; - } - else if( *p == '+' ) - { - // We set it signable so that the instruction DISPLAY +1 - // actually outputs "+1" - field->attr |= signable_e; - p += 1; - } + // We need to be able to handle + // 123 + // 123.456 + // 123E<exp> + // 123.456E<exp> + // where <exp> can be N, +N and -N + // + // Oh, yeah, and we're talking handling up to 32 digits, or more, so using + // library routines is off the table. + + int rdigit_delta = 0; + int exponent = 0; + const char *exp = strchr(p, 'E'); + if( !exp ) + { + exp = strchr(p, 'e'); + } + if(exp) + { + exponent = atoi(exp+1); + } - // We need to be able to handle - // 123 - // 123.456 - // 123E<exp> - // 123.456E<exp> - // where <exp> can be N, +N and -N - // - // Oh, yeah, and we're talking handling up to 32 digits, or more, so using - // library routines is off the table. + // We can now calculate the value, and the number of digits and rdigits. - int digits = 0; - int rdigits = 0; - int rdigit_delta = 0; - int exponent = 0; + // We count up leading zeroes as part of the attr->digits calculation. + // It turns out that certain comparisons need to know the number of digits, + // because "IF "2" EQUAL 002" is false, while "IF "2" EQUAL 2" is true. So, + // we need to count up leading zeroes. - const char *exp = strchr(p, 'E'); - if( !exp ) + for(;;) + { + char ch = *p++; + if( ch == symbol_decimal_point() ) { - exp = strchr(p, 'e'); + rdigit_delta = 1; + continue; } - if(exp) + if( ch < '0' || ch > '9' ) { - exponent = atoi(exp+1); + break; } + digits += 1; + rdigits += rdigit_delta; + value *= 10; + value += ch - '0'; + } - // We can now calculate the value, and the number of digits and rdigits. - - // We count up leading zeroes as part of the attr->digits calculation. - // It turns out that certain comparisons need to know the number of digits, - // because "IF "2" EQUAL 002" is false, while "IF "2" EQUAL 2" is true. So, - // we need to count up leading zeroes. - - for(;;) + if( exponent < 0 ) + { + rdigits += -exponent; + } + else + { + while(exponent--) { - char ch = *p++; - if( ch == symbol_decimal_point() ) + if(rdigits) { - rdigit_delta = 1; - continue; + rdigits -= 1; } - if( ch < '0' || ch > '9' ) + else { - break; + digits += 1; + value *= 10; } - digits += 1; - rdigits += rdigit_delta; - value *= 10; - value += ch - '0'; } + } - if( exponent < 0 ) - { - rdigits += -exponent; - } - else - { - while(exponent--) - { - if(rdigits) - { - rdigits -= 1; - } - else - { - digits += 1; - value *= 10; - } - } - } + if( (int32_t)digits < rdigits ) + { + digits = rdigits; + } - if(digits < rdigits) - { - digits = rdigits; - } - field->data.digits = digits; - field->data.rdigits = rdigits; + // We now need to calculate the capacity. - // We now need to calculate the capacity. + unsigned int min_prec = wi::min_precision(value, UNSIGNED); + if( min_prec > 64 ) + { + // Bytes 15 through 8 are non-zero + capacity = 16; + } + else if( min_prec > 32 ) + { + // Bytes 7 through 4 are non-zero + capacity = 8; + } + else if( min_prec > 16 ) + { + // Bytes 3 and 2 + capacity = 4; + } + else if( min_prec > 8 ) + { + // Byte 1 is non-zero + capacity = 2; + } + else + { + // The value is zero through 0xFF + capacity = 1; + } - unsigned int min_prec = wi::min_precision(value, UNSIGNED); - int capacity; - if( min_prec > 64 ) - { - // Bytes 15 through 8 are non-zero - capacity = 16; - } - else if( min_prec > 32 ) - { - // Bytes 7 through 4 are non-zero - capacity = 8; - } - else if( min_prec > 16 ) - { - // Bytes 3 and 2 - capacity = 4; - } - else if( min_prec > 8 ) + value *= sign; + + // One last adjustment. The number is signable, so the binary value + // is going to be treated as twos complement. That means that the highest + // bit has to be 1 for negative signable numbers, and 0 for positive. If + // necessary, adjust capacity up by one byte so that the variable fits: + + if( capacity < 16 && (attr & signable_e) ) + { + FIXED_WIDE_INT(128) mask + = wi::set_bit_in_zero<FIXED_WIDE_INT(128)>(capacity * 8 - 1); + if( wi::neg_p (value) && (value & mask) == 0 ) { - // Byte 1 is non-zero - capacity = 2; + capacity *= 2; } - else + else if( !wi::neg_p (value) && (value & mask) != 0 ) { - // The value is zero through 0xFF - capacity = 1; + capacity *= 2; } + } - value *= sign; + return value; + } - // One last adjustment. The number is signable, so the binary value - // is going to be treated as twos complement. That means that the highest - // bit has to be 1 for negative signable numbers, and 0 for positive. If - // necessary, adjust capacity up by one byte so that the variable fits: +static void +psa_FldLiteralN(struct cbl_field_t *field ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_FIELD(" ", field) + SHOW_PARSE_END + } + // We are constructing a completely static constant structure, based on the + // text string in .initial - if( capacity < 16 && (field->attr & signable_e) ) - { - FIXED_WIDE_INT(128) mask - = wi::set_bit_in_zero<FIXED_WIDE_INT(128)>(capacity * 8 - 1); - if( wi::neg_p (value) && (value & mask) == 0 ) - { - capacity *= 2; - } - else if( !wi::neg_p (value) && (value & mask) != 0 ) - { - capacity *= 2; - } - } - field->data.capacity = capacity; + CHECK_FIELD(field); - }while(0); + uint32_t capacity; + uint32_t digits; + int32_t rdigits; + uint64_t attr; + FIXED_WIDE_INT(128) value = dirty_to_binary(field->data.original(), + capacity, + digits, + rdigits, + attr); + // This is a rare occurrence of a parser_xxx call changing the entry + // in the symbol table. + field->data.capacity = capacity; + field->data.digits = digits; + field->data.rdigits = rdigits; + field->attr |= attr; char base_name[257]; char id_string[32] = ""; @@ -5136,9 +5157,9 @@ parser_alphabet( const cbl_alphabet_t& alphabet ) // character i has the ordinal alphabet[i] unsigned char ch = i; - ach[ch] = (alphabet.alphabet[i]); + ach[ch] = (alphabet.collation_sequence[i]); gg_assign( gg_array_value(table256, ch), - build_int_cst_type(UCHAR, (alphabet.alphabet[i])) ); + build_int_cst_type(UCHAR, (alphabet.collation_sequence[i])) ); } unsigned int low_char = alphabet.low_char; @@ -6811,7 +6832,7 @@ parser_allocate(cbl_refer_t size_or_based, cbl_field_t *f_working = current_options().initial_working(); cbl_field_t *f_local = current_options().initial_local(); - int default_byte = wsclear() ? *wsclear() : -1; + unsigned int default_byte = wsclear() ? *wsclear() : (uint32_t)(-1); gg_call(VOID, "__gg__allocate", @@ -8201,7 +8222,7 @@ parser_label_label(struct cbl_label_t *label) } CHECK_LABEL(label); - + #if 1 // At the present time, label_verify.lay is returning true, so I edited // out the if( !... ) to quiet cppcheck @@ -8252,7 +8273,7 @@ parser_label_goto(struct cbl_label_t *label) } CHECK_LABEL(label); - + label_verify.go_to(label); label_verify.go_to(label); @@ -9933,6 +9954,44 @@ parser_file_open( size_t nfiles, struct cbl_file_t *files[], int mode_char ) } } +static +tree get_the_filename(bool "ed_name, const cbl_file_t *file) + { + // The cbl_file_t has a cbl_field_t *filename. This can be a FldAlphanumeric. + // The runtime has a (char *)filename, so we need to + // do a runtime conversion. + + tree psz; // This is going to be either the name of the file, or the + // possible run-time environment variable that will contain + // the name of the file. + + cbl_field_t *field_of_name = symbol_field_forward(file->filename); + quoted_name = false; + if( field_of_name->type == FldForward ) + { + // The target of ASSIGN TO was unquoted, but didn't resolve to a + // cbl_field_t. This means that the name of the field is an + // environment variable that will hold the file name + psz = gg_define_char_star(); + gg_assign(psz, gg_strdup(gg_string_literal(field_of_name->name))); + } + else + { + // The name is coming from a presumably FldAlphaNumeric variable + psz = get_string_from(field_of_name); + gg_call( CHAR_P, + "__gg__convert_encoding", + psz, + build_int_cst_type(INT, + field_of_name->codeset.encoding), + build_int_cst_type(INT, + DEFAULT_SOURCE_ENCODING), + NULL_TREE); + quoted_name = true; + } + return psz; + } + void parser_file_open( struct cbl_file_t *file, int mode_char ) { @@ -9985,45 +10044,15 @@ parser_file_open( struct cbl_file_t *file, int mode_char ) TRACE1_END } - // The cbl_file_t has a cbl_field_t *filename. This can be a FldAlphanumeric. - // The runtime has a (char *)filename, so we need to - // do a runtime conversion. - - tree psz; // This is going to be either the name of the file, or the - // possible run-time environment variable that will contain - // the name of the file. - - cbl_field_t *field_of_name = symbol_field_forward(file->filename); - bool quoted_name = false; - if( field_of_name->type == FldForward ) - { - // The target of ASSIGN TO was unquoted, but didn't resolve to a - // cbl_field_t. This means that the name of the field is an - // environment variable that will hold the file name - psz = gg_define_char_star(); - gg_assign(psz, gg_strdup(gg_string_literal(field_of_name->name))); - } - else - { - // The name is coming from a presumably FldAlphaNumeric variable - psz = get_string_from(field_of_name); - gg_call( CHAR_P, - "__gg__convert_encoding", - psz, - build_int_cst_type(INT, - field_of_name->codeset.encoding), - build_int_cst_type(INT, - DEFAULT_CHARMAP_SOURCE), - NULL_TREE); - quoted_name = true; - } + bool quoted_name; + tree pszFilename = get_the_filename(quoted_name, file); sv_is_i_o = true; store_location_stuff("OPEN"); gg_call(VOID, "__gg__file_open", gg_get_address_of(file->var_decl_node), - psz, + pszFilename, build_int_cst_type(INT, mode_char), quoted_name ? integer_one_node : integer_zero_node, NULL_TREE); @@ -10384,6 +10413,121 @@ parser_file_delete( struct cbl_file_t *file, bool /*sequentially*/ ) } } +static void +set_up_delete_file_label(cbl_label_t *delete_file_label) + { + if( delete_file_label ) + { + if( !delete_file_label->structs.delete_file ) + { + delete_file_label->structs.delete_file + = static_cast<cbl_delete_file_t *> + (xmalloc(sizeof(struct cbl_delete_file_t))); + // Set up the address pairs for this clause + gg_create_goto_pair( + &delete_file_label->structs.delete_file->over.go_to, + &delete_file_label->structs.delete_file->over.label); + gg_create_goto_pair( + &delete_file_label->structs.delete_file->exception.go_to, + &delete_file_label->structs.delete_file->exception.label); + gg_create_goto_pair( + &delete_file_label->structs.delete_file->no_exception.go_to, + &delete_file_label->structs.delete_file->no_exception.label); + gg_create_goto_pair( + &delete_file_label->structs.delete_file->bottom.go_to, + &delete_file_label->structs.delete_file->bottom.label); + } + } + } + +void +parser_file_delete_file( cbl_label_t *name, + std::vector<cbl_file_t*> filenames ) + { + // This removes a file from the file system. It is distinct from the + // FILE DELETE statement, which deletes a record from a file. + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" "); + SHOW_PARSE_TEXT(name->name); + for(size_t i=0; i<filenames.size(); i++) + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT(filenames[i]->name) + } + SHOW_PARSE_END + } + set_up_delete_file_label(name); + tree there_was_an_error = gg_define_int(0); + for(size_t i=0; i<filenames.size(); i++) + { + bool quoted_name; + tree pszFilename = get_the_filename(quoted_name, filenames[i]); + gg_assign(there_was_an_error, + gg_bitwise_or(there_was_an_error, + gg_call_expr( + INT, + "__gg__file_remove", + gg_get_address_of(filenames[i]->var_decl_node), + pszFilename, + quoted_name ? integer_one_node : integer_zero_node, + NULL_TREE))); + set_user_status(filenames[i]); + } + IF( there_was_an_error, eq_op, integer_zero_node ) + { + // There was no error detected. + gg_append_statement(name->structs.delete_file->no_exception.go_to); + } + ELSE + { + // There was an error detected. + gg_append_statement(name->structs.delete_file->exception.go_to); + } + } + +void +parser_file_delete_on_exception( cbl_label_t *name ) + { + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" "); + SHOW_PARSE_TEXT(name->name); + SHOW_PARSE_END + } + gg_append_statement(name->structs.delete_file->bottom.go_to); + gg_append_statement(name->structs.delete_file->exception.label); + } + +void +parser_file_delete_not_exception( cbl_label_t *name ) + { + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" "); + SHOW_PARSE_TEXT(name->name); + SHOW_PARSE_END + } + gg_append_statement(name->structs.delete_file->bottom.go_to); + gg_append_statement(name->structs.delete_file->no_exception.label); + } + +void +parser_file_delete_end( cbl_label_t *name ) + { + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" "); + SHOW_PARSE_TEXT(name->name); + SHOW_PARSE_END + } + gg_append_statement(name->structs.delete_file->bottom.label); + } + void parser_file_rewrite(cbl_file_t *file, cbl_field_t *record_area, @@ -13639,7 +13783,7 @@ parser_call( cbl_refer_t name, create_and_call(narg, args, NULL_TREE, - name.field->data.initial, + name.field->data.original(), returned_value_type, returned, not_except); @@ -13747,7 +13891,7 @@ parser_entry( const cbl_field_t *name, size_t nusing, cbl_ffi_arg_t *args ) { SHOW_PARSE_HEADER SHOW_PARSE_TEXT(" ") - SHOW_PARSE_TEXT(name->data.initial) + SHOW_PARSE_TEXT(name->data.original()) SHOW_PARSE_END } @@ -13756,7 +13900,7 @@ parser_entry( const cbl_field_t *name, size_t nusing, cbl_ffi_arg_t *args ) // Get the name of the ENTRY point. // cppcheck-suppress nullPointerRedundantCheck - char *psz = cobol_name_mangler(name->data.initial); + char *psz = cobol_name_mangler(name->data.original()); // Create a goto/label pair. The label will be set up here; the goto will // be used when we re-enter the containing function: @@ -14642,13 +14786,12 @@ mh_source_is_literalN(cbl_refer_t &destref, SHOW_PARSE_TEXT("mh_source_is_literalN: __gg__psz_to_alpha_move") } - // We know that the encoding of the literal::initial is in ASCII - // We need the data sent to __gg__psz_to_alpha_move to be in the // encoding of the destination size_t charsout; - const char *converted = __gg__iconverter(DEFAULT_CHARMAP_SOURCE, + const char *converted = __gg__iconverter( + sourceref.field->codeset.encoding, destref.field->codeset.encoding, sourceref.field->data.initial, strlen(sourceref.field->data.initial), @@ -16086,54 +16229,50 @@ real_powi10 (uint32_t x) return pow10; } +static char * -binary_initial_from_float128(cbl_field_t *field, int rdigits, - REAL_VALUE_TYPE value) +binary_initial(cbl_field_t *field) { // This routine returns an xmalloced buffer designed to replace the // data.initial member of the incoming field char *retval = NULL; - // We need to adjust value so that it has no decimal places - if( rdigits ) + uint32_t capacity; + uint32_t ddigits; + int32_t drdigits; + uint64_t attr; + FIXED_WIDE_INT(128) value128 = dirty_to_binary(field->data.original(), + capacity, + ddigits, + drdigits, + attr); + int scaled_rdigits = get_scaled_rdigits(field); + + int i = field->data.rdigits; + while( i<0 ) { - REAL_VALUE_TYPE pow10 = real_powi10 (rdigits); - real_arithmetic (&value, MULT_EXPR, &value, &pow10); - real_convert (&value, TYPE_MODE (float128_type_node), &value); + value128 = value128/10; + i += 1; } - // We need to make sure that the resulting string will fit into - // a number with 'digits' digits - // Keep in mind that pure binary types, like BINARY-CHAR, have no digits - if( field->data.digits ) + // We take the digits of value128, and put them into ach. We line up + // the rdigits, and we truncate the string after desired_digits + while(drdigits < scaled_rdigits) { - REAL_VALUE_TYPE pow10 = real_powi10 (field->data.digits); - mpfr_t m0, m1; - - mpfr_inits2 (REAL_MODE_FORMAT (TYPE_MODE (float128_type_node))->p, - m0, m1, NULL); - mpfr_from_real (m0, &value, MPFR_RNDN); - mpfr_from_real (m1, &pow10, MPFR_RNDN); - mpfr_clear_flags (); - mpfr_fmod (m0, m0, m1, MPFR_RNDN); - real_from_mpfr (&value, m0, - REAL_MODE_FORMAT (TYPE_MODE (float128_type_node)), - MPFR_RNDN); - real_convert (&value, TYPE_MODE (float128_type_node), &value); - mpfr_clears (m0, m1, NULL); + value128 *= 10; + drdigits += 1; + } + while(drdigits > scaled_rdigits) + { + value128 = value128 / 10; + drdigits -= 1; } - - real_roundeven (&value, TYPE_MODE (float128_type_node), &value); - - bool fail = false; - FIXED_WIDE_INT(128) i - = FIXED_WIDE_INT(128)::from (real_to_integer (&value, &fail, 128), SIGNED); retval = static_cast<char *>(xmalloc(field->data.capacity)); gcc_assert(retval); switch(field->data.capacity) { - tree type; + tree type; case 1: case 2: case 4: @@ -16141,12 +16280,12 @@ binary_initial_from_float128(cbl_field_t *field, int rdigits, case 16: type = build_nonstandard_integer_type ( field->data.capacity * BITS_PER_UNIT, 0); - native_encode_wide_int (type, i, PTRCAST(unsigned char, retval), + native_encode_wide_int (type, value128, PTRCAST(unsigned char, retval), field->data.capacity); break; default: fprintf(stderr, - "Trouble in binary_initial_from_float128 at %s() %s:%d\n", + "Trouble in binary_initial at %s() %s:%d\n", __func__, __FILE__, __LINE__); @@ -16157,6 +16296,60 @@ binary_initial_from_float128(cbl_field_t *field, int rdigits, return retval; } +static void +digits_from_int128( char *ach, + cbl_field_t *field, + uint32_t desired_digits, + FIXED_WIDE_INT(128) value128, // cppcheck-suppress unknownMacro + int32_t rdigits) + { + if( value128 < 0 ) + { + value128 = -value128; + } + + // 'rdigits' are the number of rdigits in value128. + + int scaled_rdigits = get_scaled_rdigits(field); + + int i = field->data.rdigits; + while( i<0 ) + { + value128 = value128/10; + i += 1; + } + + // We take the digits of value128, and put them into ach. We line up + // the rdigits, and we truncate the string after desired_digits + while(rdigits < scaled_rdigits) + { + value128 *= 10; + rdigits += 1; + } + while(rdigits > scaled_rdigits) + { + value128 = value128 / 10; + rdigits -= 1; + } + char conv[128]; + print_dec (value128, conv, SIGNED); + size_t len = strlen(conv); + + if( len<desired_digits ) + { + memset(ach, ascii_0, desired_digits - len); + strcpy(ach+desired_digits - len, conv); + } + else + { + strcpy(ach, conv + len-desired_digits); + } + } + +#if 0 +// This routine was replaced with digits_from_int1289. However, I am choosing +// to keep it around for a while, because it is a master class in manipulating +// REAL_VALUE_TYPE and FIXED_WIDE_INT static void digits_from_float128(char *retval, cbl_field_t *field, size_t width, int rdigits, REAL_VALUE_TYPE value) @@ -16194,8 +16387,6 @@ digits_from_float128(char *retval, cbl_field_t *field, size_t width, int rdigits // We convert it to a integer string of digits: print_dec (i, ach, SIGNED); - //fprintf(stderr, "digits_from_float128() %s %f %s ", field->name, (double)value, ach); - gcc_assert( strlen(ach) <= field->data.digits ); if( strlen(ach) < width ) { @@ -16203,6 +16394,7 @@ digits_from_float128(char *retval, cbl_field_t *field, size_t width, int rdigits } strcpy(retval + (width-strlen(ach)), ach); } +#endif static char * initial_from_initial(cbl_field_t *field) @@ -16211,10 +16403,9 @@ initial_from_initial(cbl_field_t *field) // This routine returns an xmalloced buffer that is intended to replace the // data.initial member of the incoming field. - //fprintf(stderr, "initial_from_initial %s\n", field->name); + //fprintf(stderr, " %s\n", field->name); char *retval = NULL; - int rdigits; // Let's handle the possibility of a figurative constant cbl_figconst_t figconst = cbl_figconst_of(field->data.initial); @@ -16253,6 +16444,8 @@ initial_from_initial(cbl_field_t *field) if( field->data.etc_type == cbl_field_data_t::value_e ) value = TREE_REAL_CST (field->data.value_of ()); +#if 0 + int rdigits; // There is always the infuriating possibility of a P-scaled number if( field->attr & scaled_e ) { @@ -16288,17 +16481,18 @@ initial_from_initial(cbl_field_t *field) // Not P-scaled rdigits = field->data.rdigits; } +#endif switch(field->type) { case FldNumericBin5: case FldIndex: - retval = binary_initial_from_float128(field, rdigits, value); + retval = binary_initial(field); break; case FldNumericBinary: { - retval = binary_initial_from_float128(field, rdigits, value); + retval = binary_initial(field); size_t left = 0; size_t right = field->data.capacity - 1; while(left < right) @@ -16328,7 +16522,17 @@ initial_from_initial(cbl_field_t *field) negative = false; } - digits_from_float128(ach, field, field->data.digits, rdigits, value); + // Convert the data.initial to a __int128 + uint32_t capacity; + uint32_t ddigits; + int32_t drdigits; + uint64_t attr; + FIXED_WIDE_INT(128) value128 = dirty_to_binary(field->data.initial, + capacity, + ddigits, + drdigits, + attr); + digits_from_int128(ach, field, field->data.digits, value128, drdigits); const char *digits = ach; if( (field->attr & signable_e) @@ -16404,7 +16608,16 @@ initial_from_initial(cbl_field_t *field) size_t ndigits = (field->attr & separate_e) ? field->data.capacity * 2 : field->data.capacity * 2 - 1; - digits_from_float128(ach, field, ndigits, rdigits, value); + uint32_t capacity; + uint32_t ddigits; + int32_t drdigits; + uint64_t attr; + FIXED_WIDE_INT(128) value128 = dirty_to_binary(field->data.initial, + capacity, + ddigits, + drdigits, + attr); + digits_from_int128(ach, field, ndigits, value128, drdigits); const char *digits = ach; for(size_t i=0; i<ndigits; i++) @@ -16517,13 +16730,31 @@ initial_from_initial(cbl_field_t *field) else { size_t ndigits = field->data.capacity; - digits_from_float128(ach, field, ndigits, rdigits, value); - /* ??? This resides in libgcobol valconv.cc. */ + uint32_t capacity; + uint32_t ddigits; + int32_t drdigits; + uint64_t attr; + FIXED_WIDE_INT(128) value128 = dirty_to_binary(field->data.original(), + capacity, + ddigits, + drdigits, + attr); + digits_from_int128(ach, field, ndigits, value128, drdigits); + + // __gg__string_to_numeric_edited operates in ASCII space: __gg__string_to_numeric_edited( retval, ach, field->data.rdigits, negative, field->data.picture); + // So now we convert it to the target encoding: + size_t nbytes; + const char *converted = __gg__iconverter(DEFAULT_SOURCE_ENCODING, + field->codeset.encoding, + retval, + strlen(retval), + &nbytes); + strcpy(retval, converted); } } break; @@ -16556,10 +16787,32 @@ initial_from_initial(cbl_field_t *field) case FldLiteralN: { -//// retval = static_cast<char *>(xmalloc(field->data.capacity+1)); -//// gcc_assert(retval); -//// memcpy(retval, field->data.initial, field->data.capacity); -//// retval[field->data.capacity] = '\0'; + // This requires annotation. + + // The compiler originally used ASCII for field->data.initial. Later we + // expanded the field with the addition of the codeset.encoding + // For consistency in the parser processing, the FldLiteralN is arriving + // with the Object-Computer's character encoding, and field->data.initial + // is showing up encoded. + + // But on the run-time side, if the initial string is needed, it is + // invariably more useful in ASCII. Consider converting that string to + // a floating-point value, for example. + + // So, we are going to convert the data.initial string back to ASCII + // here. Later on, when we establish the run-time encoding, we will + // check for FldLiteralN and set that to ASCII as well. See + // actually_create_the_static_field(). + + size_t nbytes; + const char *converted = __gg__iconverter(field->codeset.encoding, + DEFAULT_SOURCE_ENCODING, + field->data.initial, + strlen(field->data.initial), + &nbytes); + retval = static_cast<char *>(xmalloc(strlen(field->data.initial)+1)); + gcc_assert(retval); + strcpy(retval, converted); break; } @@ -16716,9 +16969,14 @@ actually_create_the_static_field( cbl_field_t *new_var, next_field = TREE_CHAIN(next_field); // INT, "encoding", + // For FldLiteralN we force the encoding to be ASCII. + // See initial_from_initial() for an explanation. CONSTRUCTOR_APPEND_ELT(CONSTRUCTOR_ELTS(constr), next_field, - build_int_cst_type(INT, new_var->codeset.encoding)); + build_int_cst_type(INT, + new_var->type == FldLiteralN ? + DEFAULT_SOURCE_ENCODING + : new_var->codeset.encoding)); next_field = TREE_CHAIN(next_field); // INT, "alphabet", @@ -17643,6 +17901,10 @@ parser_symbol_add(struct cbl_field_t *new_var ) length_of_initial_string = new_var->data.capacity+1; break; + case FldLiteralN: + length_of_initial_string = strlen(new_initial)+1; + break; + default: length_of_initial_string = new_var->data.capacity; break; diff --git a/gcc/cobol/genapi.h b/gcc/cobol/genapi.h index 6582d2e..802bba7 100644 --- a/gcc/cobol/genapi.h +++ b/gcc/cobol/genapi.h @@ -400,6 +400,12 @@ parser_file_rewrite( cbl_file_t *file, cbl_field_t *field, void parser_file_delete( cbl_file_t *file, bool sequentially ); +void parser_file_delete_file( cbl_label_t *name, + std::vector<cbl_file_t*> filenames ); +void parser_file_delete_on_exception( cbl_label_t *name ); +void parser_file_delete_not_exception( cbl_label_t *name ); +void parser_file_delete_end( cbl_label_t *name ); + #if condition_lists struct cbl_conditional_t { cbl_field_t *tgt; diff --git a/gcc/cobol/genmath.cc b/gcc/cobol/genmath.cc index 320e6bf..7d6ae8c 100644 --- a/gcc/cobol/genmath.cc +++ b/gcc/cobol/genmath.cc @@ -42,6 +42,7 @@ #include "gengen.h" #include "structs.h" #include "../../libgcobol/gcobolio.h" +#include "../../libgcobol/charmaps.h" #include "show_parse.h" void diff --git a/gcc/cobol/genutil.cc b/gcc/cobol/genutil.cc index 56b6b83..63f37f6 100644 --- a/gcc/cobol/genutil.cc +++ b/gcc/cobol/genutil.cc @@ -1744,7 +1744,7 @@ get_literal_string(cbl_field_t *field) char *buffer = static_cast<char *>(xcalloc(1, buffer_length)); size_t charsout; - const char *converted = __gg__iconverter(DEFAULT_CHARMAP_SOURCE, + const char *converted = __gg__iconverter(DEFAULT_SOURCE_ENCODING, field->codeset.encoding, field->data.initial, field->data.capacity, diff --git a/gcc/cobol/parse.y b/gcc/cobol/parse.y index 9187a59..d54a686 100644 --- a/gcc/cobol/parse.y +++ b/gcc/cobol/parse.y @@ -51,7 +51,7 @@ accept_envar_e, }; - struct collating_an_t { + struct coll_alphanat_t { const char *alpha, *national; }; @@ -575,7 +575,7 @@ class locale_tgt_t { RD RECORD RECORDING RECORDS RECURSIVE REDEFINES REEL REFERENCE RELATIVE REM REMAINDER REMARKS REMOVAL RENAMES REPLACE REPLACING REPORT REPORTING REPORTS - REPOSITORY RERUN RESERVE RESTRICTED RESUME + REPOSITORY RERUN RESERVE RESTRICTED RESUME RETRY REVERSE REVERSED REWIND RF RH RIGHT ROUNDED RUN SAME SCREEN SD @@ -702,8 +702,8 @@ class locale_tgt_t { %type <number> open_io alphabet_etc %type <special_type> device_name %type <string> numed context_word ctx_name locale_spec -%type <collating_sequences> collating_sequences collating_ans -%type <collating_name> collating_an +%type <char_class_locales> char_class_locales coll_alphanats +%type <collating_name> coll_alphanat %type <literal> namestr alphabet_lit program_as repo_as %type <field> perform_cond kind_of_name %type <refer> alloc_ret @@ -738,6 +738,9 @@ class locale_tgt_t { relative_key_clause reserve_clause sharing_clause %type <file> filename read_body write_body delete_body +%type <label> delete_file_body +%type <error> delete_error delete_except delete_excepts + %type <file> start_impl start_cond start_body %type <rewrite_t> rewrite_body %type <min_max> record_vary rec_contains from_to record_desc @@ -833,6 +836,7 @@ class locale_tgt_t { global is_global anycase backward end_display exh_changed exh_named + override %type <number> mistake globally first_last %type <io_mode> io_mode @@ -874,6 +878,7 @@ class locale_tgt_t { %type <opt_init_sect> opt_init_sect %type <number> opt_init_value %type <number> locale_current loc_category user_default +%type <string> locale_name %type <token_list> loc_categories locale_tgt %type <opt_round> rounded round_between rounded_type rounded_mode %type <opt_arith> opt_arith_type @@ -901,7 +906,7 @@ class locale_tgt_t { struct { YYLTYPE loc; int token; literal_t name; } prog_end; struct { int token; special_name_t id; } special_type; struct { char locale_type; const char * name; } locale_phrase; - collating_an_t collating_sequences; + coll_alphanat_t char_class_locales; struct collating_name_t { int token; const char *name; } collating_name; struct { size_t isym; cbl_encoding_t encoding; } codeset; struct { cbl_field_type_t type; @@ -2371,6 +2376,23 @@ config_paragraphs: config_paragraph config_paragraph: SPECIAL_NAMES '.' | SPECIAL_NAMES '.' special_names '.' + { + std::reverse_iterator<symbol_elem_t *> + p(symbols_end()), + pend(symbols_begin(PROGRAM)); + for( ++p; p != pend; p++ ) { + if( p->type == SymAlphabet ) { + const auto& alphabet = *cbl_alphabet_of(&*p); + if( alphabet.encoding == no_encoding_e ) { + assert(alphabet.locale != 0 ); + const auto& missing = *cbl_locale_of(symbol_at(alphabet.locale)); + error_msg(alphabet.loc, + "ALPHABET %qs references LOCALE %qs, which is not defined", + alphabet.name, missing.name); + } + } + } + } | SOURCE_COMPUTER '.' | SOURCE_COMPUTER '.' NAME '.' | SOURCE_COMPUTER '.' NAME with_debug '.' @@ -2507,19 +2529,36 @@ with_debug: with DEBUGGING MODE { ; collations: %empty - | collation_classification - | collation_sequence - | collation_classification collation_sequence - | collation_sequence collation_classification + | char_classification + | collating_sequence + | char_classification collating_sequence + | collating_sequence char_classification ; -collation_classification: - character CLASSIFICATION collating_sequences[seq] +char_classification: + character CLASSIFICATION char_class_locales[seq] { - warn_msg(@seq, "CHARACTER CLASSIFICATION ignored"); + if( $seq.alpha ) { + auto e = symbol_locale(PROGRAM, $seq.alpha); + if( !e ) { + error_msg(@seq, "no LOCALE defined as %qs", $seq.alpha); + } else { + auto& encoding = cbl_locale_of(e)->encoding; + current.alpha_encoding(symbol_index(e), encoding); + } + } + if( $seq.national ) { + auto e = symbol_locale(PROGRAM, $seq.national); + if( !e ) { + error_msg(@seq, "no LOCALE defined as %qs", $seq.national); + } else { + auto& encoding = cbl_locale_of(e)->encoding; + current.national_encoding(symbol_index(e), encoding); + } + } } ; -collation_sequence: - program_kw collating SEQUENCE collating_sequences[seq] +collating_sequence: + program_kw collating SEQUENCE char_class_locales[seq] { if( !current.collating_sequence($seq.alpha) ) { error_msg(@seq, "collating sequence already defined as '%s'", @@ -2529,20 +2568,20 @@ collation_sequence: } ; -collating_sequences: +char_class_locales: is NAME[name] { $$.alpha = $name; $$.national = nullptr; } - | collating_ans { $$ = $1; } + | coll_alphanats { $$ = $1; } ; -collating_ans: collating_an[encoding] { - $$ = collating_an_t(); +coll_alphanats: coll_alphanat[encoding] { + $$ = coll_alphanat_t(); const char **pname = $encoding.token == ALPHANUMERIC? &$$.alpha : &$$.national; *pname = $encoding.name; } - | collating_ans collating_an[encoding] + | coll_alphanats coll_alphanat[encoding] { const char **pname = $encoding.token == ALPHANUMERIC? &$$.alpha : &$$.national; @@ -2553,7 +2592,7 @@ collating_ans: collating_an[encoding] { *pname = $encoding.name; } ; -collating_an: for alphanational is locale_phrase[locale] { +coll_alphanat: for alphanational is locale_phrase[locale] { $$.token = $alphanational; $$.name = $locale.name; if( ! $locale.name ) { @@ -2568,7 +2607,6 @@ collating_an: for alphanational is locale_phrase[locale] { keyword_str($$.token), locale_name); } - warn_msg(@locale, "LOCALE phrase ignored"); } ; @@ -2643,9 +2681,20 @@ special_name: dev_mnemonic { symbol_decimal_point_set(','); } - | LOCALE NAME is locale_spec[spec] { - current.locale($NAME, $spec); - cbl_unimplementedw("sorry, unimplemented: LOCALE %qs", $spec); + | LOCALE NAME is locale_spec[spec] + { + cbl_locale_t locale($NAME, $spec); + if( locale.encoding == no_encoding_e ) { + error_msg(@NAME, "invalid iconv LOCALE name %qs", $spec); + YYERROR; + } + if( locale.encoding == UTF8_e ) { + cbl_unimplemented("UTF-8"); + YYERROR; + } + if( ! current.locale_add(locale) ) { + error_msg(@NAME, "%qs already defined as LOCALE name", $NAME); + } } ; | upsi @@ -2655,6 +2704,8 @@ special_name: dev_mnemonic } ; locale_spec: NAME { $$ = $1; } + | UTF_8 { static char s[] ="UTF-8"; $$ = s; } + | UTF_16 { static char s[] ="UTF-16"; $$ = s; } | LITERAL { $$ = string_of($1); } ; @@ -2746,14 +2797,16 @@ device_name: SYSIN { $$.token = SYSIN; $$.id = SYSIN_e; } alphabet_name: STANDARD_ALPHABET { $$ = alphabet_add(@1, CP1252_e); } | NATIVE { $$ = alphabet_add(@1, EBCDIC_e); } | EBCDIC { $$ = alphabet_add(@1, EBCDIC_e); } - | LOCALE ctx_name + | LOCALE locale_name[name] { - auto e = symbol_alphabet(PROGRAM, $ctx_name); + auto e = symbol_locale(PROGRAM, $name); if( !e ) { - error_msg(@ctx_name, "no such ALPHABET %qs", $ctx_name); - YYERROR; - } - $$ = cbl_alphabet_of(e); + dbgmsg("no such LOCALE yet %s", $name); + cbl_locale_t locale($name); // locale is named but not defined + e = symbol_locale_add(PROGRAM, &locale); + } + cbl_alphabet_t alphabet( @name, symbol_index(e), $name); + $$ = alphabet_add(alphabet); } | alphabet_seqs { @@ -3592,7 +3645,7 @@ const_value: cce_expr value78: literalism { - cbl_field_data_t data = {}; + cbl_field_data_t data; data.capacity = capacity_cast(strlen($1.data)); data.initial = $1.data; $$.encoding = $1.encoding; @@ -3600,13 +3653,15 @@ value78: literalism } | const_value { - cbl_field_data_t data = {}; + cbl_field_data_t data; data = build_real (float128_type_node, $1); + $$.encoding = current_encoding('A'); $$.data = new cbl_field_data_t(data); } | reserved_value[value] { const auto field = constant_of(constant_index($value)); + $$.encoding = current_encoding('A'); $$.data = new cbl_field_data_t(field->data); } @@ -3638,6 +3693,7 @@ data_descr1: level_name field.type = FldLiteralN; field.data = build_real (float128_type_node, $const_value); field.data.initial = string_of($const_value); + field.codeset.set(); if( !cdf_value(field.name, cdfval_t($const_value)) ) { error_msg(@1, "%s was defined by CDF", field.name); @@ -3674,13 +3730,12 @@ data_descr1: level_name if( !cdf_value(field.name, $lit.data) ) { error_msg(@1, "%s was defined by CDF", field.name); } - if( ! field.codeset.valid() ) { - if( ! field.codeset.set(field.codeset.standard_internal.type) ) { - error_msg(@lit, "CONSTANT inconsistent with encoding %s", - cbl_alphabet_t::encoding_str(field.codeset.encoding)); - } + if( ! field.codeset.set() ) { + error_msg(@lit, "CONSTANT inconsistent with encoding %s", + cbl_alphabet_t::encoding_str(field.codeset.encoding)); } - value_encoding_check(@lit, $1, $lit.encoding); + + value_encoding_check(@lit, $1); } | level_name CONSTANT is_global FROM NAME { @@ -3718,6 +3773,7 @@ data_descr1: level_name } else { field.type = FldLiteralN; field.data.initial = string_of(field.data.value_of()); + field.codeset.set($data.encoding); if( !cdf_value(field.name, field.as_integer()) ) { yywarn("%s was defined by CDF", field.name); } @@ -3975,6 +4031,15 @@ data_descr1: level_name // Verify VALUE $field->report_invalid_initial_value(@data_clauses); + bool numerical = + $field->type == FldNumericDisplay || is_numeric($field); + + if( $field->data.initial && ! numerical ) { + if( normal_value_e == cbl_figconst_of($field->data.initial) ) { + value_encoding_check(@data_clauses, $field); + } + } + // verify REDEFINES const auto parent = parent_of($field); if( parent && $field->level == parent->level ) { @@ -4287,14 +4352,16 @@ picture_clause: PIC signed nps[fore] nines nps[aft] if( field->data.initial != NULL ) { if( 0 < field->data.capacity && field->data.capacity < uint32_t($size) ) { - auto p = blank_pad_initial( field->data.initial, - field->data.capacity, $size ); + auto p = blank_pad_initial(field->data.initial, + field->data.capacity, $size ); if( !p ) YYERROR; field->data.initial = p; } } - field->data.capacity = $size; + charmap_t *charmap = + __gg__get_charmap(field->codeset.encoding); + field->data.capacity = $size * charmap->stride(); field->data.picture = NULL; if( false ) dbgmsg("PIC alphanum_pic[size]:%d: %s", @@ -4708,14 +4775,23 @@ usage_clause1: usage BIT value_clause: VALUE all LITERAL[lit] { cbl_field_t *field = current_field(); - if( ! field->codeset.set($lit.encoding) ) { - error_msg(@lit, "VALUE inconsistent with encoding %s", - cbl_alphabet_t::encoding_str(field->codeset.encoding)); + + if( $lit.prefix[0] ) { // not the default encoding + if( ! field->codeset.set($lit.encoding) ) { + error_msg(@lit, "VALUE inconsistent with encoding %s", + cbl_alphabet_t::encoding_str(field->codeset.encoding)); + } + } else { + field->codeset.set(); } + + if( field->codeset.encoding != $lit.encoding ) { + error_msg(@lit, "PICTURE inconsistent with VALUE %s'%s'", + $lit.prefix, $lit.data); + } + field->data.initial = $lit.data; field->attr |= literal_attr($lit.prefix); - // The __gg__initialize_data routine needs to know that VALUE is a - // quoted literal. This is critical for NumericEdited variables field->attr |= quoted_e; if( field->data.capacity == 0 ) { @@ -4732,7 +4808,6 @@ value_clause: VALUE all LITERAL[lit] { } } } - value_encoding_check(@lit, field, $lit.encoding); } | VALUE all cce_expr[value] { cbl_field_t *field = current_field(); @@ -4761,11 +4836,9 @@ value_clause: VALUE all LITERAL[lit] { | VALUE all reserved_value[value] { cbl_field_t *field = current_field(); - if( ! field->codeset.valid() ) { - if( ! field->codeset.set(field->codeset.standard_internal.type) ) { - error_msg(@value, "VALUE inconsistent with encoding %s", - cbl_alphabet_t::encoding_str(field->codeset.encoding)); - } + if( ! field->codeset.set() ) { + error_msg(@value, "VALUE inconsistent with encoding %s", + cbl_alphabet_t::encoding_str(field->codeset.encoding)); } if( $value != NULLS ) { auto fig = constant_of(constant_index($value)); @@ -5017,6 +5090,7 @@ typedef_clause: is TYPEDEF strong error_msg(@2, "%s %s IS TYPEDEF must be level 01", field->level_str(), field->name); } + field->codeset.set(); field->attr |= typedef_e; if( $strong ) field->attr |= strongdef_e; if( ! current.typedef_add(field) ) { @@ -7007,6 +7081,8 @@ context_word: APPLY { static char s[] ="APPLY"; $$ = s; } // LOCK MODE clause | MULTIPLE { static char s[] ="MULTIPLE"; $$ = s; } // LOCK ON phrase + | NAT { static char s[] ="NAT"; + $$ = s; } // CONVERT function | NEAREST_AWAY_FROM_ZERO { static char s[] ="NEAREST-AWAY-FROM-ZERO"; $$ = s; } // INTERMEDIATE ROUNDING clause and ROUNDED phrase | NEAREST_EVEN { static char s[] ="NEAREST-EVEN"; @@ -8544,7 +8620,7 @@ advance_by: scalar lines { $$ = $1; } /* BUG: should accept reference */ * number of lines is negative. So, we use the * negative Number Of The Beast as a PAGE flag. */ - $$ = new_reference( new_literal("-666") ); + $$ = new_reference( new_literal(xstrdup("-666")) ); } | device_name { $$ = new_reference(literally_one); } ; @@ -8601,7 +8677,33 @@ io_invalid: INVALID key { delete: delete_impl end_delete | delete_cond end_delete + | delete_file end_delete ; +delete_file: DELETE delete_file_body[stmt] delete_error[err] { + if( ! $err.on_error ) parser_file_delete_on_exception($stmt); + if( ! $err.not_error ) parser_file_delete_not_exception($stmt); + parser_file_delete_end($stmt); + current.declaratives_evaluate(); + } +delete_file_body: + FILE_KW override filenames retry_phrase { + $$ = label_add(@$, LblXml, uniq_label("xfile")); + xml_statements.push($$); + statement_begin(@$, DELETE); + std::vector<cbl_file_t*> + filenames($filenames->files.begin(), + $filenames->files.end() ); + parser_file_delete_file( $$, filenames); + } + ; +retry_phrase: %empty + | RETRY expr TIMES + | FOR expr SECONDS + | FOREVER { + cbl_unimplemented("DELETE FILE RETRY"); + } + ; + delete_impl: DELETE delete_body[file] { file_delete_args.call_parser_file_delete(true); @@ -8634,6 +8736,63 @@ delete_body: filename[file] record $$ = $file; } ; + +delete_error: %empty %prec DELETE { + $$.on_error = $$.not_error = nullptr; + } + | delete_excepts %prec DELETE + ; +delete_excepts: delete_except[a] statements %prec DELETE + { + assert( $a.on_error || $a.not_error ); + assert( ! ($a.on_error && $a.not_error) ); + $$ = $a; + } + | delete_excepts[a] delete_except[b] statements %prec DELETE + { + if( $a.on_error && $a.not_error ) { + error_msg(@1, "too many ON ERROR clauses"); + YYERROR; + } + // "ON" and "NOT ON" could be reversed, but not duplicated. + if( $a.on_error && $b.on_error ) { + error_msg(@1, "duplicate ON ERROR clauses"); + YYERROR; + } + if( $a.not_error && $b.not_error ) { + error_msg(@1, "duplicate NOT ON ERROR clauses"); + YYERROR; + } + $$ = $a; + if( $$.on_error ) { + assert($b.not_error); + $$.not_error = $b.not_error; + } else { + assert($b.on_error); + $$.on_error = $b.on_error; + } + } + ; +delete_except: EXCEPTION + { + auto xml_stmt = xml_statements.top(); + // The value of the pointer no longer matters, only NULL or not. + $$.on_error = $$.not_error = nullptr; + switch($1) { + case EXCEPTION: + $$.on_error = xml_stmt; + parser_file_delete_on_exception(xml_stmt); + break; + case NOT: + $$.not_error = xml_stmt; + parser_file_delete_not_exception(xml_stmt); + break; + default: + gcc_unreachable(); + } + } + ; + end_delete: %empty %prec DELETE | END_DELETE ; @@ -10536,7 +10695,9 @@ function_udf: FUNCTION_UDF '(' arg_list[args] ')' { cbl_ffi_arg_t actual(param.crv, ar); return actual; } ); - auto name = new_literal(strlen(L->name), L->name, quoted_e); + // Pretend hex-encoded because that means use verbatim. + auto attr = cbl_field_attr_t(quoted_e | hex_encoded_e); + auto name = new_literal(strlen(L->name), L->name, attr); ast_call( @1, name, $$, args.size(), args.data(), NULL, NULL, true ); } | FUNCTION_UDF_0 { @@ -10547,8 +10708,11 @@ function_udf: FUNCTION_UDF '(' arg_list[args] ')' { const auto returning = cbl_field_of(symbol_at(L->returning)); $$ = new_temporary_clone(returning); $$->data.initial = returning->name; // user's name for the field - - auto name = new_literal(strlen(L->name), L->name, quoted_e); + cbl_field_attr_t call_attr + = (cbl_field_attr_t)(quoted_e|hex_encoded_e); + cbl_field_t *name = new_literal(strlen(L->name), + L->name, + call_attr); ast_call( @1, name, $$, narg, args, NULL, NULL, true ); } ; @@ -11135,6 +11299,18 @@ subst_input: anycase first_last varg[v1] varg[v2] { } ; +locale_name: NAME + { + auto e = symbol_locale(PROGRAM, $NAME); + if( !e ) { + error_msg(@NAME, "no such SPECIAL-NAMES LOCALE: %qs", $NAME); + YYERROR; + } + $$ = const_cast<char*>( + __gg__encoding_iconv_name(cbl_locale_of(e)->encoding) ); + } + ; + intrinsic_locale: LOCALE_COMPARE '(' varg[r1] varg[r2] ')' { @@ -11143,11 +11319,12 @@ intrinsic_locale: cbl_refer_t dummy = {}; if( ! intrinsic_call_3($$, LOCALE_COMPARE, $r1, $r2, &dummy) ) YYERROR; } - | LOCALE_COMPARE '(' varg[r1] varg[r2] varg[r3] ')' + | LOCALE_COMPARE '(' varg[r1] varg[r2] locale_name ')' { location_set(@1); $$ = new_alphanumeric(); - if( ! intrinsic_call_3($$, LOCALE_COMPARE, $r1, $r2, $r3) ) YYERROR; + cbl_refer_t locale(new_literal($locale_name)); + if( ! intrinsic_call_3($$, LOCALE_COMPARE, $r1, $r2, &locale) ) YYERROR; } | LOCALE_DATE '(' varg[r1] ')' @@ -11453,6 +11630,10 @@ optional: %empty { $$ = false; } | OPTIONAL { $$ = true; } ; +override: %empty { $$ = false; } + | OVERRIDE { $$ = true; } + ; + program_kw: %empty | PROGRAM_kw ; @@ -11900,6 +12081,7 @@ void ast_call( const YYLTYPE& loc, cbl_refer_t name, const cbl_refer_t& returnin if( is_literal(name.field) ) { cbl_field_t called = { FldLiteralA, quoted_e | constant_e, name.field->data, 77 }; + called.attr |= name.field->attr; snprintf(called.name, sizeof(called.name), "_%s", name.field->data.initial); name.field = cbl_field_of(symbol_field_add(PROGRAM, &called)); symbol_field_location(field_index(name.field), loc); @@ -13030,13 +13212,13 @@ struct expand_group : public std::list<cbl_refer_t> { }; -static const char * initial_default_value; - const char * wsclear() { return initial_default_value; } +static const uint32_t * initial_default_value; + const uint32_t * wsclear() { return initial_default_value; } void -wsclear( char ch ) { - static char byte = ch; - initial_default_value = &byte; +wsclear( uint32_t i ) { + static uint32_t init_val = i; + initial_default_value = &init_val; current.program_needs_initial(); } @@ -13558,16 +13740,16 @@ literal_t::set( const cbl_field_t * field ) { literal_t& literal_t::set_prefix( const char *input, size_t len ) { - encoding = current_encoding('A'); + encoding = current_encoding(display_encoding_e); assert(len < sizeof(prefix)); std::fill(prefix, prefix + sizeof(prefix), '\0'); std::transform(input, input + len, prefix, toupper); switch(prefix[0]) { case '\0': case 'Z': - encoding = current_encoding('A'); + encoding = current_encoding(display_encoding_e); break; case 'N': - encoding = current_encoding('N'); + encoding = current_encoding(national_encoding_e); if( 'X' == prefix[1] ) { cbl_unimplemented("NX literals"); } @@ -13583,7 +13765,7 @@ literal_t::set_prefix( const char *input, size_t len ) { default: gcc_unreachable(); } - assert(encoding <= iconv_YU_e); + assert(valid_encoding(encoding)); return *this; } @@ -13608,8 +13790,8 @@ literal_attr( const char prefix[] ) { case 'X': switch(prefix[0]) { case 'B': return cbl_field_attr_t(hex_encoded_e | bool_encoded_e); - case 'N': - case 'U': cbl_unimplemented("National"); return none_e; + case 'N': cbl_unimplemented("Hexadecimal National"); return none_e; + case 'U': cbl_unimplemented("Hexadecimal Unicode"); return none_e; } break; } diff --git a/gcc/cobol/parse_ante.h b/gcc/cobol/parse_ante.h index 1fbc8f5..99c9cef 100644 --- a/gcc/cobol/parse_ante.h +++ b/gcc/cobol/parse_ante.h @@ -273,38 +273,11 @@ static inline char * dequote( char input[] ) { static const char * name_of( cbl_field_t *field ) { assert(field); - // Because this can be called after .initial has been converted to the - // field->codeset.encoding, we have to undo that. There may be some danger - // associated with returning a static. I don't actually know. -- RJD. - static size_t static_length = 0; - static char * static_buffer = nullptr; - - if( field->data.initial == nullptr ) return field->name; - - if( field->name[0] == '_' ) - { - // Make a copy of .initial - if( static_length < field->data.capacity+1 ) - { - static_length = field->data.capacity+1; - static_buffer = static_cast<char *>(xrealloc(static_buffer, - static_length)); - memcpy(static_buffer, field->data.initial, field->data.capacity); - static_buffer[field->data.capacity] = '\0'; - } - // Convert it from ->encoding to DEFAULT_CHARMAP_SOURCE - size_t charsout; - char *converted = __gg__iconverter(field->codeset.encoding, - DEFAULT_CHARMAP_SOURCE, - field->data.initial, - field->data.capacity, - &charsout ); - memcpy(static_buffer, converted, charsout); - static_buffer[charsout] = '\0'; - } - + if( field->data.initial == nullptr ) { + return field->name; + } return field->name[0] == '_' && field->data.initial? - static_buffer : field->name; + field->data.original() : field->name; } static const char * @@ -1337,6 +1310,7 @@ std::map<std::string, std::list<std::string>> class prog_descr_t { std::set<std::string> call_targets, subprograms; + std::set<cbl_locale_t> locales; public: std::set<function_descr_t> function_repository; size_t program_index; @@ -1361,17 +1335,14 @@ public: } alpha, national; encoding_t() : national(EBCDIC_e) {} } alphabet; - struct locale_t { - cbl_name_t name; const char *os_name; - locale_t() : name(""), os_name(nullptr) {} - locale_t(const cbl_name_t name, const char *os_name) - : name(""), os_name(os_name) { - if( name ) { - bool ok = namcpy(YYLTYPE(), this->name, name); - gcc_assert(ok); - } - } - } locale; + + bool locale_add( const cbl_locale_t& locale ) { + auto e = symbol_locale_add(program_index, &locale); + assert(e); + auto p = locales.insert(locale); + return p.second; + } + cbl_options_t options; explicit prog_descr_t( size_t isymbol ) @@ -1904,7 +1875,14 @@ static class current_t { return program.alphabet.alpha.encoding; } cbl_encoding_t national_encoding() const { - if( programs.empty() ) return EBCDIC_e; + cbl_encoding_t when_empty = EBCDIC_e; + char *alternate = getenv("NATIONAL"); + if( alternate ) + { + when_empty = __gg__encoding_iconv_type(alternate); + gcc_assert(when_empty); + } + if( programs.empty() ) return when_empty; const prog_descr_t& program = programs.top(); return program.alphabet.national.encoding; } @@ -1929,23 +1907,8 @@ static class current_t { return programs.top().options.default_round = mode; } - const char * - locale() { - return programs.empty()? NULL : programs.top().locale.os_name; - } - const char * - locale( const cbl_name_t name ) { - if( programs.empty() ) return NULL; - const prog_descr_t::locale_t& locale = programs.top().locale; - return 0 == strcmp(name, locale.name)? locale.name : NULL; - } - const prog_descr_t::locale_t& - locale( const cbl_name_t name, const char os_name[] ) { - if( programs.empty() ) { - static prog_descr_t::locale_t empty; - return empty; - } - return programs.top().locale = prog_descr_t::locale_t(name, os_name); + bool locale_add( const cbl_locale_t& locale ) { + return programs.top().locale_add(locale); } bool new_program ( const YYLTYPE& loc, cbl_label_type_t type, @@ -2296,11 +2259,13 @@ add_debugging_declarative( const cbl_label_t * label ) { } } -cbl_options_t current_options() { +cbl_options_t +current_options() { return current.options_paragraph; } -cbl_encoding_t current_encoding( char a_or_n ) { +cbl_encoding_t +current_encoding( char a_or_n ) { cbl_encoding_t retval; switch(a_or_n) { case 'A': @@ -2316,14 +2281,17 @@ cbl_encoding_t current_encoding( char a_or_n ) { return retval; } -size_t current_program_index() { +size_t +current_program_index() { return current.program()? current.program_index() : 0; } -cbl_label_t * current_section() { +cbl_label_t * +current_section() { return current.section(); } -cbl_label_t * current_paragraph() { +cbl_label_t * +current_paragraph() { return current.paragraph(); } @@ -2402,8 +2370,13 @@ literal_refmod_valid( YYLTYPE loc, const cbl_refer_t& r ); static bool is_integer_literal( const cbl_field_t *field ) { if( field->type == FldLiteralN ) { - const char *initial = field->data.initial; - + size_t nchar; + const char *initial = __gg__iconverter(field->codeset.encoding, + DEFAULT_SOURCE_ENCODING, + field->data.initial, + strlen(field->data.initial), + &nchar); + assert(strlen(initial) == nchar); switch( *initial ) { case '-': case '+': ++initial; } @@ -2982,16 +2955,28 @@ blank_pad_initial( const char initial[], size_t capacity, size_t new_size ) { return p; } +/* + * When cbl_field_t::internalize is called, its data.initial value has been + * set, but nothing has been done to it. It is encoded according to the source + * code. internalize() converts data.initial to the field's encoding. + * + * If syntax used was was PIC VALUE, in that order, then PIC set the field's + * encoding, and the VALUE clause can verify that its encoding matches. If the + * order was VALUE PIC, the value leaves the encoding uninitialized unless the + * value string bore an encoding prefix. When PIC is processed, codeset_t::set + * allows it to set the encoding only if it's either uninitialized, or the PIC + * encoding matches the existing one set by VALUE. In no event does one + * override the other; they must agree. + * + * internalize() fails if data.initial cannot be converted to the field's + * encoding. + */ static void -value_encoding_check( const YYLTYPE& loc, cbl_field_t *field, cbl_encoding_t encoding ) { +value_encoding_check( const YYLTYPE& loc, cbl_field_t *field ) { if( ! field->internalize() ) { error_msg(loc, "inconsistent string literal encoding for '%s'", field->data.initial); } - if( encoding != field->codeset.encoding ) { - warn_msg(loc, "VALUE encoded as %qs for data item encoded as %qs", - __gg__encoding_iconv_name(encoding), field->codeset.name()); - } } #pragma GCC diagnostic push @@ -3046,12 +3031,16 @@ file_add( YYLTYPE loc, cbl_file_t *file ) { static cbl_alphabet_t * -alphabet_add( const YYLTYPE& loc, cbl_encoding_t encoding ) { - cbl_alphabet_t alphabet(loc, encoding); +alphabet_add( const cbl_alphabet_t& alphabet ) { symbol_elem_t *e = symbol_alphabet_add(PROGRAM, &alphabet); assert(e); return cbl_alphabet_of(e); } +static cbl_alphabet_t * +alphabet_add( const YYLTYPE& loc, cbl_encoding_t encoding ) { + cbl_alphabet_t alphabet(loc, encoding); + return alphabet_add(alphabet); +} // The current field always exists in the symbol table, even if it's incomplete. static cbl_field_t * @@ -3302,8 +3291,9 @@ data_division_ready() { static size_t nsymbol = 0; if( (nsymbol = symbols_update(nsymbol, nparse_error == 0)) > 0 ) { if( ! literally_one ) { - literally_one = new_literal("1"); - literally_zero = new_literal("0"); + // Use strdup so cbl_field_t::internalize can free them if need be. + literally_one = new_literal(xstrdup("1")); + literally_zero = new_literal(xstrdup("0")); } } diff --git a/gcc/cobol/scan.l b/gcc/cobol/scan.l index 07aa76d..643d099 100644 --- a/gcc/cobol/scan.l +++ b/gcc/cobol/scan.l @@ -1801,8 +1801,8 @@ B-SHIFT-RC if( elem->type == SymField ) { auto f = cbl_field_of(elem); if( f->type == FldLiteralA && f->has_attr(constant_e) ) { - type = date_time_fmt(f->data.initial); - yylval.string = xstrdup(f->data.initial); + type = date_time_fmt(f->data.original()); + yylval.string = xstrdup(f->data.original()); } } else { yylval.string = xstrdup(yytext); diff --git a/gcc/cobol/show_parse.h b/gcc/cobol/show_parse.h index e1a8cb2..7945e90 100644 --- a/gcc/cobol/show_parse.h +++ b/gcc/cobol/show_parse.h @@ -140,7 +140,13 @@ extern bool cursor_at_sol; fprintf(stderr, "%s", (b).field->name); \ if( (b).field->type == FldLiteralA || (b).field->type == FldLiteralN ) \ { \ - fprintf(stderr, " \"%s\"", (b).field->data.initial); \ + size_t nbytes; \ + const char *literal = __gg__iconverter((b).field->codeset.encoding, \ + DEFAULT_SOURCE_ENCODING, \ + (b).field->data.initial, \ + strlen((b).field->data.initial), \ + &nbytes); \ + fprintf(stderr, " \"%s\"", literal); \ } \ else \ { \ diff --git a/gcc/cobol/symbols.cc b/gcc/cobol/symbols.cc index 2a299ce..07dc0e6 100644 --- a/gcc/cobol/symbols.cc +++ b/gcc/cobol/symbols.cc @@ -293,7 +293,7 @@ elementize( const cbl_field_t& field ) { // Dubner did the following because he didn't feel like creating yet another // cbl_field_t constructor that included the hardcoded encoding for the // global special registers. - sym.elem.field.codeset.encoding = iconv_CP1252_e; + sym.elem.field.codeset.set(); return sym; } @@ -511,6 +511,9 @@ symbol_elem_cmp( const void *K, const void *E ) case SymSpecial: return special_pair_cmp(k->elem.special, e->elem.special)? 0 : 1; break; + case SymLocale: + return strcasecmp(k->elem.locale.name, e->elem.locale.name); + break; case SymAlphabet: return strcasecmp(k->elem.alphabet.name, e->elem.alphabet.name); break; @@ -677,6 +680,22 @@ symbol_special( size_t program, const char name[] ) } struct symbol_elem_t * +symbol_locale( size_t program, const char name[] ) +{ + cbl_locale_t locale(name); + assert(strlen(name) < sizeof locale.name); + strcpy(locale.name, name); + + struct symbol_elem_t key(SymLocale, program), *e; + key.elem.locale = locale; + + e = static_cast<struct symbol_elem_t *>(lfind( &key, symbols.elems, + &symbols.nelem, sizeof(key), + symbol_elem_cmp ) ); + return e; +} + +struct symbol_elem_t * symbol_alphabet( size_t program, const char name[] ) { cbl_alphabet_t alphabet(YYLTYPE(), custom_encoding_e); // cppcheck-suppress syntaxError @@ -1510,11 +1529,11 @@ field_str( const cbl_field_t *field ) { { // Apparently we need to trace back the meaning of data.literal for // field::type == FldNumericDisplay - enc_from = DEFAULT_CHARMAP_SOURCE; + enc_from = DEFAULT_SOURCE_ENCODING; } init = __gg__iconverter(enc_from, - DEFAULT_CHARMAP_SOURCE, + DEFAULT_SOURCE_ENCODING, false_data, field->data.capacity, &charsout); @@ -1522,12 +1541,12 @@ field_str( const cbl_field_t *field ) { auto eoinit = init + strlen(init); char *s = xasprintf("'%s'", init); - // No NUL within the initial data. + // No NUL within the initial data. auto ok = std::none_of( init, eoinit, []( char ch ) { return ch == '\0'; } ); assert(ok); - // If any of the init are unprintable, provide a hex version. + // If any of the init are unprintable, provide a hex version. if( ! std::all_of(init, eoinit, fisprint) ) { if( is_elementary(field->type) && field->type != FldPointer ) { const size_t len = strlen(s) + 8 + 2 * field->data.capacity; @@ -1663,7 +1682,7 @@ symbols_alphabet_set( size_t program, const char name[]) { //// // Define alphabets for codegen. //// const cbl_alphabet_t *alphabet = nullptr; //// bool supported = true; -//// +//// //// std::for_each( symbols_begin(program), symbols_end(), //// [&alphabet, &supported]( const auto& sym ) { //// if( sym.type == SymAlphabet ) { @@ -1679,7 +1698,7 @@ symbols_alphabet_set( size_t program, const char name[]) { //// cbl_unimplemented("alphabet %qs (as %qs)", alphabet->name, encoding); //// return false; //// } -//// +//// //// // Set collation sequence before parser_symbol_add.` //// if( name ) { //// symbol_elem_t *e = symbol_alphabet(program, name); @@ -1906,38 +1925,46 @@ symbols_update( size_t first, bool parsed_ok ) { } } - if( ! field->codeset.valid() ) { - switch(field->type) { - case FldForward: - case FldInvalid: - gcc_unreachable(); - case FldAlphaEdited: - case FldAlphanumeric: - case FldClass: - case FldDisplay: - case FldGroup: - case FldLiteralA: - case FldNumericDisplay: - case FldNumericEdited: + if( ! field->codeset.consistent() ) { + if( ! field->codeset.valid() ) { + switch(field->type) { + case FldForward: + case FldInvalid: + gcc_unreachable(); + case FldAlphaEdited: + case FldAlphanumeric: + case FldClass: + case FldDisplay: + case FldGroup: + case FldLiteralA: + case FldLiteralN: + case FldNumericDisplay: + case FldNumericEdited: + if( ! (field->has_attr(register_e) || field->has_attr(hex_encoded_e)) ) { + error_msg(symbol_field_location(field_index(field)), + "internal: %qs encoding not defined", field->name); + } + break; + case FldConditional: + case FldFloat: + case FldIndex: + case FldNumericBin5: + case FldNumericBinary: + case FldPacked: + case FldPointer: + case FldSwitch: + break; + } + } else { if( ! (field->has_attr(register_e) || field->has_attr(hex_encoded_e)) ) { error_msg(symbol_field_location(field_index(field)), - "internal: %qs encoding not defined", field->name); + "internal: %qs encoding %qs inconsistent", + field->name, + cbl_alphabet_t::encoding_str(field->codeset.encoding) ); } - break; - case FldConditional: - case FldFloat: - case FldIndex: - case FldLiteralN: - case FldNumericBin5: - case FldNumericBinary: - case FldPacked: - case FldPointer: - case FldSwitch: - break; } } - assert( ! field->is_typedef() ); if( parsed_ok ) parser_symbol_add(field); @@ -2542,6 +2569,13 @@ symbol_file_add( size_t program, cbl_file_t *file ) { } symbol_elem_t * +symbol_locale_add( size_t program, const cbl_locale_t *locale ) { + symbol_elem_t sym{ SymLocale, program }; + sym.elem.locale = *locale; + return symbol_add(&sym); +} + +symbol_elem_t * symbol_alphabet_add( size_t program, const cbl_alphabet_t *alphabet ) { symbol_elem_t sym{ SymAlphabet, program }; sym.elem.alphabet = *alphabet; @@ -3202,19 +3236,56 @@ constant_of( size_t isym ) return field; } +cbl_locale_t::cbl_locale_t( const cbl_name_t name, const char iconv_name[] ) { + gcc_assert(strlen(name) < sizeof this->name); + strcpy(this->name, name); + + if( iconv_name ) { + encoding = __gg__encoding_iconv_type(iconv_name); + + strcpy(collation, "C"); + // If the iconv_name is prefixed by langauge_COUNTRY (e.g. en_US), capture that. + auto pend = iconv_name + strlen(iconv_name); + auto p = std::find(iconv_name, pend, '.'); + if( p < pend ) { + auto pend2 = std::copy(iconv_name, p, collation); + std::fill(pend2, collation + sizeof(collation), '\0'); + iconv_name = ++p; + } + encoding = __gg__encoding_iconv_type(iconv_name); + } +} + +cbl_alphabet_t::cbl_alphabet_t(const YYLTYPE& loc, size_t locale, cbl_name_t name ) + : loc(loc) + , locale(locale) + , low_index(0) + , high_index(255) + , last_index(0) +{ + if( locale > 0 ) { + encoding = cbl_locale_of(symbol_at(locale))->encoding; + } + memset(collation_sequence, 0xFF, sizeof(collation_sequence)); + if( name ) { // from Special-Names collation_sequence + assert(strlen(name) < sizeof(cbl_name_t)); + strcpy(this->name, name); + } +} + /* * As parsed, the alphabet reflects the encoding of the source code. If the * program uses a different encoding for alphanumeric, convert the alphabet to - * that. - * + * that. + * * Because a custom alphabet is rare and occurs at most only once per program, * we don't attempt to avoid re-encoding. "Conversion" of ASCII to ASCII is at - * most 256 calls to iconv(3). + * most 256 calls to iconv(3). */ void cbl_alphabet_t::reencode() { - const unsigned char * const pend = alphabet + sizeof(alphabet); + const unsigned char * const pend = collation_sequence + sizeof(collation_sequence); std::vector<char> tgt(256, (char)0xFF); /* Keep copies of low_index and last_index for use in run-time as LOW-VALUE @@ -3230,13 +3301,14 @@ cbl_alphabet_t::reencode() { * a custom alphabet are from NIST, which of course are ASCII. */ const char *fromcode = __gg__encoding_iconv_name(CP1252_e); - const char *tocode = __gg__encoding_iconv_name(current_encoding('A')); + const char *tocode = + __gg__encoding_iconv_name(current_encoding(display_encoding_e)); iconv_t cd = iconv_open(tocode, fromcode); - + #if optimal_reencode if( fromcode == tocode ) { // semantically tgt.resize(0); - return tgt; // Return empty vector; caller copies zero bytes. + return tgt; // Return empty vector; caller copies zero bytes. } #endif @@ -3247,14 +3319,14 @@ cbl_alphabet_t::reencode() { * that letter in the alphanumeric encoding, and set its collation position * in that alphabet. */ - for( const unsigned char *p = alphabet; p < pend; p++ ) { + for( const unsigned char *p = collation_sequence; p < pend; p++ ) { if( *p == 0xFF ) continue; - unsigned char ch = p - alphabet; + unsigned char ch = p - collation_sequence; unsigned char pos[8] = {}; size_t inbytesleft = 1, outbytesleft = sizeof(pos); char *inbuf = reinterpret_cast<char*>(&ch), *outbuf = reinterpret_cast<char*>(pos); - + size_t n = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); if( n == size_t(-1) ) { @@ -3273,7 +3345,7 @@ cbl_alphabet_t::reencode() { fromcode, ch, ch, n, tocode); continue; } - + if( ch == low_index ) { low_index = pos[0]; } @@ -3283,21 +3355,21 @@ cbl_alphabet_t::reencode() { if( ch == high_index ) { high_index = pos[0]; } - + tgt.at(pos[0]) = *p; } - - std::copy(tgt.begin(), tgt.end(), alphabet); + + std::copy(tgt.begin(), tgt.end(), collation_sequence); } bool cbl_alphabet_t::assign( const YYLTYPE& loc, unsigned char ch, unsigned char high_value ) { - if( alphabet[ch] == 0xFF || alphabet[ch] == high_value) { - alphabet[ch] = high_value; + if( collation_sequence[ch] == 0xFF || collation_sequence[ch] == high_value) { + collation_sequence[ch] = high_value; last_index = ch; return true; } - auto taken = alphabet[ch]; + auto taken = collation_sequence[ch]; error_msg(loc, "ALPHABET %s, character %<%c%> (X%'%x%') " "in position %d already defined at position %d", name, @@ -3310,7 +3382,7 @@ cbl_alphabet_t::assign( const YYLTYPE& loc, unsigned char ch, unsigned char high void cbl_alphabet_t::also( const YYLTYPE& loc, size_t ch ) { if( ch < 256 ) { - alphabet[ch] = alphabet[last_index]; + collation_sequence[ch] = collation_sequence[last_index]; if( ch == high_index ) high_index--; return; } // else it's a figurative constant ... @@ -3323,20 +3395,20 @@ cbl_alphabet_t::also( const YYLTYPE& loc, size_t ch ) { // last_index is already set; use it as the "last value before ALSO" if( attr & low_value_e ) { - alphabet[0] = alphabet[last_index]; + collation_sequence[0] = collation_sequence[last_index]; return; } if( attr & high_value_e ) { - alphabet[high_index--] = alphabet[last_index]; + collation_sequence[high_index--] = collation_sequence[last_index]; return; } if( attr & (space_value_e|quote_value_e) ) { ch = field->data.initial[0]; - alphabet[ch] = alphabet[last_index]; + collation_sequence[ch] = collation_sequence[last_index]; return; } if( attr & (zero_value_e) ) { - alphabet[0] = alphabet[last_index]; + collation_sequence[0] = collation_sequence[last_index]; error_msg(loc, "ALSO value '%s' is unknown", field->name); return; } @@ -3448,18 +3520,33 @@ new_literal_add( const char initial[], uint32_t len, } else { - static char empty[2] = "\0"; field = new_temporary_impl(FldLiteralA); field->attr |= attr; - field->data.initial = len > 0? initial : empty; + + if(len == 0) + { + // This will cover UTF-32, should that arise. + size_t nbytes = 4; + char *init = static_cast<char *>(xmalloc(nbytes)); + memset(init, 0, nbytes); + field->data.initial = init; + } + if(len) + { + char *init = static_cast<char *>(xmalloc(len+4)); + memcpy(init, initial, len); + memset(init+len, 0, 4); + field->data.initial = init; + } field->data.capacity = len; } if( ! field->has_attr(hex_encoded_e) ) { - field->codeset.set(encoding); - if( ! field->internalize() ) { - ERROR_FIELD(field, "inconsistent string literal encoding for '%s'", initial); + // If the literal bore a prefix, set the encoding, + if( encoding != cbl_field_t::codeset_t::source_encoding->type ) { + field->codeset.set(encoding); } + field->internalize(); } static size_t literal_count = 1; @@ -3595,6 +3682,14 @@ new_alphanumeric( size_t capacity, const cbl_name_t name = nullptr ) { extern os_locale_t os_locale; +const encodings_t cbl_field_t::codeset_t::source_encodings[2] = { + { false, iconv_UTF_8_e, "UTF-8" }, + { true, iconv_CP1252_e, "CP1252" }, +}; +const encodings_t * cbl_field_t::codeset_t::source_encoding = { + cbl_field_t::codeset_t::source_encodings +}; + const encodings_t cbl_field_t::codeset_t::standard_internal = { true, iconv_CP1252_e, "CP1252" }; @@ -3603,7 +3698,7 @@ const encodings_t cbl_field_t::codeset_t::standard_internal = { cbl_field_t * new_temporary( enum cbl_field_type_t type, const char *initial, bool is_signed ) { const bool force_unsigned = type == FldNumericBin5 && ! is_signed; - + if( ! initial && ! force_unsigned ) { assert( ! is_literal(type) ); // Literal type must have literal value. return temporaries.acquire(type, initial); @@ -3719,29 +3814,26 @@ cbl_field_t::is_ascii() const { * never reverts. */ -static const char * -guess_encoding() { - static const char *fromcode; - - if( ! fromcode ) { - return fromcode = os_locale.assumed; - } - - if( fromcode == os_locale.assumed ) { - fromcode = os_locale.codeset; - if( 0 != strcmp(fromcode, "C") ) { // anything but that - return fromcode; - } - } - - return standard_internal.name; -} - const char * cbl_field_t::internalize() { - static const char *fromcode = guess_encoding(); + /* The purpose of this routine is to return a nul-terminated string which + is data.initial converted from the source-code characters to the + codeset.encoding characters. + + The contract between this routine and the routines that call it is that + for alphanumeric types, data.initial shall have the same number of + characters as will be needed to fill data.capacity. + + Be aware that for PIC X(32) Z"foo", there are the characters "foo", + followed by a NUL, and then 28 spaces to fill it out. It turns out that + iconv, given a character count of 32, converts all 32, including the + embedded NUL. So, that case works even through strlen(initial) is + smaller than the length of initial, which is the same as capacity. + */ + + static const char *fromcode = codeset.source_encodings[0].name; static const size_t noconv = size_t(-1); - static std::map<std::string, iconv_t> tocodes; + static std::unordered_map<std::string, iconv_t> tocodes; if( ! codeset.valid() ) { dbgmsg("%s:%d: not converting %s", __func__, __LINE__, data.initial); @@ -3769,20 +3861,33 @@ cbl_field_t::internalize() { assert(0 == strlen(data.initial)); return data.initial; } - if( holds_ascii() && is_ascii() ) return data.initial; + if( holds_ascii() && is_ascii() ) { + if( type != FldNumericEdited ) { + if( ! data.initial_within_capacity() ) { + ERROR_FIELD(this, "%s %s VALUE %qs of %zu exceeds size %u", + cbl_field_t::level_str(level), name, data.initial, + strlen(data.initial), data.capacity ); + } + } + return data.initial; + } assert(data.capacity > 0); // The final 2 bytes of the output are "!\0". It's a debugging sentinel. size_t n; size_t inbytesleft = data.capacity; size_t outbytesleft = inbytesleft; - char *in = const_cast<char*>(data.initial); - char *out = static_cast<char*>( xcalloc(1, outbytesleft + 2) ), *output = out; if( !is_literal(this) && inbytesleft < strlen(data.initial) ) { inbytesleft = strlen(data.initial); } + if( type == FldNumericEdited ) { + outbytesleft = inbytesleft; + } const unsigned int in_len = inbytesleft; + char *in = const_cast<char*>(data.initial); + char *out = static_cast<char*>( xcalloc(1, outbytesleft + 2) ), *output = out; + assert(fromcode != tocode); /* @@ -3799,8 +3904,9 @@ cbl_field_t::internalize() { do { if( (n = iconv( cd, &in, &inbytesleft, &out, &outbytesleft)) == noconv ) { - if( fromcode == os_locale.assumed ) { - fromcode = standard_internal.name; + if( fromcode == codeset.source_encodings[0].name ) { + codeset.source_encoding = &codeset.source_encodings[1]; + fromcode = codeset.source_encoding->name; tocodes.clear(); cd = tocodes[toname] = iconv_open(tocode, fromcode); dbgmsg("%s: trying input encoding %s", __func__, fromcode); @@ -3813,7 +3919,7 @@ cbl_field_t::internalize() { if( n == noconv ) { size_t i = in_len - inbytesleft; - yywarn("failed to encode %s %qs as %s (%zu of %u bytes left)", + yyerror("failed to encode %s %qs as %s (%zu of %u bytes left)", fromcode, data.initial + i, tocode, inbytesleft, in_len); if( false ) return NULL; return data.initial; @@ -3821,7 +3927,7 @@ cbl_field_t::internalize() { if( 0 < inbytesleft ) { // data.capacity + inbytesleft is not correct if the remaining portion has - // multibyte characters. But the fact reamins that the VALUE is too big. + // multibyte characters. But the fact remains that the VALUE is too big. ERROR_FIELD(this, "%s %s VALUE '%s' requires %zu bytes for size %u", cbl_field_t::level_str(level), name, data.initial, data.capacity + inbytesleft, data.capacity ); @@ -3829,7 +3935,7 @@ cbl_field_t::internalize() { // Replace data.initial only if iconv output differs. if( 0 != memcmp(data.initial, output, out - output) ) { - assert(out <= output + data.capacity); + assert(out <= output + data.capacity || type == FldNumericEdited); dbgmsg("%s: converted '%.*s' to %s", __func__, data.capacity, data.initial, tocode); struct localspace_t { @@ -3858,14 +3964,16 @@ cbl_field_t::internalize() { data.capacity = out - output; // trailing '!' will be overwritten } // Pad with trailing blanks, tacking a '!' on the end. - for( const char *eout = output + data.capacity; + for( const char *eout = output + data.capacity; out < eout; out += spc.len ) { memcpy(out, spc.space, spc.len); } - out[0] = '!'; + // Numeric literal strings may have leading zeros, making their length + // longer than their capacity. + out[0] = type == FldLiteralN? '\0' : '!'; assert(out[1] == '\0'); - free(const_cast<char*>(data.initial)); + data.orig = data.initial; data.initial = output; } else { free(output); diff --git a/gcc/cobol/symbols.h b/gcc/cobol/symbols.h index 66fb2fd..6d29d06 100644 --- a/gcc/cobol/symbols.h +++ b/gcc/cobol/symbols.h @@ -224,6 +224,7 @@ enum symbol_type_t { SymAlphabet, SymFile, SymDataSection, + SymLocale, }; // The ISO specification says alphanumeric literals have a maximum length of @@ -237,7 +238,7 @@ struct cbl_field_data_t { uint32_t capacity, // allocated space digits; // magnitude: total digits (or characters) int32_t rdigits; // digits to the right - const char *initial, *picture; + const char *orig, *initial, *picture; enum etc_type_t { val88_e, upsi_e, value_e } etc_type; const char * @@ -268,6 +269,7 @@ struct cbl_field_data_t { , capacity(0) , digits(0) , rdigits(0) + , orig(0) , initial(0) , picture(0) , etc_type(value_e) @@ -279,6 +281,7 @@ struct cbl_field_data_t { , capacity(capacity) , digits(0) , rdigits(0) + , orig(0) , initial(0) , picture(0) , etc_type(value_e) @@ -293,6 +296,7 @@ struct cbl_field_data_t { , capacity(capacity) , digits(digits) , rdigits(rdigits) + , orig(0) , initial(initial) , picture(picture) , etc_type(value_e) @@ -387,6 +391,12 @@ struct cbl_field_data_t { return valify(); } + bool initial_within_capacity() const { + return initial[capacity] == '\0' + || initial[capacity] == '!'; + } + const char *original() const { return orig? orig : initial; } + protected: cbl_field_data_t& copy_self( const cbl_field_data_t& that ) { memsize = that.memsize; @@ -531,7 +541,7 @@ struct cbl_field_t { uint32_t level; cbl_occurs_t occurs; struct codeset_t { - static const encodings_t standard_internal; + static const encodings_t standard_internal, source_encodings[2], *source_encoding; cbl_encoding_t encoding; size_t alphabet; // unlikely explicit codeset_t(cbl_encoding_t encoding = custom_encoding_e, @@ -544,22 +554,26 @@ struct cbl_field_t { || (alphabet != 0 && encoding == custom_encoding_e); } + bool consistent() const { + return valid() && ( encoding == current_encoding('A') + || + encoding == current_encoding('N') + || + encoding == UTF8_e ); + } bool set( cbl_encoding_t encoding, size_t alphabet = 0 ) { - assert(encoding <= iconv_YU_e); + assert(valid_encoding(encoding)); if( ! valid() ) { // setting first time this->encoding = encoding; this->alphabet = alphabet; return valid(); } - // DUBNER override. Encoding has to change when - // 01 FOO VALUE ZERO. Just 0 is okay; ZERO is not. - this->encoding = encoding; return this->encoding == encoding && this->alphabet == alphabet; } bool set( const char picture_fragment[] = nullptr) { if( ! picture_fragment ) { - cbl_encoding_t currenc = current_encoding('A'); - bool retval = set(currenc); + cbl_encoding_t enc = current_encoding('A'); + bool retval = set(enc); return retval; } size_t len = strlen(picture_fragment); @@ -568,14 +582,15 @@ struct cbl_field_t { frag.begin(), ftoupper); switch(frag[0]) { case 'A': case 'X': case '9': - return set(current_encoding('A')); + return set(current_encoding(display_encoding_e)); case 'N': case 'U': if( std::all_of(frag.begin(), frag.end(), [first = frag[0]]( char ch ) { return first == ch; } ) ) { // All N's indicates National; all U's indicates UTF-8. - auto enc = frag[0] == 'N'? current_encoding('N') : UTF8_e; + auto enc = frag[0] == 'N' ? current_encoding(national_encoding_e) + : UTF8_e; return set(enc); } return false; // They all must be the same. @@ -739,7 +754,7 @@ struct cbl_field_t { uint32_t size() const; // table capacity or capacity const char * pretty_name() const { - if( name[0] == '_' && data.initial ) return data.initial; + if( name[0] == '_' && data.original() ) return data.original(); return name; } static const char * level_str(uint32_t level ); @@ -1185,6 +1200,13 @@ struct cbl_arith_error_t { cbl_label_addresses_t bottom; }; +struct cbl_delete_file_t { + cbl_label_addresses_t over; + cbl_label_addresses_t exception; + cbl_label_addresses_t no_exception; + cbl_label_addresses_t bottom; +}; + struct cbl_compute_error_t { // This is an int. The value is a cbl_compute_error_code_t tree compute_error_code; @@ -1232,7 +1254,10 @@ struct cbl_label_t { // for parse_xml processing: struct cbl_xml_parse_t *xml_parse; - + + // For parser_file_delete_file + struct cbl_delete_file_t *delete_file; + } structs; bool is_function() const { return type == LblFunction; } @@ -1525,6 +1550,19 @@ struct cbl_section_t { } }; +struct cbl_locale_t { + cbl_name_t name; + cbl_encoding_t encoding; + cbl_name_t collation; + + explicit cbl_locale_t(const cbl_name_t name, + const char iconv_name[] = nullptr ); + + bool operator<( const cbl_locale_t& that ) const { + return strcmp(name, that.name) < 0; + } +}; + struct cbl_special_name_t { int token; enum special_name_t id; @@ -1536,22 +1574,35 @@ struct cbl_special_name_t { char * hex_decode( const char text[] ); /* - * For a custom alphabet of single-byte encoding, cbl_alphabet_t::alphabet + * An alphabet may just name an encoding, which implies binary collation. + * + * An alphabet may reference a Special-Names LOCALE, which defines an encoding + * and a collation (perhaps by default). + * + * During Special-Names parsing, an Alphabet may reference an as-yet undefined + * LOCALE with an as-yet unknown encoding. As a placeholder it inserts a named, + * undefined cbl_locale_t symbol, which the Alphabet references. If that + * locale is never defined, the encoding remains unknown, resulting in an error + * diagnostic at the end of Special-Names. + * + * For a custom alphabet of single-byte encoding, cbl_alphabet_t::collation_sequence * holds the collation position of each encoded value. - * If 'A' sorts first (after LOW-VALUE), then alphabet['A'] == 1. - * If the encoding is ASCII, then 'A' is 65 and alphabet[ 65] == 1. - * If the encoding is EBCDIC CP1140, then 'A' is 193 and alphabet[193] == 1. + * If 'A' sorts first (after LOW-VALUE), then collation_sequence['A'] == 1. + * If the encoding is ASCII, then 'A' is 65 and collation_sequence[ 65] == 1. + * If the encoding is EBCDIC CP1140, then 'A' is 193 and collation_sequence[193] == 1. */ struct cbl_alphabet_t { YYLTYPE loc; cbl_name_t name; cbl_encoding_t encoding; - unsigned char low_index, high_index, last_index, alphabet[256]; + size_t locale; // index to cbl_locale_t symbol + unsigned char low_index, high_index, last_index, collation_sequence[256]; unsigned char low_char, high_char; cbl_alphabet_t() : loc { 1,1, 1,1 } , encoding(ASCII_e) + , locale(0) , low_index(0) , high_index(255) , last_index(0) @@ -1559,12 +1610,13 @@ struct cbl_alphabet_t { , high_char(0) { memset(name, '\0', sizeof(name)); - memset(alphabet, 0xFF, sizeof(alphabet)); + memset(collation_sequence, 0xFF, sizeof(collation_sequence)); } cbl_alphabet_t(const YYLTYPE& loc, cbl_encoding_t enc) : loc(loc) , encoding(enc) + , locale(0) , low_index(0) , high_index(255) , last_index(0) @@ -1572,14 +1624,17 @@ struct cbl_alphabet_t { , high_char(0) { memset(name, '\0', sizeof(name)); - memset(alphabet, 0xFF, sizeof(alphabet)); + memset(collation_sequence, 0xFF, sizeof(collation_sequence)); } + cbl_alphabet_t(const YYLTYPE& loc, size_t locale, cbl_name_t name ); + cbl_alphabet_t( const YYLTYPE& loc, const cbl_name_t name, unsigned char low_index, unsigned char high_index, - unsigned char alphabet[] ) + unsigned char collation_sequence[] ) : loc(loc) , encoding(custom_encoding_e) + , locale(0) , low_index(low_index), high_index(high_index) , last_index(high_index) , low_char(low_index) @@ -1587,21 +1642,23 @@ struct cbl_alphabet_t { { assert(strlen(name) < sizeof(this->name)); strcpy(this->name, name); - std::copy(alphabet, alphabet + sizeof(this->alphabet), this->alphabet); + std::copy(collation_sequence, + collation_sequence + sizeof(this->collation_sequence), + this->collation_sequence); } unsigned char low_value() const { - return alphabet[low_index]; + return collation_sequence[low_index]; } unsigned char high_value() const { - return alphabet[high_index]; + return collation_sequence[high_index]; } void add_sequence( const YYLTYPE& loc, const unsigned char seq[] ) { if( low_index == 0 ) low_index = seq[0]; - unsigned char last = last_index > 0? alphabet[last_index] + 1 : 0; + unsigned char last = last_index > 0? collation_sequence[last_index] + 1 : 0; for( const unsigned char *p = seq; !end_of_string(p); p++ ) { assign(loc, *p, last++); @@ -1612,7 +1669,7 @@ struct cbl_alphabet_t { add_interval( const YYLTYPE& loc, unsigned char low, unsigned char high ) { if( low_index == 0 ) low_index = low; - unsigned char last = alphabet[last_index]; + unsigned char last = collation_sequence[last_index]; for( unsigned char ch = low; ch < high; ch++ ) { assign(loc, ch, last++); @@ -1649,8 +1706,11 @@ struct cbl_alphabet_t { " 0 1 2 3 4 5 6 7" " 8 9 A B C C E F"); unsigned int row = 0; - for( auto p = alphabet; p < alphabet + sizeof(alphabet); p++ ) { - if( (p - alphabet) % 16 == 0 ) fprintf(stderr, "\n%4X\t", row++); + for( auto p = collation_sequence; + p < collation_sequence + sizeof(collation_sequence); p++ ) { + if( (p - collation_sequence) % 16 == 0 ) { + fprintf(stderr, "\n%4X\t", row++); + } fprintf(stderr, "%3u ", *p); } fprintf(stderr, "\n"); @@ -1870,6 +1930,7 @@ struct symbol_elem_t { cbl_field_t field; cbl_label_t label; cbl_special_name_t special; + cbl_locale_t locale; cbl_alphabet_t alphabet; cbl_file_t file; cbl_section_t section; @@ -1927,6 +1988,9 @@ struct symbol_elem_t { case SymSpecial: elem.special = that.elem.special; break; + case SymLocale: + elem.locale = that.elem.locale; + break; case SymAlphabet: elem.alphabet = that.elem.alphabet; break; @@ -2092,6 +2156,18 @@ cbl_special_name_of( symbol_elem_t *e ) { return &e->elem.special; } +static inline cbl_locale_t * +cbl_locale_of( symbol_elem_t *e ) { + assert(e && e->type == SymLocale); + return &e->elem.locale; +} + +static inline const cbl_locale_t * +cbl_locale_of( const symbol_elem_t *e ) { + assert(e && e->type == SymLocale); + return &e->elem.locale; +} + static inline cbl_alphabet_t * cbl_alphabet_of( symbol_elem_t *e ) { assert(e && e->type == SymAlphabet); @@ -2104,6 +2180,7 @@ cbl_alphabet_of( const symbol_elem_t *e ) { return &e->elem.alphabet; } + static inline cbl_file_t * cbl_file_of( symbol_elem_t *e ) { assert(e && e->type == SymFile); @@ -2477,6 +2554,7 @@ struct symbol_elem_t * symbol_literalA( size_t program, const char name[] ); struct cbl_special_name_t * symbol_special( special_name_t id ); struct symbol_elem_t * symbol_special( size_t program, const char name[] ); +struct symbol_elem_t * symbol_locale( size_t program, const char name[] ); struct symbol_elem_t * symbol_alphabet( size_t program, const char name[] ); struct symbol_elem_t * symbol_file( size_t program, const char name[] ); @@ -2524,6 +2602,7 @@ cbl_label_t * symbol_label_add( size_t program, cbl_label_t * symbol_program_add( size_t program, cbl_label_t *input ); symbol_elem_t * symbol_special_add( size_t program, cbl_special_name_t *special ); +symbol_elem_t * symbol_locale_add( size_t program, const cbl_locale_t *locale ); symbol_elem_t * symbol_alphabet_add( size_t program, const cbl_alphabet_t *alphabet ); symbol_elem_t * symbol_file_add( size_t program, @@ -2548,8 +2627,8 @@ static inline size_t upsi_register() { return symbol_index(symbol_field(0,0,"UPSI-0")); } -void wsclear( char ch); -const char *wsclear(); +void wsclear( uint32_t ch); +const uint32_t *wsclear(); enum cbl_call_convention_t { cbl_call_verbatim_e = 'V', diff --git a/gcc/cobol/util.cc b/gcc/cobol/util.cc index 9615987..0e6ec8c 100644 --- a/gcc/cobol/util.cc +++ b/gcc/cobol/util.cc @@ -271,6 +271,8 @@ symbol_type_str( enum symbol_type_t type ) return "SymLabel"; case SymSpecial: return "SymSpecial"; + case SymLocale: + return "SymLocale"; case SymAlphabet: return "SymAlphabet"; case SymFile: @@ -1094,28 +1096,18 @@ cbl_field_t::report_invalid_initial_value(const YYLTYPE& loc) const { if( has_attr(all_alpha_e) ) { bool alpha_value = fig != zero_value_e; - // In order to check for all alphabetic characters, we have to convert - // data.initial back to ASCII: - - size_t outchars; - char *initial = __gg__iconverter(codeset.encoding, - DEFAULT_CHARMAP_SOURCE, - data.initial, - data.capacity, - &outchars); - if( fig == normal_value_e ) { - alpha_value = std::all_of( initial, - initial + - data.capacity, - []( char ch ) { - return ISSPACE(ch) || - ISPUNCT(ch) || - ISALPHA(ch); } ); + alpha_value = std::none_of( data.initial, + data.initial + + data.capacity, + []( char ch ) { + return + ISPUNCT(ch) || + ISDIGIT(ch); } ); } if( ! alpha_value ) { error_msg(loc, "alpha-only %s VALUE '%s' contains non-alphabetic data", - name, fig == zero_value_e? cbl_figconst_str(fig) : initial); + name, fig == zero_value_e? cbl_figconst_str(fig) : data.initial); } } @@ -1315,7 +1307,7 @@ valid_move( const struct cbl_field_t *tgt, const struct cbl_field_t *src ) size_t outcount; char *in_ascii = static_cast<char *>(xmalloc(4 * src->data.capacity)); const char *in_asciip = __gg__iconverter( src->codeset.encoding, - DEFAULT_CHARMAP_SOURCE, + DEFAULT_SOURCE_ENCODING, src->data.initial, src->data.capacity, &outcount ); @@ -2078,7 +2070,8 @@ cobol_lineno() { const char * cobol_filename() { - return input_filenames.empty()? input_filename_vestige : input_filenames.top().name; + return input_filenames.empty()? + input_filename_vestige : input_filenames.top().name; } void |
