diff options
Diffstat (limited to 'gcc/cobol/symbols.cc')
| -rw-r--r-- | gcc/cobol/symbols.cc | 1549 |
1 files changed, 902 insertions, 647 deletions
diff --git a/gcc/cobol/symbols.cc b/gcc/cobol/symbols.cc index a4fc82c..a177fcd 100644 --- a/gcc/cobol/symbols.cc +++ b/gcc/cobol/symbols.cc @@ -28,6 +28,9 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +// cppcheck-suppress-file duplicateBreak + +#include "config.h" #include <fstream> // Before cobol-system because it uses poisoned functions #include "cobol-system.h" @@ -44,6 +47,7 @@ #include "inspect.h" #include "../../libgcobol/io.h" #include "genapi.h" +#include "../../libgcobol/charmaps.h" #pragma GCC diagnostic ignored "-Wunused-result" #pragma GCC diagnostic ignored "-Wmissing-field-initializers" @@ -55,7 +59,7 @@ class symbol_pair_t { const symbol_elem_t *first, *last; public: - symbol_pair_t( const symbol_elem_t * first, const symbol_elem_t * end = NULL ) + explicit symbol_pair_t( const symbol_elem_t * first, const symbol_elem_t * end = NULL ) : first(first), last(end) {} @@ -135,11 +139,9 @@ static struct symbol_table_t { static symbol_table_t& symbol_table_extend() { - static FILE *mapped; if( symbols.nelem == 0 ) { // first time: create file & set initial capacity - assert(mapped == NULL && symbols.fd == -1); - + FILE *mapped; if( (mapped = tmpfile()) == NULL ) { cbl_err( "could not create temporary file for symbol table"); } @@ -159,8 +161,8 @@ symbol_table_extend() { off_t len = symbols.size(); if( 0 != ftruncate(symbols.fd, len) ) { - cbl_err( "%s:%d:could not extend symbol table to %zu elements", - __func__, __LINE__, symbols.capacity); + cbl_err( "%s:%d: could not extend symbol table to %lu elements", + __func__, __LINE__, gb4(symbols.capacity)); } /* @@ -228,6 +230,12 @@ cbl_span_t::from_field() { assert(from); return from->field; } cbl_field_t * cbl_span_t::len_field() { assert(len); return len->field; } +cbl_ffi_arg_t::cbl_ffi_arg_t() + : optional(false) + , crv(by_reference_e) + , attr(none_of_e) +{} + cbl_ffi_arg_t:: cbl_ffi_arg_t( cbl_refer_t* refer, cbl_ffi_arg_attr_t attr ) : optional(false) @@ -256,43 +264,6 @@ cbl_ffi_arg_t( cbl_ffi_crv_t crv, } while(0) -cbl_field_t * -symbol_valid_udf_args( size_t function, std::list<cbl_refer_t> args ) { - auto L = cbl_label_of(symbol_at(function)); - if( ! L->returning ) { - dbgmsg("logic error: %s does not define RETURNING", L->name); - return NULL; - } - auto e = std::find_if( symbol_at(function), symbols_end(), - []( auto symbol ) { - if( symbol.type == SymDataSection ) { - auto section(symbol.elem.section); - return section.type == linkage_sect_e; - } - return false; - } ); - for( auto arg : args ) { - size_t iarg(1); - e++; // skip over linkage_sect_e, which appears after the function - if( e->type != SymField ) { - ERROR_FIELD(arg.field, - "FUNCTION %s has no defined parameter matching arg %zu, '%s'", - L->name, iarg, arg.field->name ); - return NULL; - } - - auto tgt = cbl_field_of(e); - - if( ! valid_move(tgt, arg.field) ) { - ERROR_FIELD(tgt, "FUNCTION %s arg %zu, '%s' cannot be passed to %s, type %s", - L->name, iarg, arg.field->pretty_name(), - tgt->pretty_name(), 3 + cbl_field_type_str(tgt->type) ); - return NULL; - } - } - return cbl_field_of(symbol_at(L->returning)); -} - static const struct cbl_occurs_t nonarray = cbl_occurs_t(); #if 0 @@ -316,9 +287,13 @@ class group_size_t { enum { constq = constant_e | quoted_e }; static symbol_elem_t -elementize( cbl_field_t& field ) { +elementize( const cbl_field_t& field ) { symbol_elem_t sym (SymField); sym.elem.field = field; + // Dubner did the following because he didn't feel like creating yet another + // cbl_field_t constructor that included the hardcoded encoding for the + // global special registers. + sym.elem.field.codeset.set(); return sym; } @@ -369,12 +344,6 @@ special_pair_cmp( const cbl_special_name_t& key, const cbl_special_name_t& elem ) { const bool matched = key.id == elem.id || 0 == strcasecmp(key.name, elem.name); - if( getenv(__func__) ) { - dbgmsg("%s:%d: key: id=%2d, %s", __func__, __LINE__, key.id, key.name); - dbgmsg("%s:%d: elem: id=%2d, %s => %s", __func__, __LINE__, - elem.id, elem.name, matched? "match" : "no match"); - } - return matched; } @@ -492,9 +461,6 @@ symbol_elem_cmp( const void *K, const void *E ) case SymDataSection: return k->elem.section.type == e->elem.section.type ? 0 : 1; break; - case SymFunction: - return strcmp(k->elem.function.name, e->elem.function.name); - break; case SymField: if( has_parent(k) && cbl_field_of(k)->parent != cbl_field_of(e)->parent ) { return 1; @@ -545,6 +511,9 @@ symbol_elem_cmp( const void *K, const void *E ) case SymSpecial: return special_pair_cmp(k->elem.special, e->elem.special)? 0 : 1; break; + case SymLocale: + return strcasecmp(k->elem.locale.name, e->elem.locale.name); + break; case SymAlphabet: return strcasecmp(k->elem.alphabet.name, e->elem.alphabet.name); break; @@ -711,9 +680,25 @@ symbol_special( size_t program, const char name[] ) } struct symbol_elem_t * +symbol_locale( size_t program, const char name[] ) +{ + cbl_locale_t locale(name); + assert(strlen(name) < sizeof locale.name); + strcpy(locale.name, name); + + struct symbol_elem_t key(SymLocale, program), *e; + key.elem.locale = locale; + + e = static_cast<struct symbol_elem_t *>(lfind( &key, symbols.elems, + &symbols.nelem, sizeof(key), + symbol_elem_cmp ) ); + return e; +} + +struct symbol_elem_t * symbol_alphabet( size_t program, const char name[] ) { - cbl_alphabet_t alphabet(YYLTYPE(), custom_encoding_e); + cbl_alphabet_t alphabet(YYLTYPE(), custom_encoding_e); // cppcheck-suppress syntaxError assert(strlen(name) < sizeof alphabet.name); strcpy(alphabet.name, name); @@ -780,7 +765,7 @@ cbl_field_attr_str( cbl_field_attr_t attr ) { case function_e: return "function"; case quoted_e: return "quoted"; case filler_e: return "filler"; - case _spare_e: return "temporary"; + case register_e: return "register"; case intermediate_e: return "intermediate"; case embiggened_e: return "embiggened"; case all_alpha_e: return "all_alpha"; @@ -799,7 +784,7 @@ cbl_field_attr_str( cbl_field_attr_t attr ) { case leading_e: return "leading"; case separate_e: return "separate"; case envar_e: return "envar"; - case dnu_1_e: return "dnu_1"; + case encoded_e: return "encoded"; case bool_encoded_e: return "bool"; case hex_encoded_e: return "hex"; case depends_on_e: return "depends_on"; @@ -820,24 +805,24 @@ cbl_field_t::size() const { return field_size(this); } -size_t +uint64_t cbl_field_t::set_attr( cbl_field_attr_t attr ) { if( attr == signable_e ) { if( ! has_attr(attr) && this->var_decl_node != NULL ) { parser_field_attr_set(this, attr); } } - return this->attr |= size_t(attr); + return this->attr |= uint64_t(attr); } -size_t +uint64_t cbl_field_t::clear_attr( cbl_field_attr_t attr ) { if( attr == signable_e ) { if( this->var_decl_node != nullptr && has_attr(attr) ) { parser_field_attr_set(this, attr, false); } } - return this->attr &= ~size_t(attr); + return this->attr &= ~uint64_t(attr); } static uint32_t @@ -893,13 +878,6 @@ update_block_offsets( struct symbol_elem_t *block) uint32_t offset = cbl_field_of(block)->offset; const uint32_t block_level = cbl_field_of(block)->level; - if( getenv(__func__) ) { - cbl_field_t *field = cbl_field_of(block); - dbgmsg( "%s: offset is %3zu for %2u %-30s #%3zu P%zu", - __func__, field->offset, field->level, field->name, - symbol_index(block), field->parent ); - } - struct symbol_elem_t *e = block; for( ++e; e < symbols_end(); e++ ) { if( e->type != SymField ) { @@ -929,12 +907,6 @@ update_block_offsets( struct symbol_elem_t *block) offset += field_memsize(field); } - if( getenv(__func__) ) { - dbgmsg( "%s: offset is %3zu for %2u %-30s #%3zu P%zu", - __func__, field->offset, field->level, field->name, - symbol_index(e), field->parent ); - } - if( field->type == FldGroup ) { e = update_block_offsets(e) - 1; } @@ -965,7 +937,7 @@ end_of_group( const cbl_field_t *group, const cbl_field_t *field ) { class eog_t { const cbl_field_t * group; public: - eog_t( const symbol_elem_t *e ) : group(cbl_field_of(e)) {} + explicit eog_t( const symbol_elem_t *e ) : group(cbl_field_of(e)) {} bool operator()( symbol_elem_t& e ) { return e.type == SymField && end_of_group(group, cbl_field_of(&e)); @@ -985,7 +957,7 @@ end_of_group( size_t igroup ) { if( e->program != group->program ) return isym; if( e->type == SymLabel ) return isym; // end of data division if( e->type == SymField ) { - auto f = cbl_field_of(e); + const cbl_field_t * f = cbl_field_of(e); if( f->level == LEVEL77 || f->level == 66 ) return isym; if( f->level == 1 && f->parent != igroup ) { return isym; @@ -996,7 +968,7 @@ end_of_group( size_t igroup ) { } eog_t eog(symbol_at(igroup)); - symbol_elem_t *e = std::find_if( symbols_begin(++igroup), symbols_end(), eog ); + const symbol_elem_t *e = std::find_if( symbols_begin(++igroup), symbols_end(), eog ); return e - symbols_begin(); } @@ -1020,8 +992,8 @@ symbol_field_capacity( const cbl_field_t *field ) { size_t size = std::accumulate( symbol_at(bog), symbol_at_impl(eog), 0, sym_field_size::capacity ); - if(true) dbgmsg("%s: %02u %s.data.capacity was computed as %zu", __func__, - field->level, field->name, size); + if(true) dbgmsg("%s: %02u %s.data.capacity was computed as " HOST_SIZE_T_PRINT_UNSIGNED, + __func__, field->level, field->name, (fmt_size_t)size); return size; } @@ -1035,14 +1007,15 @@ has_odo( const symbol_elem_t& e ) { struct cbl_field_t * symbol_find_odo_debug( cbl_field_t * field ) { size_t bog = field_index(field), eog = end_of_group(bog); - dbgmsg("%s: %s is #%zu - #%zu of %zu, ends at %s", __func__, - field->name, bog, eog, symbols.nelem, + dbgmsg("%s: %s is #" HOST_SIZE_T_PRINT_UNSIGNED " - #" HOST_SIZE_T_PRINT_UNSIGNED + " of " HOST_SIZE_T_PRINT_UNSIGNED ", ends at %s", __func__, + field->name, (fmt_size_t)bog, (fmt_size_t)eog, (fmt_size_t)symbols.nelem, eog == symbols.nelem? "[end]" : cbl_field_of(symbol_at(eog))->name ); auto e = std::find_if( symbol_at(bog), symbol_at_impl(eog, true), has_odo ); if( e != symbol_at_impl(eog, true) ) { - dbgmsg("%s: %s has ODO at #%zu (return '%s')", __func__, - field->name, symbol_index(e), + dbgmsg("%s: %s has ODO at #" HOST_SIZE_T_PRINT_UNSIGNED " (return '%s')", __func__, + field->name, (fmt_size_t)symbol_index(e), cbl_field_of(e)->name ); } return e == symbol_at_impl(eog, true)? NULL : cbl_field_of(e); @@ -1050,8 +1023,7 @@ symbol_find_odo_debug( cbl_field_t * field ) { // Return OCCURS DEPENDING ON table subordinate to field, if any. struct cbl_field_t * -symbol_find_odo( cbl_field_t * field ) { - if( getenv(__func__) ) return symbol_find_odo_debug(field); +symbol_find_odo( const cbl_field_t * field ) { size_t bog = field_index(field), eog = end_of_group(bog); auto e = std::find_if( symbol_at(bog), symbol_at_impl(eog, true), has_odo ); return e == symbol_at_impl(eog, true)? NULL : cbl_field_of(e); @@ -1067,8 +1039,8 @@ symbols_dump( size_t first, bool header ) { if( !yydebug ) return 0; if( header ) { - fprintf(stderr, "Symbol Table has %zu elements\n", - symbols_end() - symbols_begin()); + fprintf(stderr, "Symbol Table has " HOST_SIZE_T_PRINT_UNSIGNED " elements\n", + (fmt_size_t)(symbols_end() - symbols_begin())); } for( struct symbol_elem_t *e = symbols_begin(first); e < symbols_end(); e++ ) { @@ -1076,25 +1048,22 @@ symbols_dump( size_t first, bool header ) { switch(e->type) { case SymFilename: - s = xasprintf("%4zu %-18s %s", e->program, + s = xasprintf("%4" GCC_PRISZ "u %-18s %s", (fmt_size_t)e->program, "Filename", e->elem.filename); break; case SymDataSection: - s = xasprintf("%4zu %-18s line %d", e->program, + s = xasprintf("%4" GCC_PRISZ "u %-18s line %d", (fmt_size_t)e->program, cbl_section_of(e)->name(), cbl_section_of(e)->line); break; - case SymFunction: - s = xasprintf("%4zu %-15s %s", e->program, - "Function", e->elem.function.name); - break; case SymField: { auto field = cbl_field_of(e); char *odo_str = NULL; if( field->occurs.depending_on != 0 ) { - odo_str = xasprintf("odo %zu", field->occurs.depending_on ); + odo_str = xasprintf("odo " HOST_SIZE_T_PRINT_UNSIGNED, + (fmt_size_t)field->occurs.depending_on ); } ninvalid += cbl_field_of(e)->type == FldInvalid? 1 : 0; - s = xasprintf("%4zu %-18s %s (%s)", e->program, + s = xasprintf("%4" GCC_PRISZ "u %-18s %s (%s)", (fmt_size_t)e->program, cbl_field_type_str(cbl_field_of(e)->type) + 3, field_str(cbl_field_of(e)), odo_str? odo_str : @@ -1102,7 +1071,7 @@ symbols_dump( size_t first, bool header ) { } break; case SymLabel: - s = xasprintf("%4zu %-18s %s", e->program, + s = xasprintf("%4" GCC_PRISZ "u %-18s %s", (fmt_size_t)e->program, "Labe1l", e->elem.label.str()); if( LblProgram == cbl_label_of(e)->type ) { const auto& L = *cbl_label_of(e); @@ -1114,31 +1083,35 @@ symbols_dump( size_t first, bool header ) { } break; case SymSpecial: - s = xasprintf("%4zu %-18s id=%2d, %s", e->program, + s = xasprintf("%4" GCC_PRISZ "u %-18s id=%2d, %s", (fmt_size_t)e->program, "Special", e->elem.special.id, e->elem.special.name); break; case SymAlphabet: - s = xasprintf("%4zu %-18s encoding=%2d, '%s'", e->program, "Alphabet", + s = xasprintf("%4" GCC_PRISZ "u %-18s encoding=%2d, '%s'", + (fmt_size_t)e->program, "Alphabet", int(e->elem.alphabet.encoding), e->elem.alphabet.name); break; case SymFile: - s = xasprintf("%4zu %-18s %-20s", e->program, + s = xasprintf("%4" GCC_PRISZ "u %-18s %-20s", (fmt_size_t)e->program, "File", e->elem.file.name); { char same_as[26] = ""; if( cbl_file_of(e)->same_record_as > 0 ) { - sprintf(same_as, "s%3zu", cbl_file_of(e)->same_record_as); + sprintf(same_as, "s%3" GCC_PRISZ "u", + (fmt_size_t)cbl_file_of(e)->same_record_as); } const char *type = file_org_str(e->elem.file.org); char *part = s; - s = xasprintf("%s %-4s %s %s %s{%zu-%zu} status=#%zu", + s = xasprintf("%s %-4s %s %s %s{" HOST_SIZE_T_PRINT_UNSIGNED "-" + HOST_SIZE_T_PRINT_UNSIGNED "} status=#" + HOST_SIZE_T_PRINT_UNSIGNED, part, same_as, type, e->elem.file.keys_str(), cbl_file_of(e)->varies()? "varies " : "", - cbl_file_of(e)->varying_size.min, - cbl_file_of(e)->varying_size.max, - cbl_file_of(e)->user_status); + (fmt_size_t)cbl_file_of(e)->varying_size.min, + (fmt_size_t)cbl_file_of(e)->varying_size.max, + (fmt_size_t)cbl_file_of(e)->user_status); free(part); } break; @@ -1146,7 +1119,8 @@ symbols_dump( size_t first, bool header ) { dbgmsg("%s: cannot dump symbol type %d", __func__, e->type); continue; } - fprintf(stderr, "%4zu: %s\n", e - symbols_begin(), s); + fprintf(stderr, "%4" GCC_PRISZ "u: %s\n", + (fmt_size_t)(e - symbols_begin()), s); free(s); } return ninvalid; @@ -1226,7 +1200,7 @@ static struct symbol_elem_t * // If an 01 record exists for the FD/SD, use its capacity as the // default_record capacity. if( p != symbols_end() ) { - auto record = cbl_field_of(p); + const cbl_field_t * record = cbl_field_of(p); assert(record->level == 1); e = calculate_capacity(p); auto record_size = std::max(record->data.memsize, @@ -1258,7 +1232,8 @@ static struct symbol_elem_t * } if(yydebug && group->type != FldGroup) { - dbgmsg("Field #%zu '%s' is not a group", symbol_index(e), group->name); + dbgmsg("Field #" HOST_SIZE_T_PRINT_UNSIGNED " '%s' is not a group", + (fmt_size_t)symbol_index(e), group->name); symbols_dump(symbols.first_program, true); } if( group->type == FldInvalid ) return e; @@ -1288,10 +1263,6 @@ static struct symbol_elem_t * // Print accumulating details for one group to debug log. bool details = false; - if( yydebug ) { - const auto details_for = getenv("symbols_update"); - details = details_for && 0 == strcasecmp(details_for, group->name); - } // At end of group, members is a list of all immediate children, any // of which might have been redefined and so acquired a memsize. @@ -1317,7 +1288,7 @@ static struct symbol_elem_t * // If group has a parent that is a record area, expand it, too. if( 0 < group->parent ) { - auto redefined = symbol_redefines(group); + redefined = symbol_redefines(group); if( redefined && is_record_area(redefined) ) { if( redefined->data.capacity < group->data.memsize ) { redefined->data.capacity = group->data.memsize; @@ -1347,7 +1318,7 @@ static struct symbol_elem_t * switch( group->level ) { case 1: case 77: - if( dialect_mf() && is_table(group) ) { + if( is_table(group) ) { size_t elem_size = std::max(group->data.memsize, group->data.memsize); group->data.memsize = elem_size * group->occurs.ntimes(); } @@ -1363,23 +1334,6 @@ verify_block( const struct symbol_elem_t *block, if( e->type != SymField ) { continue; } - const struct cbl_field_t *field = cbl_field_of(e); - - if( getenv(__func__) ) { - if( e == block ) { - static const char ds[] = "--------------------------------"; - dbgmsg( "%17s %-3s %-3s %-18s %-3s %3s %-16s C/D/R = init\n" - "%.25s %-.3s %-.3s %-.18s %-.3s %.3s %-.16s %-.7s %-.16s", - "", "ndx", "off", "type", "par", "lvl", "name", - ds, ds, ds, ds, ds, ds, ds, ds, ds ); - } - dbgmsg( "%s:%d: %3zu %3zu %-18s %3zu %02d %-16s %2u/%u/%d = '%s'", - __func__, __LINE__, e - symbols.elems, field->offset, - cbl_field_type_str(field->type), - field->parent, field->level, field->name, - field->data.capacity, field->data.digits, field->data.rdigits, - field->data.initial? field->data.initial : "(none)" ); - } } } @@ -1415,19 +1369,18 @@ immediately_follows( const cbl_field_t *field ) { bool is_variable_length( const cbl_field_t *field ) { - bool odo = false; - std::find_if( symbol_at(field_index(field)) + 1, symbols_end(), - [&odo, field]( const auto& elem ) { - if( elem.type == SymField ) { - auto f = cbl_field_of(&elem); - if( f->level <= field->level ) return true; - if( f->occurs.depending_on ) { - odo = true; - return true; - } - } - return false; - } ); + // RENAMES may be included in end_of_group. + size_t isym = field_index(field), esym = end_of_group(isym); + bool odo = std::any_of( symbol_at(isym) + 1, symbol_at_impl(esym), + [field]( const auto& elem ) { + if( elem.type == SymField ) { + auto f = cbl_field_of(&elem); + if( field->level < f->level ) { // exclude RENAMES + return 0 < f->occurs.depending_on; + } + } + return false; + } ); return odo; } @@ -1439,7 +1392,7 @@ is_variable_length( const cbl_field_t *field ) { * occurs-depending table." */ cbl_field_t * -rename_not_ok( cbl_field_t *first, cbl_field_t *last) { +rename_not_ok( const cbl_field_t *first, const cbl_field_t *last) { symbol_elem_t *beg = symbol_at(field_index(first)), *end = symbol_at(field_index(last)); @@ -1507,11 +1460,11 @@ cbl_field_t::attr_str( const std::vector<cbl_field_attr_t>& attrs ) const const char *sep = ""; char *out = NULL; - for( auto attr : attrs ) { + for( auto attr_l : attrs ) { char *part = out; - if( has_attr(attr) ) { + if( has_attr(attr_l) ) { int erc = asprintf(&out, "%s%s%s", - part? part : "", sep, cbl_field_attr_str(attr)); + part? part : "", sep, cbl_field_attr_str(attr_l)); if( -1 == erc ) return part; free(part); sep = ", "; @@ -1522,7 +1475,7 @@ cbl_field_t::attr_str( const std::vector<cbl_field_attr_t>& attrs ) const char * field_str( const cbl_field_t *field ) { - static char string[3*sizeof(cbl_name_t)]; + static char string[4*sizeof(cbl_name_t)]; char *pend = string; char name[2*sizeof(cbl_name_t)] = ""; @@ -1534,17 +1487,23 @@ field_str( const cbl_field_t *field ) { for( size_t i=0; i < field->occurs.nkey; i++ ) { updown[i] = field->occurs.keys[i].ascending? 'A' : 'D'; } - snprintf(name, sizeof(name), "%s[%zu]%s", - field->name, field->occurs.ntimes(), updown.data()); + snprintf(name, sizeof(name), "%s[" HOST_SIZE_T_PRINT_UNSIGNED "]%s", + field->name, (fmt_size_t)field->occurs.ntimes(), updown.data()); } } + if( field->codeset.valid() ) { + strcat(name, " ("); + strcat(name, field->codeset.name()); + strcat(name, ") "); + } + pend += snprintf(pend, string + sizeof(string) - pend, - "%02d %-20s ", field->level, name); + "%02u %-20s ", field->level, name); char offset[32] = ""; if( field->level > 1 ) { - sprintf( offset, "off%3zu", field->offset ); + sprintf( offset, "off%3" GCC_PRISZ "u", (fmt_size_t)field->offset ); } char parredef = @@ -1552,41 +1511,66 @@ field_str( const cbl_field_t *field ) { if( 'r' == parredef && field->level == 0 ) parredef = 'p'; if( field->has_attr(typedef_e) ) parredef = 'T'; - const char *data = field->data.initial? field->data.initial : NULL; - if( data ) { - auto fig = cbl_figconst_of(data); + const char *init = field->data.initial? field->data.initial : NULL; + if( init ) { + auto fig = cbl_figconst_of(init); if( normal_value_e != fig ) { - data = cbl_figconst_str(fig); + init = cbl_figconst_str(fig); } else { - char *s; - auto n = asprintf(&s, "'%s'", data); - gcc_assert(n); - auto eodata = data + field->data.capacity; - if( eodata != std::find_if_not(data, eodata, fisprint) ) { - char *p = reinterpret_cast<char*>(xrealloc(s, n + 8 + 2 * field->data.capacity)); - if( is_elementary(field->type) && - field->type != FldPointer && p != NULL ) { - s = p; - p += n; - strcat( p, "(0x" ); - p += 3; - for( auto d=data; d < eodata; d++ ) { - p += sprintf(p, "%02x", *d); +#if 0 + // At this point, we might have to convert 'init' back to ASCII + char *false_init = static_cast<char *>(xmalloc(field->init.capacity+1)); + memcpy(false_init, field->init.initial, field->data.capacity); + false_data[field->data.capacity] = '\0'; + size_t charsout; + + cbl_encoding_t enc_from = field->codeset.encoding; + if( field->type == FldNumericDisplay ) + { + // Apparently we need to trace back the meaning of data.literal for + // field::type == FldNumericDisplay + enc_from = DEFAULT_SOURCE_ENCODING; + } + + init = __gg__iconverter(enc_from, + DEFAULT_SOURCE_ENCODING, + false_data, + field->data.capacity, + &charsout); +#endif + auto eoinit = init + strlen(init); + char *s = xasprintf("'%s'", init); + + // No NUL within the initial data. + auto ok = std::none_of( init, eoinit, + []( char ch ) { return ch == '\0'; } ); + assert(ok); + + // If any of the init are unprintable, provide a hex version. + if( ! std::all_of(init, eoinit, fisprint) ) { + if( is_elementary(field->type) && field->type != FldPointer ) { + const size_t len = strlen(s) + 8 + 2 * field->data.capacity; + s = reinterpret_cast<char*>(xrealloc(s, len)); + strcat( s, " (0x" ); + char *p = s + strlen(s); + for( auto d=init; d < eoinit; d++ ) { + p += sprintf(p, "%02x", static_cast<unsigned char>(*d)); } - strcat( p++, ")" ); + strcat( s, ")" ); + assert(strlen(s) < len); } } - data = s; + init = s; } } else { - data = "NULL"; + init = "NULL"; if( field->type == FldSwitch ) { - data = xasprintf("0x%02x", field->data.upsi_mask_of()->value); + init = xasprintf("0x%02x", field->data.upsi_mask_of()->value); } } if( field->level == 88 ) { const auto& dom = *field->data.domain_of(); - data = xasprintf("%s%s %s - %s%s", + init = xasprintf("%s%s %s - %s%s", dom.first.all? "A" : "", value_or_figconst_name(dom.first.name()) , dom.first.is_numeric? "(num)" : "", @@ -1605,20 +1589,20 @@ field_str( const cbl_field_t *field ) { intermediate_e, embiggened_e, all_alpha_e, all_x_e, all_ax_e, prog_ptr_e, scaled_e, refmod_e, based_e, any_length_e, /* global_e, external_e, */ blank_zero_e, /* linkage_e, local_e, */ leading_e, - separate_e, envar_e, dnu_1_e, bool_encoded_e, hex_encoded_e, + separate_e, envar_e, encoded_e, bool_encoded_e, hex_encoded_e, depends_on_e, /* initialized_e, */ has_value_e, ieeedec_e, big_endian_e, same_as_e, record_key_e, typedef_e, strongdef_e, }; pend += snprintf(pend, string + sizeof(string) - pend, - "%c%3zu %-6s %c%c%c %2u{%3u,%u,%d = %s} (%s), line %d", - parredef, field->parent, offset, + "%c%3" GCC_PRISZ "u %-6s %c%c%c %2u{%3u,%u,%d = %s} (%s), line %d", + parredef, (fmt_size_t)field->parent, offset, (field->attr & global_e)? 'G' : 0x20, (field->attr & external_e)? 'E' : 0x20, storage_type, field->data.memsize, field->data.capacity, field->data.digits, field->data.rdigits, - data, field->attr_str(attrs), field->line ); + init, field->attr_str(attrs), field->line ); return string; } @@ -1634,7 +1618,7 @@ struct capacity_of { capacity_of operator()( symbol_elem_t& elem ) { if( elem.type == SymField ) { - cbl_field_t *f = cbl_field_of(&elem); + const cbl_field_t *f = cbl_field_of(&elem); if( is_elementary(f->type) ) { capacity += field_size(f); } @@ -1647,12 +1631,14 @@ static void extend_66_capacity( cbl_field_t *alias ) { static_assert(sizeof(symbol_elem_t*) == sizeof(const char *), "all pointers must be same size"); - assert(alias->data.picture); + assert(alias->level == 66); assert(alias->type == FldGroup); + assert(alias->data.picture); + // If data.picture is not NULL, it is the THRU symbol, see symbol_field_alias2. symbol_elem_t *e = symbol_at(alias->parent); symbol_elem_t *e2 = reinterpret_cast<symbol_elem_t*>(const_cast<char*>(alias->data.picture)); - assert(e < e2); + assert(symbol_index(e) < symbol_index(e2)); alias->data.picture = NULL; capacity_of cap; @@ -1667,6 +1653,9 @@ extend_66_capacity( cbl_field_t *alias ) { bool symbols_alphabet_set( size_t program, const char name[]) { + +//////// +// Older version struct alpha { void operator()( symbol_elem_t& elem ) const { if( elem.type == SymAlphabet ) { @@ -1676,7 +1665,7 @@ symbols_alphabet_set( size_t program, const char name[]) { }; // Define alphabets for codegen. - std::for_each(symbols_begin(), symbols_end(), alpha() ); + std::for_each(symbols_begin(program), symbols_end(), alpha() ); // Set collation sequence before parser_symbol_add. if( name ) { @@ -1687,6 +1676,38 @@ symbols_alphabet_set( size_t program, const char name[]) { parser_alphabet_use(*cbl_alphabet_of(e)); } return true; +// End older version +//////// + +//// // Define alphabets for codegen. +//// const cbl_alphabet_t *alphabet = nullptr; +//// bool supported = true; +//// +//// std::for_each( symbols_begin(program), symbols_end(), +//// [&alphabet, &supported]( const auto& sym ) { +//// if( sym.type == SymAlphabet ) { +//// alphabet = cbl_alphabet_of(&sym); +//// supported = __gg__encoding_iconv_valid(alphabet->encoding); +//// if( supported ) { +//// parser_alphabet( *alphabet ); +//// } +//// } +//// } ); +//// if( ! supported ) { +//// const char *encoding = __gg__encoding_iconv_name(alphabet->encoding); +//// cbl_unimplemented("alphabet %qs (as %qs)", alphabet->name, encoding); +//// return false; +//// } +//// +//// // Set collation sequence before parser_symbol_add.` +//// if( name ) { +//// symbol_elem_t *e = symbol_alphabet(program, name); +//// if( !e ) { +//// return false; +//// } +//// parser_alphabet_use(*cbl_alphabet_of(e)); +//// } +//// return true; } static std::ostream& @@ -1694,6 +1715,9 @@ operator<<( std::ostream& os, const cbl_occurs_bounds_t& bound ) { return os << bound.lower << ',' << bound.upper; } +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-function" +// Keep this debugging function around for when it is needed static std::ostream& operator<<( std::ostream& os, const cbl_field_data_t& field ) { return os << field.memsize << ',' @@ -1717,16 +1741,7 @@ operator<<( std::ostream& os, const cbl_field_t& field ) { << ',' << field.line << ',' << field.data; } - -static void -write_field_csv( size_t isym, const cbl_field_t *field ) { - static std::ofstream os( getenv("GCOBOL_DATA") ); - assert(os.is_open()); - - if( symbols.first_program < isym) { - os << isym << "," << *field << std::endl; - } -} +#pragma GCC diagnostic pop static std::map<size_t, std::set<size_t>> same_record_areas; size_t parse_error_count(); @@ -1747,11 +1762,6 @@ symbols_update( size_t first, bool parsed_ok ) { struct symbol_elem_t *p, *pend; std::list<cbl_field_t*> shared_record_areas; - if( getenv(__func__) ) { - fprintf(stderr, "Initial"); - symbols_dump(std::max(first, symbols.first_program), true); - } - for( p = symbols_begin(first); p < symbols_end(); p++ ) { if( p->type == SymAlphabet ) continue; // Alphabets already processed. @@ -1773,8 +1783,7 @@ symbols_update( size_t first, bool parsed_ok ) { break; case 1: pend = calculate_capacity(p); - if( dialect_mf() && is_table(field) ) { - cbl_field_t *field = cbl_field_of(p); + if( is_table(field) ) { if( field->data.memsize < field->size() ) { field->data.memsize = field->size(); } @@ -1796,10 +1805,6 @@ symbols_update( size_t first, bool parsed_ok ) { // no special processing for other levels } - if( getenv("GCOBOL_DATA") ) { - write_field_csv( p - symbols_begin(), field ); - } - // Update ODO field in situ. if( is_table(field) ) { size_t& odo = field->occurs.depending_on; @@ -1817,7 +1822,7 @@ symbols_update( size_t first, bool parsed_ok ) { bool size_invalid = field->data.memsize > 0 && symbol_redefines(field); if( size_invalid ) { // redefine of record area is ok - auto redefined = symbol_redefines(field); + const cbl_field_t * redefined = symbol_redefines(field); size_invalid = ! is_record_area(redefined); } if( !field->is_valid() || size_invalid ) @@ -1840,8 +1845,8 @@ symbols_update( size_t first, bool parsed_ok ) { if( e == symbols_end() ) { // no field redefines the file's default record auto file = cbl_file_of(symbol_at(field->parent)); - ERROR_FIELD(field, "line %d: %s lacks a file description", - file->line, file->name); + ERROR_FIELD(field, "%s lacks a file description", + file->name); return 0; } } @@ -1852,28 +1857,25 @@ symbols_update( size_t first, bool parsed_ok ) { field->line, field->level_str(), field->name); } else { - dbgmsg("%s: error: data item %s #%zu '%s' capacity %u rejected", + dbgmsg("%s: error: data item %s #" HOST_SIZE_T_PRINT_UNSIGNED + " '%s' capacity %u rejected", __func__, 3 + cbl_field_type_str(field->type), - isym, field->name, field->data.capacity); + (fmt_size_t)isym, field->name, field->data.capacity); + gcc_unreachable(); } } return 0; } if(! (field->data.memsize == 0 || field_size(field) <= field->data.memsize) ) { - dbgmsg( "%s:%d: #%zu: invalid: %s", __func__, __LINE__, - symbol_index(p), field_str(cbl_field_of(p)) ); + dbgmsg( "%s:%d: #" HOST_SIZE_T_PRINT_UNSIGNED ": invalid: %s", __func__, __LINE__, + (fmt_size_t)symbol_index(p), field_str(cbl_field_of(p)) ); } assert(field->data.memsize == 0 || field_size(field) <= field_memsize(field)); assert( !(field->data.memsize > 0 && symbol_explicitly_redefines(field)) ); } - if( getenv(__func__) ) { - fprintf(stderr, "Pre"); - symbols_dump(std::max(first, symbols.first_program), true); - } - // A shared record area has no 01 child because that child redefines its parent. for( auto sharer : shared_record_areas ) { auto redefined = cbl_field_of(symbol_at(sharer->parent)); @@ -1888,6 +1890,15 @@ symbols_update( size_t first, bool parsed_ok ) { if( field->level == 0 && field->is_key_name() ) continue; if( is_literal(field) && field->var_decl_node != NULL ) continue; + // If the field is a constant for a figconstant, just use it. + if( field->level != 0 && field->has_attr(constant_e) ) { + auto fig = cbl_figconst_field_of(field->data.initial); + if( fig ) { + field->var_decl_node = fig->var_decl_node; + continue; + } + } + if( field->is_typedef() ) { auto isym = end_of_group( symbol_index(p) ); p = symbol_at(--isym); @@ -1895,7 +1906,7 @@ symbols_update( size_t first, bool parsed_ok ) { } // Verify REDEFINing field has no ODO components - auto parent = symbol_redefines(field); + const cbl_field_t * parent = symbol_redefines(field); if( parent && !is_record_area(parent) && is_variable_length(field) ) { ERROR_FIELD(field, "line %d: REDEFINES field %s cannot be variable length", field->line, field->name); @@ -1908,6 +1919,52 @@ symbols_update( size_t first, bool parsed_ok ) { field->line, field->level_str(), field->name); continue; } + if( is_numeric(field) && ! field->has_attr(constant_e) ) { + if( field->data.capacity == 0 ) { + ERROR_FIELD(field, "numeric %qs has USAGE that requires PICTURE %s", + field->name, field->data.initial); + } + } + + if( ! field->codeset.consistent() ) { + if( ! field->codeset.valid() ) { + switch(field->type) { + case FldForward: + case FldInvalid: + gcc_unreachable(); + case FldAlphaEdited: + case FldAlphanumeric: + case FldClass: + case FldDisplay: + case FldGroup: + case FldLiteralA: + case FldLiteralN: + case FldNumericDisplay: + case FldNumericEdited: + if( ! (field->has_attr(register_e) || field->has_attr(hex_encoded_e)) ) { + error_msg(symbol_field_location(field_index(field)), + "internal: %qs encoding not defined", field->name); + } + break; + case FldConditional: + case FldFloat: + case FldIndex: + case FldNumericBin5: + case FldNumericBinary: + case FldPacked: + case FldPointer: + case FldSwitch: + break; + } + } else { + if( ! (field->has_attr(register_e) || field->has_attr(hex_encoded_e)) ) { + error_msg(symbol_field_location(field_index(field)), + "internal: %qs encoding %qs inconsistent", + field->name, + cbl_alphabet_t::encoding_str(field->codeset.encoding) ); + } + } + } assert( ! field->is_typedef() ); @@ -1986,7 +2043,8 @@ symbol_field_forward( size_t index ) { assert( index < symbols.nelem ); symbol_elem_t *e = symbol_at(index); if( (e->type != SymField) ) { - dbgmsg("%s: logic error: #%zu is %s", __func__, index, symbol_type_str(e->type)); + dbgmsg("%s: logic error: #" HOST_SIZE_T_PRINT_UNSIGNED " is %s", + __func__, (fmt_size_t)index, symbol_type_str(e->type)); } assert(e->type == SymField); @@ -2084,15 +2142,15 @@ symbol_in_file( symbol_elem_t *e ) { } #endif -static struct cbl_field_t * -symbol_field_parent_set( struct cbl_field_t *field ) +static cbl_field_t * +symbol_field_parent_set( cbl_field_t *field ) { if( field->level == 01 ) return NULL; if( field->level == 77 ) return NULL; if( field->level == 78 ) return NULL; struct symbol_elem_t *e = symbols.elems + symbols.nelem - 1; - struct symbol_elem_t *first = symbols.elems + symbols.first_program; + const struct symbol_elem_t *first = symbols.elems + symbols.first_program; for( ; field->parent == 0 && e >= first; e-- ) { if( ! (e->type == SymField && cbl_field_of(e)->level > 0) ) { @@ -2130,6 +2188,9 @@ symbol_field_parent_set( struct cbl_field_t *field ) return NULL; } prior->type = FldGroup; + if( ! prior->codeset.set() ) { // needs attention + dbgmsg("'%s' is already National", prior->name); + } field->attr |= numeric_group_attrs(prior); } // verify level 88 domain value @@ -2170,7 +2231,7 @@ class parent_elem_set private: size_t parent_index; public: - parent_elem_set( size_t parent_index ) + explicit parent_elem_set( size_t parent_index ) : parent_index(parent_index) {} void operator()( struct symbol_elem_t& e ) { @@ -2187,8 +2248,10 @@ add_token( symbol_elem_t sym ) { return sym; } +const std::list<cbl_field_t> cdf_literalize(); + /* - * When adding registers, be sure to add a complementary cblc_field_t + * When adding special registers, be sure to create the actual cblc_field_t * in libgcobol/constants.cc. */ void @@ -2207,53 +2270,63 @@ symbol_table_init(void) { // These should match the definitions in libgcobol/constants.cc static cbl_field_t constants[] = { - { 0, FldAlphanumeric, FldInvalid, space_value_e | constq, 0, 0, 0, nonarray, 0, - "SPACE", 0, {}, {1,1,0,0, " \0\xFF"}, NULL }, - { 0, FldAlphanumeric, FldInvalid, space_value_e | constq , 0, 0, 0, nonarray, 0, - "SPACES", 0, {}, {1,1,0,0, " \0\xFF"}, NULL }, - { 0, FldAlphanumeric, FldInvalid, low_value_e | constq, 0, 0, 0, nonarray, 0, - "LOW_VALUES", 0, {}, {1,1,0,0, "L\0\xFF"}, NULL }, - { 0, FldAlphanumeric, FldInvalid, zero_value_e | constq, 0, 0, 0, nonarray, 0, - "ZEROS", 0, {}, {1,1,0,0, "0"}, NULL }, - { 0, FldAlphanumeric, FldInvalid, high_value_e | constq, 0, 0, 0, nonarray, 0, - "HIGH_VALUES", 0, {}, {1,1,0,0, "H\0\xFF"}, NULL }, + { FldAlphanumeric, space_value_e | int(constq) | register_e, + {1,1,0,0, " \0\xFF"}, 0, "SPACE" }, + { FldAlphanumeric, space_value_e | int(constq) | register_e, + {1,1,0,0, " \0\xFF"}, 0, "SPACES" }, + { FldAlphanumeric, low_value_e | int(constq) | register_e, + {1,1,0,0, "L\0\xFF"}, 0, "LOW_VALUES" }, + { FldAlphanumeric, zero_value_e | int(constq) | register_e, + {1,1,0,0, "0"}, 0, "ZEROS" }, + { FldAlphanumeric, high_value_e | int(constq) | register_e, + {1,1,0,0, "H\0\xFF"}, 0, "HIGH_VALUES" }, // IBM standard: QUOTE is a double-quote unless APOST compiler option - { 0, FldAlphanumeric, FldInvalid, quote_value_e | constq , 0, 0, 0, nonarray, 0, - "QUOTES", 0, {}, {1,1,0,0, "\"\0\xFF"}, NULL }, - { 0, FldPointer, FldPointer, constq , 0, 0, 0, nonarray, 0, - "NULLS", 0, {}, {8,8,0,0, zeroes_for_null_pointer}, NULL }, + { FldAlphanumeric, quote_value_e | int(constq) | register_e , + {1,1,0,0, "\"\0\xFF"}, 0, "QUOTES" }, + { FldPointer, int(constq) | register_e , + {8,8,0,0, zeroes_for_null_pointer}, 0, "NULLS" }, // IBM defines TALLY // 01 TALLY GLOBAL PICTURE 9(5) USAGE BINARY VALUE ZERO. - { 0, FldNumericBin5, FldInvalid, signable_e, 0, 0, 0, nonarray, 0, - "_TALLY", 0, {}, {16, 16, MAX_FIXED_POINT_DIGITS, 0, NULL}, NULL }, + { FldNumericBin5, signable_e | register_e, + {16, 16, MAX_FIXED_POINT_DIGITS, 0, NULL}, 0, "_TALLY" }, // 01 ARGI is the current index into the argv array - { 0, FldNumericBin5, FldInvalid, signable_e, 0, 0, 0, nonarray, 0, - "_ARGI", 0, {}, {16, 16, MAX_FIXED_POINT_DIGITS, 0, NULL}, NULL }, + { FldNumericBin5, signable_e | register_e, + {16, 16, MAX_FIXED_POINT_DIGITS, 0, NULL}, 0, "_ARGI" }, // These last two don't require actual storage; they get BOOL var_decl_node // in parser_symbol_add() - { 0, FldConditional, FldInvalid, constant_e , 0, 0, 0, nonarray, 0, - "_VERY_TRUE", 0, {}, {1,1,0,0, ""}, NULL }, - { 0, FldConditional, FldInvalid, constant_e , 0, 0, 0, nonarray, 0, - "_VERY_FALSE", 0, {}, {1,1,0,0, ""}, NULL }, + { FldConditional, constant_e | register_e , {1,1,0,0, ""}, 0, "_VERY_TRUE" }, + { FldConditional, constant_e | register_e , {1,1,0,0, ""}, 0, "_VERY_FALSE" }, }; for( struct cbl_field_t *f = constants; f < constants + COUNT_OF(constants); f++ ) { f->our_index = table.nelem; struct symbol_elem_t sym(SymField, 0); sym.elem.field = *f; + // The following makes these constants match the definitions in + // constants.cc. Consider expanding the constructor fo cbl_field_t to + // handle encoding. + sym.elem.field.codeset.encoding = iconv_CP1252_e; table.elems[table.nelem++] = sym; } static symbol_elem_t environs[] = { + { symbol_elem_t{ 0, cbl_special_name_t{0, CONSOLE_e, "CONSOLE", 0, "/dev/stdout"}} }, // stdout in DISPLAY; stdin in ACCEPT + + { symbol_elem_t{ 0, cbl_special_name_t{0, STDIN_e, "STDIN", 0, "/dev/stdin"}} }, { symbol_elem_t{ 0, cbl_special_name_t{0, SYSIN_e, "SYSIN", 0, "/dev/stdin"}} }, - { symbol_elem_t{ 0, cbl_special_name_t{0, SYSIPT_e, "SYSIPT", 0, "/dev/stdout"}} }, + { symbol_elem_t{ 0, cbl_special_name_t{0, SYSIPT_e, "SYSIPT", 0, "/dev/stdin"}} }, + + { symbol_elem_t{ 0, cbl_special_name_t{0, STDOUT_e, "STDOUT", 0, "/dev/stdout"}} }, { symbol_elem_t{ 0, cbl_special_name_t{0, SYSOUT_e, "SYSOUT", 0, "/dev/stdout"}} }, { symbol_elem_t{ 0, cbl_special_name_t{0, SYSLIST_e, "SYSLIST", 0, "/dev/stdout"}} }, { symbol_elem_t{ 0, cbl_special_name_t{0, SYSLST_e, "SYSLST", 0, "/dev/stdout"}} }, + { symbol_elem_t{ 0, cbl_special_name_t{0, SYSPUNCH_e, "SYSPUNCH", 0, "/dev/stderr"}} }, { symbol_elem_t{ 0, cbl_special_name_t{0, SYSPCH_e, "SYSPCH", 0, "/dev/stderr"}} }, - { symbol_elem_t{ 0, cbl_special_name_t{0, CONSOLE_e, "CONSOLE", 0, "/dev/stdout"}} }, + { symbol_elem_t{ 0, cbl_special_name_t{0, STDERR_e, "STDERR", 0, "/dev/stderr"}} }, + { symbol_elem_t{ 0, cbl_special_name_t{0, SYSERR_e, "SYSERR", 0, "/dev/stderr"}} }, + { symbol_elem_t{ 0, cbl_special_name_t{0, C01_e, "C01", 0, "/dev/null"}} }, { symbol_elem_t{ 0, cbl_special_name_t{0, C02_e, "C02", 0, "/dev/null"}} }, { symbol_elem_t{ 0, cbl_special_name_t{0, C03_e, "C03", 0, "/dev/null"}} }, @@ -2273,10 +2346,6 @@ symbol_table_init(void) { { symbol_elem_t{ 0, cbl_special_name_t{0, S04_e, "S04", 0, "/dev/null"}} }, { symbol_elem_t{ 0, cbl_special_name_t{0, S05_e, "S05", 0, "/dev/null"}} }, { symbol_elem_t{ 0, cbl_special_name_t{0, AFP_5A_e, "AFP-5A", 0, "/dev/null"}} }, - { symbol_elem_t{ 0, cbl_special_name_t{0, STDIN_e, "STDIN", 0, "/dev/stdin"}} }, - { symbol_elem_t{ 0, cbl_special_name_t{0, STDOUT_e, "STDOUT", 0, "/dev/stdout"}} }, - { symbol_elem_t{ 0, cbl_special_name_t{0, STDERR_e, "STDERR", 0, "/dev/stderr"}} }, - { symbol_elem_t{ 0, cbl_special_name_t{0, SYSERR_e, "SYSERR", 0, "/dev/stderr"}} }, }; struct symbol_elem_t *p = table.elems + table.nelem; @@ -2303,31 +2372,31 @@ symbol_table_init(void) { **/ static cbl_field_t debug_registers[] = { - { 0, FldGroup, FldInvalid, global_e, 0,0,1, nonarray, 0, - "DEBUG-ITEM", 0, {}, {132,132,0,0, NULL}, NULL }, - { 0, FldAlphanumeric, FldInvalid, global_e, 0,0,2, nonarray, 0, - "DEBUG-LINE", 0, {}, {6,6,0,0, " "}, NULL }, - { 0, FldAlphanumeric, FldInvalid, 0, 0,0,2, nonarray, 0, - "FILLER", 0, {}, {1,1,0,0, " "}, NULL }, - { 0, FldAlphanumeric, FldInvalid, global_e, 0,0,2, nonarray, 0, - "DEBUG-NAME", 0, {}, {30,30,0,0, NULL}, NULL }, - { 0, FldAlphanumeric, FldInvalid, 0, 0,0,2, nonarray, 0, - "FILLER", 0, {}, {1,1,0,0, " "}, NULL }, - { 0, FldNumericDisplay, FldInvalid, signable_e | global_e | leading_e | separate_e, 0,0,2, nonarray, 0, - "DEBUG-SUB-1", 0, {}, {5,5,3,0, NULL}, NULL }, - { 0, FldAlphanumeric, FldInvalid, 0, 0,0,2, nonarray, 0, - "FILLER", 0, {}, {1,1,0,0, " "}, NULL }, - { 0, FldNumericDisplay, FldInvalid, signable_e | global_e | leading_e | separate_e, 0,0,2, nonarray, 0, - "DEBUG-SUB-2", 0, {}, {5,5,3,0, NULL}, NULL }, - { 0, FldAlphanumeric, FldInvalid, 0, 0,0,2, nonarray, 0, - "FILLER", 0, {}, {1,1,0,0, " "}, NULL }, - { 0, FldNumericDisplay, FldInvalid, signable_e | global_e | leading_e | separate_e, 0,0,2, nonarray, 0, - "DEBUG-SUB-3", 0, {}, {5,5,3,0, NULL}, NULL }, - { 0, FldAlphanumeric, FldInvalid, 0, 0,0,2, nonarray, 0, - "FILLER", 0, {}, {1,1,0,0, " "}, NULL }, - { 0, FldAlphanumeric, FldInvalid, signable_e | global_e, 0,0,2, nonarray, 0, - "DEBUG-CONTENTS", 0, {}, {76,76,0,0, NULL}, NULL }, -}; + { FldGroup, register_e, + {132,132,0,0, NULL}, 1, "DEBUG-ITEM" }, + { FldAlphanumeric, register_e, + {6,6,0,0, " "}, 2, "DEBUG-LINE" }, + { FldAlphanumeric, register_e|filler_e, + {1,1,0,0, " "}, 2, "FILLER" }, + { FldAlphanumeric, register_e, + {30,30,0,0, NULL}, 2, "DEBUG-NAME" }, + { FldAlphanumeric, register_e|filler_e, + {1,1,0,0, " "}, 2, "FILLER" }, + { FldNumericDisplay, signable_e | register_e | leading_e | separate_e, + {5,5,4,0, NULL}, 2, "DEBUG-SUB-1" }, + { FldAlphanumeric, register_e|filler_e, + {1,1,0,0, " "}, 2, "FILLER" }, + { FldNumericDisplay, signable_e | register_e | leading_e | separate_e, + {5,5,4,0, NULL}, 2, "DEBUG-SUB-2" }, + { FldAlphanumeric, register_e|filler_e, + {1,1,0,0, " "}, 2, "FILLER" }, + { FldNumericDisplay, signable_e | register_e | leading_e | separate_e, + {5,5,4,0, NULL}, 2, "DEBUG-SUB-3" }, + { FldAlphanumeric, register_e | filler_e, + {1,1,0,0, " "}, 2, "FILLER" }, + { FldAlphanumeric, signable_e | register_e, + {76,76,0,0, NULL}, 2, "DEBUG-CONTENTS" }, + }; // debug registers assert(table.nelem + COUNT_OF(debug_registers) < table.capacity); @@ -2345,26 +2414,18 @@ symbol_table_init(void) { assert(table.nelem < table.capacity); std::for_each(debug_start+1, p, parent_elem_set(debug_start - table.elems)); + // special registers static cbl_field_t special_registers[] = { - { 0, FldNumericDisplay, FldInvalid, 0, 0, 0, 0, nonarray, 0, "_FILE_STATUS", - 0, {}, {2,2,2,0, NULL}, NULL }, - { 0, FldNumericBin5, FldInvalid, 0, 0, 0, 0, nonarray, 0, "UPSI-0", - 0, {}, {2,2,4,0, NULL}, NULL }, - { 0, FldNumericBin5, FldInvalid, 0, 0, 0, 0, nonarray, 0, "RETURN-CODE", - 0, {}, {2,2,4,0, NULL}, NULL }, - { 0, FldNumericBin5, FldInvalid, 0, 0, 0, 0, nonarray, 0, "LINAGE-COUNTER", - 0, {}, {2,2,4,0, NULL}, NULL }, - { 0, FldLiteralA, FldInvalid, 0, 0, 0, 0, nonarray, 0, "_dev_stdin", - 0, {}, {0,0,0,0, "/dev/stdin"}, NULL }, - { 0, FldLiteralA, FldInvalid, constq, 0, 0, 0, nonarray, 0, "_dev_stdout", - 0, {}, {0,0,0,0, "/dev/stdout"}, NULL }, - { 0, FldLiteralA, FldInvalid, constq, 0, 0, 0, nonarray, 0, "_dev_stderr", - 0, {}, {0,0,0,0, "/dev/stderr"}, NULL }, - { 0, FldLiteralA, FldInvalid, constq, 0, 0, 0, nonarray, 0, "_dev_null", - 0, {}, {0,0,0,0, "/dev/null"}, NULL }, + { FldNumericDisplay, register_e, {2,2,2,0, NULL}, 0, "_FILE_STATUS" }, + { FldNumericBin5, register_e, {2,2,4,0, NULL}, 0, "UPSI-0" }, + { FldNumericBin5, signable_e|register_e, {2,2,4,0, NULL}, 0, "RETURN-CODE" }, + { FldNumericBin5, register_e, {2,2,4,0, NULL}, 0, "LINAGE-COUNTER" }, + { FldLiteralA, register_e, {0,0,0,0, "/dev/stdin"}, 0, "_dev_stdin" }, + { FldLiteralA, int(constq)|register_e, {0,0,0,0, "/dev/stdout"}, 0, "_dev_stdout" }, + { FldLiteralA, int(constq)|register_e, {0,0,0,0, "/dev/stderr"}, 0, "_dev_stderr" }, + { FldLiteralA, int(constq)|register_e, {0,0,0,0, "/dev/null"}, 0, "_dev_null" }, }; - // special registers assert(table.nelem + COUNT_OF(special_registers) < table.capacity); p = table.elems + table.nelem; @@ -2374,6 +2435,34 @@ symbol_table_init(void) { table.nelem = p - table.elems; assert(table.nelem < table.capacity); + // xml registers + static cbl_field_t xml_registers[] = { + { FldNumericBin5, register_e, {4,4,9,0, "0"}, 1, "XML-CODE" }, + { FldAlphanumeric, register_e, {30,30,0,0, " "}, 1, "XML-EVENT" }, + { FldNumericBin5, register_e, {4,4,9,0, "0"}, 1, "XML-INFORMATION" }, + { FldAlphanumeric, register_e | based_e | any_length_e, {1,1,0,0, nullptr}, 1, "XML-NAMESPACE" }, + { FldAlphanumeric, register_e | based_e | any_length_e, {1,1,0,0, nullptr}, 1, "XML-NNAMESPACE" }, + { FldAlphanumeric, register_e | based_e | any_length_e, {1,1,0,0, nullptr}, 1, "XML-NAMESPACE-PREFIX" }, + { FldAlphanumeric, register_e | based_e | any_length_e, {1,1,0,0, nullptr}, 1, "XML-NNAMESPACE-PREFIX" }, + { FldAlphanumeric, register_e | based_e | any_length_e, {1,1,0,0, nullptr}, 1, "XML-TEXT" }, + { FldAlphanumeric, register_e | based_e | any_length_e, {1,1,0,0, nullptr}, 1, "XML-NTEXT" }, + }, * const eoxml = xml_registers + COUNT_OF(xml_registers); + + assert(table.nelem + COUNT_OF(xml_registers) < table.capacity); + + p = table.elems + table.nelem; + p = std::transform(xml_registers, eoxml, p, elementize); + table.nelem = p - table.elems; + assert(table.nelem < table.capacity); + + // Add any CDF values already defined as literals. + // After symbols are ready, the CDF adds them directly. + const std::list<cbl_field_t> cdf_values = cdf_literalize(); + table.nelem += cdf_values.size(); + assert(table.nelem < table.capacity); + + p = std::transform(cdf_values.begin(), cdf_values.end(), p, elementize); + // Initialize symbol table. symbols = table; @@ -2391,8 +2480,6 @@ symbol_table_init(void) { symbols.registers.return_code = symbol_index(symbol_field(0,0, "RETURN-CODE")); symbols.registers.very_true = symbol_index(symbol_field(0,0, "_VERY_TRUE")); symbols.registers.very_false = symbol_index(symbol_field(0,0, "_VERY_FALSE")); - - if( getenv(__func__) ) symbols_dump(0, true); } /* @@ -2476,7 +2563,7 @@ symbol_file_add( size_t program, cbl_file_t *file ) { return NULL; } - struct symbol_elem_t sym = { SymFile, program }; + symbol_elem_t sym{ SymFile, program }; sym.elem.file = *file; e = symbol_add(&sym); @@ -2489,16 +2576,23 @@ symbol_file_add( size_t program, cbl_file_t *file ) { return e; } -struct symbol_elem_t * -symbol_alphabet_add( size_t program, struct cbl_alphabet_t *alphabet ) { - struct symbol_elem_t sym{ SymAlphabet, program }; +symbol_elem_t * +symbol_locale_add( size_t program, const cbl_locale_t *locale ) { + symbol_elem_t sym{ SymLocale, program }; + sym.elem.locale = *locale; + return symbol_add(&sym); +} + +symbol_elem_t * +symbol_alphabet_add( size_t program, const cbl_alphabet_t *alphabet ) { + symbol_elem_t sym{ SymAlphabet, program }; sym.elem.alphabet = *alphabet; return symbol_add(&sym); } -size_t +uint64_t numeric_group_attrs( const cbl_field_t *field ) { - static const size_t inherit = signable_e | leading_e | separate_e | big_endian_e; + static const uint64_t inherit = signable_e | leading_e | separate_e | big_endian_e; static_assert(sizeof(cbl_field_t::type) < sizeof(inherit), "need bigger type"); assert(field); if( field->type == FldNumericDisplay || field->type == FldGroup ) { @@ -2534,7 +2628,7 @@ symbol_typedef_add( size_t program, struct cbl_field_t *field ) { auto e = symbols_end() - 1; assert( symbols_begin() < e ); if( e->type == SymField ) { - auto f = cbl_field_of(e); + const cbl_field_t * f = cbl_field_of(e); if( f == field ) return e; } @@ -2572,7 +2666,7 @@ struct symbol_elem_t * symbol_field_add( size_t program, struct cbl_field_t *field ) { field->our_index = symbols.nelem; - cbl_field_t *parent = symbol_field_parent_set( field ); + const cbl_field_t *parent = symbol_field_parent_set( field ); if( parent && parent->type == FldGroup) { // Inherit effects of parent's USAGE, as though it appeared 1st in the // member's definition. @@ -2580,35 +2674,19 @@ symbol_field_add( size_t program, struct cbl_field_t *field ) field->attr = inherit & parent->attr; field->attr |= numeric_group_attrs(parent); field->usage = parent->usage; + if( field->level == 66 || field->level == 88 ) { + field->codeset = parent->codeset; + } // BINARY-LONG, for example, sets capacity. if( is_numeric(parent->usage) && parent->data.capacity > 0 ) { field->type = parent->usage; field->data = parent->data; - field->data = 0.0; + field->data = 0; // cppcheck-suppress redundantAssignment + // // cppcheck doesn't understand multiple overloaded operator= field->data.initial = NULL; } } - char *s; - if( (s = getenv(__func__)) != NULL ) { - if( s[0] == 'D' ) { - for( struct symbol_elem_t *e = symbols_begin(); e < symbols_end(); e++ ) { - fprintf(stderr, "%zu: %s ", e - symbols.elems, symbol_type_str(e->type)); - if( e->type == SymField ) { - fprintf(stderr, "%s = %s", - cbl_field_of(e)->name, cbl_field_of(e)->data.initial); - } - fprintf(stderr, "\n"); - } - } - - dbgmsg( "%s:%d: %3zu %-18s %02d %-16s %u/%u/%d = '%s'", __func__, __LINE__, - field->offset, - cbl_field_type_str(field->type), field->level, field->name, - field->data.capacity, field->data.digits, field->data.rdigits, - field->data.initial? field->data.initial : "(none)" ); - } - if( is_forward(field) ) { auto *e = symbol_field( program, field->parent, field->name ); if( e ) { @@ -2723,6 +2801,7 @@ symbol_field( size_t program, size_t parent, const char name[] ) return p != end? &*p : NULL; } +// cppcheck-suppress-begin [CastIntegerToAddressAtReturn] obviously not true symbol_elem_t * symbol_register( const char name[] ) { @@ -2738,6 +2817,7 @@ symbol_register( const char name[] ) return p; } +// cppcheck-suppress-end [CastIntegerToAddressAtReturn] // Find current 01 record during Level 66 construction. const symbol_elem_t * @@ -2765,11 +2845,8 @@ symbol_field_forward_add( size_t program, size_t parent, auto e = symbol_field(program, parent, name); if( e ) return e; - struct cbl_field_t field = { 0, - FldForward, FldInvalid, 0, parent, 0, 0, - nonarray, line, "", - 0, cbl_field_t::linkage_t(), - {0,0,0,0, " "}, NULL }; + cbl_field_t field = { FldForward, 0, line }; + field.parent = parent; if( sizeof(field.name) < strlen(name) ) { dbgmsg("%s:%d: logic error: name %s too long", __func__, __LINE__, name); return NULL; @@ -2800,11 +2877,12 @@ symbol_literalA( size_t program, const char name[] ) struct symbol_elem_t * symbol_file( size_t program, const char name[] ) { size_t nelem = symbols.nelem; - struct symbol_elem_t key = { SymFile, program }, *e = &key; + symbol_elem_t key{ SymFile, program }, *e = &key; assert(strlen(name) < sizeof(key.elem.file.name)); strcpy(key.elem.file.name, name); + // cppcheck-suppress-begin [knownConditionTrueFalse] do { e = static_cast<struct symbol_elem_t *>(lfind( &key, symbols.elems, &nelem, sizeof(*e), @@ -2813,6 +2891,7 @@ symbol_file( size_t program, const char name[] ) { key.program = cbl_label_of(symbol_at(key.program))->parent; if( key.program == 0 ) break; // no file without a program } while( !e ); + // cppcheck-suppress-end [knownConditionTrueFalse] if( e ) { assert(e->type == SymFile); @@ -2920,7 +2999,7 @@ seek_parent( const symbol_elem_t *e, size_t level ) { struct symbol_elem_t * symbol_field_same_as( cbl_field_t *tgt, const cbl_field_t *src ) { if( target_in_src(tgt, src) ) { - ERROR_FIELD(tgt, "%s %s may not reference itself as part of %s %s", + ERROR_FIELD(tgt, "%s %s may not reference itself as part of %s %s", tgt->level_str(), tgt->name, src->level_str(), src->name); return NULL; } @@ -2953,6 +3032,7 @@ symbol_field_same_as( cbl_field_t *tgt, const cbl_field_t *src ) { cbl_field_t dup = {}; dup.parent = field_index(tgt); dup.line = tgt->line; + dup.codeset = tgt->codeset; elem_group_t group(++bog, eog); @@ -3006,7 +3086,7 @@ symbol_file_same_record_area( std::list<cbl_file_t*>& files ) { } static symbol_elem_t * -next_program( symbol_elem_t *elem ) { +next_program( const symbol_elem_t *elem ) { size_t start = elem? symbol_index(elem) : 0; symbol_elem_t * e = std::find_if( symbols_begin(start), symbols_end(), is_program ); @@ -3044,14 +3124,14 @@ is_numeric_constant( const char name[] ) { // get default record layout for a file struct cbl_field_t * -symbol_file_record( struct cbl_file_t *file ) { +symbol_file_record( const cbl_file_t *file ) { return cbl_field_of(symbol_at(file->default_record)); } class is_section { cbl_section_type_t section_type; public: - is_section( cbl_section_type_t sect ) : section_type(sect) {} + explicit is_section( cbl_section_type_t sect ) : section_type(sect) {} bool operator()( symbol_elem_t& e ) const { return e.type == SymDataSection && cbl_section_of(&e)->type == section_type; } @@ -3062,8 +3142,6 @@ static bool fd_record_size_cmp( const symbol_elem_t& a, const symbol_elem_t& b ) return cbl_field_of(&a)->data.capacity < cbl_field_of(&b)->data.capacity; } -cbl_file_key_t cbl_file_t::no_key; - /* * Find largest and smallest record defined for a file. The rule is: * cbl_file_t::varies() returns true if the record size varies, @@ -3120,12 +3198,6 @@ symbol_file_record_sizes( struct cbl_file_t *file ) { output.min = cbl_field_of(&*p.first)->data.capacity; output.max = cbl_field_of(&*p.second)->data.capacity; - if( yydebug && getenv(__func__) ) { - dbgmsg("%s: %s: min '%s' %zu, max '%s' %zu", __func__, file->name, - cbl_field_of(&*p.first)->name, output.min, - cbl_field_of(&*p.second)->name, output.max); - } - assert(output.min > 0 && "min record size is 0"); assert(output.min <= output.max); @@ -3172,15 +3244,141 @@ constant_of( size_t isym ) return field; } +cbl_locale_t::cbl_locale_t( const cbl_name_t name, const char iconv_name[] ) { + gcc_assert(strlen(name) < sizeof this->name); + strcpy(this->name, name); + + if( iconv_name ) { + encoding = __gg__encoding_iconv_type(iconv_name); + + strcpy(collation, "C"); + // If the iconv_name is prefixed by langauge_COUNTRY (e.g. en_US), capture that. + auto pend = iconv_name + strlen(iconv_name); + auto p = std::find(iconv_name, pend, '.'); + if( p < pend ) { + auto pend2 = std::copy(iconv_name, p, collation); + std::fill(pend2, collation + sizeof(collation), '\0'); + iconv_name = ++p; + } + encoding = __gg__encoding_iconv_type(iconv_name); + } +} + +cbl_alphabet_t::cbl_alphabet_t(const YYLTYPE& loc, size_t locale, cbl_name_t name ) + : loc(loc) + , locale(locale) + , low_index(0) + , high_index(255) + , last_index(0) +{ + if( locale > 0 ) { + encoding = cbl_locale_of(symbol_at(locale))->encoding; + } + memset(collation_sequence, 0xFF, sizeof(collation_sequence)); + if( name ) { // from Special-Names collation_sequence + assert(strlen(name) < sizeof(cbl_name_t)); + strcpy(this->name, name); + } +} + +/* + * As parsed, the alphabet reflects the encoding of the source code. If the + * program uses a different encoding for alphanumeric, convert the alphabet to + * that. + * + * Because a custom alphabet is rare and occurs at most only once per program, + * we don't attempt to avoid re-encoding. "Conversion" of ASCII to ASCII is at + * most 256 calls to iconv(3). + */ +void +cbl_alphabet_t::reencode() { + + const unsigned char * const pend = collation_sequence + sizeof(collation_sequence); + std::vector<char> tgt(256, (char)0xFF); + + /* Keep copies of low_index and last_index for use in run-time as LOW-VALUE + and HIGH-VALUE, which are kept as globals in the source-code codeset + and converted to the display encoding as necessary. */ + + low_char = low_index; + high_char = last_index; + + /* + * For now, assume CP1252 source-code encoding because we're not capturing it + * anywhere except in cbl_field_t::internalize(). The only known examples of + * a custom alphabet are from NIST, which of course are ASCII. + */ + const char *fromcode = __gg__encoding_iconv_name(CP1252_e); + const char *tocode = + __gg__encoding_iconv_name(current_encoding(display_encoding_e)); + iconv_t cd = iconv_open(tocode, fromcode); + +#if optimal_reencode + if( fromcode == tocode ) { // semantically + tgt.resize(0); + return tgt; // Return empty vector; caller copies zero bytes. + } +#endif + + /* + * Each position in the alphabet array represents a letter in the source-code + * encoding. The value at that position represents the letter's collation + * position, its sort order. For each letter in alphabet, determine value of + * that letter in the alphanumeric encoding, and set its collation position + * in that alphabet. + */ + for( const unsigned char *p = collation_sequence; p < pend; p++ ) { + if( *p == 0xFF ) continue; + unsigned char ch = p - collation_sequence; + unsigned char pos[8] = {}; + size_t inbytesleft = 1, outbytesleft = sizeof(pos); + char *inbuf = reinterpret_cast<char*>(&ch), + *outbuf = reinterpret_cast<char*>(pos); + + size_t n = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); + + if( n == size_t(-1) ) { + error_msg(loc, "%s character '%c' (%x hex) cannot be converted to %s", + fromcode, ch, ch, tocode); + continue; + } + if( n != 0 ) { + dbgmsg("%s character '%c' (%x hex) irreversibly converted to %s", + fromcode, ch, ch, tocode); + } + assert(outbytesleft < sizeof(pos)); + n = sizeof(pos) - outbytesleft; + if( 1 < n ) { + error_msg(loc, "%s character '%c' (%x hex) requires %zu bytes as %s", + fromcode, ch, ch, n, tocode); + continue; + } + + if( ch == low_index ) { + low_index = pos[0]; + } + if( ch == last_index ) { + last_index = pos[0]; + } + if( ch == high_index ) { + high_index = pos[0]; + } + + tgt.at(pos[0]) = *p; + } + + std::copy(tgt.begin(), tgt.end(), collation_sequence); +} + bool cbl_alphabet_t::assign( const YYLTYPE& loc, unsigned char ch, unsigned char high_value ) { - if( alphabet[ch] == 0xFF || alphabet[ch] == high_value) { - alphabet[ch] = high_value; + if( collation_sequence[ch] == 0xFF || collation_sequence[ch] == high_value) { + collation_sequence[ch] = high_value; last_index = ch; return true; } - auto taken = alphabet[ch]; - error_msg(loc, "ALPHABET %s, character '%c' (X'%x') " + auto taken = collation_sequence[ch]; + error_msg(loc, "ALPHABET %s, character %<%c%> (X%'%x%') " "in position %d already defined at position %d", name, ISPRINT(ch)? ch : '?', ch, @@ -3192,7 +3390,7 @@ cbl_alphabet_t::assign( const YYLTYPE& loc, unsigned char ch, unsigned char high void cbl_alphabet_t::also( const YYLTYPE& loc, size_t ch ) { if( ch < 256 ) { - alphabet[ch] = alphabet[last_index]; + collation_sequence[ch] = collation_sequence[last_index]; if( ch == high_index ) high_index--; return; } // else it's a figurative constant ... @@ -3205,20 +3403,20 @@ cbl_alphabet_t::also( const YYLTYPE& loc, size_t ch ) { // last_index is already set; use it as the "last value before ALSO" if( attr & low_value_e ) { - alphabet[0] = alphabet[last_index]; + collation_sequence[0] = collation_sequence[last_index]; return; } if( attr & high_value_e ) { - alphabet[high_index--] = alphabet[last_index]; + collation_sequence[high_index--] = collation_sequence[last_index]; return; } if( attr & (space_value_e|quote_value_e) ) { ch = field->data.initial[0]; - alphabet[ch] = alphabet[last_index]; + collation_sequence[ch] = collation_sequence[last_index]; return; } if( attr & (zero_value_e) ) { - alphabet[0] = alphabet[last_index]; + collation_sequence[0] = collation_sequence[last_index]; error_msg(loc, "ALSO value '%s' is unknown", field->name); return; } @@ -3229,37 +3427,25 @@ using std::deque; static deque<cbl_field_t*> stack; static cbl_field_t * -new_temporary_impl( enum cbl_field_type_t type ) +new_temporary_impl( enum cbl_field_type_t type, const cbl_name_t name = nullptr ) { extern int yylineno; - static int nstack, nliteral; static const struct cbl_field_t empty_alpha = { - 0, FldAlphanumeric, FldInvalid, - intermediate_e, 0, 0, 0, nonarray, 0, "", - 0, cbl_field_t::linkage_t(), - {}, NULL }; + FldAlphanumeric, intermediate_e, + {MAXIMUM_ALPHA_LENGTH, + MAXIMUM_ALPHA_LENGTH, 0, 0, NULL} }; static const struct cbl_field_t empty_float = { - 0, FldFloat, FldInvalid, - intermediate_e, - 0, 0, 0, nonarray, 0, "", - 0, cbl_field_t::linkage_t(), - {16, 16, 32, 0, NULL}, NULL }; + FldFloat, intermediate_e, + {16, 16, 32, 0, NULL} }; static const struct cbl_field_t empty_comp5 = { - 0, FldNumericBin5, FldInvalid, + FldNumericBin5, signable_e | intermediate_e, - 0, 0, 0, nonarray, 0, "", - 0, cbl_field_t::linkage_t(), - {16, 16, MAX_FIXED_POINT_DIGITS, 0, NULL}, NULL }; + {16, 16, MAX_FIXED_POINT_DIGITS, 0, NULL} }; static const struct cbl_field_t empty_conditional = { - 0, FldConditional, FldInvalid, intermediate_e, - 0, 0, 0, nonarray, 0, "", - 0, cbl_field_t::linkage_t(), - {}, NULL }; + FldConditional, intermediate_e, cbl_field_data_t{} }; static struct cbl_field_t empty_literal = { - 0, FldInvalid, FldInvalid, CONSTANT_E, - 0, 0, 0, nonarray, 0, "", - 0, cbl_field_t::linkage_t(), - {}, NULL }; + FldInvalid, CONSTANT_E, cbl_field_data_t{} }; + struct cbl_field_t *f = new cbl_field_t; f->type = type; @@ -3275,7 +3461,6 @@ new_temporary_impl( enum cbl_field_type_t type ) case FldSwitch: case FldDisplay: case FldPointer: - case FldBlob: break; case FldConditional: *f = empty_conditional; @@ -3300,15 +3485,17 @@ new_temporary_impl( enum cbl_field_type_t type ) f->line = yylineno; if( is_literal(type) ) { + static int nliteral = 0; snprintf(f->name, sizeof(f->name), "_literal%d",++nliteral); } else { + static int nstack = 0; snprintf(f->name, sizeof(f->name), "_stack%d",++nstack); - - if( getenv("symbol_temporaries_free") ) { - dbgmsg("%s: %s, %s", __func__, f->name, 3 + cbl_field_type_str(f->type)); - } } + f->data.initial = name; // capture e.g. the function name + + f->codeset.set(); + return f; } @@ -3321,13 +3508,17 @@ new_temporary_decl() { static inline cbl_field_t * parser_symbol_add2( cbl_field_t *field ) { + if( ! field->codeset.valid() ) { + dbgmsg( "%s:%d: %s (%s) has no encoding", __func__, __LINE__, + field->name, cbl_field_type_str(field->type) ); + } parser_symbol_add(field); return field; } static cbl_field_t * -new_literal_add( const char initial[], uint32_t len, enum cbl_field_attr_t attr ) { - static char empty[2] = "\0"; +new_literal_add( const char initial[], uint32_t len, + cbl_field_attr_t attr, cbl_encoding_t encoding ) { cbl_field_t *field = NULL; if( !(attr & quoted_e) ) { @@ -3339,21 +3530,39 @@ new_literal_add( const char initial[], uint32_t len, enum cbl_field_attr_t attr { field = new_temporary_impl(FldLiteralA); field->attr |= attr; - field->data.initial = len > 0? initial : empty; - field->data.capacity = len; - if( ! field->internalize() ) + if(len == 0) { - ERROR_FIELD(field, "inconsistent string literal encoding for '%s'", initial); + // This will cover UTF-32, should that arise. + size_t nbytes = 4; + char *init = static_cast<char *>(xmalloc(nbytes)); + memset(init, 0, nbytes); + field->data.initial = init; } + if(len) + { + char *init = static_cast<char *>(xmalloc(len+4)); + memcpy(init, initial, len); + memset(init+len, 0, 4); + field->data.initial = init; + } + field->data.capacity = len; } + if( ! field->has_attr(hex_encoded_e) ) { + // If the literal bore a prefix, set the encoding, + if( encoding != cbl_field_t::codeset_t::source_encoding->type ) { + field->codeset.set(encoding); + } + field->internalize(); + } + static size_t literal_count = 1; sprintf(field->name, - "%s%c_%zd", + "%s%c_" HOST_SIZE_T_PRINT_DEC, "_literal", field->type == FldLiteralA ? 'a' : 'n', - literal_count++); + (fmt_size_t)literal_count++); return parser_symbol_add2(field); } @@ -3361,35 +3570,40 @@ new_literal_add( const char initial[], uint32_t len, enum cbl_field_attr_t attr static temporaries_t temporaries; cbl_field_t * -temporaries_t::literal( const char value[], uint32_t len, cbl_field_attr_t attr ) { - auto key = literal_an(value, quoted_e == (attr & quoted_e)); +temporaries_t::literal( uint32_t len, const char value[], + cbl_field_attr_t attr, cbl_encoding_t encoding ) { + bool is_quoted2 = quoted_e == (attr & quoted_e); + bool is_verbatim = hex_encoded_e == (attr & hex_encoded_e); + auto key = literal_an(value, is_quoted2, is_verbatim); - if( 0 == (attr & hex_encoded_e) ) { + if( ! is_verbatim ) { // TODO: try without this test once National is ready auto p = literals.find(key); if( p != literals.end() ) { cbl_field_t *field = p->second; return field; } } - return literals[key] = new_literal_add(value, len, attr); + return literals[key] = new_literal_add(value, len, attr, encoding); } cbl_field_t * -new_literal( uint32_t len, const char initial[], enum cbl_field_attr_t attr ) { - return temporaries.literal(initial, len, attr); +new_literal( uint32_t len, const char initial[], + cbl_field_attr_t attr, cbl_encoding_t encoding ) { + return temporaries.literal(len, initial, attr, encoding); } void temporaries_t::dump() const { extern int yylineno; - char *output = xasprintf("%4d: %zu Literals", yylineno, literals.size()); + char *output = xasprintf("%4d: " HOST_SIZE_T_PRINT_UNSIGNED " Literals", + yylineno, (fmt_size_t)literals.size()); for( const auto& elem : used ) { if( ! elem.second.empty() ) { char *so_far = output; - output = xasprintf("%s, %zu %s", + output = xasprintf("%s, " HOST_SIZE_T_PRINT_UNSIGNED " %s", so_far, - elem.second.size(), + (fmt_size_t)elem.second.size(), 3 + cbl_field_type_str(elem.first)); free(so_far); } @@ -3399,14 +3613,6 @@ temporaries_t::dump() const { } temporaries_t::~temporaries_t() { - if( getenv( "symbol_temporaries_free" ) ) { - dbgmsg("%s: %zu literals", __func__, literals.size()); - for( const auto& elem : literals ) { - const literal_an& key(elem.first); - fprintf(stderr, "%c '%s'\n", key.is_quoted? 'Q' : ' ', key.value.c_str()); - } - dump(); - } } cbl_field_t * @@ -3438,11 +3644,11 @@ temporaries_t::reuse( cbl_field_type_t type ) { } cbl_field_t * -temporaries_t::acquire( cbl_field_type_t type ) { +temporaries_t::acquire( cbl_field_type_t type, const cbl_name_t name ) { cbl_field_t *field = reuse(type); if( !field ) { - field = new_temporary_impl(type); + field = new_temporary_impl(type, name); add(field); } return parser_symbol_add2(field); // notify of reuse @@ -3450,7 +3656,6 @@ temporaries_t::acquire( cbl_field_type_t type ) { void symbol_temporaries_free() { - if( getenv(__func__) ) temporaries.dump(); for( auto& elem : temporaries.used ) { const cbl_field_type_t& type(elem.first); temporaries_t::fieldset_t& used(elem.second); @@ -3476,27 +3681,51 @@ symbol_temporaries_free() { } cbl_field_t * -new_alphanumeric( size_t capacity ) { - cbl_field_t * field = new_temporary_impl(FldAlphanumeric); +new_alphanumeric( size_t capacity, const cbl_name_t name = nullptr ) { + cbl_field_t * field = new_temporary_impl(FldAlphanumeric, name); field->data.capacity = capacity; temporaries.add(field); return parser_symbol_add2(field); } +extern os_locale_t os_locale; + +const encodings_t cbl_field_t::codeset_t::source_encodings[2] = { + { false, iconv_UTF_8_e, "UTF-8" }, + { true, iconv_CP1252_e, "CP1252" }, +}; +const encodings_t * cbl_field_t::codeset_t::source_encoding = { + cbl_field_t::codeset_t::source_encodings +}; + +const encodings_t cbl_field_t::codeset_t::standard_internal = { + true, iconv_CP1252_e, "CP1252" +}; +#define standard_internal cbl_field_t::codeset_t::standard_internal + cbl_field_t * -new_temporary( enum cbl_field_type_t type, const char *initial ) { - if( ! initial ) { +new_temporary( enum cbl_field_type_t type, const char *initial, bool is_signed ) { + const bool force_unsigned = type == FldNumericBin5 && ! is_signed; + + if( ! initial && ! force_unsigned ) { assert( ! is_literal(type) ); // Literal type must have literal value. - return temporaries.acquire(type); + return temporaries.acquire(type, initial); } if( is_literal(type) ) { - auto field = temporaries.literal(initial, - type == FldLiteralA? quoted_e : none_e); + auto field = temporaries.literal(strlen(initial), initial, + type == FldLiteralA? quoted_e : none_e, + standard_internal.type); return field; } - cbl_field_t *field = new_temporary_impl(type); - field->data.capacity = strlen(field->data.initial = initial); - temporaries.add(field); + cbl_field_t *field = new_temporary_impl(type, initial); + + // don't reuse unsigned numeric + if( force_unsigned ) { + field->clear_attr(signable_e); + } else { + temporaries.add(field); + } + parser_symbol_add(field); return field; @@ -3535,12 +3764,38 @@ new_temporary_clone( const cbl_field_t *orig) { temporaries.add(field); } field->data = orig->data; - if( field->type == FldNumericBin5 ) field->type = orig->type; + if( field->type == FldNumericBin5 ) { + field->type = orig->type; + field->codeset = orig->codeset; + } field->attr = intermediate_e; return parser_symbol_add2(field); } +/* + * This set of ASCII-like encodings is incomplete and possibly wrong. A + * complete definition would better supported with a Boolean in enccodings_t. + * If it returns false pessimistically, the only consequence is inefficiency: + * the string is processed by iconv(3). + */ +bool +cbl_field_t::holds_ascii() const { + // True if the encoding is a superset of ASCII. + switch(codeset.encoding) { + case ASCII_e: + case CP1252_e: + case iso646_e: + return true; + default: + if( iconv_1026_e <= codeset.encoding && + codeset.encoding <= iconv_ANSI_X3_4_e ) { + return true; + } + } + return false; +} + bool cbl_field_t::is_ascii() const { return std::all_of( data.initial, @@ -3566,124 +3821,172 @@ cbl_field_t::is_ascii() const { * compilation, if it moves off the default, it adjusts only once, and * never reverts. */ -static const char standard_internal[] = "CP1252//"; -extern os_locale_t os_locale; - -static const char * -guess_encoding() { - static const char *fromcode; - - if( ! fromcode ) { - return fromcode = os_locale.assumed; - } - - if( fromcode == os_locale.assumed ) { - fromcode = os_locale.codeset; - if( 0 != strcmp(fromcode, "C") ) { // anything but that - return fromcode; - } - } - - return standard_internal; -} const char * cbl_field_t::internalize() { - static const char *tocode = standard_internal; - static const char *fromcode = guess_encoding(); - static iconv_t cd = iconv_open(tocode, fromcode); + /* The purpose of this routine is to return a nul-terminated string which + is data.initial converted from the source-code characters to the + codeset.encoding characters. + + The contract between this routine and the routines that call it is that + for alphanumeric types, data.initial shall have the same number of + characters as will be needed to fill data.capacity. + + Be aware that for PIC X(32) Z"foo", there are the characters "foo", + followed by a NUL, and then 28 spaces to fill it out. It turns out that + iconv, given a character count of 32, converts all 32, including the + embedded NUL. So, that case works even through strlen(initial) is + smaller than the length of initial, which is the same as capacity. + */ + + static const char *fromcode = codeset.source_encodings[0].name; static const size_t noconv = size_t(-1); + static std::unordered_map<std::string, iconv_t> tocodes; + + if( ! codeset.valid() ) { + dbgmsg("%s:%d: not converting %s", __func__, __LINE__, data.initial); + return data.initial; + } - // Sat Mar 16 11:45:08 2024: require temporary environment for testing - if( getenv( "INTERNALIZE_NO") ) return data.initial; + const char *tocode = __gg__encoding_iconv_name(codeset.encoding); - bool using_assumed = fromcode == os_locale.assumed; + std::string toname(tocode); + auto p = tocodes.find(toname); + if( p == tocodes.end() ) { + tocodes[toname] = iconv_open(tocode, fromcode); + } + iconv_t cd = tocodes[toname]; + + if (cd == (iconv_t)-1) { + cbl_message(ParIconvE, + "failed %<iconv_open%> tocode = %qs fromcode = %qs", + tocode, fromcode); + } if( fromcode == tocode || has_attr(hex_encoded_e) ) { return data.initial; } - if( is_ascii() ) return data.initial; + if( data.capacity == 0 ) { + assert(0 == strlen(data.initial)); + return data.initial; + } + if( holds_ascii() && is_ascii() ) { + if( type != FldNumericEdited ) { + if( ! data.initial_within_capacity() ) { + ERROR_FIELD(this, "%s %s VALUE %qs of %zu exceeds size %u", + cbl_field_t::level_str(level), name, data.initial, + strlen(data.initial), data.capacity ); + } + } + return data.initial; + } assert(data.capacity > 0); - std::vector<char> output(data.capacity + 2, '\0'); - char *out = output.data(); - char *in = const_cast<char*>(data.initial); - size_t n, inbytesleft = data.capacity, outbytesleft = output.size(); + // The final 2 bytes of the output are "!\0". It's a debugging sentinel. + size_t n; + size_t inbytesleft = data.capacity; + size_t outbytesleft = inbytesleft; if( !is_literal(this) && inbytesleft < strlen(data.initial) ) { inbytesleft = strlen(data.initial); } + if( type == FldNumericEdited ) { + outbytesleft = inbytesleft; + } + const unsigned int in_len = inbytesleft; + + char *in = const_cast<char*>(data.initial); + char *out = static_cast<char*>( xcalloc(1, outbytesleft + 2) ), *output = out; assert(fromcode != tocode); - while( (n = iconv( cd, &in, &inbytesleft, &out, &outbytesleft)) == noconv ) { - if( !using_assumed ) break; // change only once - fromcode = guess_encoding(); - cd = iconv_open(tocode, fromcode); - dbgmsg("%s: trying input encoding %s", __func__, fromcode); - if( fromcode == tocode ) break; - } + /* + * If we're currently assuming the source code is encoded according to the + * locale (the default), and there's an iconv failure, try once more using a + * different assumption, that the source code is encoded as CP1252. + * + * This heuristic means that some UTF-8 literals could be converted until a + * CP1252 byte is encountered. We could be stricter about that. + * + * Also possible is a failure to avoid iconv with fromcode and tocode denote + * the same encoding but with different spellings, e.g. CP1252 and CP1252//. + */ - if( n == noconv ) { - if( !using_assumed ) { - yywarn("failed to decode '%s' as %s", data.initial, fromcode); - return NULL; + do { + if( (n = iconv( cd, &in, &inbytesleft, &out, &outbytesleft)) == noconv ) { + if( fromcode == codeset.source_encodings[0].name ) { + codeset.source_encoding = &codeset.source_encodings[1]; + fromcode = codeset.source_encoding->name; + tocodes.clear(); + cd = tocodes[toname] = iconv_open(tocode, fromcode); + dbgmsg("%s: trying input encoding %s", __func__, fromcode); + if( fromcode == tocode ) return data.initial; // no conversion required. + n = noconv - 1; // try again + } } + if( n == 0 ) break; + } while( n != noconv ); + + if( n == noconv ) { + size_t i = in_len - inbytesleft; + yyerror("failed to encode %s %qs as %s (%zu of %u bytes left)", + fromcode, data.initial + i, tocode, inbytesleft, in_len); + if( false ) return NULL; return data.initial; } if( 0 < inbytesleft ) { // data.capacity + inbytesleft is not correct if the remaining portion has - // multibyte characters. But the fact reamins that the VALUE is too big. + // multibyte characters. But the fact remains that the VALUE is too big. ERROR_FIELD(this, "%s %s VALUE '%s' requires %zu bytes for size %u", cbl_field_t::level_str(level), name, data.initial, data.capacity + inbytesleft, data.capacity ); } // Replace data.initial only if iconv output differs. - if( 0 != memcmp(data.initial, output.data(), out - output.data()) ) { - assert(out <= output.data() + data.capacity); - - if( getenv(__func__) ) { - const char *eoi = data.initial + data.capacity, *p; - char nullitude[64] = "no null"; - if( (p = std::find(data.initial, eoi, '\0')) != eoi ) { - sprintf(nullitude, "NUL @ %zu", p - data.initial); - } - dbgmsg("%s:%d: before: %-15s %-20s: '%.*s'{%u}, %s", __func__, __LINE__, - 3 + cbl_field_type_str(type), name, - data.capacity, data.initial, data.capacity, nullitude); - } + if( 0 != memcmp(data.initial, output, out - output) ) { + assert(out <= output + data.capacity || type == FldNumericEdited); dbgmsg("%s: converted '%.*s' to %s", __func__, data.capacity, data.initial, tocode); - - int len = int(out - output.data()); - char *mem = static_cast<char*>( xcalloc(1, output.size()) ); - - // Set the new memory to all blanks, tacking a '!' on the end. - memset(mem, 0x20, output.size() - 1); - mem[ output.size() - 2] = '!'; + struct localspace_t { + char space[4]; + size_t len, erc; + explicit localspace_t( iconv_t cd ) { + static char input[1] = { 0x20 }; + size_t inbytesleft2 = sizeof(input), outbytesleft2 = sizeof(space); + char *in2 = input, *out2 = space; + + erc = iconv(cd, &in2, &inbytesleft2, &out2, &outbytesleft2); + len = out2 - space; + } + bool valid() const { return 0 < len && erc != size_t(-1); } + } spc(cd); + + if( ! spc.valid() ) { + dbgmsg("%s:%d: iconv failed for %s: %s", __func__, __LINE__, + tocode, xstrerror(errno)); + ERROR_FIELD(this, "iconv failed: %s", xstrerror(errno)); + return data.initial; + } + assert( 0 < spc.len && spc.valid() ); if( is_literal(this) ) { - data.capacity = len; // trailing '!' will be overwritten + data.capacity = out - output; // trailing '!' will be overwritten } - - memcpy(mem, output.data(), len); // copy only as much as iconv converted - - free(const_cast<char*>(data.initial)); - data.initial = mem; - - if( getenv(__func__) ) { - const char *eoi = data.initial + data.capacity, *p; - char nullitude[64] = "no null"; - if( (p = std::find(data.initial, eoi, '\0')) != eoi ) { - sprintf(nullitude, "NUL @ %zu", p - data.initial); - } - dbgmsg("%s:%d: after: %-15s %-20s: '%.*s'{%u}, %s", __func__, __LINE__, - "", name, - data.capacity, data.initial, data.capacity, nullitude); + // Pad with trailing blanks, tacking a '!' on the end. + for( const char *eout = output + data.capacity; + out < eout; + out += spc.len ) { + memcpy(out, spc.space, spc.len); } - + // Numeric literal strings may have leading zeros, making their length + // longer than their capacity. + out[0] = type == FldLiteralN? '\0' : '!'; + assert(out[1] == '\0'); + data.orig = data.initial; + data.initial = output; + } else { + free(output); } return data.initial; @@ -3702,8 +4005,8 @@ cbl_label_t::str() const { buf = xasprintf("%-12s %s top level [%s], line %d", type_str() + 3, name, mangled_name, line); } else { - buf = xasprintf("%-12s %s OF #%zu '%s' [%s], line %d", - type_str() + 3, name, parent, + buf = xasprintf("%-12s %s OF #" HOST_SIZE_T_PRINT_UNSIGNED " '%s' [%s], line %d", + type_str() + 3, name, (fmt_size_t)parent, cbl_label_of(symbol_at(parent))->name, mangled_name, line); } @@ -3733,12 +4036,9 @@ cbl_label_t::explicit_parent() const { } cbl_prog_hier_t::cbl_prog_hier_t() { - nlabel = std::count_if( symbols_begin(), symbols_end(), is_program ); - assert(nlabel >0); - labels = new cbl_prog_hier_t::program_label_t[nlabel]; - std::copy_if( symbols_begin(), symbols_end(), - labels, is_program ); + std::back_inserter(labels), is_program ); + assert(! labels.empty()); } /* @@ -3803,44 +4103,22 @@ common_callables_update( const size_t iprog ) { cbl_label_t * symbol_label_add( size_t program, cbl_label_t *input ) { - if( getenv(__func__) ) { - const cbl_label_t *L = input; - dbgmsg( "%s:%d: %-5s #%3zu %-9s '%s' of '%s' at line %d", __func__, __LINE__, - "input", - size_t(0), - L->type_str()+3, - L->name, - L->parent? cbl_label_of(symbol_at(L->parent))->name : "", - L->line ); - } - cbl_label_t *label = symbol_label(program, input->type, input->parent, input->name); if( label && label->type == LblNone ) { - const char *verb = "set"; label->type = input->type; label->parent = input->parent; label->line = input->line; - if( getenv(__func__) ) { - const cbl_label_t *L = label; - dbgmsg( "%s:%d: %-5s #%3zu %-9s '%s' of '%s' at line %d", - __func__, __LINE__, - verb, - symbol_elem_of(L) - symbols_begin(), - L->type_str()+3, - L->name, - L->parent? cbl_label_of(symbol_at(L->parent))->name : "", - L->line ); - } return label; } // Set the program's mangled name, dehyphenated and uniqified by parent index. if( input->type == LblProgram ) { char *psz = cobol_name_mangler(input->name); - input->mangled_name = xasprintf("%s.%zu", psz, input->parent); + input->mangled_name = xasprintf("%s." HOST_SIZE_T_PRINT_UNSIGNED, + psz, (fmt_size_t)input->parent); free(psz); } @@ -3853,59 +4131,45 @@ symbol_label_add( size_t program, cbl_label_t *input ) if( (e = symbol_add(&elem)) == NULL ) { cbl_errx("%s:%d: could not add '%s'", __func__, __LINE__, label->name); } + assert(e); common_callables_update( symbol_index(e) ); // restore munged line number unless symbol_add returned an existing label if( e->elem.label.line < 0 ) e->elem.label.line = -e->elem.label.line; - if( getenv(__func__) ) { - const cbl_label_t *L = cbl_label_of(e); - dbgmsg( "%s:%d: added #%3zu %-9s '%s' of '%s' at line %d", __func__, __LINE__, - e - symbols_begin(), - L->type_str()+3, - L->name, - L->parent? cbl_label_of(symbol_at(L->parent))->name : "", - L->line ); - } symbols.labelmap_add(e); return cbl_label_of(e); } /* - * Under ISO (and not IBM) Declaratives are followed by a Section name. When - * the first statement is parsed, verify, if Declaratives were used, that it + * Under ISO (and not IBM) Declaratives are followed by a Section name. If + * Declaratives were used, when the first statement is parsed verify that it * was preceeded by a Section name. */ bool -symbol_label_section_exists( size_t program ) { - auto pblob = std::find_if( symbols_begin(program), symbols_end(), - []( const auto& sym ) { - if( sym.type == SymField ) { - auto& f( sym.elem.field ); - return f.type == FldBlob; - } - return false; - } ); - if( pblob == symbols_end() ) return true; // Section name not required - - bool has_section = std::any_of( ++pblob, symbols_end(), - []( const auto& sym ) { - if( sym.type == SymLabel ) { - auto& L(sym.elem.label); - if( L.type == LblSection ) { - if( L.name[0] != '_' ) { // not implicit - return true; // Section name exists - } - } +symbol_label_section_exists( size_t eval_label_index ) { + auto eval = symbols_begin(eval_label_index); + /* cppcheck warns that the following statement depends on the order of + evaluation of side effects. Since this isn't my code, and since I don't + think the warning can be eliminated without rewriting it, I am just + supprressing it. + -- Bob Dubner, 2025-07-14 */ + // cppcheck-suppress unknownEvaluationOrder + bool has_section = std::any_of( ++eval, symbols_end(), + [program = eval->program]( const auto& sym ) { + if( program == sym.program && sym.type == SymLabel ) { + const auto& L(sym.elem.label); + // true if the symbol is an explicit label. + return L.type == LblSection && L.name[0] != '_'; } return false; } ); if( yydebug && ! has_section ) { - symbols_dump(program, true); + symbols_dump(eval_label_index, true); } - // Return true if no Declaratives, because the (non-)requirement is met. - // Return false if Declaratives exist, because no Section name was found. + // Return true if a user-defined SECTION was found after the Declaratives + // label section. return has_section; } @@ -3918,7 +4182,8 @@ symbol_program_add( size_t program, cbl_label_t *input ) // Set the program's mangled name, dehyphenated and uniqified by parent index. char *psz = cobol_name_mangler(input->name); - elem.elem.label.mangled_name = xasprintf("%s.%zu", psz, input->parent); + elem.elem.label.mangled_name = xasprintf("%s." HOST_SIZE_T_PRINT_UNSIGNED, + psz, (fmt_size_t)input->parent); free(psz); e = std::find_if( symbols_begin(program), symbols_end(), @@ -3960,11 +4225,6 @@ symbol_special_add( size_t program, struct cbl_special_name_t *special ) struct symbol_elem_t *e = symbol_special(program, special->name); if( e ) { - cbl_special_name_t *s = cbl_special_name_of(e); - if( getenv(__func__) ) { - dbgmsg("%s:%d matches %s %d (%s)", __func__, __LINE__, - special->name, int(s->id), s->name); - } return e; } assert(e == NULL); @@ -3975,11 +4235,6 @@ symbol_special_add( size_t program, struct cbl_special_name_t *special ) cbl_errx( "%s:%d: could not add '%s'", __func__, __LINE__, special->name); } - if( getenv(__func__) ) { - dbgmsg( "%s:%d: added special '%s'", __func__, __LINE__, - e->elem.special.name); - } - elem_key_t key(program, cbl_special_name_of(e)->name); symbols.specials[key] = symbol_index(e); @@ -4056,7 +4311,7 @@ expand_picture(const char *picture) { assert(strlen(picture) < PICTURE_MAX); // guaranteed by picset() in scanner size_t retval_length = PICTURE_MAX; - char *retval = (char *)xmalloc(retval_length); + char *retval = static_cast<char *>(xmalloc(retval_length)); size_t index = 0; int ch; @@ -4085,7 +4340,7 @@ expand_picture(const char *picture) if( index + repeat >= retval_length ) { retval_length <<= 1; - retval = (char *)xrealloc(retval, retval_length); + retval = static_cast<char *>(xrealloc(retval, retval_length)); } while(repeat--) @@ -4098,7 +4353,7 @@ expand_picture(const char *picture) if( index >= retval_length ) { retval_length <<= 1; - retval = (char *)xrealloc(retval, retval_length); + retval = static_cast<char *>(xrealloc(retval, retval_length)); } retval[index++] = ch; } @@ -4107,7 +4362,7 @@ expand_picture(const char *picture) if( index >= retval_length ) { retval_length <<= 1; - retval = (char *)xrealloc(retval, retval_length); + retval = static_cast<char *>(xrealloc(retval, retval_length)); } retval[index++] = '\0'; @@ -4130,7 +4385,6 @@ expand_picture(const char *picture) { pcurrency[i] = 'B'; } - dest_length += sign_length; } } @@ -4361,7 +4615,7 @@ symbol_program_callables( size_t program ) { if( e->type != SymLabel ) continue; if( e->elem.label.type != LblProgram ) continue; - auto prog = cbl_label_of(e); + const cbl_label_t * prog = cbl_label_of(e); if( program == symbol_index(e) && !prog->recursive ) continue; if( (self->parent == prog->parent && prog->common) || @@ -4395,6 +4649,7 @@ symbol_program_local( const char tgt_name[] ) { */ std::map<char, const char *> currencies; +// cppcheck-suppress-begin [nullPointerRedundantCheck] bool symbol_currency_add( const char symbol[], const char sign[] ) { // In service of CURRENCY sign PICTURE SYMBOL symbol @@ -4406,6 +4661,7 @@ symbol_currency_add( const char symbol[], const char sign[] ) { currencies[*symbol] = sign; return true; } +// cppcheck-suppress-end [nullPointerRedundantCheck] const char * symbol_currency( char sign ) { @@ -4413,6 +4669,11 @@ symbol_currency( char sign ) { if( currencies.size() == 0 ) { currencies['$'] = "$"; } + if( sign == '\0' ) { // default + auto result = currencies.begin(); + gcc_assert(result != currencies.end()); + return result->second; + } auto result = currencies.find(sign); return result == currencies.end()? NULL : result->second; } @@ -4428,24 +4689,19 @@ bool decimal_is_comma() { return decimal_point == ','; } /* * A cbl_occurs_key_t is part of a field definition, and comprises * size_t symbol indexes. A cbl_key_t is a list of field pointers, - * and can be created ad hoc to describe a sort. We can construct a + * and can be created ad hoc to describe a sort. We construct a * cbl_key_t from cbl_occurs_key_t. */ cbl_key_t:: cbl_key_t( const cbl_occurs_key_t& that ) : ascending(that.ascending) { - if( that.field_list.nfield == 0 ) { - *this = cbl_key_t(); - return; - } - - nfield = that.field_list.nfield; - fields = static_cast<cbl_field_t**>( xcalloc(nfield, - sizeof(*fields)) ); - for( size_t i=0; i < that.field_list.nfield; i++ ) { - fields[i] = cbl_field_of(symbol_at(that.field_list.fields[i])); - } + std::transform( that.field_list.fields, + that.field_list.fields + that.field_list.nfield, + std::back_inserter(fields), + []( size_t isym ) { + return cbl_field_of(symbol_at(isym)); + } ); } void @@ -4458,7 +4714,7 @@ cbl_occurs_t::key_alloc( bool ascending ) { } void -cbl_occurs_t::field_add( cbl_field_list_t& field_list, cbl_field_t *field ) { +cbl_occurs_t::field_add( cbl_field_list_t& field_list, const cbl_field_t *field ) { cbl_field_list_t list = field_list; size_t ifield = field_index(field); auto nbytes = sizeof(list.fields[0]) * (list.nfield + 1); @@ -4476,14 +4732,14 @@ cbl_occurs_t::key_field_add( cbl_field_t *field ) { } void -cbl_occurs_t::index_add( cbl_field_t *field ) { +cbl_occurs_t::index_add( const cbl_field_t *field ) { field_add(indexes, field); } class is_field_at { cbl_field_t *field; public: - is_field_at( cbl_field_t *field ) : field(field) {} + explicit is_field_at( cbl_field_t *field ) : field(field) {} bool operator()( size_t isym ) const { return field == field_at(isym); } @@ -4526,6 +4782,26 @@ cbl_occurs_t::subscript_ok( const cbl_field_t *subscript ) const { return bounds.lower <= (size_t)sub && (size_t)sub <= bounds.upper; } +const cbl_field_t * +symbol_unresolved_file_key( const cbl_file_t * file, + const cbl_name_t key_field_name ) { + const symbol_elem_t *file_sym = symbol_elem_of(file); + size_t program = file_sym->program; + for( const symbol_elem_t *e = file_sym - 1; e->program == program; e-- ) { + if( e->type == SymFile ) break; + if( e->type == SymField ) { + auto f = cbl_field_of(e); + if( f->type == FldLiteralA ) break; + if( f->type == FldForward ) { + if( 0 == strcmp(key_field_name, f->name) ) { + return f; + } + } + } + } + return nullptr; +} + cbl_file_key_t:: cbl_file_key_t( cbl_name_t name, const std::list<cbl_field_t *>& fields, @@ -4609,8 +4885,8 @@ symbol_forward_names( size_t ifield ) { for( auto sym = symbols_begin(ifield); sym && sym->type == SymField; ) { const cbl_field_t *field = cbl_field_of(sym); if( !(field->type == FldForward) ) { - dbgmsg("%s:%d: logic error, not FldForward: #%zu %s", - __func__, __LINE__, symbol_index(sym), field_str(field)); + dbgmsg("%s:%d: logic error, not FldForward: #" HOST_SIZE_T_PRINT_UNSIGNED " %s", + __func__, __LINE__, (fmt_size_t)symbol_index(sym), field_str(field)); } assert(field->type == FldForward); @@ -4633,8 +4909,9 @@ symbol_forward_to( size_t fwd ) { if( !elem.second ) { const auto& field = *cbl_field_of(symbols_begin(fwd)); if( yydebug ) - dbgmsg("%s:%d: no symbol found for #%zu %s %s", __func__, __LINE__, - fwd, cbl_field_type_str(field.type), field.name); + dbgmsg("%s:%d: no symbol found for #" HOST_SIZE_T_PRINT_UNSIGNED " %s %s", + __func__, __LINE__, + (fmt_size_t)fwd, cbl_field_type_str(field.type), field.name); return fwd; } @@ -4650,13 +4927,14 @@ cbl_file_key_t::deforward( size_t ifile ) { const auto file = cbl_file_of(symbol_at(ifile)); std::transform( fields, fields + nfield, fields, [ifile, file]( size_t fwd ) { - static std::map<size_t, int> keys; auto ifield = symbol_forward_to(fwd); const auto field = cbl_field_of(symbol_at(ifield)); if( is_forward(field) && yydebug ) { - dbgmsg("%s:%d: key %d: #%zu %s of %s is %s", "deforward", __LINE__, - keys[ifile]++, ifield, field->name, file->name, + static std::map<size_t, int> keys; + dbgmsg("%s:%d: key %d: #" HOST_SIZE_T_PRINT_UNSIGNED " %s of %s is %s", + "deforward", __LINE__, + keys[ifile]++, (fmt_size_t)ifield, field->name, file->name, cbl_field_type_str(field->type) + 3); } @@ -4665,7 +4943,7 @@ cbl_file_key_t::deforward( size_t ifile ) { if( ifield == fwd ) { ERROR_FIELD(field, "line %d: %s of %s " "is not defined", - file->line, field->name, file->name); + field->line, field->name, file->name); return ifield; } @@ -4694,9 +4972,13 @@ cbl_file_key_t::deforward( size_t ifile ) { // looked-up field must have same file as parent if( ! (parent != NULL && symbol_index(symbol_elem_of(parent)) == ifile) ) { - ERROR_FIELD(field, "line %d: %s of %s " - "is not defined in file description", - file->line, field->name, file->name); + const cbl_field_t *undefined = + symbol_unresolved_file_key(file, field->name); + int lineno = undefined? undefined->line : file->line; + ERROR_FIELD(undefined? undefined : field, + "line %d: %s of %s " + "is not defined in file description", + lineno, field->name, file->name); } return ifield; } ); @@ -4710,7 +4992,7 @@ cbl_file_key_t::str() const { *p++ = '['; for( auto f = fields; f < fields + nfield; f++) { - auto n = sprintf(p, "%s%zu", sep, *f); + auto n = sprintf(p, "%s" HOST_SIZE_T_PRINT_UNSIGNED, sep, (fmt_size_t)*f); p += n; sep = ", "; } @@ -4723,6 +5005,8 @@ cbl_file_key_t::str() const { */ void cbl_file_t::deforward() { + const size_t ifile( symbol_index(symbol_elem_of(this)) ); + if( user_status ) { user_status = symbol_forward_to(user_status); @@ -4734,33 +5018,18 @@ cbl_file_t::deforward() { } for( auto p = keys; p < keys + nkey; p++ ) { - p->deforward( symbol_index(symbol_elem_of(this)) ); + p->deforward(ifile); } } char * cbl_file_t::keys_str() const { - std::vector <char *> ks(nkey); - std::transform(keys, keys + nkey, ks.begin(), - []( const cbl_file_key_t& key ) { - return key.str(); - } ); - size_t n = 4 * nkey + std::accumulate(ks.begin(), ks.end(), 0, - []( int n, const char *s ) { - return n + strlen(s); - } ); - char *output = static_cast<char*>( xcalloc(1, n) ), *p = output; - const char *sep = ""; - - *p++ = '['; - for( auto k : ks ) { - p = stpcpy(p, sep); - p = stpcpy(p, k); - sep = ", "; - free(k); + std::string names = "["; + for( cbl_file_key_t *p = keys; p < keys + nkey; p++ ) { + names += p->str(); + names += p + 1 < keys + nkey ? "," : "]"; } - *p++ = ']'; - return output; + return xasprintf("%s", names.c_str()); } /* @@ -4822,11 +5091,13 @@ cbl_file_status_cmp( const void *K, const void *E ) { static long file_status_status_of( file_status_t status ) { size_t n = COUNT_OF(file_status_fields); - file_status_field_t *fs, key { status }; - - fs = (file_status_field_t*)lfind( &key, file_status_fields, - &n, sizeof(*fs), cbl_file_status_cmp ); + const file_status_field_t *fs, key { status }; + fs = static_cast<file_status_field_t*>(lfind( &key, + file_status_fields, + &n, + sizeof(*fs), + cbl_file_status_cmp )); return fs? (long)fs->status : -1; } @@ -4852,21 +5123,6 @@ ast_file_status_between( file_status_t lower, file_status_t upper ) { } bool -is_register_field(cbl_field_t *field) - { - // TRUE when the field is an executable-level global variable of the type we - // are calling a "register", like RETURN-CODE or UPSI or the like: - return - ( field->parent == 0 - && field->level == 0 - && !(field->attr & intermediate_e) - && !(field->attr & filler_e) - && field->type != FldClass - && field->type != FldBlob - ); - } - -bool has_value( cbl_field_type_t type ) { // Indicates that the field type contains data that can be expressed as // a numeric value @@ -4882,7 +5138,6 @@ has_value( cbl_field_type_t type ) { case FldForward: case FldSwitch: case FldDisplay: - case FldBlob: return false; case FldIndex: case FldPointer: |
