aboutsummaryrefslogtreecommitdiff
path: root/gcc/cobol/symbols.cc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/cobol/symbols.cc')
-rw-r--r--gcc/cobol/symbols.cc1549
1 files changed, 902 insertions, 647 deletions
diff --git a/gcc/cobol/symbols.cc b/gcc/cobol/symbols.cc
index a4fc82c..a177fcd 100644
--- a/gcc/cobol/symbols.cc
+++ b/gcc/cobol/symbols.cc
@@ -28,6 +28,9 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+// cppcheck-suppress-file duplicateBreak
+
+#include "config.h"
#include <fstream> // Before cobol-system because it uses poisoned functions
#include "cobol-system.h"
@@ -44,6 +47,7 @@
#include "inspect.h"
#include "../../libgcobol/io.h"
#include "genapi.h"
+#include "../../libgcobol/charmaps.h"
#pragma GCC diagnostic ignored "-Wunused-result"
#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
@@ -55,7 +59,7 @@ class symbol_pair_t
{
const symbol_elem_t *first, *last;
public:
- symbol_pair_t( const symbol_elem_t * first, const symbol_elem_t * end = NULL )
+ explicit symbol_pair_t( const symbol_elem_t * first, const symbol_elem_t * end = NULL )
: first(first), last(end)
{}
@@ -135,11 +139,9 @@ static struct symbol_table_t {
static symbol_table_t&
symbol_table_extend() {
- static FILE *mapped;
if( symbols.nelem == 0 ) { // first time: create file & set initial capacity
- assert(mapped == NULL && symbols.fd == -1);
-
+ FILE *mapped;
if( (mapped = tmpfile()) == NULL ) {
cbl_err( "could not create temporary file for symbol table");
}
@@ -159,8 +161,8 @@ symbol_table_extend() {
off_t len = symbols.size();
if( 0 != ftruncate(symbols.fd, len) ) {
- cbl_err( "%s:%d:could not extend symbol table to %zu elements",
- __func__, __LINE__, symbols.capacity);
+ cbl_err( "%s:%d: could not extend symbol table to %lu elements",
+ __func__, __LINE__, gb4(symbols.capacity));
}
/*
@@ -228,6 +230,12 @@ cbl_span_t::from_field() { assert(from); return from->field; }
cbl_field_t *
cbl_span_t::len_field() { assert(len); return len->field; }
+cbl_ffi_arg_t::cbl_ffi_arg_t()
+ : optional(false)
+ , crv(by_reference_e)
+ , attr(none_of_e)
+{}
+
cbl_ffi_arg_t::
cbl_ffi_arg_t( cbl_refer_t* refer, cbl_ffi_arg_attr_t attr )
: optional(false)
@@ -256,43 +264,6 @@ cbl_ffi_arg_t( cbl_ffi_crv_t crv,
} while(0)
-cbl_field_t *
-symbol_valid_udf_args( size_t function, std::list<cbl_refer_t> args ) {
- auto L = cbl_label_of(symbol_at(function));
- if( ! L->returning ) {
- dbgmsg("logic error: %s does not define RETURNING", L->name);
- return NULL;
- }
- auto e = std::find_if( symbol_at(function), symbols_end(),
- []( auto symbol ) {
- if( symbol.type == SymDataSection ) {
- auto section(symbol.elem.section);
- return section.type == linkage_sect_e;
- }
- return false;
- } );
- for( auto arg : args ) {
- size_t iarg(1);
- e++; // skip over linkage_sect_e, which appears after the function
- if( e->type != SymField ) {
- ERROR_FIELD(arg.field,
- "FUNCTION %s has no defined parameter matching arg %zu, '%s'",
- L->name, iarg, arg.field->name );
- return NULL;
- }
-
- auto tgt = cbl_field_of(e);
-
- if( ! valid_move(tgt, arg.field) ) {
- ERROR_FIELD(tgt, "FUNCTION %s arg %zu, '%s' cannot be passed to %s, type %s",
- L->name, iarg, arg.field->pretty_name(),
- tgt->pretty_name(), 3 + cbl_field_type_str(tgt->type) );
- return NULL;
- }
- }
- return cbl_field_of(symbol_at(L->returning));
-}
-
static const struct cbl_occurs_t nonarray = cbl_occurs_t();
#if 0
@@ -316,9 +287,13 @@ class group_size_t {
enum { constq = constant_e | quoted_e };
static symbol_elem_t
-elementize( cbl_field_t& field ) {
+elementize( const cbl_field_t& field ) {
symbol_elem_t sym (SymField);
sym.elem.field = field;
+ // Dubner did the following because he didn't feel like creating yet another
+ // cbl_field_t constructor that included the hardcoded encoding for the
+ // global special registers.
+ sym.elem.field.codeset.set();
return sym;
}
@@ -369,12 +344,6 @@ special_pair_cmp( const cbl_special_name_t& key,
const cbl_special_name_t& elem ) {
const bool matched = key.id == elem.id || 0 == strcasecmp(key.name, elem.name);
- if( getenv(__func__) ) {
- dbgmsg("%s:%d: key: id=%2d, %s", __func__, __LINE__, key.id, key.name);
- dbgmsg("%s:%d: elem: id=%2d, %s => %s", __func__, __LINE__,
- elem.id, elem.name, matched? "match" : "no match");
- }
-
return matched;
}
@@ -492,9 +461,6 @@ symbol_elem_cmp( const void *K, const void *E )
case SymDataSection:
return k->elem.section.type == e->elem.section.type ? 0 : 1;
break;
- case SymFunction:
- return strcmp(k->elem.function.name, e->elem.function.name);
- break;
case SymField:
if( has_parent(k) && cbl_field_of(k)->parent != cbl_field_of(e)->parent ) {
return 1;
@@ -545,6 +511,9 @@ symbol_elem_cmp( const void *K, const void *E )
case SymSpecial:
return special_pair_cmp(k->elem.special, e->elem.special)? 0 : 1;
break;
+ case SymLocale:
+ return strcasecmp(k->elem.locale.name, e->elem.locale.name);
+ break;
case SymAlphabet:
return strcasecmp(k->elem.alphabet.name, e->elem.alphabet.name);
break;
@@ -711,9 +680,25 @@ symbol_special( size_t program, const char name[] )
}
struct symbol_elem_t *
+symbol_locale( size_t program, const char name[] )
+{
+ cbl_locale_t locale(name);
+ assert(strlen(name) < sizeof locale.name);
+ strcpy(locale.name, name);
+
+ struct symbol_elem_t key(SymLocale, program), *e;
+ key.elem.locale = locale;
+
+ e = static_cast<struct symbol_elem_t *>(lfind( &key, symbols.elems,
+ &symbols.nelem, sizeof(key),
+ symbol_elem_cmp ) );
+ return e;
+}
+
+struct symbol_elem_t *
symbol_alphabet( size_t program, const char name[] )
{
- cbl_alphabet_t alphabet(YYLTYPE(), custom_encoding_e);
+ cbl_alphabet_t alphabet(YYLTYPE(), custom_encoding_e); // cppcheck-suppress syntaxError
assert(strlen(name) < sizeof alphabet.name);
strcpy(alphabet.name, name);
@@ -780,7 +765,7 @@ cbl_field_attr_str( cbl_field_attr_t attr ) {
case function_e: return "function";
case quoted_e: return "quoted";
case filler_e: return "filler";
- case _spare_e: return "temporary";
+ case register_e: return "register";
case intermediate_e: return "intermediate";
case embiggened_e: return "embiggened";
case all_alpha_e: return "all_alpha";
@@ -799,7 +784,7 @@ cbl_field_attr_str( cbl_field_attr_t attr ) {
case leading_e: return "leading";
case separate_e: return "separate";
case envar_e: return "envar";
- case dnu_1_e: return "dnu_1";
+ case encoded_e: return "encoded";
case bool_encoded_e: return "bool";
case hex_encoded_e: return "hex";
case depends_on_e: return "depends_on";
@@ -820,24 +805,24 @@ cbl_field_t::size() const {
return field_size(this);
}
-size_t
+uint64_t
cbl_field_t::set_attr( cbl_field_attr_t attr ) {
if( attr == signable_e ) {
if( ! has_attr(attr) && this->var_decl_node != NULL ) {
parser_field_attr_set(this, attr);
}
}
- return this->attr |= size_t(attr);
+ return this->attr |= uint64_t(attr);
}
-size_t
+uint64_t
cbl_field_t::clear_attr( cbl_field_attr_t attr ) {
if( attr == signable_e ) {
if( this->var_decl_node != nullptr && has_attr(attr) ) {
parser_field_attr_set(this, attr, false);
}
}
- return this->attr &= ~size_t(attr);
+ return this->attr &= ~uint64_t(attr);
}
static uint32_t
@@ -893,13 +878,6 @@ update_block_offsets( struct symbol_elem_t *block)
uint32_t offset = cbl_field_of(block)->offset;
const uint32_t block_level = cbl_field_of(block)->level;
- if( getenv(__func__) ) {
- cbl_field_t *field = cbl_field_of(block);
- dbgmsg( "%s: offset is %3zu for %2u %-30s #%3zu P%zu",
- __func__, field->offset, field->level, field->name,
- symbol_index(block), field->parent );
- }
-
struct symbol_elem_t *e = block;
for( ++e; e < symbols_end(); e++ ) {
if( e->type != SymField ) {
@@ -929,12 +907,6 @@ update_block_offsets( struct symbol_elem_t *block)
offset += field_memsize(field);
}
- if( getenv(__func__) ) {
- dbgmsg( "%s: offset is %3zu for %2u %-30s #%3zu P%zu",
- __func__, field->offset, field->level, field->name,
- symbol_index(e), field->parent );
- }
-
if( field->type == FldGroup ) {
e = update_block_offsets(e) - 1;
}
@@ -965,7 +937,7 @@ end_of_group( const cbl_field_t *group, const cbl_field_t *field ) {
class eog_t {
const cbl_field_t * group;
public:
- eog_t( const symbol_elem_t *e ) : group(cbl_field_of(e)) {}
+ explicit eog_t( const symbol_elem_t *e ) : group(cbl_field_of(e)) {}
bool operator()( symbol_elem_t& e ) {
return e.type == SymField && end_of_group(group, cbl_field_of(&e));
@@ -985,7 +957,7 @@ end_of_group( size_t igroup ) {
if( e->program != group->program ) return isym;
if( e->type == SymLabel ) return isym; // end of data division
if( e->type == SymField ) {
- auto f = cbl_field_of(e);
+ const cbl_field_t * f = cbl_field_of(e);
if( f->level == LEVEL77 || f->level == 66 ) return isym;
if( f->level == 1 && f->parent != igroup ) {
return isym;
@@ -996,7 +968,7 @@ end_of_group( size_t igroup ) {
}
eog_t eog(symbol_at(igroup));
- symbol_elem_t *e = std::find_if( symbols_begin(++igroup), symbols_end(), eog );
+ const symbol_elem_t *e = std::find_if( symbols_begin(++igroup), symbols_end(), eog );
return e - symbols_begin();
}
@@ -1020,8 +992,8 @@ symbol_field_capacity( const cbl_field_t *field ) {
size_t size = std::accumulate( symbol_at(bog), symbol_at_impl(eog),
0, sym_field_size::capacity );
- if(true) dbgmsg("%s: %02u %s.data.capacity was computed as %zu", __func__,
- field->level, field->name, size);
+ if(true) dbgmsg("%s: %02u %s.data.capacity was computed as " HOST_SIZE_T_PRINT_UNSIGNED,
+ __func__, field->level, field->name, (fmt_size_t)size);
return size;
}
@@ -1035,14 +1007,15 @@ has_odo( const symbol_elem_t& e ) {
struct cbl_field_t *
symbol_find_odo_debug( cbl_field_t * field ) {
size_t bog = field_index(field), eog = end_of_group(bog);
- dbgmsg("%s: %s is #%zu - #%zu of %zu, ends at %s", __func__,
- field->name, bog, eog, symbols.nelem,
+ dbgmsg("%s: %s is #" HOST_SIZE_T_PRINT_UNSIGNED " - #" HOST_SIZE_T_PRINT_UNSIGNED
+ " of " HOST_SIZE_T_PRINT_UNSIGNED ", ends at %s", __func__,
+ field->name, (fmt_size_t)bog, (fmt_size_t)eog, (fmt_size_t)symbols.nelem,
eog == symbols.nelem? "[end]" : cbl_field_of(symbol_at(eog))->name );
auto e = std::find_if( symbol_at(bog), symbol_at_impl(eog, true), has_odo );
if( e != symbol_at_impl(eog, true) ) {
- dbgmsg("%s: %s has ODO at #%zu (return '%s')", __func__,
- field->name, symbol_index(e),
+ dbgmsg("%s: %s has ODO at #" HOST_SIZE_T_PRINT_UNSIGNED " (return '%s')", __func__,
+ field->name, (fmt_size_t)symbol_index(e),
cbl_field_of(e)->name );
}
return e == symbol_at_impl(eog, true)? NULL : cbl_field_of(e);
@@ -1050,8 +1023,7 @@ symbol_find_odo_debug( cbl_field_t * field ) {
// Return OCCURS DEPENDING ON table subordinate to field, if any.
struct cbl_field_t *
-symbol_find_odo( cbl_field_t * field ) {
- if( getenv(__func__) ) return symbol_find_odo_debug(field);
+symbol_find_odo( const cbl_field_t * field ) {
size_t bog = field_index(field), eog = end_of_group(bog);
auto e = std::find_if( symbol_at(bog), symbol_at_impl(eog, true), has_odo );
return e == symbol_at_impl(eog, true)? NULL : cbl_field_of(e);
@@ -1067,8 +1039,8 @@ symbols_dump( size_t first, bool header ) {
if( !yydebug ) return 0;
if( header ) {
- fprintf(stderr, "Symbol Table has %zu elements\n",
- symbols_end() - symbols_begin());
+ fprintf(stderr, "Symbol Table has " HOST_SIZE_T_PRINT_UNSIGNED " elements\n",
+ (fmt_size_t)(symbols_end() - symbols_begin()));
}
for( struct symbol_elem_t *e = symbols_begin(first); e < symbols_end(); e++ ) {
@@ -1076,25 +1048,22 @@ symbols_dump( size_t first, bool header ) {
switch(e->type) {
case SymFilename:
- s = xasprintf("%4zu %-18s %s", e->program,
+ s = xasprintf("%4" GCC_PRISZ "u %-18s %s", (fmt_size_t)e->program,
"Filename", e->elem.filename);
break;
case SymDataSection:
- s = xasprintf("%4zu %-18s line %d", e->program,
+ s = xasprintf("%4" GCC_PRISZ "u %-18s line %d", (fmt_size_t)e->program,
cbl_section_of(e)->name(), cbl_section_of(e)->line);
break;
- case SymFunction:
- s = xasprintf("%4zu %-15s %s", e->program,
- "Function", e->elem.function.name);
- break;
case SymField: {
auto field = cbl_field_of(e);
char *odo_str = NULL;
if( field->occurs.depending_on != 0 ) {
- odo_str = xasprintf("odo %zu", field->occurs.depending_on );
+ odo_str = xasprintf("odo " HOST_SIZE_T_PRINT_UNSIGNED,
+ (fmt_size_t)field->occurs.depending_on );
}
ninvalid += cbl_field_of(e)->type == FldInvalid? 1 : 0;
- s = xasprintf("%4zu %-18s %s (%s)", e->program,
+ s = xasprintf("%4" GCC_PRISZ "u %-18s %s (%s)", (fmt_size_t)e->program,
cbl_field_type_str(cbl_field_of(e)->type) + 3,
field_str(cbl_field_of(e)),
odo_str? odo_str :
@@ -1102,7 +1071,7 @@ symbols_dump( size_t first, bool header ) {
}
break;
case SymLabel:
- s = xasprintf("%4zu %-18s %s", e->program,
+ s = xasprintf("%4" GCC_PRISZ "u %-18s %s", (fmt_size_t)e->program,
"Labe1l", e->elem.label.str());
if( LblProgram == cbl_label_of(e)->type ) {
const auto& L = *cbl_label_of(e);
@@ -1114,31 +1083,35 @@ symbols_dump( size_t first, bool header ) {
}
break;
case SymSpecial:
- s = xasprintf("%4zu %-18s id=%2d, %s", e->program,
+ s = xasprintf("%4" GCC_PRISZ "u %-18s id=%2d, %s", (fmt_size_t)e->program,
"Special", e->elem.special.id, e->elem.special.name);
break;
case SymAlphabet:
- s = xasprintf("%4zu %-18s encoding=%2d, '%s'", e->program, "Alphabet",
+ s = xasprintf("%4" GCC_PRISZ "u %-18s encoding=%2d, '%s'",
+ (fmt_size_t)e->program, "Alphabet",
int(e->elem.alphabet.encoding), e->elem.alphabet.name);
break;
case SymFile:
- s = xasprintf("%4zu %-18s %-20s", e->program,
+ s = xasprintf("%4" GCC_PRISZ "u %-18s %-20s", (fmt_size_t)e->program,
"File", e->elem.file.name);
{
char same_as[26] = "";
if( cbl_file_of(e)->same_record_as > 0 ) {
- sprintf(same_as, "s%3zu", cbl_file_of(e)->same_record_as);
+ sprintf(same_as, "s%3" GCC_PRISZ "u",
+ (fmt_size_t)cbl_file_of(e)->same_record_as);
}
const char *type = file_org_str(e->elem.file.org);
char *part = s;
- s = xasprintf("%s %-4s %s %s %s{%zu-%zu} status=#%zu",
+ s = xasprintf("%s %-4s %s %s %s{" HOST_SIZE_T_PRINT_UNSIGNED "-"
+ HOST_SIZE_T_PRINT_UNSIGNED "} status=#"
+ HOST_SIZE_T_PRINT_UNSIGNED,
part, same_as, type,
e->elem.file.keys_str(),
cbl_file_of(e)->varies()? "varies " : "",
- cbl_file_of(e)->varying_size.min,
- cbl_file_of(e)->varying_size.max,
- cbl_file_of(e)->user_status);
+ (fmt_size_t)cbl_file_of(e)->varying_size.min,
+ (fmt_size_t)cbl_file_of(e)->varying_size.max,
+ (fmt_size_t)cbl_file_of(e)->user_status);
free(part);
}
break;
@@ -1146,7 +1119,8 @@ symbols_dump( size_t first, bool header ) {
dbgmsg("%s: cannot dump symbol type %d", __func__, e->type);
continue;
}
- fprintf(stderr, "%4zu: %s\n", e - symbols_begin(), s);
+ fprintf(stderr, "%4" GCC_PRISZ "u: %s\n",
+ (fmt_size_t)(e - symbols_begin()), s);
free(s);
}
return ninvalid;
@@ -1226,7 +1200,7 @@ static struct symbol_elem_t *
// If an 01 record exists for the FD/SD, use its capacity as the
// default_record capacity.
if( p != symbols_end() ) {
- auto record = cbl_field_of(p);
+ const cbl_field_t * record = cbl_field_of(p);
assert(record->level == 1);
e = calculate_capacity(p);
auto record_size = std::max(record->data.memsize,
@@ -1258,7 +1232,8 @@ static struct symbol_elem_t *
}
if(yydebug && group->type != FldGroup) {
- dbgmsg("Field #%zu '%s' is not a group", symbol_index(e), group->name);
+ dbgmsg("Field #" HOST_SIZE_T_PRINT_UNSIGNED " '%s' is not a group",
+ (fmt_size_t)symbol_index(e), group->name);
symbols_dump(symbols.first_program, true);
}
if( group->type == FldInvalid ) return e;
@@ -1288,10 +1263,6 @@ static struct symbol_elem_t *
// Print accumulating details for one group to debug log.
bool details = false;
- if( yydebug ) {
- const auto details_for = getenv("symbols_update");
- details = details_for && 0 == strcasecmp(details_for, group->name);
- }
// At end of group, members is a list of all immediate children, any
// of which might have been redefined and so acquired a memsize.
@@ -1317,7 +1288,7 @@ static struct symbol_elem_t *
// If group has a parent that is a record area, expand it, too.
if( 0 < group->parent ) {
- auto redefined = symbol_redefines(group);
+ redefined = symbol_redefines(group);
if( redefined && is_record_area(redefined) ) {
if( redefined->data.capacity < group->data.memsize ) {
redefined->data.capacity = group->data.memsize;
@@ -1347,7 +1318,7 @@ static struct symbol_elem_t *
switch( group->level ) {
case 1: case 77:
- if( dialect_mf() && is_table(group) ) {
+ if( is_table(group) ) {
size_t elem_size = std::max(group->data.memsize, group->data.memsize);
group->data.memsize = elem_size * group->occurs.ntimes();
}
@@ -1363,23 +1334,6 @@ verify_block( const struct symbol_elem_t *block,
if( e->type != SymField ) {
continue;
}
- const struct cbl_field_t *field = cbl_field_of(e);
-
- if( getenv(__func__) ) {
- if( e == block ) {
- static const char ds[] = "--------------------------------";
- dbgmsg( "%17s %-3s %-3s %-18s %-3s %3s %-16s C/D/R = init\n"
- "%.25s %-.3s %-.3s %-.18s %-.3s %.3s %-.16s %-.7s %-.16s",
- "", "ndx", "off", "type", "par", "lvl", "name",
- ds, ds, ds, ds, ds, ds, ds, ds, ds );
- }
- dbgmsg( "%s:%d: %3zu %3zu %-18s %3zu %02d %-16s %2u/%u/%d = '%s'",
- __func__, __LINE__, e - symbols.elems, field->offset,
- cbl_field_type_str(field->type),
- field->parent, field->level, field->name,
- field->data.capacity, field->data.digits, field->data.rdigits,
- field->data.initial? field->data.initial : "(none)" );
- }
}
}
@@ -1415,19 +1369,18 @@ immediately_follows( const cbl_field_t *field ) {
bool
is_variable_length( const cbl_field_t *field ) {
- bool odo = false;
- std::find_if( symbol_at(field_index(field)) + 1, symbols_end(),
- [&odo, field]( const auto& elem ) {
- if( elem.type == SymField ) {
- auto f = cbl_field_of(&elem);
- if( f->level <= field->level ) return true;
- if( f->occurs.depending_on ) {
- odo = true;
- return true;
- }
- }
- return false;
- } );
+ // RENAMES may be included in end_of_group.
+ size_t isym = field_index(field), esym = end_of_group(isym);
+ bool odo = std::any_of( symbol_at(isym) + 1, symbol_at_impl(esym),
+ [field]( const auto& elem ) {
+ if( elem.type == SymField ) {
+ auto f = cbl_field_of(&elem);
+ if( field->level < f->level ) { // exclude RENAMES
+ return 0 < f->occurs.depending_on;
+ }
+ }
+ return false;
+ } );
return odo;
}
@@ -1439,7 +1392,7 @@ is_variable_length( const cbl_field_t *field ) {
* occurs-depending table."
*/
cbl_field_t *
-rename_not_ok( cbl_field_t *first, cbl_field_t *last) {
+rename_not_ok( const cbl_field_t *first, const cbl_field_t *last) {
symbol_elem_t
*beg = symbol_at(field_index(first)),
*end = symbol_at(field_index(last));
@@ -1507,11 +1460,11 @@ cbl_field_t::attr_str( const std::vector<cbl_field_attr_t>& attrs ) const
const char *sep = "";
char *out = NULL;
- for( auto attr : attrs ) {
+ for( auto attr_l : attrs ) {
char *part = out;
- if( has_attr(attr) ) {
+ if( has_attr(attr_l) ) {
int erc = asprintf(&out, "%s%s%s",
- part? part : "", sep, cbl_field_attr_str(attr));
+ part? part : "", sep, cbl_field_attr_str(attr_l));
if( -1 == erc ) return part;
free(part);
sep = ", ";
@@ -1522,7 +1475,7 @@ cbl_field_t::attr_str( const std::vector<cbl_field_attr_t>& attrs ) const
char *
field_str( const cbl_field_t *field ) {
- static char string[3*sizeof(cbl_name_t)];
+ static char string[4*sizeof(cbl_name_t)];
char *pend = string;
char name[2*sizeof(cbl_name_t)] = "";
@@ -1534,17 +1487,23 @@ field_str( const cbl_field_t *field ) {
for( size_t i=0; i < field->occurs.nkey; i++ ) {
updown[i] = field->occurs.keys[i].ascending? 'A' : 'D';
}
- snprintf(name, sizeof(name), "%s[%zu]%s",
- field->name, field->occurs.ntimes(), updown.data());
+ snprintf(name, sizeof(name), "%s[" HOST_SIZE_T_PRINT_UNSIGNED "]%s",
+ field->name, (fmt_size_t)field->occurs.ntimes(), updown.data());
}
}
+ if( field->codeset.valid() ) {
+ strcat(name, " (");
+ strcat(name, field->codeset.name());
+ strcat(name, ") ");
+ }
+
pend += snprintf(pend, string + sizeof(string) - pend,
- "%02d %-20s ", field->level, name);
+ "%02u %-20s ", field->level, name);
char offset[32] = "";
if( field->level > 1 ) {
- sprintf( offset, "off%3zu", field->offset );
+ sprintf( offset, "off%3" GCC_PRISZ "u", (fmt_size_t)field->offset );
}
char parredef =
@@ -1552,41 +1511,66 @@ field_str( const cbl_field_t *field ) {
if( 'r' == parredef && field->level == 0 ) parredef = 'p';
if( field->has_attr(typedef_e) ) parredef = 'T';
- const char *data = field->data.initial? field->data.initial : NULL;
- if( data ) {
- auto fig = cbl_figconst_of(data);
+ const char *init = field->data.initial? field->data.initial : NULL;
+ if( init ) {
+ auto fig = cbl_figconst_of(init);
if( normal_value_e != fig ) {
- data = cbl_figconst_str(fig);
+ init = cbl_figconst_str(fig);
} else {
- char *s;
- auto n = asprintf(&s, "'%s'", data);
- gcc_assert(n);
- auto eodata = data + field->data.capacity;
- if( eodata != std::find_if_not(data, eodata, fisprint) ) {
- char *p = reinterpret_cast<char*>(xrealloc(s, n + 8 + 2 * field->data.capacity));
- if( is_elementary(field->type) &&
- field->type != FldPointer && p != NULL ) {
- s = p;
- p += n;
- strcat( p, "(0x" );
- p += 3;
- for( auto d=data; d < eodata; d++ ) {
- p += sprintf(p, "%02x", *d);
+#if 0
+ // At this point, we might have to convert 'init' back to ASCII
+ char *false_init = static_cast<char *>(xmalloc(field->init.capacity+1));
+ memcpy(false_init, field->init.initial, field->data.capacity);
+ false_data[field->data.capacity] = '\0';
+ size_t charsout;
+
+ cbl_encoding_t enc_from = field->codeset.encoding;
+ if( field->type == FldNumericDisplay )
+ {
+ // Apparently we need to trace back the meaning of data.literal for
+ // field::type == FldNumericDisplay
+ enc_from = DEFAULT_SOURCE_ENCODING;
+ }
+
+ init = __gg__iconverter(enc_from,
+ DEFAULT_SOURCE_ENCODING,
+ false_data,
+ field->data.capacity,
+ &charsout);
+#endif
+ auto eoinit = init + strlen(init);
+ char *s = xasprintf("'%s'", init);
+
+ // No NUL within the initial data.
+ auto ok = std::none_of( init, eoinit,
+ []( char ch ) { return ch == '\0'; } );
+ assert(ok);
+
+ // If any of the init are unprintable, provide a hex version.
+ if( ! std::all_of(init, eoinit, fisprint) ) {
+ if( is_elementary(field->type) && field->type != FldPointer ) {
+ const size_t len = strlen(s) + 8 + 2 * field->data.capacity;
+ s = reinterpret_cast<char*>(xrealloc(s, len));
+ strcat( s, " (0x" );
+ char *p = s + strlen(s);
+ for( auto d=init; d < eoinit; d++ ) {
+ p += sprintf(p, "%02x", static_cast<unsigned char>(*d));
}
- strcat( p++, ")" );
+ strcat( s, ")" );
+ assert(strlen(s) < len);
}
}
- data = s;
+ init = s;
}
} else {
- data = "NULL";
+ init = "NULL";
if( field->type == FldSwitch ) {
- data = xasprintf("0x%02x", field->data.upsi_mask_of()->value);
+ init = xasprintf("0x%02x", field->data.upsi_mask_of()->value);
}
}
if( field->level == 88 ) {
const auto& dom = *field->data.domain_of();
- data = xasprintf("%s%s %s - %s%s",
+ init = xasprintf("%s%s %s - %s%s",
dom.first.all? "A" : "",
value_or_figconst_name(dom.first.name()) ,
dom.first.is_numeric? "(num)" : "",
@@ -1605,20 +1589,20 @@ field_str( const cbl_field_t *field ) {
intermediate_e, embiggened_e, all_alpha_e, all_x_e,
all_ax_e, prog_ptr_e, scaled_e, refmod_e, based_e, any_length_e,
/* global_e, external_e, */ blank_zero_e, /* linkage_e, local_e, */ leading_e,
- separate_e, envar_e, dnu_1_e, bool_encoded_e, hex_encoded_e,
+ separate_e, envar_e, encoded_e, bool_encoded_e, hex_encoded_e,
depends_on_e, /* initialized_e, */ has_value_e, ieeedec_e, big_endian_e,
same_as_e, record_key_e, typedef_e, strongdef_e,
};
pend += snprintf(pend, string + sizeof(string) - pend,
- "%c%3zu %-6s %c%c%c %2u{%3u,%u,%d = %s} (%s), line %d",
- parredef, field->parent, offset,
+ "%c%3" GCC_PRISZ "u %-6s %c%c%c %2u{%3u,%u,%d = %s} (%s), line %d",
+ parredef, (fmt_size_t)field->parent, offset,
(field->attr & global_e)? 'G' : 0x20,
(field->attr & external_e)? 'E' : 0x20,
storage_type,
field->data.memsize,
field->data.capacity, field->data.digits, field->data.rdigits,
- data, field->attr_str(attrs), field->line );
+ init, field->attr_str(attrs), field->line );
return string;
}
@@ -1634,7 +1618,7 @@ struct capacity_of {
capacity_of operator()( symbol_elem_t& elem ) {
if( elem.type == SymField ) {
- cbl_field_t *f = cbl_field_of(&elem);
+ const cbl_field_t *f = cbl_field_of(&elem);
if( is_elementary(f->type) ) {
capacity += field_size(f);
}
@@ -1647,12 +1631,14 @@ static void
extend_66_capacity( cbl_field_t *alias ) {
static_assert(sizeof(symbol_elem_t*) == sizeof(const char *),
"all pointers must be same size");
- assert(alias->data.picture);
+ assert(alias->level == 66);
assert(alias->type == FldGroup);
+ assert(alias->data.picture);
+ // If data.picture is not NULL, it is the THRU symbol, see symbol_field_alias2.
symbol_elem_t *e = symbol_at(alias->parent);
symbol_elem_t *e2 =
reinterpret_cast<symbol_elem_t*>(const_cast<char*>(alias->data.picture));
- assert(e < e2);
+ assert(symbol_index(e) < symbol_index(e2));
alias->data.picture = NULL;
capacity_of cap;
@@ -1667,6 +1653,9 @@ extend_66_capacity( cbl_field_t *alias ) {
bool
symbols_alphabet_set( size_t program, const char name[]) {
+
+////////
+// Older version
struct alpha {
void operator()( symbol_elem_t& elem ) const {
if( elem.type == SymAlphabet ) {
@@ -1676,7 +1665,7 @@ symbols_alphabet_set( size_t program, const char name[]) {
};
// Define alphabets for codegen.
- std::for_each(symbols_begin(), symbols_end(), alpha() );
+ std::for_each(symbols_begin(program), symbols_end(), alpha() );
// Set collation sequence before parser_symbol_add.
if( name ) {
@@ -1687,6 +1676,38 @@ symbols_alphabet_set( size_t program, const char name[]) {
parser_alphabet_use(*cbl_alphabet_of(e));
}
return true;
+// End older version
+////////
+
+//// // Define alphabets for codegen.
+//// const cbl_alphabet_t *alphabet = nullptr;
+//// bool supported = true;
+////
+//// std::for_each( symbols_begin(program), symbols_end(),
+//// [&alphabet, &supported]( const auto& sym ) {
+//// if( sym.type == SymAlphabet ) {
+//// alphabet = cbl_alphabet_of(&sym);
+//// supported = __gg__encoding_iconv_valid(alphabet->encoding);
+//// if( supported ) {
+//// parser_alphabet( *alphabet );
+//// }
+//// }
+//// } );
+//// if( ! supported ) {
+//// const char *encoding = __gg__encoding_iconv_name(alphabet->encoding);
+//// cbl_unimplemented("alphabet %qs (as %qs)", alphabet->name, encoding);
+//// return false;
+//// }
+////
+//// // Set collation sequence before parser_symbol_add.`
+//// if( name ) {
+//// symbol_elem_t *e = symbol_alphabet(program, name);
+//// if( !e ) {
+//// return false;
+//// }
+//// parser_alphabet_use(*cbl_alphabet_of(e));
+//// }
+//// return true;
}
static std::ostream&
@@ -1694,6 +1715,9 @@ operator<<( std::ostream& os, const cbl_occurs_bounds_t& bound ) {
return os << bound.lower << ',' << bound.upper;
}
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-function"
+// Keep this debugging function around for when it is needed
static std::ostream&
operator<<( std::ostream& os, const cbl_field_data_t& field ) {
return os << field.memsize << ','
@@ -1717,16 +1741,7 @@ operator<<( std::ostream& os, const cbl_field_t& field ) {
<< ',' << field.line
<< ',' << field.data;
}
-
-static void
-write_field_csv( size_t isym, const cbl_field_t *field ) {
- static std::ofstream os( getenv("GCOBOL_DATA") );
- assert(os.is_open());
-
- if( symbols.first_program < isym) {
- os << isym << "," << *field << std::endl;
- }
-}
+#pragma GCC diagnostic pop
static std::map<size_t, std::set<size_t>> same_record_areas;
size_t parse_error_count();
@@ -1747,11 +1762,6 @@ symbols_update( size_t first, bool parsed_ok ) {
struct symbol_elem_t *p, *pend;
std::list<cbl_field_t*> shared_record_areas;
- if( getenv(__func__) ) {
- fprintf(stderr, "Initial");
- symbols_dump(std::max(first, symbols.first_program), true);
- }
-
for( p = symbols_begin(first); p < symbols_end(); p++ ) {
if( p->type == SymAlphabet ) continue; // Alphabets already processed.
@@ -1773,8 +1783,7 @@ symbols_update( size_t first, bool parsed_ok ) {
break;
case 1:
pend = calculate_capacity(p);
- if( dialect_mf() && is_table(field) ) {
- cbl_field_t *field = cbl_field_of(p);
+ if( is_table(field) ) {
if( field->data.memsize < field->size() ) {
field->data.memsize = field->size();
}
@@ -1796,10 +1805,6 @@ symbols_update( size_t first, bool parsed_ok ) {
// no special processing for other levels
}
- if( getenv("GCOBOL_DATA") ) {
- write_field_csv( p - symbols_begin(), field );
- }
-
// Update ODO field in situ.
if( is_table(field) ) {
size_t& odo = field->occurs.depending_on;
@@ -1817,7 +1822,7 @@ symbols_update( size_t first, bool parsed_ok ) {
bool size_invalid = field->data.memsize > 0 && symbol_redefines(field);
if( size_invalid ) { // redefine of record area is ok
- auto redefined = symbol_redefines(field);
+ const cbl_field_t * redefined = symbol_redefines(field);
size_invalid = ! is_record_area(redefined);
}
if( !field->is_valid() || size_invalid )
@@ -1840,8 +1845,8 @@ symbols_update( size_t first, bool parsed_ok ) {
if( e == symbols_end() ) {
// no field redefines the file's default record
auto file = cbl_file_of(symbol_at(field->parent));
- ERROR_FIELD(field, "line %d: %s lacks a file description",
- file->line, file->name);
+ ERROR_FIELD(field, "%s lacks a file description",
+ file->name);
return 0;
}
}
@@ -1852,28 +1857,25 @@ symbols_update( size_t first, bool parsed_ok ) {
field->line, field->level_str(), field->name);
} else {
- dbgmsg("%s: error: data item %s #%zu '%s' capacity %u rejected",
+ dbgmsg("%s: error: data item %s #" HOST_SIZE_T_PRINT_UNSIGNED
+ " '%s' capacity %u rejected",
__func__,
3 + cbl_field_type_str(field->type),
- isym, field->name, field->data.capacity);
+ (fmt_size_t)isym, field->name, field->data.capacity);
+ gcc_unreachable();
}
}
return 0;
}
if(! (field->data.memsize == 0 || field_size(field) <= field->data.memsize) ) {
- dbgmsg( "%s:%d: #%zu: invalid: %s", __func__, __LINE__,
- symbol_index(p), field_str(cbl_field_of(p)) );
+ dbgmsg( "%s:%d: #" HOST_SIZE_T_PRINT_UNSIGNED ": invalid: %s", __func__, __LINE__,
+ (fmt_size_t)symbol_index(p), field_str(cbl_field_of(p)) );
}
assert(field->data.memsize == 0 || field_size(field) <= field_memsize(field));
assert( !(field->data.memsize > 0 && symbol_explicitly_redefines(field)) );
}
- if( getenv(__func__) ) {
- fprintf(stderr, "Pre");
- symbols_dump(std::max(first, symbols.first_program), true);
- }
-
// A shared record area has no 01 child because that child redefines its parent.
for( auto sharer : shared_record_areas ) {
auto redefined = cbl_field_of(symbol_at(sharer->parent));
@@ -1888,6 +1890,15 @@ symbols_update( size_t first, bool parsed_ok ) {
if( field->level == 0 && field->is_key_name() ) continue;
if( is_literal(field) && field->var_decl_node != NULL ) continue;
+ // If the field is a constant for a figconstant, just use it.
+ if( field->level != 0 && field->has_attr(constant_e) ) {
+ auto fig = cbl_figconst_field_of(field->data.initial);
+ if( fig ) {
+ field->var_decl_node = fig->var_decl_node;
+ continue;
+ }
+ }
+
if( field->is_typedef() ) {
auto isym = end_of_group( symbol_index(p) );
p = symbol_at(--isym);
@@ -1895,7 +1906,7 @@ symbols_update( size_t first, bool parsed_ok ) {
}
// Verify REDEFINing field has no ODO components
- auto parent = symbol_redefines(field);
+ const cbl_field_t * parent = symbol_redefines(field);
if( parent && !is_record_area(parent) && is_variable_length(field) ) {
ERROR_FIELD(field, "line %d: REDEFINES field %s cannot be variable length",
field->line, field->name);
@@ -1908,6 +1919,52 @@ symbols_update( size_t first, bool parsed_ok ) {
field->line, field->level_str(), field->name);
continue;
}
+ if( is_numeric(field) && ! field->has_attr(constant_e) ) {
+ if( field->data.capacity == 0 ) {
+ ERROR_FIELD(field, "numeric %qs has USAGE that requires PICTURE %s",
+ field->name, field->data.initial);
+ }
+ }
+
+ if( ! field->codeset.consistent() ) {
+ if( ! field->codeset.valid() ) {
+ switch(field->type) {
+ case FldForward:
+ case FldInvalid:
+ gcc_unreachable();
+ case FldAlphaEdited:
+ case FldAlphanumeric:
+ case FldClass:
+ case FldDisplay:
+ case FldGroup:
+ case FldLiteralA:
+ case FldLiteralN:
+ case FldNumericDisplay:
+ case FldNumericEdited:
+ if( ! (field->has_attr(register_e) || field->has_attr(hex_encoded_e)) ) {
+ error_msg(symbol_field_location(field_index(field)),
+ "internal: %qs encoding not defined", field->name);
+ }
+ break;
+ case FldConditional:
+ case FldFloat:
+ case FldIndex:
+ case FldNumericBin5:
+ case FldNumericBinary:
+ case FldPacked:
+ case FldPointer:
+ case FldSwitch:
+ break;
+ }
+ } else {
+ if( ! (field->has_attr(register_e) || field->has_attr(hex_encoded_e)) ) {
+ error_msg(symbol_field_location(field_index(field)),
+ "internal: %qs encoding %qs inconsistent",
+ field->name,
+ cbl_alphabet_t::encoding_str(field->codeset.encoding) );
+ }
+ }
+ }
assert( ! field->is_typedef() );
@@ -1986,7 +2043,8 @@ symbol_field_forward( size_t index ) {
assert( index < symbols.nelem );
symbol_elem_t *e = symbol_at(index);
if( (e->type != SymField) ) {
- dbgmsg("%s: logic error: #%zu is %s", __func__, index, symbol_type_str(e->type));
+ dbgmsg("%s: logic error: #" HOST_SIZE_T_PRINT_UNSIGNED " is %s",
+ __func__, (fmt_size_t)index, symbol_type_str(e->type));
}
assert(e->type == SymField);
@@ -2084,15 +2142,15 @@ symbol_in_file( symbol_elem_t *e ) {
}
#endif
-static struct cbl_field_t *
-symbol_field_parent_set( struct cbl_field_t *field )
+static cbl_field_t *
+symbol_field_parent_set( cbl_field_t *field )
{
if( field->level == 01 ) return NULL;
if( field->level == 77 ) return NULL;
if( field->level == 78 ) return NULL;
struct symbol_elem_t *e = symbols.elems + symbols.nelem - 1;
- struct symbol_elem_t *first = symbols.elems + symbols.first_program;
+ const struct symbol_elem_t *first = symbols.elems + symbols.first_program;
for( ; field->parent == 0 && e >= first; e-- ) {
if( ! (e->type == SymField && cbl_field_of(e)->level > 0) ) {
@@ -2130,6 +2188,9 @@ symbol_field_parent_set( struct cbl_field_t *field )
return NULL;
}
prior->type = FldGroup;
+ if( ! prior->codeset.set() ) { // needs attention
+ dbgmsg("'%s' is already National", prior->name);
+ }
field->attr |= numeric_group_attrs(prior);
}
// verify level 88 domain value
@@ -2170,7 +2231,7 @@ class parent_elem_set
private:
size_t parent_index;
public:
- parent_elem_set( size_t parent_index )
+ explicit parent_elem_set( size_t parent_index )
: parent_index(parent_index)
{}
void operator()( struct symbol_elem_t& e ) {
@@ -2187,8 +2248,10 @@ add_token( symbol_elem_t sym ) {
return sym;
}
+const std::list<cbl_field_t> cdf_literalize();
+
/*
- * When adding registers, be sure to add a complementary cblc_field_t
+ * When adding special registers, be sure to create the actual cblc_field_t
* in libgcobol/constants.cc.
*/
void
@@ -2207,53 +2270,63 @@ symbol_table_init(void) {
// These should match the definitions in libgcobol/constants.cc
static cbl_field_t constants[] = {
- { 0, FldAlphanumeric, FldInvalid, space_value_e | constq, 0, 0, 0, nonarray, 0,
- "SPACE", 0, {}, {1,1,0,0, " \0\xFF"}, NULL },
- { 0, FldAlphanumeric, FldInvalid, space_value_e | constq , 0, 0, 0, nonarray, 0,
- "SPACES", 0, {}, {1,1,0,0, " \0\xFF"}, NULL },
- { 0, FldAlphanumeric, FldInvalid, low_value_e | constq, 0, 0, 0, nonarray, 0,
- "LOW_VALUES", 0, {}, {1,1,0,0, "L\0\xFF"}, NULL },
- { 0, FldAlphanumeric, FldInvalid, zero_value_e | constq, 0, 0, 0, nonarray, 0,
- "ZEROS", 0, {}, {1,1,0,0, "0"}, NULL },
- { 0, FldAlphanumeric, FldInvalid, high_value_e | constq, 0, 0, 0, nonarray, 0,
- "HIGH_VALUES", 0, {}, {1,1,0,0, "H\0\xFF"}, NULL },
+ { FldAlphanumeric, space_value_e | int(constq) | register_e,
+ {1,1,0,0, " \0\xFF"}, 0, "SPACE" },
+ { FldAlphanumeric, space_value_e | int(constq) | register_e,
+ {1,1,0,0, " \0\xFF"}, 0, "SPACES" },
+ { FldAlphanumeric, low_value_e | int(constq) | register_e,
+ {1,1,0,0, "L\0\xFF"}, 0, "LOW_VALUES" },
+ { FldAlphanumeric, zero_value_e | int(constq) | register_e,
+ {1,1,0,0, "0"}, 0, "ZEROS" },
+ { FldAlphanumeric, high_value_e | int(constq) | register_e,
+ {1,1,0,0, "H\0\xFF"}, 0, "HIGH_VALUES" },
// IBM standard: QUOTE is a double-quote unless APOST compiler option
- { 0, FldAlphanumeric, FldInvalid, quote_value_e | constq , 0, 0, 0, nonarray, 0,
- "QUOTES", 0, {}, {1,1,0,0, "\"\0\xFF"}, NULL },
- { 0, FldPointer, FldPointer, constq , 0, 0, 0, nonarray, 0,
- "NULLS", 0, {}, {8,8,0,0, zeroes_for_null_pointer}, NULL },
+ { FldAlphanumeric, quote_value_e | int(constq) | register_e ,
+ {1,1,0,0, "\"\0\xFF"}, 0, "QUOTES" },
+ { FldPointer, int(constq) | register_e ,
+ {8,8,0,0, zeroes_for_null_pointer}, 0, "NULLS" },
// IBM defines TALLY
// 01 TALLY GLOBAL PICTURE 9(5) USAGE BINARY VALUE ZERO.
- { 0, FldNumericBin5, FldInvalid, signable_e, 0, 0, 0, nonarray, 0,
- "_TALLY", 0, {}, {16, 16, MAX_FIXED_POINT_DIGITS, 0, NULL}, NULL },
+ { FldNumericBin5, signable_e | register_e,
+ {16, 16, MAX_FIXED_POINT_DIGITS, 0, NULL}, 0, "_TALLY" },
// 01 ARGI is the current index into the argv array
- { 0, FldNumericBin5, FldInvalid, signable_e, 0, 0, 0, nonarray, 0,
- "_ARGI", 0, {}, {16, 16, MAX_FIXED_POINT_DIGITS, 0, NULL}, NULL },
+ { FldNumericBin5, signable_e | register_e,
+ {16, 16, MAX_FIXED_POINT_DIGITS, 0, NULL}, 0, "_ARGI" },
// These last two don't require actual storage; they get BOOL var_decl_node
// in parser_symbol_add()
- { 0, FldConditional, FldInvalid, constant_e , 0, 0, 0, nonarray, 0,
- "_VERY_TRUE", 0, {}, {1,1,0,0, ""}, NULL },
- { 0, FldConditional, FldInvalid, constant_e , 0, 0, 0, nonarray, 0,
- "_VERY_FALSE", 0, {}, {1,1,0,0, ""}, NULL },
+ { FldConditional, constant_e | register_e , {1,1,0,0, ""}, 0, "_VERY_TRUE" },
+ { FldConditional, constant_e | register_e , {1,1,0,0, ""}, 0, "_VERY_FALSE" },
};
for( struct cbl_field_t *f = constants;
f < constants + COUNT_OF(constants); f++ ) {
f->our_index = table.nelem;
struct symbol_elem_t sym(SymField, 0);
sym.elem.field = *f;
+ // The following makes these constants match the definitions in
+ // constants.cc. Consider expanding the constructor fo cbl_field_t to
+ // handle encoding.
+ sym.elem.field.codeset.encoding = iconv_CP1252_e;
table.elems[table.nelem++] = sym;
}
static symbol_elem_t environs[] = {
+ { symbol_elem_t{ 0, cbl_special_name_t{0, CONSOLE_e, "CONSOLE", 0, "/dev/stdout"}} }, // stdout in DISPLAY; stdin in ACCEPT
+
+ { symbol_elem_t{ 0, cbl_special_name_t{0, STDIN_e, "STDIN", 0, "/dev/stdin"}} },
{ symbol_elem_t{ 0, cbl_special_name_t{0, SYSIN_e, "SYSIN", 0, "/dev/stdin"}} },
- { symbol_elem_t{ 0, cbl_special_name_t{0, SYSIPT_e, "SYSIPT", 0, "/dev/stdout"}} },
+ { symbol_elem_t{ 0, cbl_special_name_t{0, SYSIPT_e, "SYSIPT", 0, "/dev/stdin"}} },
+
+ { symbol_elem_t{ 0, cbl_special_name_t{0, STDOUT_e, "STDOUT", 0, "/dev/stdout"}} },
{ symbol_elem_t{ 0, cbl_special_name_t{0, SYSOUT_e, "SYSOUT", 0, "/dev/stdout"}} },
{ symbol_elem_t{ 0, cbl_special_name_t{0, SYSLIST_e, "SYSLIST", 0, "/dev/stdout"}} },
{ symbol_elem_t{ 0, cbl_special_name_t{0, SYSLST_e, "SYSLST", 0, "/dev/stdout"}} },
+
{ symbol_elem_t{ 0, cbl_special_name_t{0, SYSPUNCH_e, "SYSPUNCH", 0, "/dev/stderr"}} },
{ symbol_elem_t{ 0, cbl_special_name_t{0, SYSPCH_e, "SYSPCH", 0, "/dev/stderr"}} },
- { symbol_elem_t{ 0, cbl_special_name_t{0, CONSOLE_e, "CONSOLE", 0, "/dev/stdout"}} },
+ { symbol_elem_t{ 0, cbl_special_name_t{0, STDERR_e, "STDERR", 0, "/dev/stderr"}} },
+ { symbol_elem_t{ 0, cbl_special_name_t{0, SYSERR_e, "SYSERR", 0, "/dev/stderr"}} },
+
{ symbol_elem_t{ 0, cbl_special_name_t{0, C01_e, "C01", 0, "/dev/null"}} },
{ symbol_elem_t{ 0, cbl_special_name_t{0, C02_e, "C02", 0, "/dev/null"}} },
{ symbol_elem_t{ 0, cbl_special_name_t{0, C03_e, "C03", 0, "/dev/null"}} },
@@ -2273,10 +2346,6 @@ symbol_table_init(void) {
{ symbol_elem_t{ 0, cbl_special_name_t{0, S04_e, "S04", 0, "/dev/null"}} },
{ symbol_elem_t{ 0, cbl_special_name_t{0, S05_e, "S05", 0, "/dev/null"}} },
{ symbol_elem_t{ 0, cbl_special_name_t{0, AFP_5A_e, "AFP-5A", 0, "/dev/null"}} },
- { symbol_elem_t{ 0, cbl_special_name_t{0, STDIN_e, "STDIN", 0, "/dev/stdin"}} },
- { symbol_elem_t{ 0, cbl_special_name_t{0, STDOUT_e, "STDOUT", 0, "/dev/stdout"}} },
- { symbol_elem_t{ 0, cbl_special_name_t{0, STDERR_e, "STDERR", 0, "/dev/stderr"}} },
- { symbol_elem_t{ 0, cbl_special_name_t{0, SYSERR_e, "SYSERR", 0, "/dev/stderr"}} },
};
struct symbol_elem_t *p = table.elems + table.nelem;
@@ -2303,31 +2372,31 @@ symbol_table_init(void) {
**/
static cbl_field_t debug_registers[] = {
- { 0, FldGroup, FldInvalid, global_e, 0,0,1, nonarray, 0,
- "DEBUG-ITEM", 0, {}, {132,132,0,0, NULL}, NULL },
- { 0, FldAlphanumeric, FldInvalid, global_e, 0,0,2, nonarray, 0,
- "DEBUG-LINE", 0, {}, {6,6,0,0, " "}, NULL },
- { 0, FldAlphanumeric, FldInvalid, 0, 0,0,2, nonarray, 0,
- "FILLER", 0, {}, {1,1,0,0, " "}, NULL },
- { 0, FldAlphanumeric, FldInvalid, global_e, 0,0,2, nonarray, 0,
- "DEBUG-NAME", 0, {}, {30,30,0,0, NULL}, NULL },
- { 0, FldAlphanumeric, FldInvalid, 0, 0,0,2, nonarray, 0,
- "FILLER", 0, {}, {1,1,0,0, " "}, NULL },
- { 0, FldNumericDisplay, FldInvalid, signable_e | global_e | leading_e | separate_e, 0,0,2, nonarray, 0,
- "DEBUG-SUB-1", 0, {}, {5,5,3,0, NULL}, NULL },
- { 0, FldAlphanumeric, FldInvalid, 0, 0,0,2, nonarray, 0,
- "FILLER", 0, {}, {1,1,0,0, " "}, NULL },
- { 0, FldNumericDisplay, FldInvalid, signable_e | global_e | leading_e | separate_e, 0,0,2, nonarray, 0,
- "DEBUG-SUB-2", 0, {}, {5,5,3,0, NULL}, NULL },
- { 0, FldAlphanumeric, FldInvalid, 0, 0,0,2, nonarray, 0,
- "FILLER", 0, {}, {1,1,0,0, " "}, NULL },
- { 0, FldNumericDisplay, FldInvalid, signable_e | global_e | leading_e | separate_e, 0,0,2, nonarray, 0,
- "DEBUG-SUB-3", 0, {}, {5,5,3,0, NULL}, NULL },
- { 0, FldAlphanumeric, FldInvalid, 0, 0,0,2, nonarray, 0,
- "FILLER", 0, {}, {1,1,0,0, " "}, NULL },
- { 0, FldAlphanumeric, FldInvalid, signable_e | global_e, 0,0,2, nonarray, 0,
- "DEBUG-CONTENTS", 0, {}, {76,76,0,0, NULL}, NULL },
-};
+ { FldGroup, register_e,
+ {132,132,0,0, NULL}, 1, "DEBUG-ITEM" },
+ { FldAlphanumeric, register_e,
+ {6,6,0,0, " "}, 2, "DEBUG-LINE" },
+ { FldAlphanumeric, register_e|filler_e,
+ {1,1,0,0, " "}, 2, "FILLER" },
+ { FldAlphanumeric, register_e,
+ {30,30,0,0, NULL}, 2, "DEBUG-NAME" },
+ { FldAlphanumeric, register_e|filler_e,
+ {1,1,0,0, " "}, 2, "FILLER" },
+ { FldNumericDisplay, signable_e | register_e | leading_e | separate_e,
+ {5,5,4,0, NULL}, 2, "DEBUG-SUB-1" },
+ { FldAlphanumeric, register_e|filler_e,
+ {1,1,0,0, " "}, 2, "FILLER" },
+ { FldNumericDisplay, signable_e | register_e | leading_e | separate_e,
+ {5,5,4,0, NULL}, 2, "DEBUG-SUB-2" },
+ { FldAlphanumeric, register_e|filler_e,
+ {1,1,0,0, " "}, 2, "FILLER" },
+ { FldNumericDisplay, signable_e | register_e | leading_e | separate_e,
+ {5,5,4,0, NULL}, 2, "DEBUG-SUB-3" },
+ { FldAlphanumeric, register_e | filler_e,
+ {1,1,0,0, " "}, 2, "FILLER" },
+ { FldAlphanumeric, signable_e | register_e,
+ {76,76,0,0, NULL}, 2, "DEBUG-CONTENTS" },
+ };
// debug registers
assert(table.nelem + COUNT_OF(debug_registers) < table.capacity);
@@ -2345,26 +2414,18 @@ symbol_table_init(void) {
assert(table.nelem < table.capacity);
std::for_each(debug_start+1, p, parent_elem_set(debug_start - table.elems));
+ // special registers
static cbl_field_t special_registers[] = {
- { 0, FldNumericDisplay, FldInvalid, 0, 0, 0, 0, nonarray, 0, "_FILE_STATUS",
- 0, {}, {2,2,2,0, NULL}, NULL },
- { 0, FldNumericBin5, FldInvalid, 0, 0, 0, 0, nonarray, 0, "UPSI-0",
- 0, {}, {2,2,4,0, NULL}, NULL },
- { 0, FldNumericBin5, FldInvalid, 0, 0, 0, 0, nonarray, 0, "RETURN-CODE",
- 0, {}, {2,2,4,0, NULL}, NULL },
- { 0, FldNumericBin5, FldInvalid, 0, 0, 0, 0, nonarray, 0, "LINAGE-COUNTER",
- 0, {}, {2,2,4,0, NULL}, NULL },
- { 0, FldLiteralA, FldInvalid, 0, 0, 0, 0, nonarray, 0, "_dev_stdin",
- 0, {}, {0,0,0,0, "/dev/stdin"}, NULL },
- { 0, FldLiteralA, FldInvalid, constq, 0, 0, 0, nonarray, 0, "_dev_stdout",
- 0, {}, {0,0,0,0, "/dev/stdout"}, NULL },
- { 0, FldLiteralA, FldInvalid, constq, 0, 0, 0, nonarray, 0, "_dev_stderr",
- 0, {}, {0,0,0,0, "/dev/stderr"}, NULL },
- { 0, FldLiteralA, FldInvalid, constq, 0, 0, 0, nonarray, 0, "_dev_null",
- 0, {}, {0,0,0,0, "/dev/null"}, NULL },
+ { FldNumericDisplay, register_e, {2,2,2,0, NULL}, 0, "_FILE_STATUS" },
+ { FldNumericBin5, register_e, {2,2,4,0, NULL}, 0, "UPSI-0" },
+ { FldNumericBin5, signable_e|register_e, {2,2,4,0, NULL}, 0, "RETURN-CODE" },
+ { FldNumericBin5, register_e, {2,2,4,0, NULL}, 0, "LINAGE-COUNTER" },
+ { FldLiteralA, register_e, {0,0,0,0, "/dev/stdin"}, 0, "_dev_stdin" },
+ { FldLiteralA, int(constq)|register_e, {0,0,0,0, "/dev/stdout"}, 0, "_dev_stdout" },
+ { FldLiteralA, int(constq)|register_e, {0,0,0,0, "/dev/stderr"}, 0, "_dev_stderr" },
+ { FldLiteralA, int(constq)|register_e, {0,0,0,0, "/dev/null"}, 0, "_dev_null" },
};
- // special registers
assert(table.nelem + COUNT_OF(special_registers) < table.capacity);
p = table.elems + table.nelem;
@@ -2374,6 +2435,34 @@ symbol_table_init(void) {
table.nelem = p - table.elems;
assert(table.nelem < table.capacity);
+ // xml registers
+ static cbl_field_t xml_registers[] = {
+ { FldNumericBin5, register_e, {4,4,9,0, "0"}, 1, "XML-CODE" },
+ { FldAlphanumeric, register_e, {30,30,0,0, " "}, 1, "XML-EVENT" },
+ { FldNumericBin5, register_e, {4,4,9,0, "0"}, 1, "XML-INFORMATION" },
+ { FldAlphanumeric, register_e | based_e | any_length_e, {1,1,0,0, nullptr}, 1, "XML-NAMESPACE" },
+ { FldAlphanumeric, register_e | based_e | any_length_e, {1,1,0,0, nullptr}, 1, "XML-NNAMESPACE" },
+ { FldAlphanumeric, register_e | based_e | any_length_e, {1,1,0,0, nullptr}, 1, "XML-NAMESPACE-PREFIX" },
+ { FldAlphanumeric, register_e | based_e | any_length_e, {1,1,0,0, nullptr}, 1, "XML-NNAMESPACE-PREFIX" },
+ { FldAlphanumeric, register_e | based_e | any_length_e, {1,1,0,0, nullptr}, 1, "XML-TEXT" },
+ { FldAlphanumeric, register_e | based_e | any_length_e, {1,1,0,0, nullptr}, 1, "XML-NTEXT" },
+ }, * const eoxml = xml_registers + COUNT_OF(xml_registers);
+
+ assert(table.nelem + COUNT_OF(xml_registers) < table.capacity);
+
+ p = table.elems + table.nelem;
+ p = std::transform(xml_registers, eoxml, p, elementize);
+ table.nelem = p - table.elems;
+ assert(table.nelem < table.capacity);
+
+ // Add any CDF values already defined as literals.
+ // After symbols are ready, the CDF adds them directly.
+ const std::list<cbl_field_t> cdf_values = cdf_literalize();
+ table.nelem += cdf_values.size();
+ assert(table.nelem < table.capacity);
+
+ p = std::transform(cdf_values.begin(), cdf_values.end(), p, elementize);
+
// Initialize symbol table.
symbols = table;
@@ -2391,8 +2480,6 @@ symbol_table_init(void) {
symbols.registers.return_code = symbol_index(symbol_field(0,0, "RETURN-CODE"));
symbols.registers.very_true = symbol_index(symbol_field(0,0, "_VERY_TRUE"));
symbols.registers.very_false = symbol_index(symbol_field(0,0, "_VERY_FALSE"));
-
- if( getenv(__func__) ) symbols_dump(0, true);
}
/*
@@ -2476,7 +2563,7 @@ symbol_file_add( size_t program, cbl_file_t *file ) {
return NULL;
}
- struct symbol_elem_t sym = { SymFile, program };
+ symbol_elem_t sym{ SymFile, program };
sym.elem.file = *file;
e = symbol_add(&sym);
@@ -2489,16 +2576,23 @@ symbol_file_add( size_t program, cbl_file_t *file ) {
return e;
}
-struct symbol_elem_t *
-symbol_alphabet_add( size_t program, struct cbl_alphabet_t *alphabet ) {
- struct symbol_elem_t sym{ SymAlphabet, program };
+symbol_elem_t *
+symbol_locale_add( size_t program, const cbl_locale_t *locale ) {
+ symbol_elem_t sym{ SymLocale, program };
+ sym.elem.locale = *locale;
+ return symbol_add(&sym);
+}
+
+symbol_elem_t *
+symbol_alphabet_add( size_t program, const cbl_alphabet_t *alphabet ) {
+ symbol_elem_t sym{ SymAlphabet, program };
sym.elem.alphabet = *alphabet;
return symbol_add(&sym);
}
-size_t
+uint64_t
numeric_group_attrs( const cbl_field_t *field ) {
- static const size_t inherit = signable_e | leading_e | separate_e | big_endian_e;
+ static const uint64_t inherit = signable_e | leading_e | separate_e | big_endian_e;
static_assert(sizeof(cbl_field_t::type) < sizeof(inherit), "need bigger type");
assert(field);
if( field->type == FldNumericDisplay || field->type == FldGroup ) {
@@ -2534,7 +2628,7 @@ symbol_typedef_add( size_t program, struct cbl_field_t *field ) {
auto e = symbols_end() - 1;
assert( symbols_begin() < e );
if( e->type == SymField ) {
- auto f = cbl_field_of(e);
+ const cbl_field_t * f = cbl_field_of(e);
if( f == field ) return e;
}
@@ -2572,7 +2666,7 @@ struct symbol_elem_t *
symbol_field_add( size_t program, struct cbl_field_t *field )
{
field->our_index = symbols.nelem;
- cbl_field_t *parent = symbol_field_parent_set( field );
+ const cbl_field_t *parent = symbol_field_parent_set( field );
if( parent && parent->type == FldGroup) {
// Inherit effects of parent's USAGE, as though it appeared 1st in the
// member's definition.
@@ -2580,35 +2674,19 @@ symbol_field_add( size_t program, struct cbl_field_t *field )
field->attr = inherit & parent->attr;
field->attr |= numeric_group_attrs(parent);
field->usage = parent->usage;
+ if( field->level == 66 || field->level == 88 ) {
+ field->codeset = parent->codeset;
+ }
// BINARY-LONG, for example, sets capacity.
if( is_numeric(parent->usage) && parent->data.capacity > 0 ) {
field->type = parent->usage;
field->data = parent->data;
- field->data = 0.0;
+ field->data = 0; // cppcheck-suppress redundantAssignment
+ // // cppcheck doesn't understand multiple overloaded operator=
field->data.initial = NULL;
}
}
- char *s;
- if( (s = getenv(__func__)) != NULL ) {
- if( s[0] == 'D' ) {
- for( struct symbol_elem_t *e = symbols_begin(); e < symbols_end(); e++ ) {
- fprintf(stderr, "%zu: %s ", e - symbols.elems, symbol_type_str(e->type));
- if( e->type == SymField ) {
- fprintf(stderr, "%s = %s",
- cbl_field_of(e)->name, cbl_field_of(e)->data.initial);
- }
- fprintf(stderr, "\n");
- }
- }
-
- dbgmsg( "%s:%d: %3zu %-18s %02d %-16s %u/%u/%d = '%s'", __func__, __LINE__,
- field->offset,
- cbl_field_type_str(field->type), field->level, field->name,
- field->data.capacity, field->data.digits, field->data.rdigits,
- field->data.initial? field->data.initial : "(none)" );
- }
-
if( is_forward(field) ) {
auto *e = symbol_field( program, field->parent, field->name );
if( e ) {
@@ -2723,6 +2801,7 @@ symbol_field( size_t program, size_t parent, const char name[] )
return p != end? &*p : NULL;
}
+// cppcheck-suppress-begin [CastIntegerToAddressAtReturn] obviously not true
symbol_elem_t *
symbol_register( const char name[] )
{
@@ -2738,6 +2817,7 @@ symbol_register( const char name[] )
return p;
}
+// cppcheck-suppress-end [CastIntegerToAddressAtReturn]
// Find current 01 record during Level 66 construction.
const symbol_elem_t *
@@ -2765,11 +2845,8 @@ symbol_field_forward_add( size_t program, size_t parent,
auto e = symbol_field(program, parent, name);
if( e ) return e;
- struct cbl_field_t field = { 0,
- FldForward, FldInvalid, 0, parent, 0, 0,
- nonarray, line, "",
- 0, cbl_field_t::linkage_t(),
- {0,0,0,0, " "}, NULL };
+ cbl_field_t field = { FldForward, 0, line };
+ field.parent = parent;
if( sizeof(field.name) < strlen(name) ) {
dbgmsg("%s:%d: logic error: name %s too long", __func__, __LINE__, name);
return NULL;
@@ -2800,11 +2877,12 @@ symbol_literalA( size_t program, const char name[] )
struct symbol_elem_t *
symbol_file( size_t program, const char name[] ) {
size_t nelem = symbols.nelem;
- struct symbol_elem_t key = { SymFile, program }, *e = &key;
+ symbol_elem_t key{ SymFile, program }, *e = &key;
assert(strlen(name) < sizeof(key.elem.file.name));
strcpy(key.elem.file.name, name);
+ // cppcheck-suppress-begin [knownConditionTrueFalse]
do {
e = static_cast<struct symbol_elem_t *>(lfind( &key, symbols.elems,
&nelem, sizeof(*e),
@@ -2813,6 +2891,7 @@ symbol_file( size_t program, const char name[] ) {
key.program = cbl_label_of(symbol_at(key.program))->parent;
if( key.program == 0 ) break; // no file without a program
} while( !e );
+ // cppcheck-suppress-end [knownConditionTrueFalse]
if( e ) {
assert(e->type == SymFile);
@@ -2920,7 +2999,7 @@ seek_parent( const symbol_elem_t *e, size_t level ) {
struct symbol_elem_t *
symbol_field_same_as( cbl_field_t *tgt, const cbl_field_t *src ) {
if( target_in_src(tgt, src) ) {
- ERROR_FIELD(tgt, "%s %s may not reference itself as part of %s %s",
+ ERROR_FIELD(tgt, "%s %s may not reference itself as part of %s %s",
tgt->level_str(), tgt->name, src->level_str(), src->name);
return NULL;
}
@@ -2953,6 +3032,7 @@ symbol_field_same_as( cbl_field_t *tgt, const cbl_field_t *src ) {
cbl_field_t dup = {};
dup.parent = field_index(tgt);
dup.line = tgt->line;
+ dup.codeset = tgt->codeset;
elem_group_t group(++bog, eog);
@@ -3006,7 +3086,7 @@ symbol_file_same_record_area( std::list<cbl_file_t*>& files ) {
}
static symbol_elem_t *
-next_program( symbol_elem_t *elem ) {
+next_program( const symbol_elem_t *elem ) {
size_t start = elem? symbol_index(elem) : 0;
symbol_elem_t * e =
std::find_if( symbols_begin(start), symbols_end(), is_program );
@@ -3044,14 +3124,14 @@ is_numeric_constant( const char name[] ) {
// get default record layout for a file
struct cbl_field_t *
-symbol_file_record( struct cbl_file_t *file ) {
+symbol_file_record( const cbl_file_t *file ) {
return cbl_field_of(symbol_at(file->default_record));
}
class is_section {
cbl_section_type_t section_type;
public:
- is_section( cbl_section_type_t sect ) : section_type(sect) {}
+ explicit is_section( cbl_section_type_t sect ) : section_type(sect) {}
bool operator()( symbol_elem_t& e ) const {
return e.type == SymDataSection && cbl_section_of(&e)->type == section_type;
}
@@ -3062,8 +3142,6 @@ static bool fd_record_size_cmp( const symbol_elem_t& a, const symbol_elem_t& b )
return cbl_field_of(&a)->data.capacity < cbl_field_of(&b)->data.capacity;
}
-cbl_file_key_t cbl_file_t::no_key;
-
/*
* Find largest and smallest record defined for a file. The rule is:
* cbl_file_t::varies() returns true if the record size varies,
@@ -3120,12 +3198,6 @@ symbol_file_record_sizes( struct cbl_file_t *file ) {
output.min = cbl_field_of(&*p.first)->data.capacity;
output.max = cbl_field_of(&*p.second)->data.capacity;
- if( yydebug && getenv(__func__) ) {
- dbgmsg("%s: %s: min '%s' %zu, max '%s' %zu", __func__, file->name,
- cbl_field_of(&*p.first)->name, output.min,
- cbl_field_of(&*p.second)->name, output.max);
- }
-
assert(output.min > 0 && "min record size is 0");
assert(output.min <= output.max);
@@ -3172,15 +3244,141 @@ constant_of( size_t isym )
return field;
}
+cbl_locale_t::cbl_locale_t( const cbl_name_t name, const char iconv_name[] ) {
+ gcc_assert(strlen(name) < sizeof this->name);
+ strcpy(this->name, name);
+
+ if( iconv_name ) {
+ encoding = __gg__encoding_iconv_type(iconv_name);
+
+ strcpy(collation, "C");
+ // If the iconv_name is prefixed by langauge_COUNTRY (e.g. en_US), capture that.
+ auto pend = iconv_name + strlen(iconv_name);
+ auto p = std::find(iconv_name, pend, '.');
+ if( p < pend ) {
+ auto pend2 = std::copy(iconv_name, p, collation);
+ std::fill(pend2, collation + sizeof(collation), '\0');
+ iconv_name = ++p;
+ }
+ encoding = __gg__encoding_iconv_type(iconv_name);
+ }
+}
+
+cbl_alphabet_t::cbl_alphabet_t(const YYLTYPE& loc, size_t locale, cbl_name_t name )
+ : loc(loc)
+ , locale(locale)
+ , low_index(0)
+ , high_index(255)
+ , last_index(0)
+{
+ if( locale > 0 ) {
+ encoding = cbl_locale_of(symbol_at(locale))->encoding;
+ }
+ memset(collation_sequence, 0xFF, sizeof(collation_sequence));
+ if( name ) { // from Special-Names collation_sequence
+ assert(strlen(name) < sizeof(cbl_name_t));
+ strcpy(this->name, name);
+ }
+}
+
+/*
+ * As parsed, the alphabet reflects the encoding of the source code. If the
+ * program uses a different encoding for alphanumeric, convert the alphabet to
+ * that.
+ *
+ * Because a custom alphabet is rare and occurs at most only once per program,
+ * we don't attempt to avoid re-encoding. "Conversion" of ASCII to ASCII is at
+ * most 256 calls to iconv(3).
+ */
+void
+cbl_alphabet_t::reencode() {
+
+ const unsigned char * const pend = collation_sequence + sizeof(collation_sequence);
+ std::vector<char> tgt(256, (char)0xFF);
+
+ /* Keep copies of low_index and last_index for use in run-time as LOW-VALUE
+ and HIGH-VALUE, which are kept as globals in the source-code codeset
+ and converted to the display encoding as necessary. */
+
+ low_char = low_index;
+ high_char = last_index;
+
+ /*
+ * For now, assume CP1252 source-code encoding because we're not capturing it
+ * anywhere except in cbl_field_t::internalize(). The only known examples of
+ * a custom alphabet are from NIST, which of course are ASCII.
+ */
+ const char *fromcode = __gg__encoding_iconv_name(CP1252_e);
+ const char *tocode =
+ __gg__encoding_iconv_name(current_encoding(display_encoding_e));
+ iconv_t cd = iconv_open(tocode, fromcode);
+
+#if optimal_reencode
+ if( fromcode == tocode ) { // semantically
+ tgt.resize(0);
+ return tgt; // Return empty vector; caller copies zero bytes.
+ }
+#endif
+
+ /*
+ * Each position in the alphabet array represents a letter in the source-code
+ * encoding. The value at that position represents the letter's collation
+ * position, its sort order. For each letter in alphabet, determine value of
+ * that letter in the alphanumeric encoding, and set its collation position
+ * in that alphabet.
+ */
+ for( const unsigned char *p = collation_sequence; p < pend; p++ ) {
+ if( *p == 0xFF ) continue;
+ unsigned char ch = p - collation_sequence;
+ unsigned char pos[8] = {};
+ size_t inbytesleft = 1, outbytesleft = sizeof(pos);
+ char *inbuf = reinterpret_cast<char*>(&ch),
+ *outbuf = reinterpret_cast<char*>(pos);
+
+ size_t n = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
+
+ if( n == size_t(-1) ) {
+ error_msg(loc, "%s character '%c' (%x hex) cannot be converted to %s",
+ fromcode, ch, ch, tocode);
+ continue;
+ }
+ if( n != 0 ) {
+ dbgmsg("%s character '%c' (%x hex) irreversibly converted to %s",
+ fromcode, ch, ch, tocode);
+ }
+ assert(outbytesleft < sizeof(pos));
+ n = sizeof(pos) - outbytesleft;
+ if( 1 < n ) {
+ error_msg(loc, "%s character '%c' (%x hex) requires %zu bytes as %s",
+ fromcode, ch, ch, n, tocode);
+ continue;
+ }
+
+ if( ch == low_index ) {
+ low_index = pos[0];
+ }
+ if( ch == last_index ) {
+ last_index = pos[0];
+ }
+ if( ch == high_index ) {
+ high_index = pos[0];
+ }
+
+ tgt.at(pos[0]) = *p;
+ }
+
+ std::copy(tgt.begin(), tgt.end(), collation_sequence);
+}
+
bool
cbl_alphabet_t::assign( const YYLTYPE& loc, unsigned char ch, unsigned char high_value ) {
- if( alphabet[ch] == 0xFF || alphabet[ch] == high_value) {
- alphabet[ch] = high_value;
+ if( collation_sequence[ch] == 0xFF || collation_sequence[ch] == high_value) {
+ collation_sequence[ch] = high_value;
last_index = ch;
return true;
}
- auto taken = alphabet[ch];
- error_msg(loc, "ALPHABET %s, character '%c' (X'%x') "
+ auto taken = collation_sequence[ch];
+ error_msg(loc, "ALPHABET %s, character %<%c%> (X%'%x%') "
"in position %d already defined at position %d",
name,
ISPRINT(ch)? ch : '?', ch,
@@ -3192,7 +3390,7 @@ cbl_alphabet_t::assign( const YYLTYPE& loc, unsigned char ch, unsigned char high
void
cbl_alphabet_t::also( const YYLTYPE& loc, size_t ch ) {
if( ch < 256 ) {
- alphabet[ch] = alphabet[last_index];
+ collation_sequence[ch] = collation_sequence[last_index];
if( ch == high_index ) high_index--;
return;
} // else it's a figurative constant ...
@@ -3205,20 +3403,20 @@ cbl_alphabet_t::also( const YYLTYPE& loc, size_t ch ) {
// last_index is already set; use it as the "last value before ALSO"
if( attr & low_value_e ) {
- alphabet[0] = alphabet[last_index];
+ collation_sequence[0] = collation_sequence[last_index];
return;
}
if( attr & high_value_e ) {
- alphabet[high_index--] = alphabet[last_index];
+ collation_sequence[high_index--] = collation_sequence[last_index];
return;
}
if( attr & (space_value_e|quote_value_e) ) {
ch = field->data.initial[0];
- alphabet[ch] = alphabet[last_index];
+ collation_sequence[ch] = collation_sequence[last_index];
return;
}
if( attr & (zero_value_e) ) {
- alphabet[0] = alphabet[last_index];
+ collation_sequence[0] = collation_sequence[last_index];
error_msg(loc, "ALSO value '%s' is unknown", field->name);
return;
}
@@ -3229,37 +3427,25 @@ using std::deque;
static deque<cbl_field_t*> stack;
static cbl_field_t *
-new_temporary_impl( enum cbl_field_type_t type )
+new_temporary_impl( enum cbl_field_type_t type, const cbl_name_t name = nullptr )
{
extern int yylineno;
- static int nstack, nliteral;
static const struct cbl_field_t empty_alpha = {
- 0, FldAlphanumeric, FldInvalid,
- intermediate_e, 0, 0, 0, nonarray, 0, "",
- 0, cbl_field_t::linkage_t(),
- {}, NULL };
+ FldAlphanumeric, intermediate_e,
+ {MAXIMUM_ALPHA_LENGTH,
+ MAXIMUM_ALPHA_LENGTH, 0, 0, NULL} };
static const struct cbl_field_t empty_float = {
- 0, FldFloat, FldInvalid,
- intermediate_e,
- 0, 0, 0, nonarray, 0, "",
- 0, cbl_field_t::linkage_t(),
- {16, 16, 32, 0, NULL}, NULL };
+ FldFloat, intermediate_e,
+ {16, 16, 32, 0, NULL} };
static const struct cbl_field_t empty_comp5 = {
- 0, FldNumericBin5, FldInvalid,
+ FldNumericBin5,
signable_e | intermediate_e,
- 0, 0, 0, nonarray, 0, "",
- 0, cbl_field_t::linkage_t(),
- {16, 16, MAX_FIXED_POINT_DIGITS, 0, NULL}, NULL };
+ {16, 16, MAX_FIXED_POINT_DIGITS, 0, NULL} };
static const struct cbl_field_t empty_conditional = {
- 0, FldConditional, FldInvalid, intermediate_e,
- 0, 0, 0, nonarray, 0, "",
- 0, cbl_field_t::linkage_t(),
- {}, NULL };
+ FldConditional, intermediate_e, cbl_field_data_t{} };
static struct cbl_field_t empty_literal = {
- 0, FldInvalid, FldInvalid, CONSTANT_E,
- 0, 0, 0, nonarray, 0, "",
- 0, cbl_field_t::linkage_t(),
- {}, NULL };
+ FldInvalid, CONSTANT_E, cbl_field_data_t{} };
+
struct cbl_field_t *f = new cbl_field_t;
f->type = type;
@@ -3275,7 +3461,6 @@ new_temporary_impl( enum cbl_field_type_t type )
case FldSwitch:
case FldDisplay:
case FldPointer:
- case FldBlob:
break;
case FldConditional:
*f = empty_conditional;
@@ -3300,15 +3485,17 @@ new_temporary_impl( enum cbl_field_type_t type )
f->line = yylineno;
if( is_literal(type) ) {
+ static int nliteral = 0;
snprintf(f->name, sizeof(f->name), "_literal%d",++nliteral);
} else {
+ static int nstack = 0;
snprintf(f->name, sizeof(f->name), "_stack%d",++nstack);
-
- if( getenv("symbol_temporaries_free") ) {
- dbgmsg("%s: %s, %s", __func__, f->name, 3 + cbl_field_type_str(f->type));
- }
}
+ f->data.initial = name; // capture e.g. the function name
+
+ f->codeset.set();
+
return f;
}
@@ -3321,13 +3508,17 @@ new_temporary_decl() {
static inline cbl_field_t *
parser_symbol_add2( cbl_field_t *field ) {
+ if( ! field->codeset.valid() ) {
+ dbgmsg( "%s:%d: %s (%s) has no encoding", __func__, __LINE__,
+ field->name, cbl_field_type_str(field->type) );
+ }
parser_symbol_add(field);
return field;
}
static cbl_field_t *
-new_literal_add( const char initial[], uint32_t len, enum cbl_field_attr_t attr ) {
- static char empty[2] = "\0";
+new_literal_add( const char initial[], uint32_t len,
+ cbl_field_attr_t attr, cbl_encoding_t encoding ) {
cbl_field_t *field = NULL;
if( !(attr & quoted_e) )
{
@@ -3339,21 +3530,39 @@ new_literal_add( const char initial[], uint32_t len, enum cbl_field_attr_t attr
{
field = new_temporary_impl(FldLiteralA);
field->attr |= attr;
- field->data.initial = len > 0? initial : empty;
- field->data.capacity = len;
- if( ! field->internalize() )
+ if(len == 0)
{
- ERROR_FIELD(field, "inconsistent string literal encoding for '%s'", initial);
+ // This will cover UTF-32, should that arise.
+ size_t nbytes = 4;
+ char *init = static_cast<char *>(xmalloc(nbytes));
+ memset(init, 0, nbytes);
+ field->data.initial = init;
}
+ if(len)
+ {
+ char *init = static_cast<char *>(xmalloc(len+4));
+ memcpy(init, initial, len);
+ memset(init+len, 0, 4);
+ field->data.initial = init;
+ }
+ field->data.capacity = len;
}
+ if( ! field->has_attr(hex_encoded_e) ) {
+ // If the literal bore a prefix, set the encoding,
+ if( encoding != cbl_field_t::codeset_t::source_encoding->type ) {
+ field->codeset.set(encoding);
+ }
+ field->internalize();
+ }
+
static size_t literal_count = 1;
sprintf(field->name,
- "%s%c_%zd",
+ "%s%c_" HOST_SIZE_T_PRINT_DEC,
"_literal",
field->type == FldLiteralA ? 'a' : 'n',
- literal_count++);
+ (fmt_size_t)literal_count++);
return parser_symbol_add2(field);
}
@@ -3361,35 +3570,40 @@ new_literal_add( const char initial[], uint32_t len, enum cbl_field_attr_t attr
static temporaries_t temporaries;
cbl_field_t *
-temporaries_t::literal( const char value[], uint32_t len, cbl_field_attr_t attr ) {
- auto key = literal_an(value, quoted_e == (attr & quoted_e));
+temporaries_t::literal( uint32_t len, const char value[],
+ cbl_field_attr_t attr, cbl_encoding_t encoding ) {
+ bool is_quoted2 = quoted_e == (attr & quoted_e);
+ bool is_verbatim = hex_encoded_e == (attr & hex_encoded_e);
+ auto key = literal_an(value, is_quoted2, is_verbatim);
- if( 0 == (attr & hex_encoded_e) ) {
+ if( ! is_verbatim ) { // TODO: try without this test once National is ready
auto p = literals.find(key);
if( p != literals.end() ) {
cbl_field_t *field = p->second;
return field;
}
}
- return literals[key] = new_literal_add(value, len, attr);
+ return literals[key] = new_literal_add(value, len, attr, encoding);
}
cbl_field_t *
-new_literal( uint32_t len, const char initial[], enum cbl_field_attr_t attr ) {
- return temporaries.literal(initial, len, attr);
+new_literal( uint32_t len, const char initial[],
+ cbl_field_attr_t attr, cbl_encoding_t encoding ) {
+ return temporaries.literal(len, initial, attr, encoding);
}
void
temporaries_t::dump() const {
extern int yylineno;
- char *output = xasprintf("%4d: %zu Literals", yylineno, literals.size());
+ char *output = xasprintf("%4d: " HOST_SIZE_T_PRINT_UNSIGNED " Literals",
+ yylineno, (fmt_size_t)literals.size());
for( const auto& elem : used ) {
if( ! elem.second.empty() ) {
char *so_far = output;
- output = xasprintf("%s, %zu %s",
+ output = xasprintf("%s, " HOST_SIZE_T_PRINT_UNSIGNED " %s",
so_far,
- elem.second.size(),
+ (fmt_size_t)elem.second.size(),
3 + cbl_field_type_str(elem.first));
free(so_far);
}
@@ -3399,14 +3613,6 @@ temporaries_t::dump() const {
}
temporaries_t::~temporaries_t() {
- if( getenv( "symbol_temporaries_free" ) ) {
- dbgmsg("%s: %zu literals", __func__, literals.size());
- for( const auto& elem : literals ) {
- const literal_an& key(elem.first);
- fprintf(stderr, "%c '%s'\n", key.is_quoted? 'Q' : ' ', key.value.c_str());
- }
- dump();
- }
}
cbl_field_t *
@@ -3438,11 +3644,11 @@ temporaries_t::reuse( cbl_field_type_t type ) {
}
cbl_field_t *
-temporaries_t::acquire( cbl_field_type_t type ) {
+temporaries_t::acquire( cbl_field_type_t type, const cbl_name_t name ) {
cbl_field_t *field = reuse(type);
if( !field ) {
- field = new_temporary_impl(type);
+ field = new_temporary_impl(type, name);
add(field);
}
return parser_symbol_add2(field); // notify of reuse
@@ -3450,7 +3656,6 @@ temporaries_t::acquire( cbl_field_type_t type ) {
void
symbol_temporaries_free() {
- if( getenv(__func__) ) temporaries.dump();
for( auto& elem : temporaries.used ) {
const cbl_field_type_t& type(elem.first);
temporaries_t::fieldset_t& used(elem.second);
@@ -3476,27 +3681,51 @@ symbol_temporaries_free() {
}
cbl_field_t *
-new_alphanumeric( size_t capacity ) {
- cbl_field_t * field = new_temporary_impl(FldAlphanumeric);
+new_alphanumeric( size_t capacity, const cbl_name_t name = nullptr ) {
+ cbl_field_t * field = new_temporary_impl(FldAlphanumeric, name);
field->data.capacity = capacity;
temporaries.add(field);
return parser_symbol_add2(field);
}
+extern os_locale_t os_locale;
+
+const encodings_t cbl_field_t::codeset_t::source_encodings[2] = {
+ { false, iconv_UTF_8_e, "UTF-8" },
+ { true, iconv_CP1252_e, "CP1252" },
+};
+const encodings_t * cbl_field_t::codeset_t::source_encoding = {
+ cbl_field_t::codeset_t::source_encodings
+};
+
+const encodings_t cbl_field_t::codeset_t::standard_internal = {
+ true, iconv_CP1252_e, "CP1252"
+};
+#define standard_internal cbl_field_t::codeset_t::standard_internal
+
cbl_field_t *
-new_temporary( enum cbl_field_type_t type, const char *initial ) {
- if( ! initial ) {
+new_temporary( enum cbl_field_type_t type, const char *initial, bool is_signed ) {
+ const bool force_unsigned = type == FldNumericBin5 && ! is_signed;
+
+ if( ! initial && ! force_unsigned ) {
assert( ! is_literal(type) ); // Literal type must have literal value.
- return temporaries.acquire(type);
+ return temporaries.acquire(type, initial);
}
if( is_literal(type) ) {
- auto field = temporaries.literal(initial,
- type == FldLiteralA? quoted_e : none_e);
+ auto field = temporaries.literal(strlen(initial), initial,
+ type == FldLiteralA? quoted_e : none_e,
+ standard_internal.type);
return field;
}
- cbl_field_t *field = new_temporary_impl(type);
- field->data.capacity = strlen(field->data.initial = initial);
- temporaries.add(field);
+ cbl_field_t *field = new_temporary_impl(type, initial);
+
+ // don't reuse unsigned numeric
+ if( force_unsigned ) {
+ field->clear_attr(signable_e);
+ } else {
+ temporaries.add(field);
+ }
+
parser_symbol_add(field);
return field;
@@ -3535,12 +3764,38 @@ new_temporary_clone( const cbl_field_t *orig) {
temporaries.add(field);
}
field->data = orig->data;
- if( field->type == FldNumericBin5 ) field->type = orig->type;
+ if( field->type == FldNumericBin5 ) {
+ field->type = orig->type;
+ field->codeset = orig->codeset;
+ }
field->attr = intermediate_e;
return parser_symbol_add2(field);
}
+/*
+ * This set of ASCII-like encodings is incomplete and possibly wrong. A
+ * complete definition would better supported with a Boolean in enccodings_t.
+ * If it returns false pessimistically, the only consequence is inefficiency:
+ * the string is processed by iconv(3).
+ */
+bool
+cbl_field_t::holds_ascii() const {
+ // True if the encoding is a superset of ASCII.
+ switch(codeset.encoding) {
+ case ASCII_e:
+ case CP1252_e:
+ case iso646_e:
+ return true;
+ default:
+ if( iconv_1026_e <= codeset.encoding &&
+ codeset.encoding <= iconv_ANSI_X3_4_e ) {
+ return true;
+ }
+ }
+ return false;
+}
+
bool
cbl_field_t::is_ascii() const {
return std::all_of( data.initial,
@@ -3566,124 +3821,172 @@ cbl_field_t::is_ascii() const {
* compilation, if it moves off the default, it adjusts only once, and
* never reverts.
*/
-static const char standard_internal[] = "CP1252//";
-extern os_locale_t os_locale;
-
-static const char *
-guess_encoding() {
- static const char *fromcode;
-
- if( ! fromcode ) {
- return fromcode = os_locale.assumed;
- }
-
- if( fromcode == os_locale.assumed ) {
- fromcode = os_locale.codeset;
- if( 0 != strcmp(fromcode, "C") ) { // anything but that
- return fromcode;
- }
- }
-
- return standard_internal;
-}
const char *
cbl_field_t::internalize() {
- static const char *tocode = standard_internal;
- static const char *fromcode = guess_encoding();
- static iconv_t cd = iconv_open(tocode, fromcode);
+ /* The purpose of this routine is to return a nul-terminated string which
+ is data.initial converted from the source-code characters to the
+ codeset.encoding characters.
+
+ The contract between this routine and the routines that call it is that
+ for alphanumeric types, data.initial shall have the same number of
+ characters as will be needed to fill data.capacity.
+
+ Be aware that for PIC X(32) Z"foo", there are the characters "foo",
+ followed by a NUL, and then 28 spaces to fill it out. It turns out that
+ iconv, given a character count of 32, converts all 32, including the
+ embedded NUL. So, that case works even through strlen(initial) is
+ smaller than the length of initial, which is the same as capacity.
+ */
+
+ static const char *fromcode = codeset.source_encodings[0].name;
static const size_t noconv = size_t(-1);
+ static std::unordered_map<std::string, iconv_t> tocodes;
+
+ if( ! codeset.valid() ) {
+ dbgmsg("%s:%d: not converting %s", __func__, __LINE__, data.initial);
+ return data.initial;
+ }
- // Sat Mar 16 11:45:08 2024: require temporary environment for testing
- if( getenv( "INTERNALIZE_NO") ) return data.initial;
+ const char *tocode = __gg__encoding_iconv_name(codeset.encoding);
- bool using_assumed = fromcode == os_locale.assumed;
+ std::string toname(tocode);
+ auto p = tocodes.find(toname);
+ if( p == tocodes.end() ) {
+ tocodes[toname] = iconv_open(tocode, fromcode);
+ }
+ iconv_t cd = tocodes[toname];
+
+ if (cd == (iconv_t)-1) {
+ cbl_message(ParIconvE,
+ "failed %<iconv_open%> tocode = %qs fromcode = %qs",
+ tocode, fromcode);
+ }
if( fromcode == tocode || has_attr(hex_encoded_e) ) {
return data.initial;
}
- if( is_ascii() ) return data.initial;
+ if( data.capacity == 0 ) {
+ assert(0 == strlen(data.initial));
+ return data.initial;
+ }
+ if( holds_ascii() && is_ascii() ) {
+ if( type != FldNumericEdited ) {
+ if( ! data.initial_within_capacity() ) {
+ ERROR_FIELD(this, "%s %s VALUE %qs of %zu exceeds size %u",
+ cbl_field_t::level_str(level), name, data.initial,
+ strlen(data.initial), data.capacity );
+ }
+ }
+ return data.initial;
+ }
assert(data.capacity > 0);
- std::vector<char> output(data.capacity + 2, '\0');
- char *out = output.data();
- char *in = const_cast<char*>(data.initial);
- size_t n, inbytesleft = data.capacity, outbytesleft = output.size();
+ // The final 2 bytes of the output are "!\0". It's a debugging sentinel.
+ size_t n;
+ size_t inbytesleft = data.capacity;
+ size_t outbytesleft = inbytesleft;
if( !is_literal(this) && inbytesleft < strlen(data.initial) ) {
inbytesleft = strlen(data.initial);
}
+ if( type == FldNumericEdited ) {
+ outbytesleft = inbytesleft;
+ }
+ const unsigned int in_len = inbytesleft;
+
+ char *in = const_cast<char*>(data.initial);
+ char *out = static_cast<char*>( xcalloc(1, outbytesleft + 2) ), *output = out;
assert(fromcode != tocode);
- while( (n = iconv( cd, &in, &inbytesleft, &out, &outbytesleft)) == noconv ) {
- if( !using_assumed ) break; // change only once
- fromcode = guess_encoding();
- cd = iconv_open(tocode, fromcode);
- dbgmsg("%s: trying input encoding %s", __func__, fromcode);
- if( fromcode == tocode ) break;
- }
+ /*
+ * If we're currently assuming the source code is encoded according to the
+ * locale (the default), and there's an iconv failure, try once more using a
+ * different assumption, that the source code is encoded as CP1252.
+ *
+ * This heuristic means that some UTF-8 literals could be converted until a
+ * CP1252 byte is encountered. We could be stricter about that.
+ *
+ * Also possible is a failure to avoid iconv with fromcode and tocode denote
+ * the same encoding but with different spellings, e.g. CP1252 and CP1252//.
+ */
- if( n == noconv ) {
- if( !using_assumed ) {
- yywarn("failed to decode '%s' as %s", data.initial, fromcode);
- return NULL;
+ do {
+ if( (n = iconv( cd, &in, &inbytesleft, &out, &outbytesleft)) == noconv ) {
+ if( fromcode == codeset.source_encodings[0].name ) {
+ codeset.source_encoding = &codeset.source_encodings[1];
+ fromcode = codeset.source_encoding->name;
+ tocodes.clear();
+ cd = tocodes[toname] = iconv_open(tocode, fromcode);
+ dbgmsg("%s: trying input encoding %s", __func__, fromcode);
+ if( fromcode == tocode ) return data.initial; // no conversion required.
+ n = noconv - 1; // try again
+ }
}
+ if( n == 0 ) break;
+ } while( n != noconv );
+
+ if( n == noconv ) {
+ size_t i = in_len - inbytesleft;
+ yyerror("failed to encode %s %qs as %s (%zu of %u bytes left)",
+ fromcode, data.initial + i, tocode, inbytesleft, in_len);
+ if( false ) return NULL;
return data.initial;
}
if( 0 < inbytesleft ) {
// data.capacity + inbytesleft is not correct if the remaining portion has
- // multibyte characters. But the fact reamins that the VALUE is too big.
+ // multibyte characters. But the fact remains that the VALUE is too big.
ERROR_FIELD(this, "%s %s VALUE '%s' requires %zu bytes for size %u",
cbl_field_t::level_str(level), name, data.initial,
data.capacity + inbytesleft, data.capacity );
}
// Replace data.initial only if iconv output differs.
- if( 0 != memcmp(data.initial, output.data(), out - output.data()) ) {
- assert(out <= output.data() + data.capacity);
-
- if( getenv(__func__) ) {
- const char *eoi = data.initial + data.capacity, *p;
- char nullitude[64] = "no null";
- if( (p = std::find(data.initial, eoi, '\0')) != eoi ) {
- sprintf(nullitude, "NUL @ %zu", p - data.initial);
- }
- dbgmsg("%s:%d: before: %-15s %-20s: '%.*s'{%u}, %s", __func__, __LINE__,
- 3 + cbl_field_type_str(type), name,
- data.capacity, data.initial, data.capacity, nullitude);
- }
+ if( 0 != memcmp(data.initial, output, out - output) ) {
+ assert(out <= output + data.capacity || type == FldNumericEdited);
dbgmsg("%s: converted '%.*s' to %s",
__func__, data.capacity, data.initial, tocode);
-
- int len = int(out - output.data());
- char *mem = static_cast<char*>( xcalloc(1, output.size()) );
-
- // Set the new memory to all blanks, tacking a '!' on the end.
- memset(mem, 0x20, output.size() - 1);
- mem[ output.size() - 2] = '!';
+ struct localspace_t {
+ char space[4];
+ size_t len, erc;
+ explicit localspace_t( iconv_t cd ) {
+ static char input[1] = { 0x20 };
+ size_t inbytesleft2 = sizeof(input), outbytesleft2 = sizeof(space);
+ char *in2 = input, *out2 = space;
+
+ erc = iconv(cd, &in2, &inbytesleft2, &out2, &outbytesleft2);
+ len = out2 - space;
+ }
+ bool valid() const { return 0 < len && erc != size_t(-1); }
+ } spc(cd);
+
+ if( ! spc.valid() ) {
+ dbgmsg("%s:%d: iconv failed for %s: %s", __func__, __LINE__,
+ tocode, xstrerror(errno));
+ ERROR_FIELD(this, "iconv failed: %s", xstrerror(errno));
+ return data.initial;
+ }
+ assert( 0 < spc.len && spc.valid() );
if( is_literal(this) ) {
- data.capacity = len; // trailing '!' will be overwritten
+ data.capacity = out - output; // trailing '!' will be overwritten
}
-
- memcpy(mem, output.data(), len); // copy only as much as iconv converted
-
- free(const_cast<char*>(data.initial));
- data.initial = mem;
-
- if( getenv(__func__) ) {
- const char *eoi = data.initial + data.capacity, *p;
- char nullitude[64] = "no null";
- if( (p = std::find(data.initial, eoi, '\0')) != eoi ) {
- sprintf(nullitude, "NUL @ %zu", p - data.initial);
- }
- dbgmsg("%s:%d: after: %-15s %-20s: '%.*s'{%u}, %s", __func__, __LINE__,
- "", name,
- data.capacity, data.initial, data.capacity, nullitude);
+ // Pad with trailing blanks, tacking a '!' on the end.
+ for( const char *eout = output + data.capacity;
+ out < eout;
+ out += spc.len ) {
+ memcpy(out, spc.space, spc.len);
}
-
+ // Numeric literal strings may have leading zeros, making their length
+ // longer than their capacity.
+ out[0] = type == FldLiteralN? '\0' : '!';
+ assert(out[1] == '\0');
+ data.orig = data.initial;
+ data.initial = output;
+ } else {
+ free(output);
}
return data.initial;
@@ -3702,8 +4005,8 @@ cbl_label_t::str() const {
buf = xasprintf("%-12s %s top level [%s], line %d",
type_str() + 3, name, mangled_name, line);
} else {
- buf = xasprintf("%-12s %s OF #%zu '%s' [%s], line %d",
- type_str() + 3, name, parent,
+ buf = xasprintf("%-12s %s OF #" HOST_SIZE_T_PRINT_UNSIGNED " '%s' [%s], line %d",
+ type_str() + 3, name, (fmt_size_t)parent,
cbl_label_of(symbol_at(parent))->name,
mangled_name, line);
}
@@ -3733,12 +4036,9 @@ cbl_label_t::explicit_parent() const {
}
cbl_prog_hier_t::cbl_prog_hier_t() {
- nlabel = std::count_if( symbols_begin(), symbols_end(), is_program );
- assert(nlabel >0);
- labels = new cbl_prog_hier_t::program_label_t[nlabel];
-
std::copy_if( symbols_begin(), symbols_end(),
- labels, is_program );
+ std::back_inserter(labels), is_program );
+ assert(! labels.empty());
}
/*
@@ -3803,44 +4103,22 @@ common_callables_update( const size_t iprog ) {
cbl_label_t *
symbol_label_add( size_t program, cbl_label_t *input )
{
- if( getenv(__func__) ) {
- const cbl_label_t *L = input;
- dbgmsg( "%s:%d: %-5s #%3zu %-9s '%s' of '%s' at line %d", __func__, __LINE__,
- "input",
- size_t(0),
- L->type_str()+3,
- L->name,
- L->parent? cbl_label_of(symbol_at(L->parent))->name : "",
- L->line );
- }
-
cbl_label_t *label = symbol_label(program, input->type,
input->parent, input->name);
if( label && label->type == LblNone ) {
- const char *verb = "set";
label->type = input->type;
label->parent = input->parent;
label->line = input->line;
- if( getenv(__func__) ) {
- const cbl_label_t *L = label;
- dbgmsg( "%s:%d: %-5s #%3zu %-9s '%s' of '%s' at line %d",
- __func__, __LINE__,
- verb,
- symbol_elem_of(L) - symbols_begin(),
- L->type_str()+3,
- L->name,
- L->parent? cbl_label_of(symbol_at(L->parent))->name : "",
- L->line );
- }
return label;
}
// Set the program's mangled name, dehyphenated and uniqified by parent index.
if( input->type == LblProgram ) {
char *psz = cobol_name_mangler(input->name);
- input->mangled_name = xasprintf("%s.%zu", psz, input->parent);
+ input->mangled_name = xasprintf("%s." HOST_SIZE_T_PRINT_UNSIGNED,
+ psz, (fmt_size_t)input->parent);
free(psz);
}
@@ -3853,59 +4131,45 @@ symbol_label_add( size_t program, cbl_label_t *input )
if( (e = symbol_add(&elem)) == NULL ) {
cbl_errx("%s:%d: could not add '%s'", __func__, __LINE__, label->name);
}
+ assert(e);
common_callables_update( symbol_index(e) );
// restore munged line number unless symbol_add returned an existing label
if( e->elem.label.line < 0 ) e->elem.label.line = -e->elem.label.line;
- if( getenv(__func__) ) {
- const cbl_label_t *L = cbl_label_of(e);
- dbgmsg( "%s:%d: added #%3zu %-9s '%s' of '%s' at line %d", __func__, __LINE__,
- e - symbols_begin(),
- L->type_str()+3,
- L->name,
- L->parent? cbl_label_of(symbol_at(L->parent))->name : "",
- L->line );
- }
symbols.labelmap_add(e);
return cbl_label_of(e);
}
/*
- * Under ISO (and not IBM) Declaratives are followed by a Section name. When
- * the first statement is parsed, verify, if Declaratives were used, that it
+ * Under ISO (and not IBM) Declaratives are followed by a Section name. If
+ * Declaratives were used, when the first statement is parsed verify that it
* was preceeded by a Section name.
*/
bool
-symbol_label_section_exists( size_t program ) {
- auto pblob = std::find_if( symbols_begin(program), symbols_end(),
- []( const auto& sym ) {
- if( sym.type == SymField ) {
- auto& f( sym.elem.field );
- return f.type == FldBlob;
- }
- return false;
- } );
- if( pblob == symbols_end() ) return true; // Section name not required
-
- bool has_section = std::any_of( ++pblob, symbols_end(),
- []( const auto& sym ) {
- if( sym.type == SymLabel ) {
- auto& L(sym.elem.label);
- if( L.type == LblSection ) {
- if( L.name[0] != '_' ) { // not implicit
- return true; // Section name exists
- }
- }
+symbol_label_section_exists( size_t eval_label_index ) {
+ auto eval = symbols_begin(eval_label_index);
+ /* cppcheck warns that the following statement depends on the order of
+ evaluation of side effects. Since this isn't my code, and since I don't
+ think the warning can be eliminated without rewriting it, I am just
+ supprressing it.
+ -- Bob Dubner, 2025-07-14 */
+ // cppcheck-suppress unknownEvaluationOrder
+ bool has_section = std::any_of( ++eval, symbols_end(),
+ [program = eval->program]( const auto& sym ) {
+ if( program == sym.program && sym.type == SymLabel ) {
+ const auto& L(sym.elem.label);
+ // true if the symbol is an explicit label.
+ return L.type == LblSection && L.name[0] != '_';
}
return false;
} );
if( yydebug && ! has_section ) {
- symbols_dump(program, true);
+ symbols_dump(eval_label_index, true);
}
- // Return true if no Declaratives, because the (non-)requirement is met.
- // Return false if Declaratives exist, because no Section name was found.
+ // Return true if a user-defined SECTION was found after the Declaratives
+ // label section.
return has_section;
}
@@ -3918,7 +4182,8 @@ symbol_program_add( size_t program, cbl_label_t *input )
// Set the program's mangled name, dehyphenated and uniqified by parent index.
char *psz = cobol_name_mangler(input->name);
- elem.elem.label.mangled_name = xasprintf("%s.%zu", psz, input->parent);
+ elem.elem.label.mangled_name = xasprintf("%s." HOST_SIZE_T_PRINT_UNSIGNED,
+ psz, (fmt_size_t)input->parent);
free(psz);
e = std::find_if( symbols_begin(program), symbols_end(),
@@ -3960,11 +4225,6 @@ symbol_special_add( size_t program, struct cbl_special_name_t *special )
struct symbol_elem_t *e = symbol_special(program, special->name);
if( e ) {
- cbl_special_name_t *s = cbl_special_name_of(e);
- if( getenv(__func__) ) {
- dbgmsg("%s:%d matches %s %d (%s)", __func__, __LINE__,
- special->name, int(s->id), s->name);
- }
return e;
}
assert(e == NULL);
@@ -3975,11 +4235,6 @@ symbol_special_add( size_t program, struct cbl_special_name_t *special )
cbl_errx( "%s:%d: could not add '%s'", __func__, __LINE__, special->name);
}
- if( getenv(__func__) ) {
- dbgmsg( "%s:%d: added special '%s'", __func__, __LINE__,
- e->elem.special.name);
- }
-
elem_key_t key(program, cbl_special_name_of(e)->name);
symbols.specials[key] = symbol_index(e);
@@ -4056,7 +4311,7 @@ expand_picture(const char *picture)
{
assert(strlen(picture) < PICTURE_MAX); // guaranteed by picset() in scanner
size_t retval_length = PICTURE_MAX;
- char *retval = (char *)xmalloc(retval_length);
+ char *retval = static_cast<char *>(xmalloc(retval_length));
size_t index = 0;
int ch;
@@ -4085,7 +4340,7 @@ expand_picture(const char *picture)
if( index + repeat >= retval_length )
{
retval_length <<= 1;
- retval = (char *)xrealloc(retval, retval_length);
+ retval = static_cast<char *>(xrealloc(retval, retval_length));
}
while(repeat--)
@@ -4098,7 +4353,7 @@ expand_picture(const char *picture)
if( index >= retval_length )
{
retval_length <<= 1;
- retval = (char *)xrealloc(retval, retval_length);
+ retval = static_cast<char *>(xrealloc(retval, retval_length));
}
retval[index++] = ch;
}
@@ -4107,7 +4362,7 @@ expand_picture(const char *picture)
if( index >= retval_length )
{
retval_length <<= 1;
- retval = (char *)xrealloc(retval, retval_length);
+ retval = static_cast<char *>(xrealloc(retval, retval_length));
}
retval[index++] = '\0';
@@ -4130,7 +4385,6 @@ expand_picture(const char *picture)
{
pcurrency[i] = 'B';
}
- dest_length += sign_length;
}
}
@@ -4361,7 +4615,7 @@ symbol_program_callables( size_t program ) {
if( e->type != SymLabel ) continue;
if( e->elem.label.type != LblProgram ) continue;
- auto prog = cbl_label_of(e);
+ const cbl_label_t * prog = cbl_label_of(e);
if( program == symbol_index(e) && !prog->recursive ) continue;
if( (self->parent == prog->parent && prog->common) ||
@@ -4395,6 +4649,7 @@ symbol_program_local( const char tgt_name[] ) {
*/
std::map<char, const char *> currencies;
+// cppcheck-suppress-begin [nullPointerRedundantCheck]
bool
symbol_currency_add( const char symbol[], const char sign[] ) {
// In service of CURRENCY sign PICTURE SYMBOL symbol
@@ -4406,6 +4661,7 @@ symbol_currency_add( const char symbol[], const char sign[] ) {
currencies[*symbol] = sign;
return true;
}
+// cppcheck-suppress-end [nullPointerRedundantCheck]
const char *
symbol_currency( char sign ) {
@@ -4413,6 +4669,11 @@ symbol_currency( char sign ) {
if( currencies.size() == 0 ) {
currencies['$'] = "$";
}
+ if( sign == '\0' ) { // default
+ auto result = currencies.begin();
+ gcc_assert(result != currencies.end());
+ return result->second;
+ }
auto result = currencies.find(sign);
return result == currencies.end()? NULL : result->second;
}
@@ -4428,24 +4689,19 @@ bool decimal_is_comma() { return decimal_point == ','; }
/*
* A cbl_occurs_key_t is part of a field definition, and comprises
* size_t symbol indexes. A cbl_key_t is a list of field pointers,
- * and can be created ad hoc to describe a sort. We can construct a
+ * and can be created ad hoc to describe a sort. We construct a
* cbl_key_t from cbl_occurs_key_t.
*/
cbl_key_t::
cbl_key_t( const cbl_occurs_key_t& that )
: ascending(that.ascending)
{
- if( that.field_list.nfield == 0 ) {
- *this = cbl_key_t();
- return;
- }
-
- nfield = that.field_list.nfield;
- fields = static_cast<cbl_field_t**>( xcalloc(nfield,
- sizeof(*fields)) );
- for( size_t i=0; i < that.field_list.nfield; i++ ) {
- fields[i] = cbl_field_of(symbol_at(that.field_list.fields[i]));
- }
+ std::transform( that.field_list.fields,
+ that.field_list.fields + that.field_list.nfield,
+ std::back_inserter(fields),
+ []( size_t isym ) {
+ return cbl_field_of(symbol_at(isym));
+ } );
}
void
@@ -4458,7 +4714,7 @@ cbl_occurs_t::key_alloc( bool ascending ) {
}
void
-cbl_occurs_t::field_add( cbl_field_list_t& field_list, cbl_field_t *field ) {
+cbl_occurs_t::field_add( cbl_field_list_t& field_list, const cbl_field_t *field ) {
cbl_field_list_t list = field_list;
size_t ifield = field_index(field);
auto nbytes = sizeof(list.fields[0]) * (list.nfield + 1);
@@ -4476,14 +4732,14 @@ cbl_occurs_t::key_field_add( cbl_field_t *field ) {
}
void
-cbl_occurs_t::index_add( cbl_field_t *field ) {
+cbl_occurs_t::index_add( const cbl_field_t *field ) {
field_add(indexes, field);
}
class is_field_at {
cbl_field_t *field;
public:
- is_field_at( cbl_field_t *field ) : field(field) {}
+ explicit is_field_at( cbl_field_t *field ) : field(field) {}
bool operator()( size_t isym ) const {
return field == field_at(isym);
}
@@ -4526,6 +4782,26 @@ cbl_occurs_t::subscript_ok( const cbl_field_t *subscript ) const {
return bounds.lower <= (size_t)sub && (size_t)sub <= bounds.upper;
}
+const cbl_field_t *
+symbol_unresolved_file_key( const cbl_file_t * file,
+ const cbl_name_t key_field_name ) {
+ const symbol_elem_t *file_sym = symbol_elem_of(file);
+ size_t program = file_sym->program;
+ for( const symbol_elem_t *e = file_sym - 1; e->program == program; e-- ) {
+ if( e->type == SymFile ) break;
+ if( e->type == SymField ) {
+ auto f = cbl_field_of(e);
+ if( f->type == FldLiteralA ) break;
+ if( f->type == FldForward ) {
+ if( 0 == strcmp(key_field_name, f->name) ) {
+ return f;
+ }
+ }
+ }
+ }
+ return nullptr;
+}
+
cbl_file_key_t::
cbl_file_key_t( cbl_name_t name,
const std::list<cbl_field_t *>& fields,
@@ -4609,8 +4885,8 @@ symbol_forward_names( size_t ifield ) {
for( auto sym = symbols_begin(ifield); sym && sym->type == SymField; ) {
const cbl_field_t *field = cbl_field_of(sym);
if( !(field->type == FldForward) ) {
- dbgmsg("%s:%d: logic error, not FldForward: #%zu %s",
- __func__, __LINE__, symbol_index(sym), field_str(field));
+ dbgmsg("%s:%d: logic error, not FldForward: #" HOST_SIZE_T_PRINT_UNSIGNED " %s",
+ __func__, __LINE__, (fmt_size_t)symbol_index(sym), field_str(field));
}
assert(field->type == FldForward);
@@ -4633,8 +4909,9 @@ symbol_forward_to( size_t fwd ) {
if( !elem.second ) {
const auto& field = *cbl_field_of(symbols_begin(fwd));
if( yydebug )
- dbgmsg("%s:%d: no symbol found for #%zu %s %s", __func__, __LINE__,
- fwd, cbl_field_type_str(field.type), field.name);
+ dbgmsg("%s:%d: no symbol found for #" HOST_SIZE_T_PRINT_UNSIGNED " %s %s",
+ __func__, __LINE__,
+ (fmt_size_t)fwd, cbl_field_type_str(field.type), field.name);
return fwd;
}
@@ -4650,13 +4927,14 @@ cbl_file_key_t::deforward( size_t ifile ) {
const auto file = cbl_file_of(symbol_at(ifile));
std::transform( fields, fields + nfield, fields,
[ifile, file]( size_t fwd ) {
- static std::map<size_t, int> keys;
auto ifield = symbol_forward_to(fwd);
const auto field = cbl_field_of(symbol_at(ifield));
if( is_forward(field) && yydebug ) {
- dbgmsg("%s:%d: key %d: #%zu %s of %s is %s", "deforward", __LINE__,
- keys[ifile]++, ifield, field->name, file->name,
+ static std::map<size_t, int> keys;
+ dbgmsg("%s:%d: key %d: #" HOST_SIZE_T_PRINT_UNSIGNED " %s of %s is %s",
+ "deforward", __LINE__,
+ keys[ifile]++, (fmt_size_t)ifield, field->name, file->name,
cbl_field_type_str(field->type) + 3);
}
@@ -4665,7 +4943,7 @@ cbl_file_key_t::deforward( size_t ifile ) {
if( ifield == fwd ) {
ERROR_FIELD(field, "line %d: %s of %s "
"is not defined",
- file->line, field->name, file->name);
+ field->line, field->name, file->name);
return ifield;
}
@@ -4694,9 +4972,13 @@ cbl_file_key_t::deforward( size_t ifile ) {
// looked-up field must have same file as parent
if( ! (parent != NULL &&
symbol_index(symbol_elem_of(parent)) == ifile) ) {
- ERROR_FIELD(field, "line %d: %s of %s "
- "is not defined in file description",
- file->line, field->name, file->name);
+ const cbl_field_t *undefined =
+ symbol_unresolved_file_key(file, field->name);
+ int lineno = undefined? undefined->line : file->line;
+ ERROR_FIELD(undefined? undefined : field,
+ "line %d: %s of %s "
+ "is not defined in file description",
+ lineno, field->name, file->name);
}
return ifield;
} );
@@ -4710,7 +4992,7 @@ cbl_file_key_t::str() const {
*p++ = '[';
for( auto f = fields; f < fields + nfield; f++) {
- auto n = sprintf(p, "%s%zu", sep, *f);
+ auto n = sprintf(p, "%s" HOST_SIZE_T_PRINT_UNSIGNED, sep, (fmt_size_t)*f);
p += n;
sep = ", ";
}
@@ -4723,6 +5005,8 @@ cbl_file_key_t::str() const {
*/
void
cbl_file_t::deforward() {
+ const size_t ifile( symbol_index(symbol_elem_of(this)) );
+
if( user_status ) {
user_status = symbol_forward_to(user_status);
@@ -4734,33 +5018,18 @@ cbl_file_t::deforward() {
}
for( auto p = keys; p < keys + nkey; p++ ) {
- p->deforward( symbol_index(symbol_elem_of(this)) );
+ p->deforward(ifile);
}
}
char *
cbl_file_t::keys_str() const {
- std::vector <char *> ks(nkey);
- std::transform(keys, keys + nkey, ks.begin(),
- []( const cbl_file_key_t& key ) {
- return key.str();
- } );
- size_t n = 4 * nkey + std::accumulate(ks.begin(), ks.end(), 0,
- []( int n, const char *s ) {
- return n + strlen(s);
- } );
- char *output = static_cast<char*>( xcalloc(1, n) ), *p = output;
- const char *sep = "";
-
- *p++ = '[';
- for( auto k : ks ) {
- p = stpcpy(p, sep);
- p = stpcpy(p, k);
- sep = ", ";
- free(k);
+ std::string names = "[";
+ for( cbl_file_key_t *p = keys; p < keys + nkey; p++ ) {
+ names += p->str();
+ names += p + 1 < keys + nkey ? "," : "]";
}
- *p++ = ']';
- return output;
+ return xasprintf("%s", names.c_str());
}
/*
@@ -4822,11 +5091,13 @@ cbl_file_status_cmp( const void *K, const void *E ) {
static long
file_status_status_of( file_status_t status ) {
size_t n = COUNT_OF(file_status_fields);
- file_status_field_t *fs, key { status };
-
- fs = (file_status_field_t*)lfind( &key, file_status_fields,
- &n, sizeof(*fs), cbl_file_status_cmp );
+ const file_status_field_t *fs, key { status };
+ fs = static_cast<file_status_field_t*>(lfind( &key,
+ file_status_fields,
+ &n,
+ sizeof(*fs),
+ cbl_file_status_cmp ));
return fs? (long)fs->status : -1;
}
@@ -4852,21 +5123,6 @@ ast_file_status_between( file_status_t lower, file_status_t upper ) {
}
bool
-is_register_field(cbl_field_t *field)
- {
- // TRUE when the field is an executable-level global variable of the type we
- // are calling a "register", like RETURN-CODE or UPSI or the like:
- return
- ( field->parent == 0
- && field->level == 0
- && !(field->attr & intermediate_e)
- && !(field->attr & filler_e)
- && field->type != FldClass
- && field->type != FldBlob
- );
- }
-
-bool
has_value( cbl_field_type_t type ) {
// Indicates that the field type contains data that can be expressed as
// a numeric value
@@ -4882,7 +5138,6 @@ has_value( cbl_field_type_t type ) {
case FldForward:
case FldSwitch:
case FldDisplay:
- case FldBlob:
return false;
case FldIndex:
case FldPointer: