diff options
Diffstat (limited to 'gcc/cobol/util.cc')
-rw-r--r-- | gcc/cobol/util.cc | 1078 |
1 files changed, 869 insertions, 209 deletions
diff --git a/gcc/cobol/util.cc b/gcc/cobol/util.cc index 62ecd98..23f605d 100644 --- a/gcc/cobol/util.cc +++ b/gcc/cobol/util.cc @@ -35,6 +35,10 @@ */ #include "cobol-system.h" +#include "coretypes.h" +#include "tree.h" +#undef yy_flex_debug + #include <langinfo.h> #include "coretypes.h" @@ -55,13 +59,13 @@ #include "cbldiag.h" #include "lexio.h" -#define HOWEVER_GCC_DEFINES_TREE #include "../../libgcobol/ec.h" #include "../../libgcobol/common-defs.h" #include "symbols.h" #include "inspect.h" #include "../../libgcobol/io.h" #include "genapi.h" +#include "genutil.h" #pragma GCC diagnostic ignored "-Wunused-result" #pragma GCC diagnostic ignored "-Wmissing-field-initializers" @@ -72,14 +76,47 @@ extern int yyparse(void); extern int demonstration_administrator(int N); +#if !defined (HAVE_GET_CURRENT_DIR_NAME) +/* Posix platforms might not have get_current_dir_name but should have + getcwd() and PATH_MAX. */ +#if __has_include (<limits.h>) +# include <limits.h> +#endif +/* The Hurd doesn't define PATH_MAX. */ +#if !defined (PATH_MAX) && defined(__GNU__) +# define PATH_MAX 4096 +#endif +static inline char * +get_current_dir_name () +{ + /* Use libiberty's allocator here. */ + char *buf = (char *) xmalloc (PATH_MAX); + return getcwd (buf, PATH_MAX); +} +#endif + +/* + * For printing messages, usually the size of the thing is some kind of string + * length, and doesn't really need a size_t. For message formatting, use a + * simple unsigned long, and warn if that's no good. "gb4" here stands for + * "4 Gigabytes". + */ +unsigned long +gb4( size_t input ) { + if( input != static_cast<unsigned long>(input) ) { + yywarn("size too large to print: %lx:%lx", + (unsigned long)(input >> (4 * sizeof(unsigned long))), + static_cast<unsigned long>(input)); + } + return input; +} + const char * symbol_type_str( enum symbol_type_t type ) { switch(type) { case SymFilename: return "SymFilename"; - case SymFunction: - return "SymFunction"; case SymField: return "SymField"; case SymLabel: @@ -93,7 +130,7 @@ symbol_type_str( enum symbol_type_t type ) case SymDataSection: return "SymDataSection"; } - dbgmsg("%s:%d: invalid symbol_type_t %d", __func__, __LINE__, type); + cbl_internal_error("%s:%d: invalid %<symbol_type_t%> %d", __func__, __LINE__, type); return "???"; } @@ -142,7 +179,7 @@ cbl_field_type_str( enum cbl_field_type_t type ) case FldBlob: return "FldBlob"; } - dbgmsg("%s:%d: invalid symbol_type_t %d", __func__, __LINE__, type); + cbl_internal_error("%s:%d: invalid %<symbol_type_t%> %d", __func__, __LINE__, type); return "???"; } @@ -302,8 +339,9 @@ is_numeric_edited( const char picture[] ) { break; default: numed_message = xasprintf("invalid PICTURE character " - "'%c' at offset %zu in '%s'", - *p, p - picture, picture); + "'%c' at offset " HOST_SIZE_T_PRINT_UNSIGNED + " in '%s'", + *p, (fmt_size_t)(p - picture), picture); break; } @@ -327,49 +365,50 @@ normalize_picture( char picture[] ) regmatch_t pmatch[4]; if( (erc = regcomp(preg, regex, cflags)) != 0 ) { - regerror(erc, preg, regexmsg, sizeof(regexmsg)); - dbgmsg( "%s:%d: could not compile regex: %s", __func__, __LINE__, regexmsg ); - return picture; + regerror(erc, preg, regexmsg, sizeof(regexmsg)); + dbgmsg( "%s:%d: could not compile regex: %s", __func__, __LINE__, regexmsg ); + return picture; } while( (erc = regexec(preg, picture, COUNT_OF(pmatch), pmatch, 0)) == 0 ) { - assert(pmatch[1].rm_so != -1 && pmatch[1].rm_so < pmatch[1].rm_eo); - size_t len = pmatch[1].rm_eo - pmatch[1].rm_so; - assert(len == 1); - const char *start = picture + pmatch[1].rm_so; - - assert(pmatch[2].rm_so != -2 && pmatch[2].rm_so < pmatch[2].rm_eo); - len = pmatch[2].rm_eo - pmatch[2].rm_so; - assert(len > 0); - - /* - * Overwrite e.g. A(4) with AAAA. - */ - assert(pmatch[2].rm_so == pmatch[1].rm_eo + 1); // character paren number - p = picture + pmatch[2].rm_so; - len = 0; - if( 1 != sscanf(p, "%zu", &len) ) { - dbgmsg("%s:%d: no number found in '%s'", __func__, __LINE__, p); - goto irregular; - } - if( len == 0 ) { - dbgmsg("%s:%d: ZERO length found in '%s'", __func__, __LINE__, p); - goto irregular; - } + assert(pmatch[1].rm_so != -1 && pmatch[1].rm_so < pmatch[1].rm_eo); + size_t len = pmatch[1].rm_eo - pmatch[1].rm_so; + assert(len == 1); + const char *start = picture + pmatch[1].rm_so; + + assert(pmatch[2].rm_so != -2 && pmatch[2].rm_so < pmatch[2].rm_eo); + len = pmatch[2].rm_eo - pmatch[2].rm_so; + assert(len > 0); + + /* + * Overwrite e.g. A(4) with AAAA. + */ + assert(pmatch[2].rm_so == pmatch[1].rm_eo + 1); // character paren number + p = picture + pmatch[2].rm_so; + len = 0; + fmt_size_t lenf = 0; + if( 1 != sscanf(p, "%" GCC_PRISZ "u", &lenf) ) { + dbgmsg("%s:%d: no number found in '%s'", __func__, __LINE__, p); + goto irregular; + } + len = lenf; + if( len == 0 ) { + dbgmsg("%s:%d: ZERO length found in '%s'", __func__, __LINE__, p); + goto irregular; + } - std::vector <char> pic(len + 1, '\0'); - memset(pic.data(), *start, len); - const char *finish = picture + pmatch[2].rm_eo, - *eopicture = picture + strlen(picture); + std::vector <char> pic(len + 1, '\0'); + memset(pic.data(), *start, len); + const char *finish = picture + pmatch[2].rm_eo, + *eopicture = picture + strlen(picture); - p = xasprintf( "%*s%s%*s", - (int)(start - picture), picture, - pic.data(), - (int)(eopicture - finish), finish ); + p = xasprintf( "%*s%s%*s", + (int)(start - picture), picture, + pic.data(), + (int)(eopicture - finish), finish ); - free(picture); - picture = p; - continue; + free(picture); + picture = p; } assert(erc == REG_NOMATCH); @@ -440,7 +479,7 @@ is_elementary( enum cbl_field_type_t type ) case FldFloat: return true; // takes up space } - dbgmsg("%s:%d: invalid symbol_type_t %d", __func__, __LINE__, type); + cbl_internal_error("%s:%d: invalid %<symbol_type_t%> %d", __func__, __LINE__, type); return false; } @@ -752,7 +791,7 @@ symbol_field_type_update( cbl_field_t *field, bool redefine_field( cbl_field_t *field ) { - cbl_field_t *primary = symbol_redefines(field); + const cbl_field_t *primary = symbol_redefines(field); bool fOK = true; if( !primary ) return false; @@ -800,7 +839,7 @@ cbl_field_t::report_invalid_initial_value(const YYLTYPE& loc) const { // 8 or more, we need do no further testing because we assume // everything fits. if( data.capacity < 8 ) { - auto p = strchr(data.initial, symbol_decimal_point()); + const auto p = strchr(data.initial, symbol_decimal_point()); if( p && atoll(p+1) != 0 ) { error_msg(loc, "integer type %s VALUE '%s' " "requires integer VALUE", @@ -863,8 +902,8 @@ cbl_field_t::report_invalid_initial_value(const YYLTYPE& loc) const { return TOUPPER(ch) == 'E'; } ); if( !has_exponent && data.precision() < pend - p ) { - error_msg(loc, "%s cannot represent VALUE '%s' exactly (max .%zu)", - name, data.initial, pend - p); + error_msg(loc, "%s cannot represent VALUE %qs exactly (max %c%zu)", + name, data.initial, '.', pend - p); } } } @@ -922,8 +961,7 @@ const cbl_field_t * literal_subscript_oob( const cbl_refer_t& r, size_t& isub /* output */) { // Verify literal subscripts if dimensions are correct. size_t ndim(dimensions(r.field)); - if( ndim == 0 || ndim != r.nsubscript ) return NULL; - cbl_refer_t *esub = r.subscripts + r.nsubscript; + if( ndim == 0 || ndim != r.nsubscript() ) return NULL; std::vector<cbl_field_t *> dims( ndim, NULL ); auto pdim = dims.end(); @@ -941,29 +979,28 @@ literal_subscript_oob( const cbl_refer_t& r, size_t& isub /* output */) { * for the corresponding dimension. Return the first subscript not * meeting those criteria, if any. */ - auto p = std::find_if( r.subscripts, esub, - [&pdim]( const cbl_refer_t& r ) { + auto psub = std::find_if( r.subscripts.begin(), r.subscripts.end(), + [pdim]( const cbl_refer_t& r ) mutable { const auto& occurs((*pdim)->occurs); pdim++; return ! occurs.subscript_ok(r.field); } ); - isub = p - r.subscripts; - return p == esub? NULL : dims[isub]; + isub = psub - r.subscripts.begin(); + return psub == r.subscripts.end()? NULL : dims[isub]; } size_t cbl_refer_t::subscripts_set( const std::list<cbl_refer_t>& subs ) { - nsubscript = subs.size(); - subscripts = new cbl_refer_t[nsubscript]; - std::copy( subs.begin(), subs.end(), subscripts ); - + subscripts.clear(); + std::copy( subs.begin(), subs.end(), std::back_inserter(subscripts) ); return dimensions(field); } const char * cbl_refer_t::str() const { static char subscripts[64]; - sprintf(subscripts, "(%u of %zu dimensions)", nsubscript, dimensions(field)); + sprintf(subscripts, "(%u of " HOST_SIZE_T_PRINT_UNSIGNED " dimensions)", + nsubscript(), (fmt_size_t)dimensions(field)); char *output = xasprintf("%s %s %s", field? field_str(field) : "(none)", 0 < dimensions(field)? subscripts : "", @@ -979,18 +1016,18 @@ cbl_refer_t::name() const { const char * cbl_refer_t::deref_str() const { - std::vector<char> dimstr(nsubscript * 16, '\0'); + std::vector<char> dimstr(nsubscript() * 16, '\0'); dimstr.at(0) = '('; auto p = dimstr.begin() + 1; if( !field ) return name(); - for( auto sub = subscripts; sub < subscripts + nsubscript; sub++ ) { - auto initial = sub->field->data.initial ? sub->field->data.initial : "?"; + for( const auto& sub : subscripts ) { + auto initial = sub.field->data.initial ? sub.field->data.initial : "?"; size_t len = dimstr.end() - p; p += snprintf( &*p, len, "%s ", initial ); } - if( 0 < nsubscript ) { + if( ! subscripts.empty() ) { *--p = ')'; } char *output = xasprintf("%s%s", field->name, dimstr.data()); @@ -1009,10 +1046,10 @@ struct move_corresponding_field { tgt.field = cbl_field_of(symbol_at(elem.second)); if( yydebug ) { - dbgmsg("move_corresponding:%d: SRC: %3zu %s", __LINE__, - elem.first, src.str()); - dbgmsg("move_corresponding:%d: to %3zu %s", __LINE__, - elem.second, tgt.str()); + dbgmsg("move_corresponding:%d: SRC: %3" GCC_PRISZ "u %s", __LINE__, + (fmt_size_t)elem.first, src.str()); + dbgmsg("move_corresponding:%d: to %3" GCC_PRISZ "u %s", __LINE__, + (fmt_size_t)elem.second, tgt.str()); } parser_move(tgt, src); @@ -1067,10 +1104,8 @@ valid_move( const struct cbl_field_t *tgt, const struct cbl_field_t *src ) static_assert(sizeof(matrix[0]) == COUNT_OF(matrix[0]), "matrix should be square"); - for( const cbl_field_t *args[] = {tgt, src}, **p=args; - p < args + COUNT_OF(args); p++ ) { - auto& f(**p); - switch(f.type) { + for( auto field : { src, tgt } ) { + switch(field->type) { case FldClass: case FldConditional: case FldIndex: @@ -1082,9 +1117,9 @@ valid_move( const struct cbl_field_t *tgt, const struct cbl_field_t *src ) case FldForward: case FldBlob: default: - if( sizeof(matrix[0]) < f.type ) { + if( sizeof(matrix[0]) < field->type ) { cbl_internal_error("logic error: MOVE %s %s invalid type:", - cbl_field_type_str(f.type), f.name); + cbl_field_type_str(field->type), field->name); } break; } @@ -1116,8 +1151,9 @@ valid_move( const struct cbl_field_t *tgt, const struct cbl_field_t *src ) if( yydebug && ! retval ) { auto bad = std::find_if( p, pend, []( char ch ) { return ! ISDIGIT(ch); } ); - dbgmsg("%s:%d: offending character '%c' at position %zu", - __func__, __LINE__, *bad, bad - p); + dbgmsg("%s:%d: offending character '%c' at position " + HOST_SIZE_T_PRINT_UNSIGNED, + __func__, __LINE__, *bad, (fmt_size_t)(bad - p)); } } break; @@ -1150,12 +1186,6 @@ valid_move( const struct cbl_field_t *tgt, const struct cbl_field_t *src ) } } - if( yydebug && getenv(__func__) ) { - dbgmsg("%s:%d: ok to move %s to %s (0x%02x)", __func__, __LINE__, - cbl_field_type_str(src->type), cbl_field_type_str(tgt->type), - retval); - } - return retval; } @@ -1347,7 +1377,7 @@ public: { assert(isym); } - procdef_t( const procref_base_t& ref ) + explicit procdef_t( const procref_base_t& ref ) : procref_base_t(ref) , isym(0) {} @@ -1356,13 +1386,6 @@ public: return procref_base_t(*this) < procref_base_t(that); } - bool operator<( const procref_base_t& that ) const { - if( that.has_section() ) { - return procref_base_t(*this) < that; - } - return strcasecmp(paragraph(), that.paragraph()) < 0; - } - cbl_label_t * label_of() const { return isym == 0? NULL : cbl_label_of(symbol_at(isym)); } @@ -1393,7 +1416,7 @@ static procedures_t::iterator current_procedure = programs.end()->second.end(); class procedure_match { const procref_base_t& ref; public: - procedure_match( const procref_base_t& ref ) : ref(ref) {} + explicit procedure_match( const procref_base_t& ref ) : ref(ref) {} // Match a 2-name reference to section & paragraph, else to one or the other. bool operator()( procedures_t::const_reference elem ) { const procdef_t& key = elem.first; @@ -1421,16 +1444,7 @@ locally_unique( size_t program, const procdef_t& key, const procref_t& ref ) { const char *section_name = ref.has_section()? ref.section() : key.section(); procref_base_t full_ref(section_name, ref.paragraph()); - if( getenv(__func__) ) { - dbgmsg("%s: %zu for ref %s of '%s' (line %d) " - "in %s of '%s' (as %s of '%s')", __func__, - procedures.count(full_ref), - ref.paragraph(), ref.section(), ref.line_number(), - key.paragraph(), key.section(), - full_ref.paragraph(), full_ref.section() ); - } - - return 1 == procedures.count(full_ref); + return 1 == procedures.count(procdef_t(full_ref)); } // Add each section and paragraph to the map as it occurs in the Cobol text. @@ -1451,9 +1465,6 @@ procedure_definition_add( size_t program, const cbl_label_t *procedure ) { } procdef_t key( section_name, paragraph_name, isym ); - if( getenv(__func__) ) { - dbgmsg("%s: #%3zu %s of %s", __func__, isym, paragraph_name, section_name); - } current_procedure = programs[program].insert( make_pair(key, procedures_t::mapped_type()) ); } @@ -1463,9 +1474,6 @@ void procedure_reference_add( const char *section, const char *paragraph, int line, size_t context ) { - if( getenv(__func__) ) { - dbgmsg("%s: line %3d %s of %s", __func__, line, paragraph, section); - } current_procedure->second.push_back( procref_t(section, paragraph, line, context) ); } @@ -1496,10 +1504,11 @@ ambiguous_reference( size_t program ) { ambiguous = find_if_not( proc.second.begin(), proc.second.end(), is_unique(program, proc.first) ); if( proc.second.end() != ambiguous ) { - if( yydebug || getenv("symbol_label_add")) { - dbgmsg("%s: %s of '%s' has %zu potential matches", __func__, - ambiguous->paragraph(), ambiguous->section(), - procedures.count(*ambiguous)); + if( yydebug ) { + dbgmsg("%s: %s of '%s' has " HOST_SIZE_T_PRINT_UNSIGNED + "potential matches", __func__, + ambiguous->paragraph(), ambiguous->section(), + (fmt_size_t)procedures.count(procdef_t(*ambiguous))); } return new procref_t(*ambiguous); } @@ -1526,7 +1535,7 @@ intradeclarative_reference() { class next_group { size_t isym; public: - next_group( symbol_elem_t *group ) : isym(symbol_index(group)) {} + explicit next_group( const symbol_elem_t *group ) : isym(symbol_index(group)) {} // return true if elem is not a member of the group bool operator()( const symbol_elem_t& elem ) { @@ -1542,9 +1551,9 @@ parent_names( const symbol_elem_t *elem, if( is_filler(cbl_field_of(elem)) ) return; - // dbgmsg("%s: asked about %s of %s (%zu away)", __func__, + // dbgmsg("%s: asked about %s of %s (" HOST_SIZE_T_PRINT_UNSIGNED " away)", __func__, // cbl_field_of(elem)->name, - // cbl_field_of(group)->name, elem - group); + // cbl_field_of(group)->name, (fmt_size_t)(elem - group)); for( const symbol_elem_t *e=elem; e && group < e; e = symbol_parent(e) ) { names.push_front( cbl_field_of(e)->name ); @@ -1563,15 +1572,17 @@ public: symbol_elem_t *rgroup, type_t type ) : lgroup(lgroup), rgroup(rgroup), type(type) { - dbgmsg( "%s:%d: for #%zu %s and #%zu %s on line %d", __func__, __LINE__, - symbol_index(lgroup), cbl_field_of(lgroup)->name, - symbol_index(rgroup), cbl_field_of(rgroup)->name, yylineno ); + dbgmsg( "%s:%d: for #" HOST_SIZE_T_PRINT_UNSIGNED + " %s and #" HOST_SIZE_T_PRINT_UNSIGNED " %s on line %d", + __func__, __LINE__, + (fmt_size_t)symbol_index(lgroup), cbl_field_of(lgroup)->name, + (fmt_size_t)symbol_index(rgroup), cbl_field_of(rgroup)->name, yylineno ); } static bool any_redefines( const cbl_field_t& field, const symbol_elem_t *group ) { for( const cbl_field_t *f = &field; f && f->parent > 0; f = parent_of(f) ) { - symbol_elem_t *e = symbol_at(f->parent); + const symbol_elem_t *e = symbol_at(f->parent); if( e == group || e->type != SymField ) break; if( symbol_redefines(f) ) return true; } @@ -1642,8 +1653,9 @@ corresponding_fields( cbl_field_t *lhs, cbl_field_t *rhs, lhsg.a = symbols_begin(field_index(lhs)); lhsg.z = std::find_if( lhsg.a, symbols_end(), next_group(lhsg.a) ); - dbgmsg("%s:%d: examining %zu symbols after %s", __func__, __LINE__, - lhsg.z - lhsg.a, lhs->name); + dbgmsg("%s:%d: examining " HOST_SIZE_T_PRINT_UNSIGNED " symbols after %s", + __func__, __LINE__, + (fmt_size_t)(lhsg.z - lhsg.a), lhs->name); find_corresponding finder( symbol_at(field_index(lhs)), symbol_at(field_index(rhs)), type ); @@ -1651,8 +1663,9 @@ corresponding_fields( cbl_field_t *lhs, cbl_field_t *rhs, output.erase(0); - dbgmsg( "%s:%d: %s and %s have %zu corresponding fields", - __func__, __LINE__, lhs->name, rhs->name, output.size() ); + dbgmsg( "%s:%d: %s and %s have " HOST_SIZE_T_PRINT_UNSIGNED + " corresponding fields", + __func__, __LINE__, lhs->name, rhs->name, (fmt_size_t)output.size() ); return output; } @@ -1730,11 +1743,10 @@ struct input_file_t { ino_t inode; int lineno; const char *name; - const line_map *lines; input_file_t( const char *name, ino_t inode, - int lineno=1, const line_map *lines = NULL ) - : inode(inode), lineno(lineno), name(name), lines(lines) + int lineno=1 ) + : inode(inode), lineno(lineno), name(name) { if( inode == 0 ) inode_set(); } @@ -1753,14 +1765,29 @@ struct input_file_t { class unique_stack : public std::stack<input_file_t> { + friend void cobol_set_pp_option(int opt); + bool option_m; + std::set<std::string> all_names; + + const char * + no_wd( const char *wd, const char *name ) { + int i; + for( i=0; wd[i] == name[i]; i++ ) i++; + if( wd[i] == '\0' && name[i] == '/' ) i++; + return yydebug? name : name + i; + } + public: + unique_stack() : option_m(false) {} + bool push( const value_type& value ) { auto ok = std::none_of( c.cbegin(), c.cend(), - [value]( auto& that ) { + [value]( const auto& that ) { return value == that; } ); if( ok ) { std::stack<input_file_t>::push(value); + all_names.insert(value.name); return true; } size_t n = c.size(); @@ -1771,21 +1798,39 @@ class unique_stack : public std::stack<input_file_t> "----- ---- --------" "----------------------------------------"); for( const auto& v : c ) { - dbgmsg( " %4zu %4d %s", c.size() - --n, v.lineno, no_wd(wd, v.name) ); + dbgmsg( " %4" GCC_PRISZ "u %4d %s", + (fmt_size_t)(c.size() - --n), v.lineno, no_wd(wd, v.name) ); } } else { - dbgmsg("unable to get current working directory: %m"); + dbgmsg("unable to get current working directory: %s", xstrerror(errno)); } free(wd); } return false; } - const char * - no_wd( const char *wd, const char *name ) { - int i; - for( i=0; wd[i] == name[i]; i++ ) i++; - if( wd[i] == '\0' && name[i] == '/' ) i++; - return yydebug? name : name + i; + + // Look down into the stack. peek(0) == top() + const input_file_t& peek( size_t n ) const { + gcc_assert( n < size() ); + return c.at(size() - ++n); + } + + void option( int opt ) { // capture other preprocessor options eventually + assert(opt == 'M'); + option_m = true; + } + int option() const { + return option_m? 'M' : 0; + } + + void print() const { + std::string input( top().name ); + printf( "%s: ", input.c_str() ); + for( const auto& name : all_names ) { + if( name != input ) + printf( "\\\n\t%s ", name.c_str() ); + } + printf("\n"); } }; @@ -1794,6 +1839,12 @@ static unique_stack input_filenames; static std::map<std::string, ino_t> old_filenames; static const unsigned int sysp = 0; // not a C header file, cf. line-map.h +void cobol_set_pp_option(int opt) { + // capture other preprocessor options eventually + assert(opt == 'M'); + input_filenames.option_m = true; +} + /* * Maintain a stack of input filenames. Ensure the files are unique (by * inode), to prevent copybook cycles. Before pushing a new name, Record the @@ -1804,12 +1855,13 @@ static const unsigned int sysp = 0; // not a C header file, cf. line-map.h * to enforce uniqueness, and the scanner to maintain line numbers. */ bool cobol_filename( const char *name, ino_t inode ) { - line_map *lines = NULL; + const line_map *lines = NULL; if( inode == 0 ) { auto p = old_filenames.find(name); if( p == old_filenames.end() ) { for( auto& elem : old_filenames ) { - dbgmsg("%6zu %-30s", elem.second, elem.first.c_str()); + dbgmsg("%6" GCC_PRISZ "u %-30s", + (fmt_size_t)elem.second, elem.first.c_str()); } cbl_errx( "logic error: missing inode for %s", name); } @@ -1818,32 +1870,42 @@ bool cobol_filename( const char *name, ino_t inode ) { } linemap_add(line_table, LC_ENTER, sysp, name, 1); input_filename_vestige = name; - bool pushed = input_filenames.push( input_file_t(name, inode, 1, lines) ); - input_filenames.top().lineno = yylineno = 1; - if( getenv(__func__) ) { - dbgmsg(" saving %s with lineno as %d", - input_filenames.top().name, input_filenames.top().lineno); - } + bool pushed = input_filenames.push( input_file_t(name, inode, 1) ); return pushed; } const char * -cobol_lineno_save() { +cobol_lineno( int lineno ) { if( input_filenames.empty() ) return NULL; auto& input( input_filenames.top() ); - input.lineno = yylineno; - if( getenv(__func__) ) { - dbgmsg(" setting %s with lineno as %d", input.name, input.lineno); - } + input.lineno = lineno; return input.name; } +/* + * This function is called from the scanner, usually when a copybook is on top + * of the input stack, before the parser retrieves the token and resets the + * current filename. For that reason, we normaly want to line number of the + * file that is about to become the current one, which is the one behind top(). + * + * If somehow we arrive here when there is nothing underneath, we return the + * current line nubmer, or zero if there's no input. The only consequence is + * that the reported line number might be wrong. + */ +int +cobol_lineno() { + if( input_filenames.empty() ) return 0; + size_t n = input_filenames.size() < 2? 0 : 1; + const auto& input( input_filenames.peek(n) ); + return input.lineno; +} + const char * cobol_filename() { return input_filenames.empty()? input_filename_vestige : input_filenames.top().name; } -const char * +void cobol_filename_restore() { assert(!input_filenames.empty()); const input_file_t& top( input_filenames.top() ); @@ -1851,21 +1913,17 @@ cobol_filename_restore() { input_filename_vestige = top.name; input_filenames.pop(); - if( input_filenames.empty() ) return NULL; + if( input_filenames.empty() ) return; auto& input = input_filenames.top(); - input.lines = linemap_add(line_table, LC_LEAVE, sysp, NULL, 0); - - yylineno = input.lineno; - if( getenv("cobol_filename") ) { - dbgmsg("restoring %s with lineno to %d", input.name, input.lineno); - } - return input.name; + linemap_add(line_table, LC_LEAVE, sysp, NULL, 0); } static location_t token_location; +location_t location_from_lineno() { return token_location; } + template <typename LOC> static void gcc_location_set_impl( const LOC& loc ) { @@ -1893,11 +1951,9 @@ verify_format( const char gmsgid[] ) { static regex_t re; static int cflags = REG_EXTENDED; static int status = regcomp( &re, pattern, cflags ); - static char errbuf[80]; - - if( status != 0 ) { + static char errbuf[80]; int n = regerror(status, &re, errbuf, sizeof(errbuf)); gcc_assert(size_t(n) < sizeof(errbuf)); fprintf(stderr, "%s:%d: %s", __func__, __LINE__, errbuf); @@ -1916,6 +1972,8 @@ verify_format( const char gmsgid[] ) { static const diagnostic_option_id option_zero; size_t parse_error_inc(); +void ydferror( const char gmsgid[], ... ) ATTRIBUTE_GCOBOL_DIAG(1, 2); + void ydferror( const char gmsgid[], ... ) { verify_format(gmsgid); @@ -1938,7 +1996,7 @@ extern YYLTYPE yylloc; * the global token_location, which is passed to the diagnostic framework. The * original value is restored when the instantiated variable goes out of scope. */ -class temp_loc_t : protected YYLTYPE { +class temp_loc_t { location_t orig; public: temp_loc_t() : orig(token_location) { @@ -1946,10 +2004,10 @@ class temp_loc_t : protected YYLTYPE { gcc_location_set(yylloc); // use lookahead location } - temp_loc_t( const YYLTYPE& loc) : orig(token_location) { + explicit temp_loc_t( const YYLTYPE& loc) : orig(token_location) { gcc_location_set(loc); } - temp_loc_t( const YDFLTYPE& loc) : orig(token_location) { + explicit temp_loc_t( const YDFLTYPE& loc) : orig(token_location) { YYLTYPE lloc = { loc.first_line, loc.first_column, loc.last_line, loc.last_column }; @@ -1992,21 +2050,14 @@ void error_msg( const YYLTYPE& loc, const char gmsgid[], ... ) { ERROR_MSG_BODY } +void error_msg( const YDFLTYPE& loc, const char gmsgid[], ... ) + ATTRIBUTE_GCOBOL_DIAG(2, 3); + void error_msg( const YDFLTYPE& loc, const char gmsgid[], ... ) { ERROR_MSG_BODY } void -cdf_location_set(YYLTYPE loc) { - extern YDFLTYPE ydflloc; - - ydflloc.first_line = loc.first_line; - ydflloc.first_column = loc.first_column; - ydflloc.last_line = loc.last_line; - ydflloc.last_column = loc.last_column; -} - -void yyerror( const char gmsgid[], ... ) { temp_loc_t looker; verify_format(gmsgid); @@ -2059,7 +2110,7 @@ yyerrorvl( int line, const char *filename, const char fmt[], ... ) { static inline size_t matched_length( const regmatch_t& rm ) { return rm.rm_eo - rm.rm_so; } -const char * +int cobol_fileline_set( const char line[] ) { static const char pattern[] = "#line +([[:alnum:]]+) +[\"']([^\"']+). *\n"; static const int cflags = REG_EXTENDED | REG_ICASE; @@ -2072,7 +2123,7 @@ cobol_fileline_set( const char line[] ) { if( (erc = regcomp(&re, pattern, cflags)) != 0 ) { regerror(erc, &re, regexmsg, sizeof(regexmsg)); dbgmsg( "%s:%d: could not compile regex: %s", __func__, __LINE__, regexmsg ); - return line; + return 0; } preg = &re; } @@ -2080,10 +2131,10 @@ cobol_fileline_set( const char line[] ) { if( erc != REG_NOMATCH ) { regerror(erc, preg, regexmsg, sizeof(regexmsg)); dbgmsg( "%s:%d: could not compile regex: %s", __func__, __LINE__, regexmsg ); - return line; + return 0; } - error_msg(yylloc, "invalid #line directive: %s", line ); - return line; + error_msg(yylloc, "invalid %<#line%> directive: %s", line ); + return 0; } const char @@ -2092,40 +2143,39 @@ cobol_fileline_set( const char line[] ) { int fileline; if( 1 != sscanf(line_str, "%d", &fileline) ) - yywarn("could not parse line number %s from #line directive", line_str); + yywarn("could not parse line number %s from %<#line%> directive", line_str); input_file_t input_file( filename, ino_t(0), fileline ); // constructor sets inode - if( getenv(__func__) ) return filename; // ignore #line directive - if( input_filenames.empty() ) { - input_file.lines = linemap_add(line_table, LC_ENTER, sysp, filename, 1); input_filenames.push(input_file); } input_file_t& file = input_filenames.top(); file = input_file; - yylineno = file.lineno; - return file.name; + return file.lineno; } -class timespec_t { - struct timespec now; +//#define TIMING_PARSE +#ifdef TIMING_PARSE +class cbl_timespec { + uint64_t now; // Nanoseconds public: - timespec_t() { - clock_gettime(CLOCK_MONOTONIC, &now); + cbl_timespec() { + now = get_time_nanoseconds(); } double ns() const { - return now.tv_sec * 1000000000 + now.tv_nsec; + return now; } - friend double operator-( const timespec_t& now, const timespec_t& then ); + friend double operator-( const cbl_timespec& now, const cbl_timespec& then ); }; double -operator-( const timespec_t& then, const timespec_t& now ) { +operator-( const cbl_timespec& now, const cbl_timespec& then ) { return (now.ns() - then.ns()) / 1000000000; } +#endif static int parse_file( const char filename[] ) @@ -2136,15 +2186,25 @@ parse_file( const char filename[] ) parser_enter_file(filename); - timespec_t start; + if( input_filenames.option() == 'M' ) { + input_filenames.print(); + return 0; + } + +#ifdef TIMING_PARSE + cbl_timespec start; +#endif int erc = yyparse(); - timespec_t finish; +#ifdef TIMING_PARSE + cbl_timespec finish; double dt = finish - start; + printf("Overall parse & generate time is %.6f seconds\n", dt); +#endif + parser_leave_file(); - //printf("Overall parse & generate time is %.6f seconds\n", dt); fclose (yyin); @@ -2168,30 +2228,20 @@ cobol_set_debugging( bool flex, bool yacc, bool parser ) yy_flex_debug = flex? 1 : 0; ydfdebug = yydebug = yacc? 1 : 0; f_trace_debug = parser? 1 : 0; - - char *ind = getenv("INDICATOR_COLUMN"); - if( ind ) { - int col; - if( 1 != sscanf(ind, "%d", &col) ) { - yywarn("ignored non-integer value for INDICATOR_COLUMN=%s", ind); - } - cobol_set_indicator_column(col); - } } -os_locale_t os_locale = { "UTF-8", xstrdup("C.UTF-8") }; - +os_locale_t os_locale = { "UTF-8", "C.UTF-8" }; void cobol_parse_files (int nfile, const char **files) { - char * opaque = setlocale(LC_CTYPE, ""); + const char * opaque = setlocale(LC_CTYPE, ""); if( ! opaque ) { yywarn("setlocale: unable to initialize LOCALE"); } else { char *codeset = nl_langinfo(CODESET); if( ! codeset ) { - yywarn("nl_langinfo failed after setlocale succeeded"); + yywarn("%<nl_langinfo%> failed after %<setlocale()%> succeeded"); } else { os_locale.codeset = codeset; } @@ -2213,6 +2263,7 @@ cbl_message(int fd, const char *format_string, ...) char *ostring = xvasprintf(format_string, ap); va_end(ap); write(fd, ostring, strlen(ostring)); + write(fd, "\n", 1); free(ostring); } @@ -2302,7 +2353,7 @@ dbgmsg(const char *msg, ...) { void dialect_error( const YYLTYPE& loc, const char term[], const char dialect[] ) { - error_msg(loc, "%s is not ISO syntax, requires -dialect %s", + error_msg(loc, "%s is not ISO syntax, requires %<-dialect %s%>", term, dialect); } @@ -2313,12 +2364,621 @@ bool fisdigit(int c) bool fisspace(int c) { return ISSPACE(c); - }; + } int ftolower(int c) { return TOLOWER(c); } +int ftoupper(int c) + { + return TOUPPER(c); + } bool fisprint(int c) { return ISPRINT(c); - }; + } + +// 8.9 Reserved words +static const std::set<std::string> reserved_words = { + // GCC COBOL keywords + "COMMAND-LINE", + "COMMAND-LINE-COUNT", + + // GCC device names + "C01", + "C02", + "C03", + "C04", + "C05", + "C06", + "C07", + "C08", + "C09", + "C10", + "C11", + "C12", + "CONSOLE", + "S01", + "S02", + "S03", + "S04", + "S05", + "STDERR", + "STDIN", + "STDOUT", + "SYSIN", + "SYSIPT", + "SYSLIST", + "SYSLST", + "SYSOUT", + "SYSPCH", + "SYSPUNCH", + "UPSI", + + // IBM keywords that GCC recognizes + "BASIS", + "CBL", + "ENTER", + "READY", + "TITLE", + "TRACE", + "ALTER", + "COBOL", + "DATE-COMPILED", + "DATE-WRITTEN", + "DBCS", + "DEBUGGING", + "EGCS", + "ENTRY", + "EVERY", + "INSTALLATION", + "I-O-CONTROL", + "KANJI", + "LABEL", + "NULLS", + "PADDING", + "PROCEDURES", + "PROCEED", + "RECORDING", + "RERUN", + "REVERSED", + "SECURITY", + "TALLY", + "VOLATILE", + "XML", + "END-START", + + // ISO 2023 keywords + "ACCEPT", + "ACCESS", + "ACTIVE-CLASS", + "ADD", + "ADDRESS", + "ADVANCING", + "AFTER", + "ALIGNED", + "ALL", + "ALLOCATE", + "ALPHABET", + "ALPHABETIC", + "ALPHABETIC-LOWER", + "ALPHABETIC-UPPER", + "ALPHANUMERIC", + "ALPHANUMERIC-EDITED", + "ALSO", + "ALTERNATE", + "AND", + "ANY", + "ANYCASE", + "ARE", + "AREA", + "AREAS", + "AS", + "ASCENDING", + "ASSIGN", + "AT", + "B-AND", + "B-NOT", + "B-OR", + "B-SHIFT-L", + "B-SHIFT-LC", + "B-SHIFT-R", + "B-SHIFT-RC", + "B-XOR", + "BASED", + "BEFORE", + "BINARY", + "BINARY-CHAR", + "BINARY-DOUBLE", + "BINARY-LONG", + "BINARY-SHORT", + "BIT", + "BLANK", + "BLOCK", + "BOOLEAN", + "BOTTOM", + "BY", + "CALL", + "CANCEL", + "CF", + "CH", + "CHARACTER", + "CHARACTERS", + "CLASS", + "CLASS-ID", + "CLOSE", + "CODE", + "CODE-SET", + "COL", + "COLLATING", + "COLS", + "COLUMN", + "COLUMNS", + "COMMA", + "COMMIT", + "COMMON", + "COMP", + "COMPUTATIONAL", + "COMPUTE", + "CONDITION", + "CONFIGURATION", + "CONSTANT", + "CONTAINS", + "CONTENT", + "CONTINUE", + "CONTROL", + "CONTROLS", + "CONVERTING", + "COPY", + "CORR", + "CORRESPONDING", + "COUNT", + "CRT", + "CURRENCY", + "CURSOR", + "DATA", + "DATA-POINTER", + "DATE", + "DAY", + "DAY-OF-WEEK", + "DE", + "DECIMAL-POINT", + "DECLARATIVES", + "DEFAULT", + "DELETE", + "DELIMITED", + "DELIMITER", + "DEPENDING", + "DESCENDING", + "DESTINATION", + "DETAIL", + "DISPLAY", + "DIVIDE", + "DIVISION", + "DOWN", + "DUPLICATES", + "DYNAMIC", + "EC", + "EDITING", + "ELSE", + "EMD-START", + "END", + "END-ACCEPT", + "END-ADD", + "END-CALL", + "END-COMPUTE", + "END-DELETE", + "END-DISPLAY", + "END-DIVIDE", + "END-EVALUATE", + "END-IF", + "END-MULTIPLY", + "END-OF-PAGE", + "END-PERFORM", + "END-READ", + "END-RECEIVE", + "END-RETURN", + "END-REWRITE", + "END-SEARCH", + "END-SEND", + "END-STRING", + "END-SUBTRACT", + "END-UNSTRING", + "END-WRITE", + "ENVIRONMENT", + "EO", + "EOP", + "EQUAL", + "ERROR", + "EVALUATE", + "EXCEPTION", + "EXCEPTION-OBJECT", + "EXCLUSIVE-OR", + "EXIT", + "EXTEND", + "EXTERNAL", + "FACTORY", + "FALSE", + "FARTHEST-FROM-ZERO", + "FD", + "FILE", + "FILE-CONTROL", + "FILLER", + "FINAL", + "FINALLY", + "FIRST", + "FLOAT-BINARY-128", + "FLOAT-BINARY-32", + "FLOAT-BINARY-64", + "FLOAT-DECIMAL-16", + "FLOAT-DECIMAL-34", + "FLOAT-EXTENDED", + "FLOAT-INFINITY", + "FLOAT-LONG", + "FLOAT-NOT-A-NUMBER-", + "FLOAT-SHORT", + "FOOTING", + "FOR", + "FORMAT", + "FREE", + "FROM", + "FUNCTION", + "FUNCTION-ID", + "FUNCTION-POINTER", + "GENERATE", + "GET", + "GIVING", + "GLOBAL", + "GO", + "GOBACK", + "GREATER", + "GROUP", + "GROUP-USAGE", + "HEADING", + "HIGH-VALUE", + "HIGH-VALUES", + "I-O", + "I-OICONTROL", + "IDENTIFICATION", + "IF", + "IN", + "IN-ARITHMETIC-RANGE", + "INDEX", + "INDEXED", + "INDICATE", + "INHERITS", + "INITIAL", + "INITIALIZE", + "INITIATE", + "INPUT", + "INPUT-OUTPUT", + "INSPECT", + "INTERFACE", + "INTERFACE-ID", + "INTO", + "INVALID", + "INVOKE", + "IS", + "JUST", + "JUSTIFIED", + "KEY", + "LAST", + "LEADING", + "LEFT", + "LENGTH", + "LESS", + "LIMIT", + "LIMITS", + "LINAGE", + "LINAGE-COUNTER", + "LINE", + "LINE-COUNTER", + "LINES", + "LINKAGE", + "LOCAL-STORAGE", + "LOCALE", + "LOCATION", + "LOCK", + "LOW-VALUE", + "LOW-VALUES", + "MERGE", + "MESSAGE-TAG", + "METHOD-ID", + "MINUS", + "MODE", + "MOVE", + "MULTIPLY", + "NATIONAL", + "NATIONAL-EDITED", + "NATIVE", + "NEAREST-TO-ZERO", + "NEGATIVE", + "NESTED", + "NEXT", + "NO", + "NOT", + "NULL", + "NUMBER", + "NUMERIC", + "NUMERIC-EDITED", + "OBJECT", + "OBJECT-COMPUTER", + "OBJECT-REFERENCE", + "OCCURS", + "OF", + "OFF", + "OMITTED", + "ON", + "OPEN", + "OPTIONAL", + "OPTIONS", + "OR", + "ORDER", + "ORGANIZATION", + "OTHER", + "OUTPUT", + "OVERFLOW", + "OVERRIDE", + "PACKED-DECIMAL", + "PAGE", + "PAGE-COUNTER", + "PERFORM", + "PF", + "PH", + "PIC", + "PICTURE", + "PLUS", + "POINTER", + "POSITIVE", + "PRESENT", + "PRINTING", + "PROCEDURE", + "PROGRAM", + "PROGRAM-ID", + "PROGRAM-POINTER", + "PROPERTY", + "PROTOTYPE", + "QUIET", + "QUOTE", + "QUOTES", + "RAISE", + "RAISING", + "RANDOM", + "RD", + "READ", + "RECEIVE", + "RECORD", + "RECORDS", + "REDEFINES", + "REEL", + "REFERENCE", + "RELATIVE", + "RELEASE", + "REMAINDER", + "REMOVAL", + "RENAMES", + "REPLACE", + "REPLACING", + "REPORT", + "REPORTING", + "REPORTS", + "REPOSITORY", + "RESERVE", + "RESET", + "RESUME", + "RETRY", + "RETURN", + "RETURNING", + "REWIND", + "REWRITE", + "RF", + "RH", + "RIGHT", + "ROLLBACK", + "ROUNDED", + "RUN", + "SAME", + "SCREEN", + "SD", + "SEARCH", + "SECTION", + "SELECT", + "SELF", + "SEND", + "SENTENCE", + "SEPARATE", + "SEQUENCE", + "SEQUENTIAL", + "SET", + "SHARING", + "SIGN", + "SIGNALING", + "SIZE", + "SORT", + "SORT-MERGE", + "SOURCE", + "SOURCE-COMPUTER", + "SOURCES", + "SPACE", + "SPACES", + "SPECIAL-NAMES", + "STANDARD", + "STANDARD-1", + "STANDARD-2", + "START", + "STATUS", + "STOP", + "STRING", + "SUBTRACT", + "SUM", + "SUPER", + "SUPPRESS", + "SYMBOLIC", + "SYNC", + "SYNCHRONIZED", + "SYSTEM-DEFAULT", + "TABLE", + "TALLYING", + "TERMINATE", + "TEST", + "THAN", + "THEN", + "THROUGH", + "THRU", + "TIME", + "TIMES", + "TO", + "TOP", + "TRAILING", + "TRUE", + "TYPE", + "TYPEDEF", + "UNIT", + "UNIVERSAL", + "UNLOCK", + "UNSTRING", + "UNTIL", + "UP", + "UPON", + "USAGE", + "USE", + "USER-DEFAULT", + "USING", + "VAL-STATUS", + "VALID", + "VALIDATE", + "VALIDATE-STATUS", + "VALUE", + "VALUES", + "VARYING", + "WHEN", + "WITH", + "WORKING-STORAGE", + "WRITE", + "XOR", + "ZERO", + "ZEROES", + "ZEROS", + "+", + "-", + "*", + "/", + "**", + "<", + "<=", + "<>", + "=", + ">", + ">=", + "&", + "*>", + "::", + ">>", +}; + +// 8.10 Context-sensitive words +static const std::set<std::string> context_sensitive_words = { + "ACTIVATING", // MODULE-NAME intrinsic function + "ANUM", // CONVERT intrinsic function + "APPLY", // I-O-CONTROL paragraph + "ARITHMETIC", // OPTIONS paragraph + "ATTRIBUTE", // SET statement + "AUTO", // screen description entry + "AUTOMATIC", // LOCK MODE clause + "AWAY-FROM-ZERO", // ROUNDED phrase + "BACKGROUND-COLOR", // screen description entry + "BACKWARD", // INSPECT statement + "BELL", // screen description entry and SET attribute statement + "BINARY-ENCODING", // USAGE clause and FLOAT-DECIMAL clause + "BLINK", // screen description entry and SET attribute statement + "BYTE", // CONVERT intrinsic function + "BYTES", // RECORD clause + "BYTE-LENGTH", // constant entry + "CAPACITY", // OCCURS clause + "CENTER", // COLUMN clause + "CLASSIFICATION", // OBJECT-COMPUTER paragraph + "CURRENT", // MODULE-NAME intrinsic function + "CYCLE", // EXIT statement + "DECIMAL-ENCODING", // USAGE clause and FLOAT-DECIMAL clause + "EOL", // ERASE clause in a screen description entry + "EOS", // ERASE clause in a screen description entry + "ENTRY-CONVENTION", // OPTIONS paragraph + "ERASE", // screen description entry + "EXPANDS", // class-specifier and interface-specifier of the REPOSITORY paragraph + "FLOAT-BINARY", // OPTIONS paragraph + "FLOAT-DECIMAL", // OPTIONS paragraph + "FOREGROUND-COLOR", // screen description entry + "FOREVER", // RETRY phrase + "FULL", // screen description entry + "HEX", // CONVERT intrinsic function + "HIGH-ORDER-LEFT", // FLOAT-BINARY clause, FLOAT-DECIMAL clause, and USAGE clause + "HIGH-ORDER-RIGHT", // FLOAT-BINARY clause, FLOAT-DECIMAL clause, and USAGE clause + "HIGHLIGHT", // screen description entry and SET attribute statement + "IGNORING", // READ statement + "IMPLEMENTS", // FACTORY paragraph and OBJECT paragraph + "INITIALIZED", // ALLOCATE statement and OCCURS clause + "INTERMEDIATE", // OPTIONS paragraph + "INTRINSIC", // function-specifier of the REPOSITORY paragraph + "LC_ALL", // SET statement + "LC_COLLATE", // SET statement + "LC_CTYPE", // SET statement + "LC_MESSAGES", // SET statement + "LC_MONETARY", // SET statement + "LC_NUMERIC", // SET statement + "LC_TIME", // SET statement + "LOWLIGHT", // screen description entry and SET attribute statement + "MANUAL", // LOCK MODE clause + "MULTIPLE", // LOCK ON phrase + "NAT", // CONVERT intrinsic function + "NEAREST-AWAY-FROM-ZERO", // INTERMEDIATE ROUNDING clause and ROUNDED phrase + "NEAREST-EVEN", // INTERMEDIATE ROUNDING clause and ROUNDED phrase + "NEAREST-TOWARD-ZERO", // INTERMEDIATE ROUNDING clause and ROUNDED phrase + "NONE", // DEFAULT clause + "NORMAL", // STOP statement + "NUMBERS", // COLUMN clause and LINE clause + "ONLY", // Object-view, SHARING clause, SHARING phrase, and USAGE clause + "PARAGRAPH", // EXIT statement + "PREFIXED", // DYNAMIC LENGTH STRUCTURE clause + "PREVIOUS", // READ statement + "PROHIBITED", // INTERMEDIATE ROUNDING clause and ROUNDED phrase + "RECURSIVE", // PROGRAM-ID paragraph + "RELATION", // VALIDATE-STATUS clause + "REQUIRED", // screen description entry + "REVERSE-VIDEO", // screen description entry and SET attribute statement + "ROUNDING", // OPTIONS paragraph + "SECONDS", // RETRY phrase, CONTINUE statement + "SECURE", // screen description entry + "SHORT", // DYNAMIC LENGTH STRUCTURE clause + "SIGNED", // DYNAMIC LENGTH STRUCTURE clause and USAGE clause + "STACK", // MODULE-NAME intrinsic function + "STANDARD-BINARY", // ARITHMETIC clause + "STANDARD-DECIMAL", // ARITHMETIC clause + "STATEMENT", // RESUME statement + "STEP", // OCCURS clause + "STRONG", // TYPEDEF clause + "STRUCTURE", // DYNAMIC LENGTH STRUCTURE clause + "SYMBOL", // CURRENCY clause + "TOP-LEVEL", // MODULE-NAME intrinsic function + "TOWARD-GREATER", // ROUNDED phrase + "TOWARD-LESSER", // ROUNDED phrase + "TRUNCATION", // INTERMEDIATE ROUNDING clause and ROUNDED phrase + "UCS-4", // ALPHABET clause + "UNDERLINE", // screen description entry and SET attribute statement + "UNSIGNED", // USAGE clause + "UTF-8", // ALPHABET clause + "UTF-16", // ALPHABET clause + "YYYYDDD", // ACCEPT statement + "YYYYMMDD", // ACCEPT statement +}; + +// Is the input a COBOL word, per ISO/IEC 1989:2023 (E) ? +// We add a few GCC-specific keywords, and our supported IBM keywords. +bool +iso_cobol_word( const std::string& name, bool include_context ) { + auto ok = 1 == reserved_words.count(name); + if( include_context && !ok ) { + ok = 1 == context_sensitive_words.count(name); + } + return ok; +} + |