aboutsummaryrefslogtreecommitdiff
path: root/gcc/cobol/util.cc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/cobol/util.cc')
-rw-r--r--gcc/cobol/util.cc1078
1 files changed, 869 insertions, 209 deletions
diff --git a/gcc/cobol/util.cc b/gcc/cobol/util.cc
index 62ecd98..23f605d 100644
--- a/gcc/cobol/util.cc
+++ b/gcc/cobol/util.cc
@@ -35,6 +35,10 @@
*/
#include "cobol-system.h"
+#include "coretypes.h"
+#include "tree.h"
+#undef yy_flex_debug
+
#include <langinfo.h>
#include "coretypes.h"
@@ -55,13 +59,13 @@
#include "cbldiag.h"
#include "lexio.h"
-#define HOWEVER_GCC_DEFINES_TREE
#include "../../libgcobol/ec.h"
#include "../../libgcobol/common-defs.h"
#include "symbols.h"
#include "inspect.h"
#include "../../libgcobol/io.h"
#include "genapi.h"
+#include "genutil.h"
#pragma GCC diagnostic ignored "-Wunused-result"
#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
@@ -72,14 +76,47 @@ extern int yyparse(void);
extern int demonstration_administrator(int N);
+#if !defined (HAVE_GET_CURRENT_DIR_NAME)
+/* Posix platforms might not have get_current_dir_name but should have
+ getcwd() and PATH_MAX. */
+#if __has_include (<limits.h>)
+# include <limits.h>
+#endif
+/* The Hurd doesn't define PATH_MAX. */
+#if !defined (PATH_MAX) && defined(__GNU__)
+# define PATH_MAX 4096
+#endif
+static inline char *
+get_current_dir_name ()
+{
+ /* Use libiberty's allocator here. */
+ char *buf = (char *) xmalloc (PATH_MAX);
+ return getcwd (buf, PATH_MAX);
+}
+#endif
+
+/*
+ * For printing messages, usually the size of the thing is some kind of string
+ * length, and doesn't really need a size_t. For message formatting, use a
+ * simple unsigned long, and warn if that's no good. "gb4" here stands for
+ * "4 Gigabytes".
+ */
+unsigned long
+gb4( size_t input ) {
+ if( input != static_cast<unsigned long>(input) ) {
+ yywarn("size too large to print: %lx:%lx",
+ (unsigned long)(input >> (4 * sizeof(unsigned long))),
+ static_cast<unsigned long>(input));
+ }
+ return input;
+}
+
const char *
symbol_type_str( enum symbol_type_t type )
{
switch(type) {
case SymFilename:
return "SymFilename";
- case SymFunction:
- return "SymFunction";
case SymField:
return "SymField";
case SymLabel:
@@ -93,7 +130,7 @@ symbol_type_str( enum symbol_type_t type )
case SymDataSection:
return "SymDataSection";
}
- dbgmsg("%s:%d: invalid symbol_type_t %d", __func__, __LINE__, type);
+ cbl_internal_error("%s:%d: invalid %<symbol_type_t%> %d", __func__, __LINE__, type);
return "???";
}
@@ -142,7 +179,7 @@ cbl_field_type_str( enum cbl_field_type_t type )
case FldBlob:
return "FldBlob";
}
- dbgmsg("%s:%d: invalid symbol_type_t %d", __func__, __LINE__, type);
+ cbl_internal_error("%s:%d: invalid %<symbol_type_t%> %d", __func__, __LINE__, type);
return "???";
}
@@ -302,8 +339,9 @@ is_numeric_edited( const char picture[] ) {
break;
default:
numed_message = xasprintf("invalid PICTURE character "
- "'%c' at offset %zu in '%s'",
- *p, p - picture, picture);
+ "'%c' at offset " HOST_SIZE_T_PRINT_UNSIGNED
+ " in '%s'",
+ *p, (fmt_size_t)(p - picture), picture);
break;
}
@@ -327,49 +365,50 @@ normalize_picture( char picture[] )
regmatch_t pmatch[4];
if( (erc = regcomp(preg, regex, cflags)) != 0 ) {
- regerror(erc, preg, regexmsg, sizeof(regexmsg));
- dbgmsg( "%s:%d: could not compile regex: %s", __func__, __LINE__, regexmsg );
- return picture;
+ regerror(erc, preg, regexmsg, sizeof(regexmsg));
+ dbgmsg( "%s:%d: could not compile regex: %s", __func__, __LINE__, regexmsg );
+ return picture;
}
while( (erc = regexec(preg, picture, COUNT_OF(pmatch), pmatch, 0)) == 0 ) {
- assert(pmatch[1].rm_so != -1 && pmatch[1].rm_so < pmatch[1].rm_eo);
- size_t len = pmatch[1].rm_eo - pmatch[1].rm_so;
- assert(len == 1);
- const char *start = picture + pmatch[1].rm_so;
-
- assert(pmatch[2].rm_so != -2 && pmatch[2].rm_so < pmatch[2].rm_eo);
- len = pmatch[2].rm_eo - pmatch[2].rm_so;
- assert(len > 0);
-
- /*
- * Overwrite e.g. A(4) with AAAA.
- */
- assert(pmatch[2].rm_so == pmatch[1].rm_eo + 1); // character paren number
- p = picture + pmatch[2].rm_so;
- len = 0;
- if( 1 != sscanf(p, "%zu", &len) ) {
- dbgmsg("%s:%d: no number found in '%s'", __func__, __LINE__, p);
- goto irregular;
- }
- if( len == 0 ) {
- dbgmsg("%s:%d: ZERO length found in '%s'", __func__, __LINE__, p);
- goto irregular;
- }
+ assert(pmatch[1].rm_so != -1 && pmatch[1].rm_so < pmatch[1].rm_eo);
+ size_t len = pmatch[1].rm_eo - pmatch[1].rm_so;
+ assert(len == 1);
+ const char *start = picture + pmatch[1].rm_so;
+
+ assert(pmatch[2].rm_so != -2 && pmatch[2].rm_so < pmatch[2].rm_eo);
+ len = pmatch[2].rm_eo - pmatch[2].rm_so;
+ assert(len > 0);
+
+ /*
+ * Overwrite e.g. A(4) with AAAA.
+ */
+ assert(pmatch[2].rm_so == pmatch[1].rm_eo + 1); // character paren number
+ p = picture + pmatch[2].rm_so;
+ len = 0;
+ fmt_size_t lenf = 0;
+ if( 1 != sscanf(p, "%" GCC_PRISZ "u", &lenf) ) {
+ dbgmsg("%s:%d: no number found in '%s'", __func__, __LINE__, p);
+ goto irregular;
+ }
+ len = lenf;
+ if( len == 0 ) {
+ dbgmsg("%s:%d: ZERO length found in '%s'", __func__, __LINE__, p);
+ goto irregular;
+ }
- std::vector <char> pic(len + 1, '\0');
- memset(pic.data(), *start, len);
- const char *finish = picture + pmatch[2].rm_eo,
- *eopicture = picture + strlen(picture);
+ std::vector <char> pic(len + 1, '\0');
+ memset(pic.data(), *start, len);
+ const char *finish = picture + pmatch[2].rm_eo,
+ *eopicture = picture + strlen(picture);
- p = xasprintf( "%*s%s%*s",
- (int)(start - picture), picture,
- pic.data(),
- (int)(eopicture - finish), finish );
+ p = xasprintf( "%*s%s%*s",
+ (int)(start - picture), picture,
+ pic.data(),
+ (int)(eopicture - finish), finish );
- free(picture);
- picture = p;
- continue;
+ free(picture);
+ picture = p;
}
assert(erc == REG_NOMATCH);
@@ -440,7 +479,7 @@ is_elementary( enum cbl_field_type_t type )
case FldFloat:
return true; // takes up space
}
- dbgmsg("%s:%d: invalid symbol_type_t %d", __func__, __LINE__, type);
+ cbl_internal_error("%s:%d: invalid %<symbol_type_t%> %d", __func__, __LINE__, type);
return false;
}
@@ -752,7 +791,7 @@ symbol_field_type_update( cbl_field_t *field,
bool
redefine_field( cbl_field_t *field ) {
- cbl_field_t *primary = symbol_redefines(field);
+ const cbl_field_t *primary = symbol_redefines(field);
bool fOK = true;
if( !primary ) return false;
@@ -800,7 +839,7 @@ cbl_field_t::report_invalid_initial_value(const YYLTYPE& loc) const {
// 8 or more, we need do no further testing because we assume
// everything fits.
if( data.capacity < 8 ) {
- auto p = strchr(data.initial, symbol_decimal_point());
+ const auto p = strchr(data.initial, symbol_decimal_point());
if( p && atoll(p+1) != 0 ) {
error_msg(loc, "integer type %s VALUE '%s' "
"requires integer VALUE",
@@ -863,8 +902,8 @@ cbl_field_t::report_invalid_initial_value(const YYLTYPE& loc) const {
return TOUPPER(ch) == 'E';
} );
if( !has_exponent && data.precision() < pend - p ) {
- error_msg(loc, "%s cannot represent VALUE '%s' exactly (max .%zu)",
- name, data.initial, pend - p);
+ error_msg(loc, "%s cannot represent VALUE %qs exactly (max %c%zu)",
+ name, data.initial, '.', pend - p);
}
}
}
@@ -922,8 +961,7 @@ const cbl_field_t *
literal_subscript_oob( const cbl_refer_t& r, size_t& isub /* output */) {
// Verify literal subscripts if dimensions are correct.
size_t ndim(dimensions(r.field));
- if( ndim == 0 || ndim != r.nsubscript ) return NULL;
- cbl_refer_t *esub = r.subscripts + r.nsubscript;
+ if( ndim == 0 || ndim != r.nsubscript() ) return NULL;
std::vector<cbl_field_t *> dims( ndim, NULL );
auto pdim = dims.end();
@@ -941,29 +979,28 @@ literal_subscript_oob( const cbl_refer_t& r, size_t& isub /* output */) {
* for the corresponding dimension. Return the first subscript not
* meeting those criteria, if any.
*/
- auto p = std::find_if( r.subscripts, esub,
- [&pdim]( const cbl_refer_t& r ) {
+ auto psub = std::find_if( r.subscripts.begin(), r.subscripts.end(),
+ [pdim]( const cbl_refer_t& r ) mutable {
const auto& occurs((*pdim)->occurs);
pdim++;
return ! occurs.subscript_ok(r.field);
} );
- isub = p - r.subscripts;
- return p == esub? NULL : dims[isub];
+ isub = psub - r.subscripts.begin();
+ return psub == r.subscripts.end()? NULL : dims[isub];
}
size_t
cbl_refer_t::subscripts_set( const std::list<cbl_refer_t>& subs ) {
- nsubscript = subs.size();
- subscripts = new cbl_refer_t[nsubscript];
- std::copy( subs.begin(), subs.end(), subscripts );
-
+ subscripts.clear();
+ std::copy( subs.begin(), subs.end(), std::back_inserter(subscripts) );
return dimensions(field);
}
const char *
cbl_refer_t::str() const {
static char subscripts[64];
- sprintf(subscripts, "(%u of %zu dimensions)", nsubscript, dimensions(field));
+ sprintf(subscripts, "(%u of " HOST_SIZE_T_PRINT_UNSIGNED " dimensions)",
+ nsubscript(), (fmt_size_t)dimensions(field));
char *output = xasprintf("%s %s %s",
field? field_str(field) : "(none)",
0 < dimensions(field)? subscripts : "",
@@ -979,18 +1016,18 @@ cbl_refer_t::name() const {
const char *
cbl_refer_t::deref_str() const {
- std::vector<char> dimstr(nsubscript * 16, '\0');
+ std::vector<char> dimstr(nsubscript() * 16, '\0');
dimstr.at(0) = '(';
auto p = dimstr.begin() + 1;
if( !field ) return name();
- for( auto sub = subscripts; sub < subscripts + nsubscript; sub++ ) {
- auto initial = sub->field->data.initial ? sub->field->data.initial : "?";
+ for( const auto& sub : subscripts ) {
+ auto initial = sub.field->data.initial ? sub.field->data.initial : "?";
size_t len = dimstr.end() - p;
p += snprintf( &*p, len, "%s ", initial );
}
- if( 0 < nsubscript ) {
+ if( ! subscripts.empty() ) {
*--p = ')';
}
char *output = xasprintf("%s%s", field->name, dimstr.data());
@@ -1009,10 +1046,10 @@ struct move_corresponding_field {
tgt.field = cbl_field_of(symbol_at(elem.second));
if( yydebug ) {
- dbgmsg("move_corresponding:%d: SRC: %3zu %s", __LINE__,
- elem.first, src.str());
- dbgmsg("move_corresponding:%d: to %3zu %s", __LINE__,
- elem.second, tgt.str());
+ dbgmsg("move_corresponding:%d: SRC: %3" GCC_PRISZ "u %s", __LINE__,
+ (fmt_size_t)elem.first, src.str());
+ dbgmsg("move_corresponding:%d: to %3" GCC_PRISZ "u %s", __LINE__,
+ (fmt_size_t)elem.second, tgt.str());
}
parser_move(tgt, src);
@@ -1067,10 +1104,8 @@ valid_move( const struct cbl_field_t *tgt, const struct cbl_field_t *src )
static_assert(sizeof(matrix[0]) == COUNT_OF(matrix[0]),
"matrix should be square");
- for( const cbl_field_t *args[] = {tgt, src}, **p=args;
- p < args + COUNT_OF(args); p++ ) {
- auto& f(**p);
- switch(f.type) {
+ for( auto field : { src, tgt } ) {
+ switch(field->type) {
case FldClass:
case FldConditional:
case FldIndex:
@@ -1082,9 +1117,9 @@ valid_move( const struct cbl_field_t *tgt, const struct cbl_field_t *src )
case FldForward:
case FldBlob:
default:
- if( sizeof(matrix[0]) < f.type ) {
+ if( sizeof(matrix[0]) < field->type ) {
cbl_internal_error("logic error: MOVE %s %s invalid type:",
- cbl_field_type_str(f.type), f.name);
+ cbl_field_type_str(field->type), field->name);
}
break;
}
@@ -1116,8 +1151,9 @@ valid_move( const struct cbl_field_t *tgt, const struct cbl_field_t *src )
if( yydebug && ! retval ) {
auto bad = std::find_if( p, pend,
[]( char ch ) { return ! ISDIGIT(ch); } );
- dbgmsg("%s:%d: offending character '%c' at position %zu",
- __func__, __LINE__, *bad, bad - p);
+ dbgmsg("%s:%d: offending character '%c' at position "
+ HOST_SIZE_T_PRINT_UNSIGNED,
+ __func__, __LINE__, *bad, (fmt_size_t)(bad - p));
}
}
break;
@@ -1150,12 +1186,6 @@ valid_move( const struct cbl_field_t *tgt, const struct cbl_field_t *src )
}
}
- if( yydebug && getenv(__func__) ) {
- dbgmsg("%s:%d: ok to move %s to %s (0x%02x)", __func__, __LINE__,
- cbl_field_type_str(src->type), cbl_field_type_str(tgt->type),
- retval);
- }
-
return retval;
}
@@ -1347,7 +1377,7 @@ public:
{
assert(isym);
}
- procdef_t( const procref_base_t& ref )
+ explicit procdef_t( const procref_base_t& ref )
: procref_base_t(ref)
, isym(0)
{}
@@ -1356,13 +1386,6 @@ public:
return procref_base_t(*this) < procref_base_t(that);
}
- bool operator<( const procref_base_t& that ) const {
- if( that.has_section() ) {
- return procref_base_t(*this) < that;
- }
- return strcasecmp(paragraph(), that.paragraph()) < 0;
- }
-
cbl_label_t * label_of() const {
return isym == 0? NULL : cbl_label_of(symbol_at(isym));
}
@@ -1393,7 +1416,7 @@ static procedures_t::iterator current_procedure = programs.end()->second.end();
class procedure_match {
const procref_base_t& ref;
public:
- procedure_match( const procref_base_t& ref ) : ref(ref) {}
+ explicit procedure_match( const procref_base_t& ref ) : ref(ref) {}
// Match a 2-name reference to section & paragraph, else to one or the other.
bool operator()( procedures_t::const_reference elem ) {
const procdef_t& key = elem.first;
@@ -1421,16 +1444,7 @@ locally_unique( size_t program, const procdef_t& key, const procref_t& ref ) {
const char *section_name = ref.has_section()? ref.section() : key.section();
procref_base_t full_ref(section_name, ref.paragraph());
- if( getenv(__func__) ) {
- dbgmsg("%s: %zu for ref %s of '%s' (line %d) "
- "in %s of '%s' (as %s of '%s')", __func__,
- procedures.count(full_ref),
- ref.paragraph(), ref.section(), ref.line_number(),
- key.paragraph(), key.section(),
- full_ref.paragraph(), full_ref.section() );
- }
-
- return 1 == procedures.count(full_ref);
+ return 1 == procedures.count(procdef_t(full_ref));
}
// Add each section and paragraph to the map as it occurs in the Cobol text.
@@ -1451,9 +1465,6 @@ procedure_definition_add( size_t program, const cbl_label_t *procedure ) {
}
procdef_t key( section_name, paragraph_name, isym );
- if( getenv(__func__) ) {
- dbgmsg("%s: #%3zu %s of %s", __func__, isym, paragraph_name, section_name);
- }
current_procedure =
programs[program].insert( make_pair(key, procedures_t::mapped_type()) );
}
@@ -1463,9 +1474,6 @@ void
procedure_reference_add( const char *section, const char *paragraph,
int line, size_t context )
{
- if( getenv(__func__) ) {
- dbgmsg("%s: line %3d %s of %s", __func__, line, paragraph, section);
- }
current_procedure->second.push_back( procref_t(section, paragraph,
line, context) );
}
@@ -1496,10 +1504,11 @@ ambiguous_reference( size_t program ) {
ambiguous = find_if_not( proc.second.begin(), proc.second.end(),
is_unique(program, proc.first) );
if( proc.second.end() != ambiguous ) {
- if( yydebug || getenv("symbol_label_add")) {
- dbgmsg("%s: %s of '%s' has %zu potential matches", __func__,
- ambiguous->paragraph(), ambiguous->section(),
- procedures.count(*ambiguous));
+ if( yydebug ) {
+ dbgmsg("%s: %s of '%s' has " HOST_SIZE_T_PRINT_UNSIGNED
+ "potential matches", __func__,
+ ambiguous->paragraph(), ambiguous->section(),
+ (fmt_size_t)procedures.count(procdef_t(*ambiguous)));
}
return new procref_t(*ambiguous);
}
@@ -1526,7 +1535,7 @@ intradeclarative_reference() {
class next_group {
size_t isym;
public:
- next_group( symbol_elem_t *group ) : isym(symbol_index(group)) {}
+ explicit next_group( const symbol_elem_t *group ) : isym(symbol_index(group)) {}
// return true if elem is not a member of the group
bool operator()( const symbol_elem_t& elem ) {
@@ -1542,9 +1551,9 @@ parent_names( const symbol_elem_t *elem,
if( is_filler(cbl_field_of(elem)) ) return;
- // dbgmsg("%s: asked about %s of %s (%zu away)", __func__,
+ // dbgmsg("%s: asked about %s of %s (" HOST_SIZE_T_PRINT_UNSIGNED " away)", __func__,
// cbl_field_of(elem)->name,
- // cbl_field_of(group)->name, elem - group);
+ // cbl_field_of(group)->name, (fmt_size_t)(elem - group));
for( const symbol_elem_t *e=elem; e && group < e; e = symbol_parent(e) ) {
names.push_front( cbl_field_of(e)->name );
@@ -1563,15 +1572,17 @@ public:
symbol_elem_t *rgroup, type_t type )
: lgroup(lgroup), rgroup(rgroup), type(type)
{
- dbgmsg( "%s:%d: for #%zu %s and #%zu %s on line %d", __func__, __LINE__,
- symbol_index(lgroup), cbl_field_of(lgroup)->name,
- symbol_index(rgroup), cbl_field_of(rgroup)->name, yylineno );
+ dbgmsg( "%s:%d: for #" HOST_SIZE_T_PRINT_UNSIGNED
+ " %s and #" HOST_SIZE_T_PRINT_UNSIGNED " %s on line %d",
+ __func__, __LINE__,
+ (fmt_size_t)symbol_index(lgroup), cbl_field_of(lgroup)->name,
+ (fmt_size_t)symbol_index(rgroup), cbl_field_of(rgroup)->name, yylineno );
}
static bool
any_redefines( const cbl_field_t& field, const symbol_elem_t *group ) {
for( const cbl_field_t *f = &field; f && f->parent > 0; f = parent_of(f) ) {
- symbol_elem_t *e = symbol_at(f->parent);
+ const symbol_elem_t *e = symbol_at(f->parent);
if( e == group || e->type != SymField ) break;
if( symbol_redefines(f) ) return true;
}
@@ -1642,8 +1653,9 @@ corresponding_fields( cbl_field_t *lhs, cbl_field_t *rhs,
lhsg.a = symbols_begin(field_index(lhs));
lhsg.z = std::find_if( lhsg.a, symbols_end(), next_group(lhsg.a) );
- dbgmsg("%s:%d: examining %zu symbols after %s", __func__, __LINE__,
- lhsg.z - lhsg.a, lhs->name);
+ dbgmsg("%s:%d: examining " HOST_SIZE_T_PRINT_UNSIGNED " symbols after %s",
+ __func__, __LINE__,
+ (fmt_size_t)(lhsg.z - lhsg.a), lhs->name);
find_corresponding finder( symbol_at(field_index(lhs)),
symbol_at(field_index(rhs)), type );
@@ -1651,8 +1663,9 @@ corresponding_fields( cbl_field_t *lhs, cbl_field_t *rhs,
output.erase(0);
- dbgmsg( "%s:%d: %s and %s have %zu corresponding fields",
- __func__, __LINE__, lhs->name, rhs->name, output.size() );
+ dbgmsg( "%s:%d: %s and %s have " HOST_SIZE_T_PRINT_UNSIGNED
+ " corresponding fields",
+ __func__, __LINE__, lhs->name, rhs->name, (fmt_size_t)output.size() );
return output;
}
@@ -1730,11 +1743,10 @@ struct input_file_t {
ino_t inode;
int lineno;
const char *name;
- const line_map *lines;
input_file_t( const char *name, ino_t inode,
- int lineno=1, const line_map *lines = NULL )
- : inode(inode), lineno(lineno), name(name), lines(lines)
+ int lineno=1 )
+ : inode(inode), lineno(lineno), name(name)
{
if( inode == 0 ) inode_set();
}
@@ -1753,14 +1765,29 @@ struct input_file_t {
class unique_stack : public std::stack<input_file_t>
{
+ friend void cobol_set_pp_option(int opt);
+ bool option_m;
+ std::set<std::string> all_names;
+
+ const char *
+ no_wd( const char *wd, const char *name ) {
+ int i;
+ for( i=0; wd[i] == name[i]; i++ ) i++;
+ if( wd[i] == '\0' && name[i] == '/' ) i++;
+ return yydebug? name : name + i;
+ }
+
public:
+ unique_stack() : option_m(false) {}
+
bool push( const value_type& value ) {
auto ok = std::none_of( c.cbegin(), c.cend(),
- [value]( auto& that ) {
+ [value]( const auto& that ) {
return value == that;
} );
if( ok ) {
std::stack<input_file_t>::push(value);
+ all_names.insert(value.name);
return true;
}
size_t n = c.size();
@@ -1771,21 +1798,39 @@ class unique_stack : public std::stack<input_file_t>
"----- ---- --------"
"----------------------------------------");
for( const auto& v : c ) {
- dbgmsg( " %4zu %4d %s", c.size() - --n, v.lineno, no_wd(wd, v.name) );
+ dbgmsg( " %4" GCC_PRISZ "u %4d %s",
+ (fmt_size_t)(c.size() - --n), v.lineno, no_wd(wd, v.name) );
}
} else {
- dbgmsg("unable to get current working directory: %m");
+ dbgmsg("unable to get current working directory: %s", xstrerror(errno));
}
free(wd);
}
return false;
}
- const char *
- no_wd( const char *wd, const char *name ) {
- int i;
- for( i=0; wd[i] == name[i]; i++ ) i++;
- if( wd[i] == '\0' && name[i] == '/' ) i++;
- return yydebug? name : name + i;
+
+ // Look down into the stack. peek(0) == top()
+ const input_file_t& peek( size_t n ) const {
+ gcc_assert( n < size() );
+ return c.at(size() - ++n);
+ }
+
+ void option( int opt ) { // capture other preprocessor options eventually
+ assert(opt == 'M');
+ option_m = true;
+ }
+ int option() const {
+ return option_m? 'M' : 0;
+ }
+
+ void print() const {
+ std::string input( top().name );
+ printf( "%s: ", input.c_str() );
+ for( const auto& name : all_names ) {
+ if( name != input )
+ printf( "\\\n\t%s ", name.c_str() );
+ }
+ printf("\n");
}
};
@@ -1794,6 +1839,12 @@ static unique_stack input_filenames;
static std::map<std::string, ino_t> old_filenames;
static const unsigned int sysp = 0; // not a C header file, cf. line-map.h
+void cobol_set_pp_option(int opt) {
+ // capture other preprocessor options eventually
+ assert(opt == 'M');
+ input_filenames.option_m = true;
+}
+
/*
* Maintain a stack of input filenames. Ensure the files are unique (by
* inode), to prevent copybook cycles. Before pushing a new name, Record the
@@ -1804,12 +1855,13 @@ static const unsigned int sysp = 0; // not a C header file, cf. line-map.h
* to enforce uniqueness, and the scanner to maintain line numbers.
*/
bool cobol_filename( const char *name, ino_t inode ) {
- line_map *lines = NULL;
+ const line_map *lines = NULL;
if( inode == 0 ) {
auto p = old_filenames.find(name);
if( p == old_filenames.end() ) {
for( auto& elem : old_filenames ) {
- dbgmsg("%6zu %-30s", elem.second, elem.first.c_str());
+ dbgmsg("%6" GCC_PRISZ "u %-30s",
+ (fmt_size_t)elem.second, elem.first.c_str());
}
cbl_errx( "logic error: missing inode for %s", name);
}
@@ -1818,32 +1870,42 @@ bool cobol_filename( const char *name, ino_t inode ) {
}
linemap_add(line_table, LC_ENTER, sysp, name, 1);
input_filename_vestige = name;
- bool pushed = input_filenames.push( input_file_t(name, inode, 1, lines) );
- input_filenames.top().lineno = yylineno = 1;
- if( getenv(__func__) ) {
- dbgmsg(" saving %s with lineno as %d",
- input_filenames.top().name, input_filenames.top().lineno);
- }
+ bool pushed = input_filenames.push( input_file_t(name, inode, 1) );
return pushed;
}
const char *
-cobol_lineno_save() {
+cobol_lineno( int lineno ) {
if( input_filenames.empty() ) return NULL;
auto& input( input_filenames.top() );
- input.lineno = yylineno;
- if( getenv(__func__) ) {
- dbgmsg(" setting %s with lineno as %d", input.name, input.lineno);
- }
+ input.lineno = lineno;
return input.name;
}
+/*
+ * This function is called from the scanner, usually when a copybook is on top
+ * of the input stack, before the parser retrieves the token and resets the
+ * current filename. For that reason, we normaly want to line number of the
+ * file that is about to become the current one, which is the one behind top().
+ *
+ * If somehow we arrive here when there is nothing underneath, we return the
+ * current line nubmer, or zero if there's no input. The only consequence is
+ * that the reported line number might be wrong.
+ */
+int
+cobol_lineno() {
+ if( input_filenames.empty() ) return 0;
+ size_t n = input_filenames.size() < 2? 0 : 1;
+ const auto& input( input_filenames.peek(n) );
+ return input.lineno;
+}
+
const char *
cobol_filename() {
return input_filenames.empty()? input_filename_vestige : input_filenames.top().name;
}
-const char *
+void
cobol_filename_restore() {
assert(!input_filenames.empty());
const input_file_t& top( input_filenames.top() );
@@ -1851,21 +1913,17 @@ cobol_filename_restore() {
input_filename_vestige = top.name;
input_filenames.pop();
- if( input_filenames.empty() ) return NULL;
+ if( input_filenames.empty() ) return;
auto& input = input_filenames.top();
- input.lines = linemap_add(line_table, LC_LEAVE, sysp, NULL, 0);
-
- yylineno = input.lineno;
- if( getenv("cobol_filename") ) {
- dbgmsg("restoring %s with lineno to %d", input.name, input.lineno);
- }
- return input.name;
+ linemap_add(line_table, LC_LEAVE, sysp, NULL, 0);
}
static location_t token_location;
+location_t location_from_lineno() { return token_location; }
+
template <typename LOC>
static void
gcc_location_set_impl( const LOC& loc ) {
@@ -1893,11 +1951,9 @@ verify_format( const char gmsgid[] ) {
static regex_t re;
static int cflags = REG_EXTENDED;
static int status = regcomp( &re, pattern, cflags );
- static char errbuf[80];
-
-
if( status != 0 ) {
+ static char errbuf[80];
int n = regerror(status, &re, errbuf, sizeof(errbuf));
gcc_assert(size_t(n) < sizeof(errbuf));
fprintf(stderr, "%s:%d: %s", __func__, __LINE__, errbuf);
@@ -1916,6 +1972,8 @@ verify_format( const char gmsgid[] ) {
static const diagnostic_option_id option_zero;
size_t parse_error_inc();
+void ydferror( const char gmsgid[], ... ) ATTRIBUTE_GCOBOL_DIAG(1, 2);
+
void
ydferror( const char gmsgid[], ... ) {
verify_format(gmsgid);
@@ -1938,7 +1996,7 @@ extern YYLTYPE yylloc;
* the global token_location, which is passed to the diagnostic framework. The
* original value is restored when the instantiated variable goes out of scope.
*/
-class temp_loc_t : protected YYLTYPE {
+class temp_loc_t {
location_t orig;
public:
temp_loc_t() : orig(token_location) {
@@ -1946,10 +2004,10 @@ class temp_loc_t : protected YYLTYPE {
gcc_location_set(yylloc); // use lookahead location
}
- temp_loc_t( const YYLTYPE& loc) : orig(token_location) {
+ explicit temp_loc_t( const YYLTYPE& loc) : orig(token_location) {
gcc_location_set(loc);
}
- temp_loc_t( const YDFLTYPE& loc) : orig(token_location) {
+ explicit temp_loc_t( const YDFLTYPE& loc) : orig(token_location) {
YYLTYPE lloc = {
loc.first_line, loc.first_column,
loc.last_line, loc.last_column };
@@ -1992,21 +2050,14 @@ void error_msg( const YYLTYPE& loc, const char gmsgid[], ... ) {
ERROR_MSG_BODY
}
+void error_msg( const YDFLTYPE& loc, const char gmsgid[], ... )
+ ATTRIBUTE_GCOBOL_DIAG(2, 3);
+
void error_msg( const YDFLTYPE& loc, const char gmsgid[], ... ) {
ERROR_MSG_BODY
}
void
-cdf_location_set(YYLTYPE loc) {
- extern YDFLTYPE ydflloc;
-
- ydflloc.first_line = loc.first_line;
- ydflloc.first_column = loc.first_column;
- ydflloc.last_line = loc.last_line;
- ydflloc.last_column = loc.last_column;
-}
-
-void
yyerror( const char gmsgid[], ... ) {
temp_loc_t looker;
verify_format(gmsgid);
@@ -2059,7 +2110,7 @@ yyerrorvl( int line, const char *filename, const char fmt[], ... ) {
static inline size_t
matched_length( const regmatch_t& rm ) { return rm.rm_eo - rm.rm_so; }
-const char *
+int
cobol_fileline_set( const char line[] ) {
static const char pattern[] = "#line +([[:alnum:]]+) +[\"']([^\"']+). *\n";
static const int cflags = REG_EXTENDED | REG_ICASE;
@@ -2072,7 +2123,7 @@ cobol_fileline_set( const char line[] ) {
if( (erc = regcomp(&re, pattern, cflags)) != 0 ) {
regerror(erc, &re, regexmsg, sizeof(regexmsg));
dbgmsg( "%s:%d: could not compile regex: %s", __func__, __LINE__, regexmsg );
- return line;
+ return 0;
}
preg = &re;
}
@@ -2080,10 +2131,10 @@ cobol_fileline_set( const char line[] ) {
if( erc != REG_NOMATCH ) {
regerror(erc, preg, regexmsg, sizeof(regexmsg));
dbgmsg( "%s:%d: could not compile regex: %s", __func__, __LINE__, regexmsg );
- return line;
+ return 0;
}
- error_msg(yylloc, "invalid #line directive: %s", line );
- return line;
+ error_msg(yylloc, "invalid %<#line%> directive: %s", line );
+ return 0;
}
const char
@@ -2092,40 +2143,39 @@ cobol_fileline_set( const char line[] ) {
int fileline;
if( 1 != sscanf(line_str, "%d", &fileline) )
- yywarn("could not parse line number %s from #line directive", line_str);
+ yywarn("could not parse line number %s from %<#line%> directive", line_str);
input_file_t input_file( filename, ino_t(0), fileline ); // constructor sets inode
- if( getenv(__func__) ) return filename; // ignore #line directive
-
if( input_filenames.empty() ) {
- input_file.lines = linemap_add(line_table, LC_ENTER, sysp, filename, 1);
input_filenames.push(input_file);
}
input_file_t& file = input_filenames.top();
file = input_file;
- yylineno = file.lineno;
- return file.name;
+ return file.lineno;
}
-class timespec_t {
- struct timespec now;
+//#define TIMING_PARSE
+#ifdef TIMING_PARSE
+class cbl_timespec {
+ uint64_t now; // Nanoseconds
public:
- timespec_t() {
- clock_gettime(CLOCK_MONOTONIC, &now);
+ cbl_timespec() {
+ now = get_time_nanoseconds();
}
double ns() const {
- return now.tv_sec * 1000000000 + now.tv_nsec;
+ return now;
}
- friend double operator-( const timespec_t& now, const timespec_t& then );
+ friend double operator-( const cbl_timespec& now, const cbl_timespec& then );
};
double
-operator-( const timespec_t& then, const timespec_t& now ) {
+operator-( const cbl_timespec& now, const cbl_timespec& then ) {
return (now.ns() - then.ns()) / 1000000000;
}
+#endif
static int
parse_file( const char filename[] )
@@ -2136,15 +2186,25 @@ parse_file( const char filename[] )
parser_enter_file(filename);
- timespec_t start;
+ if( input_filenames.option() == 'M' ) {
+ input_filenames.print();
+ return 0;
+ }
+
+#ifdef TIMING_PARSE
+ cbl_timespec start;
+#endif
int erc = yyparse();
- timespec_t finish;
+#ifdef TIMING_PARSE
+ cbl_timespec finish;
double dt = finish - start;
+ printf("Overall parse & generate time is %.6f seconds\n", dt);
+#endif
+
parser_leave_file();
- //printf("Overall parse & generate time is %.6f seconds\n", dt);
fclose (yyin);
@@ -2168,30 +2228,20 @@ cobol_set_debugging( bool flex, bool yacc, bool parser )
yy_flex_debug = flex? 1 : 0;
ydfdebug = yydebug = yacc? 1 : 0;
f_trace_debug = parser? 1 : 0;
-
- char *ind = getenv("INDICATOR_COLUMN");
- if( ind ) {
- int col;
- if( 1 != sscanf(ind, "%d", &col) ) {
- yywarn("ignored non-integer value for INDICATOR_COLUMN=%s", ind);
- }
- cobol_set_indicator_column(col);
- }
}
-os_locale_t os_locale = { "UTF-8", xstrdup("C.UTF-8") };
-
+os_locale_t os_locale = { "UTF-8", "C.UTF-8" };
void
cobol_parse_files (int nfile, const char **files)
{
- char * opaque = setlocale(LC_CTYPE, "");
+ const char * opaque = setlocale(LC_CTYPE, "");
if( ! opaque ) {
yywarn("setlocale: unable to initialize LOCALE");
} else {
char *codeset = nl_langinfo(CODESET);
if( ! codeset ) {
- yywarn("nl_langinfo failed after setlocale succeeded");
+ yywarn("%<nl_langinfo%> failed after %<setlocale()%> succeeded");
} else {
os_locale.codeset = codeset;
}
@@ -2213,6 +2263,7 @@ cbl_message(int fd, const char *format_string, ...)
char *ostring = xvasprintf(format_string, ap);
va_end(ap);
write(fd, ostring, strlen(ostring));
+ write(fd, "\n", 1);
free(ostring);
}
@@ -2302,7 +2353,7 @@ dbgmsg(const char *msg, ...) {
void
dialect_error( const YYLTYPE& loc, const char term[], const char dialect[] ) {
- error_msg(loc, "%s is not ISO syntax, requires -dialect %s",
+ error_msg(loc, "%s is not ISO syntax, requires %<-dialect %s%>",
term, dialect);
}
@@ -2313,12 +2364,621 @@ bool fisdigit(int c)
bool fisspace(int c)
{
return ISSPACE(c);
- };
+ }
int ftolower(int c)
{
return TOLOWER(c);
}
+int ftoupper(int c)
+ {
+ return TOUPPER(c);
+ }
bool fisprint(int c)
{
return ISPRINT(c);
- };
+ }
+
+// 8.9 Reserved words
+static const std::set<std::string> reserved_words = {
+ // GCC COBOL keywords
+ "COMMAND-LINE",
+ "COMMAND-LINE-COUNT",
+
+ // GCC device names
+ "C01",
+ "C02",
+ "C03",
+ "C04",
+ "C05",
+ "C06",
+ "C07",
+ "C08",
+ "C09",
+ "C10",
+ "C11",
+ "C12",
+ "CONSOLE",
+ "S01",
+ "S02",
+ "S03",
+ "S04",
+ "S05",
+ "STDERR",
+ "STDIN",
+ "STDOUT",
+ "SYSIN",
+ "SYSIPT",
+ "SYSLIST",
+ "SYSLST",
+ "SYSOUT",
+ "SYSPCH",
+ "SYSPUNCH",
+ "UPSI",
+
+ // IBM keywords that GCC recognizes
+ "BASIS",
+ "CBL",
+ "ENTER",
+ "READY",
+ "TITLE",
+ "TRACE",
+ "ALTER",
+ "COBOL",
+ "DATE-COMPILED",
+ "DATE-WRITTEN",
+ "DBCS",
+ "DEBUGGING",
+ "EGCS",
+ "ENTRY",
+ "EVERY",
+ "INSTALLATION",
+ "I-O-CONTROL",
+ "KANJI",
+ "LABEL",
+ "NULLS",
+ "PADDING",
+ "PROCEDURES",
+ "PROCEED",
+ "RECORDING",
+ "RERUN",
+ "REVERSED",
+ "SECURITY",
+ "TALLY",
+ "VOLATILE",
+ "XML",
+ "END-START",
+
+ // ISO 2023 keywords
+ "ACCEPT",
+ "ACCESS",
+ "ACTIVE-CLASS",
+ "ADD",
+ "ADDRESS",
+ "ADVANCING",
+ "AFTER",
+ "ALIGNED",
+ "ALL",
+ "ALLOCATE",
+ "ALPHABET",
+ "ALPHABETIC",
+ "ALPHABETIC-LOWER",
+ "ALPHABETIC-UPPER",
+ "ALPHANUMERIC",
+ "ALPHANUMERIC-EDITED",
+ "ALSO",
+ "ALTERNATE",
+ "AND",
+ "ANY",
+ "ANYCASE",
+ "ARE",
+ "AREA",
+ "AREAS",
+ "AS",
+ "ASCENDING",
+ "ASSIGN",
+ "AT",
+ "B-AND",
+ "B-NOT",
+ "B-OR",
+ "B-SHIFT-L",
+ "B-SHIFT-LC",
+ "B-SHIFT-R",
+ "B-SHIFT-RC",
+ "B-XOR",
+ "BASED",
+ "BEFORE",
+ "BINARY",
+ "BINARY-CHAR",
+ "BINARY-DOUBLE",
+ "BINARY-LONG",
+ "BINARY-SHORT",
+ "BIT",
+ "BLANK",
+ "BLOCK",
+ "BOOLEAN",
+ "BOTTOM",
+ "BY",
+ "CALL",
+ "CANCEL",
+ "CF",
+ "CH",
+ "CHARACTER",
+ "CHARACTERS",
+ "CLASS",
+ "CLASS-ID",
+ "CLOSE",
+ "CODE",
+ "CODE-SET",
+ "COL",
+ "COLLATING",
+ "COLS",
+ "COLUMN",
+ "COLUMNS",
+ "COMMA",
+ "COMMIT",
+ "COMMON",
+ "COMP",
+ "COMPUTATIONAL",
+ "COMPUTE",
+ "CONDITION",
+ "CONFIGURATION",
+ "CONSTANT",
+ "CONTAINS",
+ "CONTENT",
+ "CONTINUE",
+ "CONTROL",
+ "CONTROLS",
+ "CONVERTING",
+ "COPY",
+ "CORR",
+ "CORRESPONDING",
+ "COUNT",
+ "CRT",
+ "CURRENCY",
+ "CURSOR",
+ "DATA",
+ "DATA-POINTER",
+ "DATE",
+ "DAY",
+ "DAY-OF-WEEK",
+ "DE",
+ "DECIMAL-POINT",
+ "DECLARATIVES",
+ "DEFAULT",
+ "DELETE",
+ "DELIMITED",
+ "DELIMITER",
+ "DEPENDING",
+ "DESCENDING",
+ "DESTINATION",
+ "DETAIL",
+ "DISPLAY",
+ "DIVIDE",
+ "DIVISION",
+ "DOWN",
+ "DUPLICATES",
+ "DYNAMIC",
+ "EC",
+ "EDITING",
+ "ELSE",
+ "EMD-START",
+ "END",
+ "END-ACCEPT",
+ "END-ADD",
+ "END-CALL",
+ "END-COMPUTE",
+ "END-DELETE",
+ "END-DISPLAY",
+ "END-DIVIDE",
+ "END-EVALUATE",
+ "END-IF",
+ "END-MULTIPLY",
+ "END-OF-PAGE",
+ "END-PERFORM",
+ "END-READ",
+ "END-RECEIVE",
+ "END-RETURN",
+ "END-REWRITE",
+ "END-SEARCH",
+ "END-SEND",
+ "END-STRING",
+ "END-SUBTRACT",
+ "END-UNSTRING",
+ "END-WRITE",
+ "ENVIRONMENT",
+ "EO",
+ "EOP",
+ "EQUAL",
+ "ERROR",
+ "EVALUATE",
+ "EXCEPTION",
+ "EXCEPTION-OBJECT",
+ "EXCLUSIVE-OR",
+ "EXIT",
+ "EXTEND",
+ "EXTERNAL",
+ "FACTORY",
+ "FALSE",
+ "FARTHEST-FROM-ZERO",
+ "FD",
+ "FILE",
+ "FILE-CONTROL",
+ "FILLER",
+ "FINAL",
+ "FINALLY",
+ "FIRST",
+ "FLOAT-BINARY-128",
+ "FLOAT-BINARY-32",
+ "FLOAT-BINARY-64",
+ "FLOAT-DECIMAL-16",
+ "FLOAT-DECIMAL-34",
+ "FLOAT-EXTENDED",
+ "FLOAT-INFINITY",
+ "FLOAT-LONG",
+ "FLOAT-NOT-A-NUMBER-",
+ "FLOAT-SHORT",
+ "FOOTING",
+ "FOR",
+ "FORMAT",
+ "FREE",
+ "FROM",
+ "FUNCTION",
+ "FUNCTION-ID",
+ "FUNCTION-POINTER",
+ "GENERATE",
+ "GET",
+ "GIVING",
+ "GLOBAL",
+ "GO",
+ "GOBACK",
+ "GREATER",
+ "GROUP",
+ "GROUP-USAGE",
+ "HEADING",
+ "HIGH-VALUE",
+ "HIGH-VALUES",
+ "I-O",
+ "I-OICONTROL",
+ "IDENTIFICATION",
+ "IF",
+ "IN",
+ "IN-ARITHMETIC-RANGE",
+ "INDEX",
+ "INDEXED",
+ "INDICATE",
+ "INHERITS",
+ "INITIAL",
+ "INITIALIZE",
+ "INITIATE",
+ "INPUT",
+ "INPUT-OUTPUT",
+ "INSPECT",
+ "INTERFACE",
+ "INTERFACE-ID",
+ "INTO",
+ "INVALID",
+ "INVOKE",
+ "IS",
+ "JUST",
+ "JUSTIFIED",
+ "KEY",
+ "LAST",
+ "LEADING",
+ "LEFT",
+ "LENGTH",
+ "LESS",
+ "LIMIT",
+ "LIMITS",
+ "LINAGE",
+ "LINAGE-COUNTER",
+ "LINE",
+ "LINE-COUNTER",
+ "LINES",
+ "LINKAGE",
+ "LOCAL-STORAGE",
+ "LOCALE",
+ "LOCATION",
+ "LOCK",
+ "LOW-VALUE",
+ "LOW-VALUES",
+ "MERGE",
+ "MESSAGE-TAG",
+ "METHOD-ID",
+ "MINUS",
+ "MODE",
+ "MOVE",
+ "MULTIPLY",
+ "NATIONAL",
+ "NATIONAL-EDITED",
+ "NATIVE",
+ "NEAREST-TO-ZERO",
+ "NEGATIVE",
+ "NESTED",
+ "NEXT",
+ "NO",
+ "NOT",
+ "NULL",
+ "NUMBER",
+ "NUMERIC",
+ "NUMERIC-EDITED",
+ "OBJECT",
+ "OBJECT-COMPUTER",
+ "OBJECT-REFERENCE",
+ "OCCURS",
+ "OF",
+ "OFF",
+ "OMITTED",
+ "ON",
+ "OPEN",
+ "OPTIONAL",
+ "OPTIONS",
+ "OR",
+ "ORDER",
+ "ORGANIZATION",
+ "OTHER",
+ "OUTPUT",
+ "OVERFLOW",
+ "OVERRIDE",
+ "PACKED-DECIMAL",
+ "PAGE",
+ "PAGE-COUNTER",
+ "PERFORM",
+ "PF",
+ "PH",
+ "PIC",
+ "PICTURE",
+ "PLUS",
+ "POINTER",
+ "POSITIVE",
+ "PRESENT",
+ "PRINTING",
+ "PROCEDURE",
+ "PROGRAM",
+ "PROGRAM-ID",
+ "PROGRAM-POINTER",
+ "PROPERTY",
+ "PROTOTYPE",
+ "QUIET",
+ "QUOTE",
+ "QUOTES",
+ "RAISE",
+ "RAISING",
+ "RANDOM",
+ "RD",
+ "READ",
+ "RECEIVE",
+ "RECORD",
+ "RECORDS",
+ "REDEFINES",
+ "REEL",
+ "REFERENCE",
+ "RELATIVE",
+ "RELEASE",
+ "REMAINDER",
+ "REMOVAL",
+ "RENAMES",
+ "REPLACE",
+ "REPLACING",
+ "REPORT",
+ "REPORTING",
+ "REPORTS",
+ "REPOSITORY",
+ "RESERVE",
+ "RESET",
+ "RESUME",
+ "RETRY",
+ "RETURN",
+ "RETURNING",
+ "REWIND",
+ "REWRITE",
+ "RF",
+ "RH",
+ "RIGHT",
+ "ROLLBACK",
+ "ROUNDED",
+ "RUN",
+ "SAME",
+ "SCREEN",
+ "SD",
+ "SEARCH",
+ "SECTION",
+ "SELECT",
+ "SELF",
+ "SEND",
+ "SENTENCE",
+ "SEPARATE",
+ "SEQUENCE",
+ "SEQUENTIAL",
+ "SET",
+ "SHARING",
+ "SIGN",
+ "SIGNALING",
+ "SIZE",
+ "SORT",
+ "SORT-MERGE",
+ "SOURCE",
+ "SOURCE-COMPUTER",
+ "SOURCES",
+ "SPACE",
+ "SPACES",
+ "SPECIAL-NAMES",
+ "STANDARD",
+ "STANDARD-1",
+ "STANDARD-2",
+ "START",
+ "STATUS",
+ "STOP",
+ "STRING",
+ "SUBTRACT",
+ "SUM",
+ "SUPER",
+ "SUPPRESS",
+ "SYMBOLIC",
+ "SYNC",
+ "SYNCHRONIZED",
+ "SYSTEM-DEFAULT",
+ "TABLE",
+ "TALLYING",
+ "TERMINATE",
+ "TEST",
+ "THAN",
+ "THEN",
+ "THROUGH",
+ "THRU",
+ "TIME",
+ "TIMES",
+ "TO",
+ "TOP",
+ "TRAILING",
+ "TRUE",
+ "TYPE",
+ "TYPEDEF",
+ "UNIT",
+ "UNIVERSAL",
+ "UNLOCK",
+ "UNSTRING",
+ "UNTIL",
+ "UP",
+ "UPON",
+ "USAGE",
+ "USE",
+ "USER-DEFAULT",
+ "USING",
+ "VAL-STATUS",
+ "VALID",
+ "VALIDATE",
+ "VALIDATE-STATUS",
+ "VALUE",
+ "VALUES",
+ "VARYING",
+ "WHEN",
+ "WITH",
+ "WORKING-STORAGE",
+ "WRITE",
+ "XOR",
+ "ZERO",
+ "ZEROES",
+ "ZEROS",
+ "+",
+ "-",
+ "*",
+ "/",
+ "**",
+ "<",
+ "<=",
+ "<>",
+ "=",
+ ">",
+ ">=",
+ "&",
+ "*>",
+ "::",
+ ">>",
+};
+
+// 8.10 Context-sensitive words
+static const std::set<std::string> context_sensitive_words = {
+ "ACTIVATING", // MODULE-NAME intrinsic function
+ "ANUM", // CONVERT intrinsic function
+ "APPLY", // I-O-CONTROL paragraph
+ "ARITHMETIC", // OPTIONS paragraph
+ "ATTRIBUTE", // SET statement
+ "AUTO", // screen description entry
+ "AUTOMATIC", // LOCK MODE clause
+ "AWAY-FROM-ZERO", // ROUNDED phrase
+ "BACKGROUND-COLOR", // screen description entry
+ "BACKWARD", // INSPECT statement
+ "BELL", // screen description entry and SET attribute statement
+ "BINARY-ENCODING", // USAGE clause and FLOAT-DECIMAL clause
+ "BLINK", // screen description entry and SET attribute statement
+ "BYTE", // CONVERT intrinsic function
+ "BYTES", // RECORD clause
+ "BYTE-LENGTH", // constant entry
+ "CAPACITY", // OCCURS clause
+ "CENTER", // COLUMN clause
+ "CLASSIFICATION", // OBJECT-COMPUTER paragraph
+ "CURRENT", // MODULE-NAME intrinsic function
+ "CYCLE", // EXIT statement
+ "DECIMAL-ENCODING", // USAGE clause and FLOAT-DECIMAL clause
+ "EOL", // ERASE clause in a screen description entry
+ "EOS", // ERASE clause in a screen description entry
+ "ENTRY-CONVENTION", // OPTIONS paragraph
+ "ERASE", // screen description entry
+ "EXPANDS", // class-specifier and interface-specifier of the REPOSITORY paragraph
+ "FLOAT-BINARY", // OPTIONS paragraph
+ "FLOAT-DECIMAL", // OPTIONS paragraph
+ "FOREGROUND-COLOR", // screen description entry
+ "FOREVER", // RETRY phrase
+ "FULL", // screen description entry
+ "HEX", // CONVERT intrinsic function
+ "HIGH-ORDER-LEFT", // FLOAT-BINARY clause, FLOAT-DECIMAL clause, and USAGE clause
+ "HIGH-ORDER-RIGHT", // FLOAT-BINARY clause, FLOAT-DECIMAL clause, and USAGE clause
+ "HIGHLIGHT", // screen description entry and SET attribute statement
+ "IGNORING", // READ statement
+ "IMPLEMENTS", // FACTORY paragraph and OBJECT paragraph
+ "INITIALIZED", // ALLOCATE statement and OCCURS clause
+ "INTERMEDIATE", // OPTIONS paragraph
+ "INTRINSIC", // function-specifier of the REPOSITORY paragraph
+ "LC_ALL", // SET statement
+ "LC_COLLATE", // SET statement
+ "LC_CTYPE", // SET statement
+ "LC_MESSAGES", // SET statement
+ "LC_MONETARY", // SET statement
+ "LC_NUMERIC", // SET statement
+ "LC_TIME", // SET statement
+ "LOWLIGHT", // screen description entry and SET attribute statement
+ "MANUAL", // LOCK MODE clause
+ "MULTIPLE", // LOCK ON phrase
+ "NAT", // CONVERT intrinsic function
+ "NEAREST-AWAY-FROM-ZERO", // INTERMEDIATE ROUNDING clause and ROUNDED phrase
+ "NEAREST-EVEN", // INTERMEDIATE ROUNDING clause and ROUNDED phrase
+ "NEAREST-TOWARD-ZERO", // INTERMEDIATE ROUNDING clause and ROUNDED phrase
+ "NONE", // DEFAULT clause
+ "NORMAL", // STOP statement
+ "NUMBERS", // COLUMN clause and LINE clause
+ "ONLY", // Object-view, SHARING clause, SHARING phrase, and USAGE clause
+ "PARAGRAPH", // EXIT statement
+ "PREFIXED", // DYNAMIC LENGTH STRUCTURE clause
+ "PREVIOUS", // READ statement
+ "PROHIBITED", // INTERMEDIATE ROUNDING clause and ROUNDED phrase
+ "RECURSIVE", // PROGRAM-ID paragraph
+ "RELATION", // VALIDATE-STATUS clause
+ "REQUIRED", // screen description entry
+ "REVERSE-VIDEO", // screen description entry and SET attribute statement
+ "ROUNDING", // OPTIONS paragraph
+ "SECONDS", // RETRY phrase, CONTINUE statement
+ "SECURE", // screen description entry
+ "SHORT", // DYNAMIC LENGTH STRUCTURE clause
+ "SIGNED", // DYNAMIC LENGTH STRUCTURE clause and USAGE clause
+ "STACK", // MODULE-NAME intrinsic function
+ "STANDARD-BINARY", // ARITHMETIC clause
+ "STANDARD-DECIMAL", // ARITHMETIC clause
+ "STATEMENT", // RESUME statement
+ "STEP", // OCCURS clause
+ "STRONG", // TYPEDEF clause
+ "STRUCTURE", // DYNAMIC LENGTH STRUCTURE clause
+ "SYMBOL", // CURRENCY clause
+ "TOP-LEVEL", // MODULE-NAME intrinsic function
+ "TOWARD-GREATER", // ROUNDED phrase
+ "TOWARD-LESSER", // ROUNDED phrase
+ "TRUNCATION", // INTERMEDIATE ROUNDING clause and ROUNDED phrase
+ "UCS-4", // ALPHABET clause
+ "UNDERLINE", // screen description entry and SET attribute statement
+ "UNSIGNED", // USAGE clause
+ "UTF-8", // ALPHABET clause
+ "UTF-16", // ALPHABET clause
+ "YYYYDDD", // ACCEPT statement
+ "YYYYMMDD", // ACCEPT statement
+};
+
+// Is the input a COBOL word, per ISO/IEC 1989:2023 (E) ?
+// We add a few GCC-specific keywords, and our supported IBM keywords.
+bool
+iso_cobol_word( const std::string& name, bool include_context ) {
+ auto ok = 1 == reserved_words.count(name);
+ if( include_context && !ok ) {
+ ok = 1 == context_sensitive_words.count(name);
+ }
+ return ok;
+}
+