1 files changed, 869 insertions, 209 deletions
diff --git a/gcc/cobol/util.cc b/gcc/cobol/util.cc
index 62ecd98..23f605d 100644
--- a/gcc/cobol/util.cc
+++ b/gcc/cobol/util.cc
@@ -35,6 +35,10 @@
  */
 
 #include "cobol-system.h"
+#include "coretypes.h"
+#include "tree.h"
+#undef yy_flex_debug
+
 #include <langinfo.h>
 
 #include "coretypes.h"
@@ -55,13 +59,13 @@
 #include "cbldiag.h"
 #include "lexio.h"
 
-#define HOWEVER_GCC_DEFINES_TREE
 #include "../../libgcobol/ec.h"
 #include "../../libgcobol/common-defs.h"
 #include "symbols.h"
 #include "inspect.h"
 #include "../../libgcobol/io.h"
 #include "genapi.h"
+#include "genutil.h"
 
 #pragma GCC diagnostic ignored "-Wunused-result"
 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
@@ -72,14 +76,47 @@ extern int yyparse(void);
 
 extern int demonstration_administrator(int N);
 
+#if !defined (HAVE_GET_CURRENT_DIR_NAME)
+/* Posix platforms might not have get_current_dir_name but should have
+   getcwd() and PATH_MAX.  */
+#if __has_include (<limits.h>)
+# include <limits.h>
+#endif
+/* The Hurd doesn't define PATH_MAX.  */
+#if !defined (PATH_MAX) && defined(__GNU__)
+# define PATH_MAX 4096
+#endif
+static inline char *
+get_current_dir_name ()
+{
+  /* Use libiberty's allocator here.  */
+  char *buf = (char *) xmalloc (PATH_MAX);
+  return getcwd (buf, PATH_MAX);
+}
+#endif
+
+/*
+ * For printing messages, usually the size of the thing is some kind of string
+ * length, and doesn't really need a size_t.  For message formatting, use a
+ * simple unsigned long, and warn if that's no good.  "gb4" here stands for 
+ * "4 Gigabytes".
+ */
+unsigned long
+gb4( size_t input ) {
+  if( input != static_cast<unsigned long>(input) ) {
+    yywarn("size too large to print: %lx:%lx",
+	   (unsigned long)(input >> (4 * sizeof(unsigned long))),
+	   static_cast<unsigned long>(input));
+  }
+  return input;
+}
+  
 const char *
 symbol_type_str( enum symbol_type_t type )
 {
     switch(type) {
     case SymFilename:
         return "SymFilename";
-    case SymFunction:
-        return "SymFunction";
     case SymField:
         return "SymField";
     case SymLabel:
@@ -93,7 +130,7 @@ symbol_type_str( enum symbol_type_t type )
     case SymDataSection:
         return "SymDataSection";
     }
-    dbgmsg("%s:%d: invalid symbol_type_t %d", __func__, __LINE__, type);
+    cbl_internal_error("%s:%d: invalid %<symbol_type_t%> %d", __func__, __LINE__, type);
     return "???";
 }
 
@@ -142,7 +179,7 @@ cbl_field_type_str( enum cbl_field_type_t type )
   case FldBlob:
     return "FldBlob";
  }
-  dbgmsg("%s:%d: invalid symbol_type_t %d", __func__, __LINE__, type);
+  cbl_internal_error("%s:%d: invalid %<symbol_type_t%> %d", __func__, __LINE__, type);
   return "???";
 }
 
@@ -302,8 +339,9 @@ is_numeric_edited( const char picture[] ) {
       break;
     default:
       numed_message = xasprintf("invalid PICTURE character "
-                                "'%c' at offset %zu in '%s'",
-                                *p, p - picture, picture);
+                                "'%c' at offset " HOST_SIZE_T_PRINT_UNSIGNED
+                                " in '%s'",
+                                *p, (fmt_size_t)(p - picture), picture);
       break;
     }
 
@@ -327,49 +365,50 @@ normalize_picture( char picture[] )
     regmatch_t pmatch[4];
 
     if( (erc = regcomp(preg, regex, cflags)) != 0 ) {
-        regerror(erc, preg, regexmsg, sizeof(regexmsg));
-        dbgmsg( "%s:%d: could not compile regex: %s", __func__, __LINE__, regexmsg );
-        return picture;
+      regerror(erc, preg, regexmsg, sizeof(regexmsg));
+      dbgmsg( "%s:%d: could not compile regex: %s", __func__, __LINE__, regexmsg );
+      return picture;
     }
 
     while( (erc = regexec(preg, picture, COUNT_OF(pmatch), pmatch, 0)) == 0 ) {
-        assert(pmatch[1].rm_so != -1 && pmatch[1].rm_so < pmatch[1].rm_eo);
-        size_t len = pmatch[1].rm_eo - pmatch[1].rm_so;
-        assert(len == 1);
-        const char *start = picture + pmatch[1].rm_so;
-
-        assert(pmatch[2].rm_so != -2 && pmatch[2].rm_so < pmatch[2].rm_eo);
-        len = pmatch[2].rm_eo - pmatch[2].rm_so;
-        assert(len > 0);
-
-        /*
-         * Overwrite e.g. A(4) with AAAA.
-         */
-        assert(pmatch[2].rm_so == pmatch[1].rm_eo + 1); // character paren number
-        p = picture + pmatch[2].rm_so;
-        len = 0;
-        if( 1 != sscanf(p, "%zu", &len) ) {
-            dbgmsg("%s:%d: no number found in '%s'", __func__, __LINE__, p);
-            goto irregular;
-        }
-        if( len == 0 ) {
-            dbgmsg("%s:%d: ZERO length found in '%s'", __func__, __LINE__, p);
-            goto irregular;
-        }
+      assert(pmatch[1].rm_so != -1 && pmatch[1].rm_so < pmatch[1].rm_eo);
+      size_t len = pmatch[1].rm_eo - pmatch[1].rm_so;
+      assert(len == 1);
+      const char *start = picture + pmatch[1].rm_so;
+
+      assert(pmatch[2].rm_so != -2 && pmatch[2].rm_so < pmatch[2].rm_eo);
+      len = pmatch[2].rm_eo - pmatch[2].rm_so;
+      assert(len > 0);
+
+      /*
+       * Overwrite e.g. A(4) with AAAA.
+       */
+      assert(pmatch[2].rm_so == pmatch[1].rm_eo + 1); // character paren number
+      p = picture + pmatch[2].rm_so;
+      len = 0;
+      fmt_size_t lenf = 0;
+      if( 1 != sscanf(p, "%" GCC_PRISZ "u", &lenf) ) {
+        dbgmsg("%s:%d: no number found in '%s'", __func__, __LINE__, p);
+        goto irregular;
+      }
+      len = lenf;
+      if( len == 0 ) {
+        dbgmsg("%s:%d: ZERO length found in '%s'", __func__, __LINE__, p);
+        goto irregular;
+      }
 
-	std::vector <char> pic(len + 1, '\0');
-        memset(pic.data(), *start, len);
-        const char *finish = picture + pmatch[2].rm_eo,
-                    *eopicture = picture + strlen(picture);
+      std::vector <char> pic(len + 1, '\0');
+      memset(pic.data(), *start, len);
+      const char *finish = picture + pmatch[2].rm_eo,
+        *eopicture = picture + strlen(picture);
 
-        p = xasprintf( "%*s%s%*s",
-		       (int)(start - picture), picture,
-		       pic.data(),
-		       (int)(eopicture - finish), finish );
+      p = xasprintf( "%*s%s%*s",
+                     (int)(start - picture), picture,
+                     pic.data(),
+                     (int)(eopicture - finish), finish );
 
-        free(picture);
-        picture = p;
-        continue;
+      free(picture);
+      picture = p;
     }
     assert(erc == REG_NOMATCH);
 
@@ -440,7 +479,7 @@ is_elementary( enum cbl_field_type_t type )
     case FldFloat:
       return true; // takes up space
     }
-    dbgmsg("%s:%d: invalid symbol_type_t %d", __func__, __LINE__, type);
+    cbl_internal_error("%s:%d: invalid %<symbol_type_t%> %d", __func__, __LINE__, type);
     return false;
 }
 
@@ -752,7 +791,7 @@ symbol_field_type_update( cbl_field_t *field,
 
 bool
 redefine_field( cbl_field_t *field ) {
-  cbl_field_t *primary = symbol_redefines(field);
+  const cbl_field_t *primary = symbol_redefines(field);
   bool fOK = true;
 
   if( !primary ) return false;
@@ -800,7 +839,7 @@ cbl_field_t::report_invalid_initial_value(const YYLTYPE& loc) const {
         // 8 or more, we need do no further testing because we assume
         // everything fits.
         if( data.capacity < 8 ) {
-          auto p = strchr(data.initial, symbol_decimal_point());
+          const auto p = strchr(data.initial, symbol_decimal_point());
           if( p && atoll(p+1) != 0 ) {
             error_msg(loc, "integer type %s VALUE '%s' "
                      "requires integer VALUE",
@@ -863,8 +902,8 @@ cbl_field_t::report_invalid_initial_value(const YYLTYPE& loc) const {
                                                  return TOUPPER(ch) == 'E';
                                                } );
               if( !has_exponent && data.precision() < pend - p ) {
-                error_msg(loc, "%s cannot represent  VALUE '%s' exactly (max .%zu)",
-                         name, data.initial, pend - p);
+                error_msg(loc, "%s cannot represent VALUE %qs exactly (max %c%zu)",
+                          name, data.initial, '.', pend - p);
               }
             }
           }
@@ -922,8 +961,7 @@ const cbl_field_t *
 literal_subscript_oob( const cbl_refer_t& r, size_t& isub /* output */)  {
   // Verify literal subscripts if dimensions are correct.
   size_t ndim(dimensions(r.field));
-  if( ndim == 0 || ndim != r.nsubscript ) return NULL;
-  cbl_refer_t *esub = r.subscripts + r.nsubscript;
+  if( ndim == 0 || ndim != r.nsubscript() ) return NULL;
   std::vector<cbl_field_t *> dims( ndim, NULL );
   auto pdim = dims.end();
 
@@ -941,29 +979,28 @@ literal_subscript_oob( const cbl_refer_t& r, size_t& isub /* output */)  {
    * for the corresponding dimension.  Return the first subscript not
    * meeting those criteria, if any.
    */
-  auto p = std::find_if( r.subscripts, esub,
-                         [&pdim]( const cbl_refer_t& r ) {
+  auto psub = std::find_if( r.subscripts.begin(), r.subscripts.end(),
+                         [pdim]( const cbl_refer_t& r ) mutable {
                            const auto& occurs((*pdim)->occurs);
                            pdim++;
                            return ! occurs.subscript_ok(r.field);
                          } );
-  isub = p - r.subscripts;
-  return p == esub? NULL : dims[isub];
+  isub = psub - r.subscripts.begin(); 
+  return psub == r.subscripts.end()? NULL : dims[isub];
 }
 
 size_t
 cbl_refer_t::subscripts_set( const std::list<cbl_refer_t>& subs ) {
-  nsubscript = subs.size();
-  subscripts = new cbl_refer_t[nsubscript];
-  std::copy( subs.begin(), subs.end(), subscripts );
-
+  subscripts.clear();
+  std::copy( subs.begin(), subs.end(), std::back_inserter(subscripts) );
   return dimensions(field);
 }
 
 const char *
 cbl_refer_t::str() const {
   static char subscripts[64];
-  sprintf(subscripts, "(%u of %zu dimensions)", nsubscript, dimensions(field));
+  sprintf(subscripts, "(%u of " HOST_SIZE_T_PRINT_UNSIGNED " dimensions)",
+          nsubscript(), (fmt_size_t)dimensions(field));
   char *output = xasprintf("%s %s %s",
                            field? field_str(field) : "(none)",
                            0 < dimensions(field)? subscripts : "",
@@ -979,18 +1016,18 @@ cbl_refer_t::name() const {
 
 const char *
 cbl_refer_t::deref_str() const {
-  std::vector<char> dimstr(nsubscript * 16, '\0');
+  std::vector<char> dimstr(nsubscript() * 16, '\0');
   dimstr.at(0) = '(';
   auto p = dimstr.begin() + 1;
 
   if( !field ) return name();
 
-  for( auto sub = subscripts; sub <  subscripts + nsubscript; sub++ ) {
-    auto initial = sub->field->data.initial ? sub->field->data.initial : "?";
+  for( const auto& sub : subscripts ) {
+    auto initial = sub.field->data.initial ? sub.field->data.initial : "?";
     size_t len = dimstr.end() - p;
     p += snprintf( &*p, len, "%s ", initial );
   }
-  if( 0 < nsubscript ) {
+  if( ! subscripts.empty() ) {
     *--p = ')';
   }
   char *output = xasprintf("%s%s", field->name, dimstr.data());
@@ -1009,10 +1046,10 @@ struct move_corresponding_field {
     tgt.field = cbl_field_of(symbol_at(elem.second));
 
     if( yydebug ) {
-      dbgmsg("move_corresponding:%d: SRC: %3zu %s", __LINE__,
-            elem.first, src.str());
-      dbgmsg("move_corresponding:%d:   to %3zu %s", __LINE__,
-            elem.second, tgt.str());
+      dbgmsg("move_corresponding:%d: SRC: %3" GCC_PRISZ "u %s", __LINE__,
+            (fmt_size_t)elem.first, src.str());
+      dbgmsg("move_corresponding:%d:   to %3" GCC_PRISZ "u %s", __LINE__,
+            (fmt_size_t)elem.second, tgt.str());
     }
 
     parser_move(tgt, src);
@@ -1067,10 +1104,8 @@ valid_move( const struct cbl_field_t *tgt, const struct cbl_field_t *src )
   static_assert(sizeof(matrix[0]) == COUNT_OF(matrix[0]),
                 "matrix should be square");
 
-  for( const cbl_field_t *args[] = {tgt, src}, **p=args;
-       p < args + COUNT_OF(args); p++ ) {
-    auto& f(**p);
-    switch(f.type) {
+  for( auto field : { src, tgt } ) {
+    switch(field->type) {
     case FldClass:
     case FldConditional:
     case FldIndex:
@@ -1082,9 +1117,9 @@ valid_move( const struct cbl_field_t *tgt, const struct cbl_field_t *src )
     case FldForward:
     case FldBlob:
     default:
-      if( sizeof(matrix[0]) < f.type ) {
+      if( sizeof(matrix[0]) < field->type ) {
         cbl_internal_error("logic error: MOVE %s %s invalid type:",
-                           cbl_field_type_str(f.type), f.name);
+                           cbl_field_type_str(field->type), field->name);
       }
       break;
     }
@@ -1116,8 +1151,9 @@ valid_move( const struct cbl_field_t *tgt, const struct cbl_field_t *src )
         if( yydebug && ! retval ) {
           auto bad = std::find_if( p, pend,
                                    []( char ch ) { return ! ISDIGIT(ch); } );
-          dbgmsg("%s:%d: offending character '%c' at position %zu",
-                __func__, __LINE__, *bad, bad - p);
+          dbgmsg("%s:%d: offending character '%c' at position "
+                 HOST_SIZE_T_PRINT_UNSIGNED,
+                 __func__, __LINE__, *bad, (fmt_size_t)(bad - p));
         }
       }
       break;
@@ -1150,12 +1186,6 @@ valid_move( const struct cbl_field_t *tgt, const struct cbl_field_t *src )
     }
   }
 
-  if( yydebug && getenv(__func__) ) {
-    dbgmsg("%s:%d: ok to move %s to %s (0x%02x)", __func__, __LINE__,
-          cbl_field_type_str(src->type), cbl_field_type_str(tgt->type),
-          retval);
-  }
-
   return retval;
 }
 
@@ -1347,7 +1377,7 @@ public:
   {
     assert(isym);
   }
-  procdef_t( const procref_base_t& ref )
+  explicit procdef_t( const procref_base_t& ref )
     : procref_base_t(ref)
     , isym(0)
   {}
@@ -1356,13 +1386,6 @@ public:
     return procref_base_t(*this) < procref_base_t(that);
   }
 
-  bool operator<( const procref_base_t& that ) const {
-    if( that.has_section() ) {
-      return procref_base_t(*this) < that;
-    }
-    return strcasecmp(paragraph(), that.paragraph()) < 0;
-  }
-
   cbl_label_t * label_of() const {
     return isym == 0? NULL : cbl_label_of(symbol_at(isym));
   }
@@ -1393,7 +1416,7 @@ static procedures_t::iterator current_procedure = programs.end()->second.end();
 class procedure_match {
   const procref_base_t& ref;
 public:
-  procedure_match( const procref_base_t& ref ) : ref(ref) {}
+  explicit procedure_match( const procref_base_t& ref ) : ref(ref) {}
   // Match a 2-name reference to section & paragraph, else to one or the other.
   bool operator()( procedures_t::const_reference elem ) {
     const procdef_t& key = elem.first;
@@ -1421,16 +1444,7 @@ locally_unique( size_t program, const procdef_t& key, const procref_t& ref ) {
   const char *section_name = ref.has_section()? ref.section() : key.section();
   procref_base_t full_ref(section_name, ref.paragraph());
 
-  if( getenv(__func__) ) {
-    dbgmsg("%s: %zu for ref %s of '%s' (line %d) "
-          "in %s of '%s' (as %s of '%s')", __func__,
-          procedures.count(full_ref),
-          ref.paragraph(), ref.section(), ref.line_number(),
-          key.paragraph(), key.section(),
-          full_ref.paragraph(), full_ref.section() );
-  }
-
-  return 1 == procedures.count(full_ref);
+  return 1 == procedures.count(procdef_t(full_ref));
 }
 
 // Add each section and paragraph to the map as it occurs in the Cobol text.
@@ -1451,9 +1465,6 @@ procedure_definition_add( size_t program, const cbl_label_t *procedure ) {
   }
 
   procdef_t key( section_name, paragraph_name, isym );
-  if( getenv(__func__) ) {
-    dbgmsg("%s: #%3zu %s of %s", __func__, isym, paragraph_name, section_name);
-  }
   current_procedure =
     programs[program].insert( make_pair(key, procedures_t::mapped_type()) );
 }
@@ -1463,9 +1474,6 @@ void
 procedure_reference_add( const char *section, const char *paragraph,
                          int line, size_t context )
 {
-  if( getenv(__func__) ) {
-    dbgmsg("%s: line %3d %s of %s", __func__, line, paragraph, section);
-  }
   current_procedure->second.push_back( procref_t(section, paragraph,
                                                  line, context) );
 }
@@ -1496,10 +1504,11 @@ ambiguous_reference( size_t program ) {
       ambiguous = find_if_not( proc.second.begin(), proc.second.end(),
                                is_unique(program, proc.first) );
     if( proc.second.end() != ambiguous ) {
-      if( yydebug || getenv("symbol_label_add")) {
-        dbgmsg("%s: %s of '%s' has %zu potential matches", __func__,
-              ambiguous->paragraph(), ambiguous->section(),
-              procedures.count(*ambiguous));
+      if( yydebug ) {
+        dbgmsg("%s: %s of '%s' has " HOST_SIZE_T_PRINT_UNSIGNED
+               "potential matches", __func__,
+               ambiguous->paragraph(), ambiguous->section(),
+               (fmt_size_t)procedures.count(procdef_t(*ambiguous)));
       }
       return new procref_t(*ambiguous);
     }
@@ -1526,7 +1535,7 @@ intradeclarative_reference() {
 class next_group {
   size_t isym;
 public:
-  next_group( symbol_elem_t *group ) : isym(symbol_index(group)) {}
+ explicit next_group( const symbol_elem_t *group ) : isym(symbol_index(group)) {}
 
   // return true if elem is not a member of the group
   bool operator()( const symbol_elem_t& elem ) {
@@ -1542,9 +1551,9 @@ parent_names( const symbol_elem_t *elem,
 
   if( is_filler(cbl_field_of(elem)) ) return;
 
-  // dbgmsg("%s: asked about %s of %s (%zu away)", __func__,
+  // dbgmsg("%s: asked about %s of %s (" HOST_SIZE_T_PRINT_UNSIGNED " away)", __func__,
   //       cbl_field_of(elem)->name,
-  //       cbl_field_of(group)->name, elem - group);
+  //       cbl_field_of(group)->name, (fmt_size_t)(elem - group));
 
   for( const symbol_elem_t *e=elem; e && group < e; e = symbol_parent(e) ) {
     names.push_front( cbl_field_of(e)->name );
@@ -1563,15 +1572,17 @@ public:
                       symbol_elem_t *rgroup, type_t type )
     : lgroup(lgroup), rgroup(rgroup), type(type)
   {
-    dbgmsg( "%s:%d: for #%zu %s and #%zu %s on line %d", __func__, __LINE__,
-             symbol_index(lgroup), cbl_field_of(lgroup)->name,
-             symbol_index(rgroup), cbl_field_of(rgroup)->name, yylineno );
+    dbgmsg( "%s:%d: for #" HOST_SIZE_T_PRINT_UNSIGNED
+            " %s and #" HOST_SIZE_T_PRINT_UNSIGNED " %s on line %d",
+            __func__, __LINE__,
+            (fmt_size_t)symbol_index(lgroup), cbl_field_of(lgroup)->name,
+            (fmt_size_t)symbol_index(rgroup), cbl_field_of(rgroup)->name, yylineno );
   }
 
   static bool
   any_redefines( const cbl_field_t& field, const symbol_elem_t *group ) {
     for( const cbl_field_t *f = &field; f && f->parent > 0; f = parent_of(f) ) {
-      symbol_elem_t *e = symbol_at(f->parent);
+      const symbol_elem_t *e = symbol_at(f->parent);
       if( e == group || e->type != SymField ) break;
       if( symbol_redefines(f) ) return true;
     }
@@ -1642,8 +1653,9 @@ corresponding_fields( cbl_field_t *lhs, cbl_field_t *rhs,
   lhsg.a = symbols_begin(field_index(lhs));
   lhsg.z = std::find_if( lhsg.a, symbols_end(), next_group(lhsg.a) );
 
-  dbgmsg("%s:%d: examining %zu symbols after %s", __func__, __LINE__,
-          lhsg.z - lhsg.a, lhs->name);
+  dbgmsg("%s:%d: examining " HOST_SIZE_T_PRINT_UNSIGNED " symbols after %s",
+          __func__, __LINE__,
+          (fmt_size_t)(lhsg.z - lhsg.a), lhs->name);
 
   find_corresponding finder( symbol_at(field_index(lhs)),
                              symbol_at(field_index(rhs)), type );
@@ -1651,8 +1663,9 @@ corresponding_fields( cbl_field_t *lhs, cbl_field_t *rhs,
 
   output.erase(0);
 
-  dbgmsg( "%s:%d: %s and %s have %zu corresponding fields",
-          __func__, __LINE__, lhs->name, rhs->name, output.size() );
+  dbgmsg( "%s:%d: %s and %s have " HOST_SIZE_T_PRINT_UNSIGNED
+          " corresponding fields",
+          __func__, __LINE__, lhs->name, rhs->name, (fmt_size_t)output.size() );
 
   return output;
 }
@@ -1730,11 +1743,10 @@ struct input_file_t {
   ino_t inode;
   int lineno;
   const char *name;
-  const line_map *lines;
 
   input_file_t( const char *name, ino_t inode,
-                int lineno=1, const line_map *lines = NULL )
-    : inode(inode), lineno(lineno), name(name), lines(lines)
+                int lineno=1 )
+    : inode(inode), lineno(lineno), name(name)
   {
     if( inode == 0 ) inode_set();
   }
@@ -1753,14 +1765,29 @@ struct input_file_t {
 
 class unique_stack : public std::stack<input_file_t>
 {
+  friend void cobol_set_pp_option(int opt);
+  bool option_m;
+  std::set<std::string> all_names;
+  
+  const char *
+  no_wd( const char *wd, const char *name ) {
+    int i;
+    for( i=0; wd[i] == name[i]; i++ ) i++;
+    if( wd[i] == '\0' && name[i] == '/' ) i++;
+    return yydebug? name : name + i;
+  }
+
  public:
+  unique_stack() : option_m(false) {}
+  
   bool push( const value_type& value ) {
     auto ok = std::none_of( c.cbegin(), c.cend(),
-                            [value]( auto& that ) {
+                            [value]( const auto& that ) {
                               return value == that;
                                 } );
     if( ok ) {
       std::stack<input_file_t>::push(value);
+      all_names.insert(value.name);
       return true;
     }
     size_t n = c.size();
@@ -1771,21 +1798,39 @@ class unique_stack : public std::stack<input_file_t>
                 "----- ---- --------"
                 "----------------------------------------");
         for( const auto& v : c ) {
-          dbgmsg( " %4zu %4d %s", c.size() - --n, v.lineno, no_wd(wd, v.name) );
+          dbgmsg( " %4" GCC_PRISZ "u %4d %s",
+                  (fmt_size_t)(c.size() - --n), v.lineno, no_wd(wd, v.name) );
         }
       } else {
-        dbgmsg("unable to get current working directory: %m");
+        dbgmsg("unable to get current working directory: %s", xstrerror(errno));
       }
       free(wd);
     }
     return false;
   }
-  const char *
-  no_wd( const char *wd, const char *name ) {
-    int i;
-    for( i=0; wd[i] == name[i]; i++ ) i++;
-    if( wd[i] == '\0' && name[i] == '/' ) i++;
-    return yydebug? name : name + i;
+
+  // Look down into the stack. peek(0) == top()
+  const input_file_t& peek( size_t n ) const {
+    gcc_assert( n < size() );
+    return c.at(size() - ++n);
+  } 
+  
+  void option( int opt ) { // capture other preprocessor options eventually
+    assert(opt == 'M');
+    option_m = true;
+  }
+  int option() const {
+    return option_m?  'M' : 0;
+  }
+
+  void print() const {
+    std::string input( top().name );
+    printf( "%s: ", input.c_str() );
+    for( const auto& name : all_names ) {
+      if( name != input ) 
+	printf( "\\\n\t%s ", name.c_str() );
+    }
+    printf("\n");
   }
 };
 
@@ -1794,6 +1839,12 @@ static unique_stack input_filenames;
 static std::map<std::string, ino_t> old_filenames;
 static const unsigned int sysp = 0;  // not a C header file, cf. line-map.h
 
+void cobol_set_pp_option(int opt) {
+  // capture other preprocessor options eventually
+  assert(opt == 'M');
+  input_filenames.option_m = true;
+}
+				  
 /*
  * Maintain a stack of input filenames.  Ensure the files are unique (by
  * inode), to prevent copybook cycles. Before pushing a new name, Record the
@@ -1804,12 +1855,13 @@ static const unsigned int sysp = 0;  // not a C header file, cf. line-map.h
  * to enforce uniqueness, and the scanner to maintain line numbers.
  */
 bool cobol_filename( const char *name, ino_t inode ) {
-  line_map *lines = NULL;
+  const line_map *lines = NULL;
   if( inode == 0 ) {
     auto p = old_filenames.find(name);
     if( p == old_filenames.end() ) {
       for( auto& elem : old_filenames ) {
-        dbgmsg("%6zu %-30s", elem.second, elem.first.c_str());
+        dbgmsg("%6" GCC_PRISZ "u %-30s",
+               (fmt_size_t)elem.second, elem.first.c_str());
       }
       cbl_errx( "logic error: missing inode for %s", name);
     }
@@ -1818,32 +1870,42 @@ bool cobol_filename( const char *name, ino_t inode ) {
   }
   linemap_add(line_table, LC_ENTER, sysp, name, 1);
   input_filename_vestige = name;
-  bool pushed = input_filenames.push( input_file_t(name, inode, 1, lines) );
-  input_filenames.top().lineno = yylineno = 1;
-  if( getenv(__func__) ) {
-    dbgmsg("   saving %s with lineno as %d",
-          input_filenames.top().name, input_filenames.top().lineno);
-  }
+  bool pushed = input_filenames.push( input_file_t(name, inode, 1) );
   return pushed;
 }
 
 const char *
-cobol_lineno_save() {
+cobol_lineno( int lineno ) {
   if( input_filenames.empty() ) return NULL;
   auto& input( input_filenames.top() );
-  input.lineno = yylineno;
-  if( getenv(__func__) ) {
-    dbgmsg("  setting %s with lineno as %d", input.name, input.lineno);
-  }
+  input.lineno = lineno;
   return input.name;
 }
 
+/*
+ * This function is called from the scanner, usually when a copybook is on top
+ * of the input stack, before the parser retrieves the token and resets the
+ * current filename.  For that reason, we normaly want to line number of the
+ * file that is about to become the current one, which is the one behind top().
+ *
+ * If somehow we arrive here when there is nothing underneath, we return the
+ * current line nubmer, or zero if there's no input.  The only consequence is
+ * that the reported line number might be wrong.
+ */
+int
+cobol_lineno() {
+  if( input_filenames.empty() ) return 0;
+  size_t n = input_filenames.size() < 2? 0 : 1;
+  const auto& input( input_filenames.peek(n) );
+  return input.lineno;
+}
+
 const char *
 cobol_filename() {
   return input_filenames.empty()? input_filename_vestige : input_filenames.top().name;
 }
 
-const char *
+void
 cobol_filename_restore() {
   assert(!input_filenames.empty());
   const input_file_t& top( input_filenames.top() );
@@ -1851,21 +1913,17 @@ cobol_filename_restore() {
   input_filename_vestige = top.name;
 
   input_filenames.pop();
-  if( input_filenames.empty() ) return NULL;
+  if( input_filenames.empty() ) return;
 
   auto& input = input_filenames.top();
 
-  input.lines = linemap_add(line_table, LC_LEAVE, sysp, NULL, 0);
-
-  yylineno = input.lineno;
-  if( getenv("cobol_filename") ) {
-    dbgmsg("restoring %s with lineno to %d", input.name, input.lineno);
-  }
-  return input.name;
+  linemap_add(line_table, LC_LEAVE, sysp, NULL, 0);
 }
 
 static location_t token_location;
 
+location_t location_from_lineno() { return token_location; }
+
 template <typename LOC>
 static void
 gcc_location_set_impl( const LOC& loc ) {
@@ -1893,11 +1951,9 @@ verify_format( const char gmsgid[] ) {
   static regex_t re;
   static int cflags = REG_EXTENDED;
   static int status = regcomp( &re, pattern, cflags );
-  static char errbuf[80];
-
-
 
   if( status != 0 ) {
+    static char errbuf[80];
     int n = regerror(status, &re, errbuf, sizeof(errbuf));
     gcc_assert(size_t(n) < sizeof(errbuf));
     fprintf(stderr, "%s:%d: %s", __func__, __LINE__, errbuf);
@@ -1916,6 +1972,8 @@ verify_format( const char gmsgid[] ) {
 static const diagnostic_option_id option_zero;
 size_t parse_error_inc();
 
+void ydferror( const char gmsgid[], ... ) ATTRIBUTE_GCOBOL_DIAG(1, 2);
+
 void
 ydferror( const char gmsgid[], ... ) {
   verify_format(gmsgid);
@@ -1938,7 +1996,7 @@ extern YYLTYPE yylloc;
  * the global token_location, which is passed to the diagnostic framework. The
  * original value is restored when the instantiated variable goes out of scope.
  */
-class temp_loc_t : protected YYLTYPE {
+class temp_loc_t {
   location_t orig;
  public:
   temp_loc_t() : orig(token_location) {
@@ -1946,10 +2004,10 @@ class temp_loc_t : protected YYLTYPE {
 
     gcc_location_set(yylloc); // use lookahead location
   }
-  temp_loc_t( const YYLTYPE& loc) : orig(token_location) {
+  explicit temp_loc_t( const YYLTYPE& loc) : orig(token_location) {
     gcc_location_set(loc);
   }
-  temp_loc_t( const YDFLTYPE& loc) : orig(token_location) {
+  explicit temp_loc_t( const YDFLTYPE& loc) : orig(token_location) {
     YYLTYPE lloc = {
       loc.first_line, loc.first_column,
       loc.last_line,  loc.last_column };
@@ -1992,21 +2050,14 @@ void error_msg( const YYLTYPE& loc, const char gmsgid[], ... ) {
   ERROR_MSG_BODY
 }
 
+void error_msg( const YDFLTYPE& loc, const char gmsgid[], ... )
+  ATTRIBUTE_GCOBOL_DIAG(2, 3);
+
 void error_msg( const YDFLTYPE& loc, const char gmsgid[], ... ) {
   ERROR_MSG_BODY
 }
 
 void
-cdf_location_set(YYLTYPE loc) {
-  extern YDFLTYPE ydflloc;
-
-  ydflloc.first_line =   loc.first_line;
-  ydflloc.first_column = loc.first_column;
-  ydflloc.last_line =    loc.last_line;
-  ydflloc.last_column =  loc.last_column;
-}
-
-void
 yyerror( const char gmsgid[], ... ) {
   temp_loc_t looker;
   verify_format(gmsgid);
@@ -2059,7 +2110,7 @@ yyerrorvl( int line, const char *filename, const char fmt[], ... ) {
 static inline size_t
 matched_length( const regmatch_t& rm ) { return rm.rm_eo - rm.rm_so; }
 
-const char *
+int
 cobol_fileline_set( const char line[] ) {
   static const char pattern[] = "#line +([[:alnum:]]+) +[\"']([^\"']+). *\n";
   static const int cflags = REG_EXTENDED | REG_ICASE;
@@ -2072,7 +2123,7 @@ cobol_fileline_set( const char line[] ) {
     if( (erc = regcomp(&re, pattern, cflags)) != 0 ) {
         regerror(erc, &re, regexmsg, sizeof(regexmsg));
         dbgmsg( "%s:%d: could not compile regex: %s", __func__, __LINE__, regexmsg );
-        return line;
+        return 0;
     }
     preg = &re;
   }
@@ -2080,10 +2131,10 @@ cobol_fileline_set( const char line[] ) {
     if( erc != REG_NOMATCH ) {
       regerror(erc, preg, regexmsg, sizeof(regexmsg));
       dbgmsg( "%s:%d: could not compile regex: %s", __func__, __LINE__, regexmsg );
-      return line;
+      return 0;
     }
-    error_msg(yylloc, "invalid #line directive: %s", line );
-    return line;
+    error_msg(yylloc, "invalid %<#line%> directive: %s", line );
+    return 0;
   }
 
   const char
@@ -2092,40 +2143,39 @@ cobol_fileline_set( const char line[] ) {
   int fileline;
 
   if( 1 != sscanf(line_str, "%d", &fileline) )
-    yywarn("could not parse line number %s from #line directive", line_str);
+    yywarn("could not parse line number %s from %<#line%> directive", line_str);
 
   input_file_t input_file( filename, ino_t(0), fileline ); // constructor sets inode
 
-  if( getenv(__func__) ) return filename; // ignore #line directive
-
   if( input_filenames.empty() ) {
-    input_file.lines = linemap_add(line_table, LC_ENTER, sysp, filename, 1);
     input_filenames.push(input_file);
   }
 
   input_file_t& file = input_filenames.top();
   file = input_file;
-  yylineno = file.lineno;
 
-  return file.name;
+  return file.lineno;
 }
 
-class timespec_t {
-  struct timespec now;
+//#define TIMING_PARSE
+#ifdef TIMING_PARSE
+class cbl_timespec {
+  uint64_t now; // Nanoseconds
  public:
-  timespec_t() {
-    clock_gettime(CLOCK_MONOTONIC, &now);
+  cbl_timespec() {
+    now = get_time_nanoseconds();
   }
   double ns() const {
-    return now.tv_sec * 1000000000 + now.tv_nsec;
+    return now;
   }
-  friend double operator-( const timespec_t& now, const timespec_t& then );
+  friend double operator-( const cbl_timespec& now, const cbl_timespec& then );
 };
 
 double
-operator-( const timespec_t& then, const timespec_t& now ) {
+operator-( const cbl_timespec& now, const cbl_timespec& then ) {
   return (now.ns() - then.ns()) / 1000000000;
 }
+#endif
 
 static int
 parse_file( const char filename[] )
@@ -2136,15 +2186,25 @@ parse_file( const char filename[] )
 
   parser_enter_file(filename);
 
-  timespec_t start;
+  if( input_filenames.option() == 'M' ) {
+    input_filenames.print();
+    return 0;
+  }
+
+#ifdef TIMING_PARSE
+  cbl_timespec start;
+#endif
 
   int erc = yyparse();
 
-  timespec_t finish;
+#ifdef TIMING_PARSE
+  cbl_timespec finish;
   double dt  = finish - start;
+  printf("Overall parse & generate time is %.6f seconds\n", dt);
+#endif
+
   parser_leave_file();
 
-  //printf("Overall parse & generate time is %.6f seconds\n", dt);
 
   fclose (yyin);
 
@@ -2168,30 +2228,20 @@ cobol_set_debugging( bool flex, bool yacc, bool parser )
   yy_flex_debug = flex? 1 : 0;
   ydfdebug = yydebug = yacc? 1 : 0;
   f_trace_debug = parser? 1 : 0;
-
-  char *ind = getenv("INDICATOR_COLUMN");
-  if( ind ) {
-    int col;
-    if( 1 != sscanf(ind, "%d", &col) ) {
-      yywarn("ignored non-integer value for INDICATOR_COLUMN=%s", ind);
-    }
-    cobol_set_indicator_column(col);
-  }
 }
 
-os_locale_t os_locale = { "UTF-8", xstrdup("C.UTF-8") };
-
+os_locale_t os_locale = { "UTF-8", "C.UTF-8" };
 
 void
 cobol_parse_files (int nfile, const char **files)
 {
-  char * opaque = setlocale(LC_CTYPE, "");
+  const char * opaque = setlocale(LC_CTYPE, "");
   if( ! opaque ) {
     yywarn("setlocale: unable to initialize LOCALE");
   } else {
     char *codeset = nl_langinfo(CODESET);
     if( ! codeset ) {
-      yywarn("nl_langinfo failed after setlocale succeeded");
+      yywarn("%<nl_langinfo%> failed after %<setlocale()%> succeeded");
     } else {
       os_locale.codeset = codeset;
     }
@@ -2213,6 +2263,7 @@ cbl_message(int fd, const char *format_string, ...)
   char *ostring = xvasprintf(format_string, ap);
   va_end(ap);
   write(fd, ostring, strlen(ostring));
+  write(fd, "\n", 1);
   free(ostring);
   }
 
@@ -2302,7 +2353,7 @@ dbgmsg(const char *msg, ...) {
 
 void
 dialect_error( const YYLTYPE& loc, const char term[], const char dialect[] ) {
-  error_msg(loc, "%s is not ISO syntax, requires -dialect %s",
+  error_msg(loc, "%s is not ISO syntax, requires %<-dialect %s%>",
            term, dialect);
 }
 
@@ -2313,12 +2364,621 @@ bool fisdigit(int c)
 bool fisspace(int c)
   {
   return ISSPACE(c);
-  };
+  }
 int  ftolower(int c)
   {
   return TOLOWER(c);
   }
+int  ftoupper(int c)
+  {
+  return TOUPPER(c);
+  }
 bool fisprint(int c)
   {
   return ISPRINT(c);
-  };
+  }
+
+// 8.9 Reserved words
+static const std::set<std::string> reserved_words = {
+  // GCC COBOL keywords
+  "COMMAND-LINE",
+  "COMMAND-LINE-COUNT",
+
+  // GCC device names
+  "C01",
+  "C02",
+  "C03",
+  "C04",
+  "C05",
+  "C06",
+  "C07",
+  "C08",
+  "C09",
+  "C10",
+  "C11",
+  "C12",
+  "CONSOLE",
+  "S01",
+  "S02",
+  "S03",
+  "S04",
+  "S05",
+  "STDERR",
+  "STDIN",
+  "STDOUT",
+  "SYSIN",
+  "SYSIPT",
+  "SYSLIST",
+  "SYSLST",
+  "SYSOUT",
+  "SYSPCH",
+  "SYSPUNCH",
+  "UPSI",
+
+  // IBM keywords that GCC recognizes
+  "BASIS",
+  "CBL",
+  "ENTER",
+  "READY",
+  "TITLE",
+  "TRACE",
+  "ALTER",
+  "COBOL",
+  "DATE-COMPILED",
+  "DATE-WRITTEN",
+  "DBCS",
+  "DEBUGGING",
+  "EGCS",
+  "ENTRY",
+  "EVERY",
+  "INSTALLATION",
+  "I-O-CONTROL",
+  "KANJI",
+  "LABEL",
+  "NULLS",
+  "PADDING",
+  "PROCEDURES",
+  "PROCEED",
+  "RECORDING",
+  "RERUN",
+  "REVERSED",
+  "SECURITY",
+  "TALLY",
+  "VOLATILE",
+  "XML",
+  "END-START",
+  
+  // ISO 2023 keywords
+  "ACCEPT",
+  "ACCESS",
+  "ACTIVE-CLASS",
+  "ADD",
+  "ADDRESS",
+  "ADVANCING",
+  "AFTER",
+  "ALIGNED",
+  "ALL",
+  "ALLOCATE",
+  "ALPHABET",
+  "ALPHABETIC",
+  "ALPHABETIC-LOWER",
+  "ALPHABETIC-UPPER",
+  "ALPHANUMERIC",
+  "ALPHANUMERIC-EDITED",
+  "ALSO",
+  "ALTERNATE",
+  "AND",
+  "ANY",
+  "ANYCASE",
+  "ARE",
+  "AREA",
+  "AREAS",
+  "AS",
+  "ASCENDING",
+  "ASSIGN",
+  "AT",
+  "B-AND",
+  "B-NOT",
+  "B-OR",
+  "B-SHIFT-L",
+  "B-SHIFT-LC",
+  "B-SHIFT-R",
+  "B-SHIFT-RC",
+  "B-XOR",
+  "BASED",
+  "BEFORE",
+  "BINARY",
+  "BINARY-CHAR",
+  "BINARY-DOUBLE",
+  "BINARY-LONG",
+  "BINARY-SHORT",
+  "BIT",
+  "BLANK",
+  "BLOCK",
+  "BOOLEAN",
+  "BOTTOM",
+  "BY",
+  "CALL",
+  "CANCEL",
+  "CF",
+  "CH",
+  "CHARACTER",
+  "CHARACTERS",
+  "CLASS",
+  "CLASS-ID",
+  "CLOSE",
+  "CODE",
+  "CODE-SET",
+  "COL",
+  "COLLATING",
+  "COLS",
+  "COLUMN",
+  "COLUMNS",
+  "COMMA",
+  "COMMIT",
+  "COMMON",
+  "COMP",
+  "COMPUTATIONAL",
+  "COMPUTE",
+  "CONDITION",
+  "CONFIGURATION",
+  "CONSTANT",
+  "CONTAINS",
+  "CONTENT",
+  "CONTINUE",
+  "CONTROL",
+  "CONTROLS",
+  "CONVERTING",
+  "COPY",
+  "CORR",
+  "CORRESPONDING",
+  "COUNT",
+  "CRT",
+  "CURRENCY",
+  "CURSOR",
+  "DATA",
+  "DATA-POINTER",
+  "DATE",
+  "DAY",
+  "DAY-OF-WEEK",
+  "DE",
+  "DECIMAL-POINT",
+  "DECLARATIVES",
+  "DEFAULT",
+  "DELETE",
+  "DELIMITED",
+  "DELIMITER",
+  "DEPENDING",
+  "DESCENDING",
+  "DESTINATION",
+  "DETAIL",
+  "DISPLAY",
+  "DIVIDE",
+  "DIVISION",
+  "DOWN",
+  "DUPLICATES",
+  "DYNAMIC",
+  "EC",
+  "EDITING",
+  "ELSE",
+  "EMD-START",
+  "END",
+  "END-ACCEPT",
+  "END-ADD",
+  "END-CALL",
+  "END-COMPUTE",
+  "END-DELETE",
+  "END-DISPLAY",
+  "END-DIVIDE",
+  "END-EVALUATE",
+  "END-IF",
+  "END-MULTIPLY",
+  "END-OF-PAGE",
+  "END-PERFORM",
+  "END-READ",
+  "END-RECEIVE",
+  "END-RETURN",
+  "END-REWRITE",
+  "END-SEARCH",
+  "END-SEND",
+  "END-STRING",
+  "END-SUBTRACT",
+  "END-UNSTRING",
+  "END-WRITE",
+  "ENVIRONMENT",
+  "EO",
+  "EOP",
+  "EQUAL",
+  "ERROR",
+  "EVALUATE",
+  "EXCEPTION",
+  "EXCEPTION-OBJECT",
+  "EXCLUSIVE-OR",
+  "EXIT",
+  "EXTEND",
+  "EXTERNAL",
+  "FACTORY",
+  "FALSE",
+  "FARTHEST-FROM-ZERO",
+  "FD",
+  "FILE",
+  "FILE-CONTROL",
+  "FILLER",
+  "FINAL",
+  "FINALLY",
+  "FIRST",
+  "FLOAT-BINARY-128",
+  "FLOAT-BINARY-32",
+  "FLOAT-BINARY-64",
+  "FLOAT-DECIMAL-16",
+  "FLOAT-DECIMAL-34",
+  "FLOAT-EXTENDED",
+  "FLOAT-INFINITY",
+  "FLOAT-LONG",
+  "FLOAT-NOT-A-NUMBER-",
+  "FLOAT-SHORT",
+  "FOOTING",
+  "FOR",
+  "FORMAT",
+  "FREE",
+  "FROM",
+  "FUNCTION",
+  "FUNCTION-ID",
+  "FUNCTION-POINTER",
+  "GENERATE",
+  "GET",
+  "GIVING",
+  "GLOBAL",
+  "GO",
+  "GOBACK",
+  "GREATER",
+  "GROUP",
+  "GROUP-USAGE",
+  "HEADING",
+  "HIGH-VALUE",
+  "HIGH-VALUES",
+  "I-O",
+  "I-OICONTROL",
+  "IDENTIFICATION",
+  "IF",
+  "IN",
+  "IN-ARITHMETIC-RANGE",
+  "INDEX",
+  "INDEXED",
+  "INDICATE",
+  "INHERITS",
+  "INITIAL",
+  "INITIALIZE",
+  "INITIATE",
+  "INPUT",
+  "INPUT-OUTPUT",
+  "INSPECT",
+  "INTERFACE",
+  "INTERFACE-ID",
+  "INTO",
+  "INVALID",
+  "INVOKE",
+  "IS",
+  "JUST",
+  "JUSTIFIED",
+  "KEY",
+  "LAST",
+  "LEADING",
+  "LEFT",
+  "LENGTH",
+  "LESS",
+  "LIMIT",
+  "LIMITS",
+  "LINAGE",
+  "LINAGE-COUNTER",
+  "LINE",
+  "LINE-COUNTER",
+  "LINES",
+  "LINKAGE",
+  "LOCAL-STORAGE",
+  "LOCALE",
+  "LOCATION",
+  "LOCK",
+  "LOW-VALUE",
+  "LOW-VALUES",
+  "MERGE",
+  "MESSAGE-TAG",
+  "METHOD-ID",
+  "MINUS",
+  "MODE",
+  "MOVE",
+  "MULTIPLY",
+  "NATIONAL",
+  "NATIONAL-EDITED",
+  "NATIVE",
+  "NEAREST-TO-ZERO",
+  "NEGATIVE",
+  "NESTED",
+  "NEXT",
+  "NO",
+  "NOT",
+  "NULL",
+  "NUMBER",
+  "NUMERIC",
+  "NUMERIC-EDITED",
+  "OBJECT",
+  "OBJECT-COMPUTER",
+  "OBJECT-REFERENCE",
+  "OCCURS",
+  "OF",
+  "OFF",
+  "OMITTED",
+  "ON",
+  "OPEN",
+  "OPTIONAL",
+  "OPTIONS",
+  "OR",
+  "ORDER",
+  "ORGANIZATION",
+  "OTHER",
+  "OUTPUT",
+  "OVERFLOW",
+  "OVERRIDE",
+  "PACKED-DECIMAL",
+  "PAGE",
+  "PAGE-COUNTER",
+  "PERFORM",
+  "PF",
+  "PH",
+  "PIC",
+  "PICTURE",
+  "PLUS",
+  "POINTER",
+  "POSITIVE",
+  "PRESENT",
+  "PRINTING",
+  "PROCEDURE",
+  "PROGRAM",
+  "PROGRAM-ID",
+  "PROGRAM-POINTER",
+  "PROPERTY",
+  "PROTOTYPE",
+  "QUIET",
+  "QUOTE",
+  "QUOTES",
+  "RAISE",
+  "RAISING",
+  "RANDOM",
+  "RD",
+  "READ",
+  "RECEIVE",
+  "RECORD",
+  "RECORDS",
+  "REDEFINES",
+  "REEL",
+  "REFERENCE",
+  "RELATIVE",
+  "RELEASE",
+  "REMAINDER",
+  "REMOVAL",
+  "RENAMES",
+  "REPLACE",
+  "REPLACING",
+  "REPORT",
+  "REPORTING",
+  "REPORTS",
+  "REPOSITORY",
+  "RESERVE",
+  "RESET",
+  "RESUME",
+  "RETRY",
+  "RETURN",
+  "RETURNING",
+  "REWIND",
+  "REWRITE",
+  "RF",
+  "RH",
+  "RIGHT",
+  "ROLLBACK",
+  "ROUNDED",
+  "RUN",
+  "SAME",
+  "SCREEN",
+  "SD",
+  "SEARCH",
+  "SECTION",
+  "SELECT",
+  "SELF",
+  "SEND",
+  "SENTENCE",
+  "SEPARATE",
+  "SEQUENCE",
+  "SEQUENTIAL",
+  "SET",
+  "SHARING",
+  "SIGN",
+  "SIGNALING",
+  "SIZE",
+  "SORT",
+  "SORT-MERGE",
+  "SOURCE",
+  "SOURCE-COMPUTER",
+  "SOURCES",
+  "SPACE",
+  "SPACES",
+  "SPECIAL-NAMES",
+  "STANDARD",
+  "STANDARD-1",
+  "STANDARD-2",
+  "START",
+  "STATUS",
+  "STOP",
+  "STRING",
+  "SUBTRACT",
+  "SUM",
+  "SUPER",
+  "SUPPRESS",
+  "SYMBOLIC",
+  "SYNC",
+  "SYNCHRONIZED",
+  "SYSTEM-DEFAULT",
+  "TABLE",
+  "TALLYING",
+  "TERMINATE",
+  "TEST",
+  "THAN",
+  "THEN",
+  "THROUGH",
+  "THRU",
+  "TIME",
+  "TIMES",
+  "TO",
+  "TOP",
+  "TRAILING",
+  "TRUE",
+  "TYPE",
+  "TYPEDEF",
+  "UNIT",
+  "UNIVERSAL",
+  "UNLOCK",
+  "UNSTRING",
+  "UNTIL",
+  "UP",
+  "UPON",
+  "USAGE",
+  "USE",
+  "USER-DEFAULT",
+  "USING",
+  "VAL-STATUS",
+  "VALID",
+  "VALIDATE",
+  "VALIDATE-STATUS",
+  "VALUE",
+  "VALUES",
+  "VARYING",
+  "WHEN",
+  "WITH",
+  "WORKING-STORAGE",
+  "WRITE",
+  "XOR",
+  "ZERO",
+  "ZEROES",
+  "ZEROS",
+  "+",
+  "-",
+  "*",
+  "/",
+  "**",
+  "<",
+  "<=",
+  "<>",
+  "=",
+  ">",
+  ">=",
+  "&",
+  "*>",
+  "::",
+  ">>",
+};
+
+// 8.10 Context-sensitive words
+static const std::set<std::string> context_sensitive_words = {
+  "ACTIVATING",              // MODULE-NAME intrinsic function
+  "ANUM",                    // CONVERT intrinsic function
+  "APPLY",                   // I-O-CONTROL paragraph
+  "ARITHMETIC",              // OPTIONS paragraph
+  "ATTRIBUTE",               // SET statement
+  "AUTO",                    // screen description entry
+  "AUTOMATIC",               // LOCK MODE clause
+  "AWAY-FROM-ZERO",          // ROUNDED phrase
+  "BACKGROUND-COLOR",        // screen description entry
+  "BACKWARD",                // INSPECT statement
+  "BELL",                    // screen description entry and SET attribute statement
+  "BINARY-ENCODING",         // USAGE clause and FLOAT-DECIMAL clause
+  "BLINK",                   // screen description entry and SET attribute statement
+  "BYTE",                    // CONVERT intrinsic function
+  "BYTES",                   // RECORD clause
+  "BYTE-LENGTH",             // constant entry
+  "CAPACITY",                // OCCURS clause
+  "CENTER",                  // COLUMN clause
+  "CLASSIFICATION",          // OBJECT-COMPUTER paragraph
+  "CURRENT",                 // MODULE-NAME intrinsic function
+  "CYCLE",                   // EXIT statement
+  "DECIMAL-ENCODING",        // USAGE clause and FLOAT-DECIMAL clause
+  "EOL",                     // ERASE clause in a screen description entry
+  "EOS",                     // ERASE clause in a screen description entry
+  "ENTRY-CONVENTION",        // OPTIONS paragraph
+  "ERASE",                   // screen description entry
+  "EXPANDS",                 // class-specifier and interface-specifier of the REPOSITORY paragraph
+  "FLOAT-BINARY",            // OPTIONS paragraph
+  "FLOAT-DECIMAL",           // OPTIONS paragraph
+  "FOREGROUND-COLOR",        // screen description entry
+  "FOREVER",                 // RETRY phrase
+  "FULL",                    // screen description entry
+  "HEX",                     // CONVERT intrinsic function
+  "HIGH-ORDER-LEFT",         // FLOAT-BINARY clause, FLOAT-DECIMAL clause, and USAGE clause
+  "HIGH-ORDER-RIGHT",        // FLOAT-BINARY clause, FLOAT-DECIMAL clause, and USAGE clause
+  "HIGHLIGHT",               // screen description entry and SET attribute statement
+  "IGNORING",                // READ statement
+  "IMPLEMENTS",              // FACTORY paragraph and OBJECT paragraph
+  "INITIALIZED",             // ALLOCATE statement and OCCURS clause
+  "INTERMEDIATE",            // OPTIONS paragraph
+  "INTRINSIC",               // function-specifier of the REPOSITORY paragraph
+  "LC_ALL",                  // SET statement
+  "LC_COLLATE",              // SET statement
+  "LC_CTYPE",                // SET statement
+  "LC_MESSAGES",             // SET statement
+  "LC_MONETARY",             // SET statement
+  "LC_NUMERIC",              // SET statement
+  "LC_TIME",                 // SET statement
+  "LOWLIGHT",                // screen description entry and SET attribute statement
+  "MANUAL",                  // LOCK MODE clause
+  "MULTIPLE",                // LOCK ON phrase
+  "NAT",                     // CONVERT intrinsic function
+  "NEAREST-AWAY-FROM-ZERO",  // INTERMEDIATE ROUNDING clause and ROUNDED phrase
+  "NEAREST-EVEN",            // INTERMEDIATE ROUNDING clause and ROUNDED phrase
+  "NEAREST-TOWARD-ZERO",     // INTERMEDIATE ROUNDING clause and ROUNDED phrase
+  "NONE",                    // DEFAULT clause
+  "NORMAL",                  // STOP statement
+  "NUMBERS",                 // COLUMN clause and LINE clause
+  "ONLY",                    // Object-view, SHARING clause, SHARING phrase, and USAGE clause
+  "PARAGRAPH",               // EXIT statement
+  "PREFIXED",                // DYNAMIC LENGTH STRUCTURE clause
+  "PREVIOUS",                // READ statement
+  "PROHIBITED",              // INTERMEDIATE ROUNDING clause and ROUNDED phrase
+  "RECURSIVE",               // PROGRAM-ID paragraph
+  "RELATION",                // VALIDATE-STATUS clause
+  "REQUIRED",                // screen description entry
+  "REVERSE-VIDEO",           // screen description entry and SET attribute statement
+  "ROUNDING",                // OPTIONS paragraph
+  "SECONDS",                 // RETRY phrase, CONTINUE statement
+  "SECURE",                  // screen description entry
+  "SHORT",                   // DYNAMIC LENGTH STRUCTURE clause
+  "SIGNED",                  // DYNAMIC LENGTH STRUCTURE clause and USAGE clause
+  "STACK",                   // MODULE-NAME intrinsic function
+  "STANDARD-BINARY",         // ARITHMETIC clause
+  "STANDARD-DECIMAL",        // ARITHMETIC clause
+  "STATEMENT",               // RESUME statement
+  "STEP",                    // OCCURS clause
+  "STRONG",                  // TYPEDEF clause
+  "STRUCTURE",               // DYNAMIC LENGTH STRUCTURE clause
+  "SYMBOL",                  // CURRENCY clause
+  "TOP-LEVEL",               // MODULE-NAME intrinsic function
+  "TOWARD-GREATER",          // ROUNDED phrase
+  "TOWARD-LESSER",           // ROUNDED phrase
+  "TRUNCATION",              // INTERMEDIATE ROUNDING clause and ROUNDED phrase
+  "UCS-4",                   // ALPHABET clause
+  "UNDERLINE",               // screen description entry and SET attribute statement
+  "UNSIGNED",                // USAGE clause
+  "UTF-8",                   // ALPHABET clause
+  "UTF-16",                  // ALPHABET clause
+  "YYYYDDD",                 // ACCEPT statement
+  "YYYYMMDD",                // ACCEPT statement
+};
+
+// Is the input a COBOL word, per ISO/IEC 1989:2023 (E) ?
+// We add a few GCC-specific keywords, and our supported IBM keywords.
+bool
+iso_cobol_word( const std::string& name, bool include_context ) {
+  auto ok = 1 == reserved_words.count(name);
+  if( include_context && !ok ) {
+    ok = 1 == context_sensitive_words.count(name);
+  }
+  return ok;
+}
+