1 files changed, 202 insertions, 120 deletions
diff --git a/gcc/cobol/lexio.cc b/gcc/cobol/lexio.cc
index 82bacf2..2d9fb72 100644
--- a/gcc/cobol/lexio.cc
+++ b/gcc/cobol/lexio.cc
@@ -28,6 +28,7 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include "config.h"
 #include <ext/stdio_filebuf.h>
 #include "cobol-system.h"
 #include "cbldiag.h"
@@ -45,8 +46,22 @@ static struct {
     first_file = false;
     return tf;
   }
+  inline bool is_fixed() const { return column == 7; }
+  inline bool is_reffmt() const { return is_fixed() && right_margin == 73; }
+  inline bool is_free() const { return ! is_fixed(); }
+  
+  const char * description() const {
+    if( is_reffmt() ) return "REFERENCE";
+    if( is_fixed() ) return "FIXED";
+    if( is_free() ) return "FREE";
+    gcc_unreachable();
+  }    
 } indicator = { true, false, 0, 0 };
 
+// public source format test functions
+bool is_fixed_format() { return indicator.is_fixed(); }
+bool is_reference_format() { return indicator.is_reffmt(); }
+
 static bool debug_mode = false;
 
 /*
@@ -85,10 +100,6 @@ cobol_set_indicator_column( int column )
   indicator.column = column;
 }
 
-bool is_fixed_format() { return indicator.column == 7; }
-bool is_reference_format() {
-  return indicator.column == 7 && indicator.right_margin == 73;
-}
 bool include_debug()      { return indicator.column == 7 && debug_mode; }
 bool set_debug( bool tf ) { return debug_mode = tf && is_fixed_format(); }
 
@@ -112,7 +123,7 @@ continues_at( char *bol, char *eol ) {
 // Return pointer to indicator column. Test ch if provided.
 // NULL means no indicator column or tested value not present.
 static inline char *
-indicated( char *bol, char *eol, char ch = '\0' ) {
+indicated( char *bol, const char *eol, char ch = '\0' ) {
   if( indicator.column == 0 && *bol != '*' ) {
     return NULL;  // no indicator column in free format, except for comments
   }
@@ -129,10 +140,10 @@ indicated( char *bol, char *eol, char ch = '\0' ) {
 
 static char *
 remove_inline_comment( char *bol, char *eol ) {
-  static char ends = '\0';
   char *nl = std::find(bol, eol, '\n');
 
   if( bol < nl ) {
+    static char ends = '\0';
     std::swap(*nl, ends);
     char *comment = strstr(bol, "*>");
     if( comment ) {
@@ -197,10 +208,10 @@ maybe_add_space(const span_t& pattern, replace_t& recognized) {
   }
 
   if( befter[0] == blank || befter[1] == blank ) {
-    char *s = xasprintf( "%s%.*s%s",
-                         befter[0],
-                         recognized.after.size(), recognized.after.p,
-                         befter[1] );
+    const char *s = xasprintf( "%s%.*s%s",
+                               befter[0],
+                               recognized.after.size(), recognized.after.p,
+                               befter[1] );
     recognized.after = span_t(s, s + strlen(s));
   }
 }
@@ -255,14 +266,17 @@ recognize_replacements( filespan_t mfile, std::list<replace_t>& pending_replacem
 
       span_t found(mfile.eodata, mfile.eodata);
 
-      if( regex_search( mfile.ccur(), (const char *)mfile.eodata, cm, re) ) {
+      if( regex_search( mfile.ccur(),
+                        const_cast<const char *>(mfile.eodata),
+                        cm, re) ) {
         gcc_assert(cm[1].matched);
         found = span_t( cm[1].first, cm[1].second );
         if( yy_flex_debug ) {
           size_t n = count_newlines(mfile.data, found.p);
-          dbgmsg("%s:%d first '%.*s' is on line %zu (offset %zu)", __func__, __LINE__,
+          dbgmsg("%s:%d first '%.*s' is on line " HOST_SIZE_T_PRINT_UNSIGNED
+                 " (offset " HOST_SIZE_T_PRINT_UNSIGNED ")", __func__, __LINE__,
                  directive.before.size(), directive.before.p,
-                 ++n, found.p - mfile.data);
+                 (fmt_size_t)++n, (fmt_size_t)(found.p - mfile.data));
         }
       } else {
         dbgmsg("%s:%d not found: '%s' in \n'%.*s'", __func__, __LINE__,
@@ -289,9 +303,12 @@ recognize_replacements( filespan_t mfile, std::list<replace_t>& pending_replacem
     bol = next.found.pend;
 
     if( yy_flex_debug ) {
-      size_t n = std::count((const char *)mfile.data, recognized.before.p, '\n');
-      dbgmsg( "%s:%d: line %zu @ %zu: '%s'\n/%.*s/%.*s/", __func__, __LINE__,
-              ++n, next.found.p - mfile.data,
+      size_t n = std::count(const_cast<const char *>(mfile.data),
+                            recognized.before.p, '\n');
+      dbgmsg( "%s:%d: line " HOST_SIZE_T_PRINT_UNSIGNED
+              " @ " HOST_SIZE_T_PRINT_UNSIGNED ": '%s'\n/%.*s/%.*s/",
+              __func__, __LINE__,
+              (fmt_size_t)++n, (fmt_size_t)(next.found.p - mfile.data),
               next.directive.before.p,
               int(recognized.before.size()), recognized.before.p,
               int(recognized.after.size()), recognized.after.p );
@@ -303,14 +320,16 @@ recognize_replacements( filespan_t mfile, std::list<replace_t>& pending_replacem
     next.found = span_t(mfile.eodata, mfile.eodata);
 
     regex re(next.directive.before.p, extended_icase);
-    if( regex_search(bol, (const char *)mfile.eodata, cm, re) ) {
+    if( regex_search(bol, const_cast<const char *>(mfile.eodata), cm, re) ) {
       gcc_assert(cm[1].matched);
       next.found = span_t( cm[1].first, cm[1].second );
-      size_t n = std::count((const char *)mfile.data, next.found.p, '\n');
+      size_t n = std::count(const_cast<const char *>(mfile.data),
+                            next.found.p, '\n');
       if( false )
-        dbgmsg("%s:%d next '%.*s' will be on line %zu (offset %zu)", __func__, __LINE__,
+        dbgmsg("%s:%d next '%.*s' will be on line " HOST_SIZE_T_PRINT_UNSIGNED
+               " (offset " HOST_SIZE_T_PRINT_UNSIGNED ")", __func__, __LINE__,
                next.directive.before.size(), next.directive.before.p,
-               ++n, next.found.p - mfile.data);
+               (fmt_size_t)++n, (fmt_size_t)(next.found.p - mfile.data));
     }
     pnext = std::min_element(futures.begin(), futures.end());
   }
@@ -330,7 +349,7 @@ check_source_format_directive( filespan_t& mfile ) {
 
   // show contents of marked subexpressions within each match
   cmatch cm;
-  if( regex_search(p, (const char *)mfile.eol, cm, re) ) {
+  if( regex_search(p, const_cast<const char *>(mfile.eol), cm, re) ) {
     gcc_assert(cm.size() > 1);
     switch( cm[3].length() ) {
     case 4:
@@ -343,11 +362,14 @@ check_source_format_directive( filespan_t& mfile ) {
       gcc_assert(cm[3].length() == 4 || cm[3].length() == 5);
       break;
     }
+
+    dbgmsg( "%s:%d: %s format set, on line " HOST_SIZE_T_PRINT_UNSIGNED,
+            __func__, __LINE__,
+            indicator.column == 7? "FIXED" : "FREE",
+            (fmt_size_t)mfile.lineno() );
+    char *bol = indicator.is_fixed()? mfile.cur : const_cast<char*>(cm[0].first);
+    erase_line(bol, const_cast<char*>(cm[0].second));
     mfile.cur = const_cast<char*>(cm[0].second);
-    dbgmsg( "%s:%d: %s format set, on line %zu", __func__, __LINE__,
-            indicator.column == 7? "FIXED" : "FREE", mfile.lineno() );
-    erase_line(const_cast<char*>(cm[0].first),
-               const_cast<char*>(cm[0].second));
   }
 }
 
@@ -380,21 +402,28 @@ struct buffer_t : public bytespan_t {
     dbgmsg("flex input buffer: '%.*s'\n[xelf]", int(pos - data), data);
   }
   void dump() const {
+#ifdef GETENV_OK
     if( getenv("lexer_input") ) show();
+#endif
   }
 };
 
-static bool
-valid_sequence_area( const char *p, const char *eodata ) {
-  const char *pend = p + 6;
-  if ( eodata < pend ) return false;
+static inline bool is_p( char ch ) { return TOUPPER(ch) == 'P'; }
 
-  for( ; p < pend; p++ ) {
-    if( ! (ISDIGIT(*p) || *p == SPACE) ) {
-      return false;
+static bool
+is_program_id( const char *p, const char *eol ) {
+  static const std::string program_id("PROGRAM-ID");
+  auto eop = p + program_id.size();
+  if( eop < eol ) {
+    // PROGRAM-ID must be followed by a dot, perhaps with intervening whitespace.
+    for( const char *dot=eop; dot < eol && *dot != '.'; dot++ ) {
+      if( !ISSPACE(*dot) ) return false;
     }
+    std::string line (p, eop);
+    std::transform(line.begin(), line.end(), line.begin(), ::toupper);
+    return line == program_id;
   }
-  return true; // characters either digits or blanks
+  return false;
 }
 
 const char * esc( size_t len, const char input[] );
@@ -434,9 +463,9 @@ struct replacing_term_t {
   bool matched, done;
   span_t leading_trailing, term, stmt;
 
-  replacing_term_t(const char input[]) : matched(false), done(false) {
-    stmt = span_t(input, input);
-  }
+  explicit replacing_term_t(const char input[])
+    : matched(false), done(false), stmt(span_t(input, input))
+    {}
 };
 
 extern YYLTYPE yylloc;
@@ -457,11 +486,11 @@ update_yylloc( const csub_match& stmt, const csub_match& term ) {
   class dump_loc_on_exit {
    public:
     dump_loc_on_exit() {
-      if( getenv( "update_yylloc" ) )
+      if( gcobol_getenv( "update_yylloc" ) )
         location_dump( "update_yylloc", __LINE__, "begin", yylloc);
     }
     ~dump_loc_on_exit() {
-      if( getenv( "update_yylloc" ) )
+      if( gcobol_getenv( "update_yylloc" ) )
         location_dump( "update_yylloc", __LINE__, "end  ", yylloc);
     }
   } dloe;
@@ -506,7 +535,7 @@ update_yylloc( const csub_match& stmt, const csub_match& term ) {
 
 static replacing_term_t
 parse_replacing_term( const char *stmt, const char *estmt ) {
-  gcc_assert(stmt); gcc_assert(estmt); gcc_assert(stmt < estmt);
+  gcc_assert(stmt); gcc_assert(estmt); gcc_assert(stmt <= estmt);
   replacing_term_t output(stmt);
 
   static const char pattern[] =
@@ -716,13 +745,13 @@ parse_replacing_pair( const char *stmt, const char *estmt ) {
       }
     }
     if( pair.stmt.p ) {
-      yywarn("CDF syntax error '%*s'", (int)pair.stmt.size(), pair.stmt.p);
+      yywarn("CDF syntax error '%.*s'", (int)pair.stmt.size(), pair.stmt.p);
     }
     else {
       // This eliminated a compiler warning about "format-overflow"
       yywarn("CDF syntax error");
     }
-    pair.stmt = span_t(0UL, stmt);
+    pair.stmt = span_t(size_t(0), stmt);
     pair.replace = replace_t();
   }
   return pair;
@@ -762,9 +791,9 @@ parse_replace_pairs( const char *stmt, const char *estmt, bool is_copy_stmt ) {
       // Report findings.
       if( false && yy_flex_debug ) {
         for( size_t i=0; i < cm.size(); i++ ) {
-          dbgmsg("%s: %s %zu: '%.*s'", __func__,
+          dbgmsg("%s: %s " HOST_SIZE_T_PRINT_UNSIGNED ": '%.*s'", __func__,
                  cm[i].matched? "Pair" : "pair",
-                 i,
+                 (fmt_size_t)i,
                  cm[i].matched? int(cm[i].length()) : 0,
                  cm[i].matched? cm[i].first : "");
         }
@@ -784,7 +813,7 @@ parse_replace_pairs( const char *stmt, const char *estmt, bool is_copy_stmt ) {
     }
 
     span_t& before(parsed.replace.before);
-    span_t& after(parsed.replace.after);
+    const span_t& after(parsed.replace.after);
 
     const char *befter[2] = { nonword_ch, nonword_ch };
     gcc_assert(before.p < before.pend);
@@ -823,9 +852,10 @@ parse_replace_pairs( const char *stmt, const char *estmt, bool is_copy_stmt ) {
   }
 
   if( yy_flex_debug ) {
-    dbgmsg( "%s:%d: %s: %zu pairs parsed from  '%.*s'", __func__, __LINE__,
-            parsed.done()? "done" : "not done",
-            pairs.size(), parsed.stmt.size(), parsed.stmt.p );
+    dbgmsg( "%s:%d: %s: " HOST_SIZE_T_PRINT_UNSIGNED " pairs parsed from  '%.*s'",
+            __func__, __LINE__,
+            parsed.done() ? "done" : "not done",
+            (fmt_size_t)pairs.size(), parsed.stmt.size(), parsed.stmt.p );
     int i = 0;
     for( const auto& replace : pairs ) {
       dbgmsg("%s:%d:%4d: '%s' => '%s'", __func__, __LINE__,
@@ -851,7 +881,7 @@ struct copy_descr_t {
 };
 
 static YYLTYPE
-location_in( const filespan_t& mfile, const csub_match cm ) {
+location_in( const filespan_t& mfile, const csub_match& cm ) {
   YYLTYPE loc {
     int(mfile.lineno() + 1), int(mfile.colno() + 1),
     int(mfile.lineno() + 1), int(mfile.colno() + 1)
@@ -902,14 +932,15 @@ parse_copy_directive( filespan_t& mfile ) {
     copy_stmt.p = mfile.eodata;
 
     if( regex_search(mfile.ccur(),
-                          (const char *)mfile.eodata, cm, re) ) {
+                     const_cast<const char *>(mfile.eodata), cm, re) ) {
       copy_stmt = span_t( cm[0].first, cm[0].second );
       if( yy_flex_debug ) {
         size_t nnl = 1 + count_newlines(mfile.data, copy_stmt.p);
         size_t nst = 1 + count_newlines(copy_stmt.p, copy_stmt.pend);
-        dbgmsg("%s:%d: line %zu: COPY directive is %zu lines '%.*s'",
+        dbgmsg("%s:%d: line " HOST_SIZE_T_PRINT_UNSIGNED
+               ": COPY directive is " HOST_SIZE_T_PRINT_UNSIGNED " lines '%.*s'",
                __func__, __LINE__,
-               nnl, nst, copy_stmt.size(), copy_stmt.p);
+               (fmt_size_t)nnl, (fmt_size_t)nst, copy_stmt.size(), copy_stmt.p);
       }
     }
   }
@@ -922,7 +953,8 @@ parse_copy_directive( filespan_t& mfile ) {
     outcome.partial_line = span_t(mfile.cur, copy_stmt.p);
 
     if( yy_flex_debug ) {
-      dbgmsg("%zu expressions", std::count(pattern, pattern + sizeof(pattern), '('));
+      dbgmsg(HOST_SIZE_T_PRINT_UNSIGNED " expressions",
+             (fmt_size_t)std::count(pattern, pattern + sizeof(pattern), '('));
       int i = 0;
       for( const auto& m : cm ) {
         if( m.matched )
@@ -953,7 +985,7 @@ parse_copy_directive( filespan_t& mfile ) {
       std::pair<std::list<replace_t>, char*>
         result = parse_replace_pairs( cm[0].second, mfile.eodata, true );
 
-      std::list<replace_t>& replacements(result.first);
+      const std::list<replace_t>& replacements(result.first);
       outcome.parsed = (outcome.nreplace = replacements.size()) > 0;
       if( outcome.parsed ) {
         replace_directives.push(replacements);
@@ -980,7 +1012,7 @@ parse_copy_directive( filespan_t& mfile ) {
 }
 
 static char *
-parse_replace_last_off( filespan_t& mfile ) {
+parse_replace_last_off( const filespan_t& mfile ) {
   static const char pattern[] =
     "REPLACE" "[[:space:]]+"
     "(LAST[[:space:]]+)?OFF[[:space:]]*[.]"
@@ -990,7 +1022,7 @@ parse_replace_last_off( filespan_t& mfile ) {
 
   // REPLACE [LAST] OFF?
   bool found = regex_search(mfile.ccur(),
-                                 (const char *)mfile.eodata, cm, re);
+                            const_cast<const char *>(mfile.eodata), cm, re);
   gcc_assert(found); // caller ensures
 
   gcc_assert(cm.size() == 2);
@@ -1006,8 +1038,9 @@ parse_replace_last_off( filespan_t& mfile ) {
     }
   }
 
-  dbgmsg( "%s:%d: line %zu: parsed '%.*s', ", __func__, __LINE__,
-          mfile.lineno(), int(cm[0].length()), cm[0].first );
+  dbgmsg( "%s:%d: line " HOST_SIZE_T_PRINT_UNSIGNED ": parsed '%.*s', ",
+          __func__, __LINE__,
+          (fmt_size_t)mfile.lineno(), int(cm[0].length()), cm[0].first );
 
   // Remove statement from input
   erase_line(const_cast<char*>(cm[0].first),
@@ -1039,20 +1072,23 @@ parse_replace_text( filespan_t& mfile ) {
     gcc_assert(mfile.line_length() > 2);
     if( pend[-1] == '\n' ) pend -= 2;
     auto len = int(pend - mfile.cur);
-    dbgmsg("%s:%d: line %zu: parsing '%.*s", __func__, __LINE__,
-          current_lineno, len, mfile.cur);
+    dbgmsg("%s:%d: line " HOST_SIZE_T_PRINT_UNSIGNED ": parsing '%.*s",
+           __func__, __LINE__,
+           (fmt_size_t)current_lineno, len, mfile.cur);
   }
 
-  if( ! regex_search(mfile.ccur(), (const char *)mfile.eodata, cm, re) ) {
-    dbgmsg( "%s:%d: line %zu: not a REPLACE statement:\n'%.*s'",
-            __func__, __LINE__, current_lineno,
+  if( ! regex_search(mfile.ccur(), mfile.eodata, cm, re) ) {
+    dbgmsg( "%s:%d: line " HOST_SIZE_T_PRINT_UNSIGNED
+            ": not a REPLACE statement:\n'%.*s'",
+            __func__, __LINE__, (fmt_size_t)current_lineno,
             int(mfile.line_length()), mfile.cur );
     return span_t();
   }
 
   // Report findings.
     if( yy_flex_debug ) {
-      dbgmsg("%zu expressions", std::count(pattern, pattern + sizeof(pattern), '('));
+      dbgmsg(HOST_SIZE_T_PRINT_UNSIGNED " expressions",
+             (fmt_size_t)std::count(pattern, pattern + sizeof(pattern), '('));
       int i = 0;
       for( const auto& m : cm ) {
         if( m.matched )
@@ -1077,12 +1113,14 @@ parse_replace_text( filespan_t& mfile ) {
 
   std::pair<std::list<replace_t>, char*>
         result = parse_replace_pairs(replace_stmt.p, replace_stmt.pend, false);
-  std::list<replace_t>& replacements(result.first);
+  const std::list<replace_t>& replacements(result.first);
   replace_directives.push( replacements );
 
   if( yy_flex_debug ) {
-    dbgmsg( "%s:%d: line %zu: %zu pairs parsed from  '%.*s'", __func__, __LINE__,
-           current_lineno, replacements.size(), int(replace_stmt.size()), replace_stmt.p );
+    dbgmsg( "%s:%d: line " HOST_SIZE_T_PRINT_UNSIGNED ": " HOST_SIZE_T_PRINT_UNSIGNED
+            " pairs parsed from  '%.*s'", __func__, __LINE__,
+            (fmt_size_t)current_lineno, (fmt_size_t)replacements.size(),
+            int(replace_stmt.size()), replace_stmt.p );
     for( const auto& replace : replacements ) {
       int i = 0;
       dbgmsg("%s:%d:%4d: '%s' => '%s'", __func__, __LINE__,
@@ -1113,7 +1151,7 @@ parse_replace_directive( filespan_t& mfile ) {
     next_directive = mfile.eodata;
 
     if( regex_search(mfile.ccur(),
-                          (const char *)mfile.eodata, cm, re) ) {
+                     const_cast<const char *>(mfile.eodata), cm, re) ) {
       gcc_assert(cm[1].matched);
       next_directive = cm[0].first;
 
@@ -1162,8 +1200,9 @@ bytespan_t::append( const char *input, const char *eoinput ) {
 #if LEXIO
   auto nq = std::count_if(data, eodata, isquote);
   dbgmsg("%s:%3d:  input ------ '%.*s'", __func__, __LINE__, int(eoinput - input), input);
-  dbgmsg("%s:%3d:  precondition '%.*s' (%zu: %s)", __func__, __LINE__,
-        int(size()), data, nq, in_string()? "in string" : "not in string");
+  dbgmsg("%s:%3d:  precondition '%.*s' (" HOST_SIZE_T_PRINT_UNSIGNED ": %s)",
+         __func__, __LINE__,
+         int(size()), data, (fmt_size_t)nq, in_string()? "in string" : "not in string");
 #endif
   if( !in_string() ) { // Remove trailing space unless it's part of a literal.
     while(data < eodata && ISSPACE(eodata[-1])) eodata--;
@@ -1374,7 +1413,7 @@ preprocess_filter_add( const char input[] ) {
 
   auto filename = find_filter(filter.c_str());
   if( !filename ) {
-    yywarn("preprocessor '%s/%s' not found", getcwd(NULL, 0), filter);
+    yywarn("preprocessor '%s/%s' not found", getcwd(NULL, 0), filter.c_str());
     return false;
   }
   preprocessor_filters.push_back( std::make_pair(xstrdup(filename), options) );
@@ -1420,7 +1459,7 @@ cdftext::lex_open( const char filename[] ) {
 
   int output = open_output();
 
-  // Process any files supplied by the -include comamnd-line option.
+  // Process any files supplied by the -include command-line option.
   for( auto name : included_files ) {
     int input;
     if( -1 == (input = open(name, O_RDONLY)) ) {
@@ -1431,7 +1470,10 @@ cdftext::lex_open( const char filename[] ) {
     filespan_t mfile( free_form_reference_format( input ) );
 
     process_file( mfile, output );
+
+    cobol_filename_restore(); // process_file restores only for COPY
   }
+  included_files.clear();
 
   cobol_filename(filename, inode_of(input));
   filespan_t mfile( free_form_reference_format( input ) );
@@ -1453,7 +1495,7 @@ cdftext::lex_open( const char filename[] ) {
     argv[0] = filter;
 
     auto last_argv = std::transform( options.begin(), options.end(), argv.begin() + 1,
-                                     []( std::string& opt ) {
+                                     []( const std::string& opt ) {
                                        return xstrdup(opt.c_str());
                                      } );
     *last_argv = NULL;
@@ -1482,11 +1524,11 @@ cdftext::lex_open( const char filename[] ) {
     int status;
     auto kid = wait(&status);
     gcc_assert(pid == kid);
-    if( kid == -1 ) cbl_err( "failed waiting for pid %d", pid);
+    if( kid == -1 ) cbl_err( "failed waiting for pid %ld", static_cast<long>(pid));
 
     if( WIFSIGNALED(status) ) {
-      cbl_errx( "%s pid %d terminated by %s",
-           filter, kid, strsignal(WTERMSIG(status)) );
+      cbl_errx( "%s pid %ld terminated by %s",
+                filter, static_cast<long>(kid), strsignal(WTERMSIG(status)) );
     }
     if( WIFEXITED(status) ) {
       if( (status = WEXITSTATUS(status)) != 0 ) {
@@ -1504,7 +1546,7 @@ int
 cdftext::open_input( const char filename[] ) {
   int fd = open(filename, O_RDONLY);
   if( fd == -1 ) {
-    dbgmsg( "could not open '%s': %m", filename );
+    dbgmsg( "could not open '%s': %s", filename, xstrerror(errno) );
   }
 
   verbose_file_reader = NULL != getenv("GCOBOL_TEMPDIR");
@@ -1518,9 +1560,9 @@ cdftext::open_input( const char filename[] ) {
 int
 cdftext::open_output() {
   char *name = getenv("GCOBOL_TEMPDIR");
-  int fd;
 
   if( name && 0 != strcmp(name, "/") ) {
+    int fd;
     char * stem = xasprintf("%sXXXXXX", name);
     if( -1 == (fd = mkstemp(stem)) ) {
       cbl_err( "could not open temporary file '%s' (%s)",
@@ -1561,8 +1603,8 @@ cdftext::map_file( int fd ) {
           cbl_err( "%s: could not prepare map file from FIFO %d",
               __func__, input);
         }
-        if( false ) dbgmsg("%s: copied %ld bytes from FIFO",
-                                __func__, nout);
+        if( false ) dbgmsg("%s: copied " HOST_SIZE_T_PRINT_DEC " bytes from FIFO",
+                                __func__, (fmt_size_t)nout);
       }
     }
   } while( S_ISFIFO(sb.st_mode) );
@@ -1585,6 +1627,54 @@ cdftext::map_file( int fd ) {
 
 bool lexio_dialect_mf();
 
+/*
+ * A valid sequence area is 6 digits or blanks at the begining of the line that
+ * contains PROGRAM-ID. Return NULL if no valid sequence area, else return
+ * pointer to BOL.
+ */
+static const char *
+valid_sequence_area( const char *data, const char *eodata ) {
+  
+  for( const char *p = data;
+       (p = std::find_if(p, eodata, is_p)) != eodata;
+       p++ )
+  {
+    auto eol = std::find(p, eodata, '\n');
+    if( p == data || ISSPACE(p[-1]) ) {
+      if( is_program_id(p, eol) ) {  // found program-id token
+	const char *bol = p;
+	for( ; data <= bol-1 && bol[-1] != '\n'; --bol )
+	  ;
+	if( 6 < p - bol ) {
+	  if( std::all_of(bol, bol+6, ::isdigit) ) {
+	    return bol;
+	  }
+	  if( std::all_of(bol, bol+6, ::isblank) ) {
+	    return bol;
+	  }
+	  break;
+	}
+      }
+    }
+  }
+  return nullptr;  
+}
+
+/*
+ * Reference Format -- valid COBOL between columns 8 and 72 -- has data after
+ * column 72 on the PROGRAM-ID line. Extended Reference Format (that allows
+ * longer lines) has no reason to follow the PROGRAM-ID with more stuff.
+ */
+static bool
+infer_reference_format( const char *bol, const char *eodata ) {
+  assert(bol);
+  auto eol = std::find(bol, eodata, '\n');
+  if( 72 < eol - bol ) {
+    return ! std::all_of(bol + 72, eol, ::isspace);
+  }
+  return false;
+}
+
 filespan_t
 cdftext::free_form_reference_format( int input ) {
   filespan_t source_buffer = map_file(input);
@@ -1599,29 +1689,23 @@ cdftext::free_form_reference_format( int input ) {
     size_t lineno;
     bytespan_t line;
     // construct with length zero
-    current_line_t( char data[] ) : lineno(0), line(data, data) {}
+    explicit current_line_t( char data[] ) : lineno(0), line(data, data) {}
   } current( mfile.data );
 
   /*
-   * If the format is not explicitly set on the command line, test the
-   * first 6 bytes of the first file to determine the format
-   * heuristically. If the first 6 characters are only digits or
-   * blanks, then the file is in fixed format.
+   * Infer source code format. 
    */
-
   if( indicator.inference_pending()  ) {
-    const char *p = mfile.data;
-    while( p < mfile.eodata ) {
-      const char * pend =
-        std::find(p, const_cast<const char *>(mfile.eodata), '\n');
-      if( 6 < pend - p ) break;
-      p = pend;
-      if( p < mfile.eodata) p++;
+    const char *bol = valid_sequence_area(mfile.data, mfile.eodata);
+    if( bol ) {
+      indicator.column = 7;
+      if( infer_reference_format(bol, mfile.eodata) ) {
+	indicator.right_margin = 73;
+      }
     }
-    if( valid_sequence_area(p, mfile.eodata) ) indicator.column = 7;
 
     dbgmsg("%s:%d: %s format detected", __func__, __LINE__,
-           indicator.column == 7? "FIXED" : "FREE");
+           indicator.description());
   }
 
   while( mfile.next_line() ) {
@@ -1745,15 +1829,15 @@ cdftext::free_form_reference_format( int input ) {
 void
 cdftext::process_file( filespan_t mfile, int output, bool second_pass ) {
   static size_t nfiles = 0;
-  std::list<replace_t> replacements;
 
-  __gnu_cxx::stdio_filebuf<char> outbuf(fdopen(output, "w"), std::ios::out);
+  __gnu_cxx::stdio_filebuf<char> outbuf(fdopen(output, "a"), std::ios::out);
   std::ostream out(&outbuf);
   std::ostream_iterator<char> ofs(out);
 
   // indicate current file
   static const char file_push[] = "\f#FILE PUSH ", file_pop[] = "\f#FILE POP\f";
 
+  if( !included_files.empty() ) { ++nfiles; }; // force push/pop of included filename
   if( !second_pass && nfiles++ ) {
     static const char delimiter[] = "\f";
     const char *filename = cobol_filename();
@@ -1772,8 +1856,8 @@ cdftext::process_file( filespan_t mfile, int output, bool second_pass ) {
       std::copy_if(copied.erased_lines.p, copied.erased_lines.pend, ofs,
                    []( char ch ) { return ch == '\n'; } );
       struct { int in, out; filespan_t mfile; } copy;
-      dbgmsg("%s:%d: line %zu, opening %s on fd %d", __func__, __LINE__,
-             mfile.lineno(),
+      dbgmsg("%s:%d: line " HOST_SIZE_T_PRINT_UNSIGNED ", opening %s on fd %d",
+             __func__, __LINE__,mfile.lineno(),
              copybook.source(), copybook.current()->fd);
       copy.in = copybook.current()->fd;
       copy.mfile = free_form_reference_format( copy.in );
@@ -1809,31 +1893,12 @@ cdftext::process_file( filespan_t mfile, int output, bool second_pass ) {
       continue; // No active REPLACE directive.
     }
 
-    std::list<span_t> segments = segment_line(mfile); // no replace yields
-    //                                                // 1 segment
+    std::list<span_t> segments = segment_line(mfile);
 
     for( const auto& segment : segments ) {
       std::copy(segment.p, segment.pend, ofs);
     }
 
-    if( segments.size() == 2 ) {
-      struct {
-        size_t before, after;
-        int delta() const { return before - after; } } nlines;
-      nlines.before = std::count(segments.front().p,
-                                 segments.front().pend, '\n');
-      nlines.after  = std::count(segments.back().p, segments.back().pend, '\n');
-      if( nlines.delta() < 0 ) {
-        yywarn("line %zu: REPLACED %zu lines with %zu lines, "
-              "line count off by %d", mfile.lineno(),
-              nlines.before, nlines.after, nlines.delta());
-      }
-      int nnl = nlines.delta();
-      while( nnl-- > 0 ) {
-        static const char nl[] = "\n";
-        std::copy(nl, nl + 1, ofs);
-      }
-    }
     out.flush();
   }
   // end of file
@@ -1841,6 +1906,7 @@ cdftext::process_file( filespan_t mfile, int output, bool second_pass ) {
     std::copy(file_pop, file_pop + strlen(file_pop), ofs);
     out.flush();
   }
+  if( !included_files.empty() ) { --nfiles; };
 }
 
 std::list<span_t>
@@ -1856,12 +1922,30 @@ cdftext::segment_line( filespan_t& mfile ) {
     return output;
   }
 
+  /*
+   * If the replacement changes the number of lines in the replaced text, we
+   * need to reset the line number, because the next statement is on a
+   * different line in the manipulated text than in the original.  Before each
+   * replacement, set the original line number.  After each replacement, set
+   * the line number after the elided text on the next line.
+   */
   for( const replace_t& segment : pending ) {
     gcc_assert(mfile.cur <= segment.before.p);
     gcc_assert(segment.before.pend <= mfile.eodata);
 
+    struct { unsigned long ante, post; } lineno = {
+      gb4(mfile.lineno()), gb4(mfile.lineno() + segment.after.nlines())
+    };
+    char *directive = lineno.ante == lineno.post?
+      nullptr : xasprintf("\n#line %lu \"%s\"\n",
+                          lineno.ante, cobol_filename());
+
+    if( directive ) 
+      output.push_back( span_t(strlen(directive), directive) );
     output.push_back( span_t(mfile.cur, segment.before.p) );
     output.push_back( span_t(segment.after.p, segment.after.pend ) );
+    if( directive ) 
+      output.push_back( span_t(strlen(directive), directive) );
 
     mfile.cur = const_cast<char*>(segment.before.pend);
   }
@@ -1877,5 +1961,3 @@ cdftext::segment_line( filespan_t& mfile ) {
 
   return output;
 }
-
-//////// End of the cdf_text.h file