aboutsummaryrefslogtreecommitdiff
path: root/gcc/cobol/lexio.cc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/cobol/lexio.cc')
-rw-r--r--gcc/cobol/lexio.cc322
1 files changed, 202 insertions, 120 deletions
diff --git a/gcc/cobol/lexio.cc b/gcc/cobol/lexio.cc
index 82bacf2..2d9fb72 100644
--- a/gcc/cobol/lexio.cc
+++ b/gcc/cobol/lexio.cc
@@ -28,6 +28,7 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#include "config.h"
#include <ext/stdio_filebuf.h>
#include "cobol-system.h"
#include "cbldiag.h"
@@ -45,8 +46,22 @@ static struct {
first_file = false;
return tf;
}
+ inline bool is_fixed() const { return column == 7; }
+ inline bool is_reffmt() const { return is_fixed() && right_margin == 73; }
+ inline bool is_free() const { return ! is_fixed(); }
+
+ const char * description() const {
+ if( is_reffmt() ) return "REFERENCE";
+ if( is_fixed() ) return "FIXED";
+ if( is_free() ) return "FREE";
+ gcc_unreachable();
+ }
} indicator = { true, false, 0, 0 };
+// public source format test functions
+bool is_fixed_format() { return indicator.is_fixed(); }
+bool is_reference_format() { return indicator.is_reffmt(); }
+
static bool debug_mode = false;
/*
@@ -85,10 +100,6 @@ cobol_set_indicator_column( int column )
indicator.column = column;
}
-bool is_fixed_format() { return indicator.column == 7; }
-bool is_reference_format() {
- return indicator.column == 7 && indicator.right_margin == 73;
-}
bool include_debug() { return indicator.column == 7 && debug_mode; }
bool set_debug( bool tf ) { return debug_mode = tf && is_fixed_format(); }
@@ -112,7 +123,7 @@ continues_at( char *bol, char *eol ) {
// Return pointer to indicator column. Test ch if provided.
// NULL means no indicator column or tested value not present.
static inline char *
-indicated( char *bol, char *eol, char ch = '\0' ) {
+indicated( char *bol, const char *eol, char ch = '\0' ) {
if( indicator.column == 0 && *bol != '*' ) {
return NULL; // no indicator column in free format, except for comments
}
@@ -129,10 +140,10 @@ indicated( char *bol, char *eol, char ch = '\0' ) {
static char *
remove_inline_comment( char *bol, char *eol ) {
- static char ends = '\0';
char *nl = std::find(bol, eol, '\n');
if( bol < nl ) {
+ static char ends = '\0';
std::swap(*nl, ends);
char *comment = strstr(bol, "*>");
if( comment ) {
@@ -197,10 +208,10 @@ maybe_add_space(const span_t& pattern, replace_t& recognized) {
}
if( befter[0] == blank || befter[1] == blank ) {
- char *s = xasprintf( "%s%.*s%s",
- befter[0],
- recognized.after.size(), recognized.after.p,
- befter[1] );
+ const char *s = xasprintf( "%s%.*s%s",
+ befter[0],
+ recognized.after.size(), recognized.after.p,
+ befter[1] );
recognized.after = span_t(s, s + strlen(s));
}
}
@@ -255,14 +266,17 @@ recognize_replacements( filespan_t mfile, std::list<replace_t>& pending_replacem
span_t found(mfile.eodata, mfile.eodata);
- if( regex_search( mfile.ccur(), (const char *)mfile.eodata, cm, re) ) {
+ if( regex_search( mfile.ccur(),
+ const_cast<const char *>(mfile.eodata),
+ cm, re) ) {
gcc_assert(cm[1].matched);
found = span_t( cm[1].first, cm[1].second );
if( yy_flex_debug ) {
size_t n = count_newlines(mfile.data, found.p);
- dbgmsg("%s:%d first '%.*s' is on line %zu (offset %zu)", __func__, __LINE__,
+ dbgmsg("%s:%d first '%.*s' is on line " HOST_SIZE_T_PRINT_UNSIGNED
+ " (offset " HOST_SIZE_T_PRINT_UNSIGNED ")", __func__, __LINE__,
directive.before.size(), directive.before.p,
- ++n, found.p - mfile.data);
+ (fmt_size_t)++n, (fmt_size_t)(found.p - mfile.data));
}
} else {
dbgmsg("%s:%d not found: '%s' in \n'%.*s'", __func__, __LINE__,
@@ -289,9 +303,12 @@ recognize_replacements( filespan_t mfile, std::list<replace_t>& pending_replacem
bol = next.found.pend;
if( yy_flex_debug ) {
- size_t n = std::count((const char *)mfile.data, recognized.before.p, '\n');
- dbgmsg( "%s:%d: line %zu @ %zu: '%s'\n/%.*s/%.*s/", __func__, __LINE__,
- ++n, next.found.p - mfile.data,
+ size_t n = std::count(const_cast<const char *>(mfile.data),
+ recognized.before.p, '\n');
+ dbgmsg( "%s:%d: line " HOST_SIZE_T_PRINT_UNSIGNED
+ " @ " HOST_SIZE_T_PRINT_UNSIGNED ": '%s'\n/%.*s/%.*s/",
+ __func__, __LINE__,
+ (fmt_size_t)++n, (fmt_size_t)(next.found.p - mfile.data),
next.directive.before.p,
int(recognized.before.size()), recognized.before.p,
int(recognized.after.size()), recognized.after.p );
@@ -303,14 +320,16 @@ recognize_replacements( filespan_t mfile, std::list<replace_t>& pending_replacem
next.found = span_t(mfile.eodata, mfile.eodata);
regex re(next.directive.before.p, extended_icase);
- if( regex_search(bol, (const char *)mfile.eodata, cm, re) ) {
+ if( regex_search(bol, const_cast<const char *>(mfile.eodata), cm, re) ) {
gcc_assert(cm[1].matched);
next.found = span_t( cm[1].first, cm[1].second );
- size_t n = std::count((const char *)mfile.data, next.found.p, '\n');
+ size_t n = std::count(const_cast<const char *>(mfile.data),
+ next.found.p, '\n');
if( false )
- dbgmsg("%s:%d next '%.*s' will be on line %zu (offset %zu)", __func__, __LINE__,
+ dbgmsg("%s:%d next '%.*s' will be on line " HOST_SIZE_T_PRINT_UNSIGNED
+ " (offset " HOST_SIZE_T_PRINT_UNSIGNED ")", __func__, __LINE__,
next.directive.before.size(), next.directive.before.p,
- ++n, next.found.p - mfile.data);
+ (fmt_size_t)++n, (fmt_size_t)(next.found.p - mfile.data));
}
pnext = std::min_element(futures.begin(), futures.end());
}
@@ -330,7 +349,7 @@ check_source_format_directive( filespan_t& mfile ) {
// show contents of marked subexpressions within each match
cmatch cm;
- if( regex_search(p, (const char *)mfile.eol, cm, re) ) {
+ if( regex_search(p, const_cast<const char *>(mfile.eol), cm, re) ) {
gcc_assert(cm.size() > 1);
switch( cm[3].length() ) {
case 4:
@@ -343,11 +362,14 @@ check_source_format_directive( filespan_t& mfile ) {
gcc_assert(cm[3].length() == 4 || cm[3].length() == 5);
break;
}
+
+ dbgmsg( "%s:%d: %s format set, on line " HOST_SIZE_T_PRINT_UNSIGNED,
+ __func__, __LINE__,
+ indicator.column == 7? "FIXED" : "FREE",
+ (fmt_size_t)mfile.lineno() );
+ char *bol = indicator.is_fixed()? mfile.cur : const_cast<char*>(cm[0].first);
+ erase_line(bol, const_cast<char*>(cm[0].second));
mfile.cur = const_cast<char*>(cm[0].second);
- dbgmsg( "%s:%d: %s format set, on line %zu", __func__, __LINE__,
- indicator.column == 7? "FIXED" : "FREE", mfile.lineno() );
- erase_line(const_cast<char*>(cm[0].first),
- const_cast<char*>(cm[0].second));
}
}
@@ -380,21 +402,28 @@ struct buffer_t : public bytespan_t {
dbgmsg("flex input buffer: '%.*s'\n[xelf]", int(pos - data), data);
}
void dump() const {
+#ifdef GETENV_OK
if( getenv("lexer_input") ) show();
+#endif
}
};
-static bool
-valid_sequence_area( const char *p, const char *eodata ) {
- const char *pend = p + 6;
- if ( eodata < pend ) return false;
+static inline bool is_p( char ch ) { return TOUPPER(ch) == 'P'; }
- for( ; p < pend; p++ ) {
- if( ! (ISDIGIT(*p) || *p == SPACE) ) {
- return false;
+static bool
+is_program_id( const char *p, const char *eol ) {
+ static const std::string program_id("PROGRAM-ID");
+ auto eop = p + program_id.size();
+ if( eop < eol ) {
+ // PROGRAM-ID must be followed by a dot, perhaps with intervening whitespace.
+ for( const char *dot=eop; dot < eol && *dot != '.'; dot++ ) {
+ if( !ISSPACE(*dot) ) return false;
}
+ std::string line (p, eop);
+ std::transform(line.begin(), line.end(), line.begin(), ::toupper);
+ return line == program_id;
}
- return true; // characters either digits or blanks
+ return false;
}
const char * esc( size_t len, const char input[] );
@@ -434,9 +463,9 @@ struct replacing_term_t {
bool matched, done;
span_t leading_trailing, term, stmt;
- replacing_term_t(const char input[]) : matched(false), done(false) {
- stmt = span_t(input, input);
- }
+ explicit replacing_term_t(const char input[])
+ : matched(false), done(false), stmt(span_t(input, input))
+ {}
};
extern YYLTYPE yylloc;
@@ -457,11 +486,11 @@ update_yylloc( const csub_match& stmt, const csub_match& term ) {
class dump_loc_on_exit {
public:
dump_loc_on_exit() {
- if( getenv( "update_yylloc" ) )
+ if( gcobol_getenv( "update_yylloc" ) )
location_dump( "update_yylloc", __LINE__, "begin", yylloc);
}
~dump_loc_on_exit() {
- if( getenv( "update_yylloc" ) )
+ if( gcobol_getenv( "update_yylloc" ) )
location_dump( "update_yylloc", __LINE__, "end ", yylloc);
}
} dloe;
@@ -506,7 +535,7 @@ update_yylloc( const csub_match& stmt, const csub_match& term ) {
static replacing_term_t
parse_replacing_term( const char *stmt, const char *estmt ) {
- gcc_assert(stmt); gcc_assert(estmt); gcc_assert(stmt < estmt);
+ gcc_assert(stmt); gcc_assert(estmt); gcc_assert(stmt <= estmt);
replacing_term_t output(stmt);
static const char pattern[] =
@@ -716,13 +745,13 @@ parse_replacing_pair( const char *stmt, const char *estmt ) {
}
}
if( pair.stmt.p ) {
- yywarn("CDF syntax error '%*s'", (int)pair.stmt.size(), pair.stmt.p);
+ yywarn("CDF syntax error '%.*s'", (int)pair.stmt.size(), pair.stmt.p);
}
else {
// This eliminated a compiler warning about "format-overflow"
yywarn("CDF syntax error");
}
- pair.stmt = span_t(0UL, stmt);
+ pair.stmt = span_t(size_t(0), stmt);
pair.replace = replace_t();
}
return pair;
@@ -762,9 +791,9 @@ parse_replace_pairs( const char *stmt, const char *estmt, bool is_copy_stmt ) {
// Report findings.
if( false && yy_flex_debug ) {
for( size_t i=0; i < cm.size(); i++ ) {
- dbgmsg("%s: %s %zu: '%.*s'", __func__,
+ dbgmsg("%s: %s " HOST_SIZE_T_PRINT_UNSIGNED ": '%.*s'", __func__,
cm[i].matched? "Pair" : "pair",
- i,
+ (fmt_size_t)i,
cm[i].matched? int(cm[i].length()) : 0,
cm[i].matched? cm[i].first : "");
}
@@ -784,7 +813,7 @@ parse_replace_pairs( const char *stmt, const char *estmt, bool is_copy_stmt ) {
}
span_t& before(parsed.replace.before);
- span_t& after(parsed.replace.after);
+ const span_t& after(parsed.replace.after);
const char *befter[2] = { nonword_ch, nonword_ch };
gcc_assert(before.p < before.pend);
@@ -823,9 +852,10 @@ parse_replace_pairs( const char *stmt, const char *estmt, bool is_copy_stmt ) {
}
if( yy_flex_debug ) {
- dbgmsg( "%s:%d: %s: %zu pairs parsed from '%.*s'", __func__, __LINE__,
- parsed.done()? "done" : "not done",
- pairs.size(), parsed.stmt.size(), parsed.stmt.p );
+ dbgmsg( "%s:%d: %s: " HOST_SIZE_T_PRINT_UNSIGNED " pairs parsed from '%.*s'",
+ __func__, __LINE__,
+ parsed.done() ? "done" : "not done",
+ (fmt_size_t)pairs.size(), parsed.stmt.size(), parsed.stmt.p );
int i = 0;
for( const auto& replace : pairs ) {
dbgmsg("%s:%d:%4d: '%s' => '%s'", __func__, __LINE__,
@@ -851,7 +881,7 @@ struct copy_descr_t {
};
static YYLTYPE
-location_in( const filespan_t& mfile, const csub_match cm ) {
+location_in( const filespan_t& mfile, const csub_match& cm ) {
YYLTYPE loc {
int(mfile.lineno() + 1), int(mfile.colno() + 1),
int(mfile.lineno() + 1), int(mfile.colno() + 1)
@@ -902,14 +932,15 @@ parse_copy_directive( filespan_t& mfile ) {
copy_stmt.p = mfile.eodata;
if( regex_search(mfile.ccur(),
- (const char *)mfile.eodata, cm, re) ) {
+ const_cast<const char *>(mfile.eodata), cm, re) ) {
copy_stmt = span_t( cm[0].first, cm[0].second );
if( yy_flex_debug ) {
size_t nnl = 1 + count_newlines(mfile.data, copy_stmt.p);
size_t nst = 1 + count_newlines(copy_stmt.p, copy_stmt.pend);
- dbgmsg("%s:%d: line %zu: COPY directive is %zu lines '%.*s'",
+ dbgmsg("%s:%d: line " HOST_SIZE_T_PRINT_UNSIGNED
+ ": COPY directive is " HOST_SIZE_T_PRINT_UNSIGNED " lines '%.*s'",
__func__, __LINE__,
- nnl, nst, copy_stmt.size(), copy_stmt.p);
+ (fmt_size_t)nnl, (fmt_size_t)nst, copy_stmt.size(), copy_stmt.p);
}
}
}
@@ -922,7 +953,8 @@ parse_copy_directive( filespan_t& mfile ) {
outcome.partial_line = span_t(mfile.cur, copy_stmt.p);
if( yy_flex_debug ) {
- dbgmsg("%zu expressions", std::count(pattern, pattern + sizeof(pattern), '('));
+ dbgmsg(HOST_SIZE_T_PRINT_UNSIGNED " expressions",
+ (fmt_size_t)std::count(pattern, pattern + sizeof(pattern), '('));
int i = 0;
for( const auto& m : cm ) {
if( m.matched )
@@ -953,7 +985,7 @@ parse_copy_directive( filespan_t& mfile ) {
std::pair<std::list<replace_t>, char*>
result = parse_replace_pairs( cm[0].second, mfile.eodata, true );
- std::list<replace_t>& replacements(result.first);
+ const std::list<replace_t>& replacements(result.first);
outcome.parsed = (outcome.nreplace = replacements.size()) > 0;
if( outcome.parsed ) {
replace_directives.push(replacements);
@@ -980,7 +1012,7 @@ parse_copy_directive( filespan_t& mfile ) {
}
static char *
-parse_replace_last_off( filespan_t& mfile ) {
+parse_replace_last_off( const filespan_t& mfile ) {
static const char pattern[] =
"REPLACE" "[[:space:]]+"
"(LAST[[:space:]]+)?OFF[[:space:]]*[.]"
@@ -990,7 +1022,7 @@ parse_replace_last_off( filespan_t& mfile ) {
// REPLACE [LAST] OFF?
bool found = regex_search(mfile.ccur(),
- (const char *)mfile.eodata, cm, re);
+ const_cast<const char *>(mfile.eodata), cm, re);
gcc_assert(found); // caller ensures
gcc_assert(cm.size() == 2);
@@ -1006,8 +1038,9 @@ parse_replace_last_off( filespan_t& mfile ) {
}
}
- dbgmsg( "%s:%d: line %zu: parsed '%.*s', ", __func__, __LINE__,
- mfile.lineno(), int(cm[0].length()), cm[0].first );
+ dbgmsg( "%s:%d: line " HOST_SIZE_T_PRINT_UNSIGNED ": parsed '%.*s', ",
+ __func__, __LINE__,
+ (fmt_size_t)mfile.lineno(), int(cm[0].length()), cm[0].first );
// Remove statement from input
erase_line(const_cast<char*>(cm[0].first),
@@ -1039,20 +1072,23 @@ parse_replace_text( filespan_t& mfile ) {
gcc_assert(mfile.line_length() > 2);
if( pend[-1] == '\n' ) pend -= 2;
auto len = int(pend - mfile.cur);
- dbgmsg("%s:%d: line %zu: parsing '%.*s", __func__, __LINE__,
- current_lineno, len, mfile.cur);
+ dbgmsg("%s:%d: line " HOST_SIZE_T_PRINT_UNSIGNED ": parsing '%.*s",
+ __func__, __LINE__,
+ (fmt_size_t)current_lineno, len, mfile.cur);
}
- if( ! regex_search(mfile.ccur(), (const char *)mfile.eodata, cm, re) ) {
- dbgmsg( "%s:%d: line %zu: not a REPLACE statement:\n'%.*s'",
- __func__, __LINE__, current_lineno,
+ if( ! regex_search(mfile.ccur(), mfile.eodata, cm, re) ) {
+ dbgmsg( "%s:%d: line " HOST_SIZE_T_PRINT_UNSIGNED
+ ": not a REPLACE statement:\n'%.*s'",
+ __func__, __LINE__, (fmt_size_t)current_lineno,
int(mfile.line_length()), mfile.cur );
return span_t();
}
// Report findings.
if( yy_flex_debug ) {
- dbgmsg("%zu expressions", std::count(pattern, pattern + sizeof(pattern), '('));
+ dbgmsg(HOST_SIZE_T_PRINT_UNSIGNED " expressions",
+ (fmt_size_t)std::count(pattern, pattern + sizeof(pattern), '('));
int i = 0;
for( const auto& m : cm ) {
if( m.matched )
@@ -1077,12 +1113,14 @@ parse_replace_text( filespan_t& mfile ) {
std::pair<std::list<replace_t>, char*>
result = parse_replace_pairs(replace_stmt.p, replace_stmt.pend, false);
- std::list<replace_t>& replacements(result.first);
+ const std::list<replace_t>& replacements(result.first);
replace_directives.push( replacements );
if( yy_flex_debug ) {
- dbgmsg( "%s:%d: line %zu: %zu pairs parsed from '%.*s'", __func__, __LINE__,
- current_lineno, replacements.size(), int(replace_stmt.size()), replace_stmt.p );
+ dbgmsg( "%s:%d: line " HOST_SIZE_T_PRINT_UNSIGNED ": " HOST_SIZE_T_PRINT_UNSIGNED
+ " pairs parsed from '%.*s'", __func__, __LINE__,
+ (fmt_size_t)current_lineno, (fmt_size_t)replacements.size(),
+ int(replace_stmt.size()), replace_stmt.p );
for( const auto& replace : replacements ) {
int i = 0;
dbgmsg("%s:%d:%4d: '%s' => '%s'", __func__, __LINE__,
@@ -1113,7 +1151,7 @@ parse_replace_directive( filespan_t& mfile ) {
next_directive = mfile.eodata;
if( regex_search(mfile.ccur(),
- (const char *)mfile.eodata, cm, re) ) {
+ const_cast<const char *>(mfile.eodata), cm, re) ) {
gcc_assert(cm[1].matched);
next_directive = cm[0].first;
@@ -1162,8 +1200,9 @@ bytespan_t::append( const char *input, const char *eoinput ) {
#if LEXIO
auto nq = std::count_if(data, eodata, isquote);
dbgmsg("%s:%3d: input ------ '%.*s'", __func__, __LINE__, int(eoinput - input), input);
- dbgmsg("%s:%3d: precondition '%.*s' (%zu: %s)", __func__, __LINE__,
- int(size()), data, nq, in_string()? "in string" : "not in string");
+ dbgmsg("%s:%3d: precondition '%.*s' (" HOST_SIZE_T_PRINT_UNSIGNED ": %s)",
+ __func__, __LINE__,
+ int(size()), data, (fmt_size_t)nq, in_string()? "in string" : "not in string");
#endif
if( !in_string() ) { // Remove trailing space unless it's part of a literal.
while(data < eodata && ISSPACE(eodata[-1])) eodata--;
@@ -1374,7 +1413,7 @@ preprocess_filter_add( const char input[] ) {
auto filename = find_filter(filter.c_str());
if( !filename ) {
- yywarn("preprocessor '%s/%s' not found", getcwd(NULL, 0), filter);
+ yywarn("preprocessor '%s/%s' not found", getcwd(NULL, 0), filter.c_str());
return false;
}
preprocessor_filters.push_back( std::make_pair(xstrdup(filename), options) );
@@ -1420,7 +1459,7 @@ cdftext::lex_open( const char filename[] ) {
int output = open_output();
- // Process any files supplied by the -include comamnd-line option.
+ // Process any files supplied by the -include command-line option.
for( auto name : included_files ) {
int input;
if( -1 == (input = open(name, O_RDONLY)) ) {
@@ -1431,7 +1470,10 @@ cdftext::lex_open( const char filename[] ) {
filespan_t mfile( free_form_reference_format( input ) );
process_file( mfile, output );
+
+ cobol_filename_restore(); // process_file restores only for COPY
}
+ included_files.clear();
cobol_filename(filename, inode_of(input));
filespan_t mfile( free_form_reference_format( input ) );
@@ -1453,7 +1495,7 @@ cdftext::lex_open( const char filename[] ) {
argv[0] = filter;
auto last_argv = std::transform( options.begin(), options.end(), argv.begin() + 1,
- []( std::string& opt ) {
+ []( const std::string& opt ) {
return xstrdup(opt.c_str());
} );
*last_argv = NULL;
@@ -1482,11 +1524,11 @@ cdftext::lex_open( const char filename[] ) {
int status;
auto kid = wait(&status);
gcc_assert(pid == kid);
- if( kid == -1 ) cbl_err( "failed waiting for pid %d", pid);
+ if( kid == -1 ) cbl_err( "failed waiting for pid %ld", static_cast<long>(pid));
if( WIFSIGNALED(status) ) {
- cbl_errx( "%s pid %d terminated by %s",
- filter, kid, strsignal(WTERMSIG(status)) );
+ cbl_errx( "%s pid %ld terminated by %s",
+ filter, static_cast<long>(kid), strsignal(WTERMSIG(status)) );
}
if( WIFEXITED(status) ) {
if( (status = WEXITSTATUS(status)) != 0 ) {
@@ -1504,7 +1546,7 @@ int
cdftext::open_input( const char filename[] ) {
int fd = open(filename, O_RDONLY);
if( fd == -1 ) {
- dbgmsg( "could not open '%s': %m", filename );
+ dbgmsg( "could not open '%s': %s", filename, xstrerror(errno) );
}
verbose_file_reader = NULL != getenv("GCOBOL_TEMPDIR");
@@ -1518,9 +1560,9 @@ cdftext::open_input( const char filename[] ) {
int
cdftext::open_output() {
char *name = getenv("GCOBOL_TEMPDIR");
- int fd;
if( name && 0 != strcmp(name, "/") ) {
+ int fd;
char * stem = xasprintf("%sXXXXXX", name);
if( -1 == (fd = mkstemp(stem)) ) {
cbl_err( "could not open temporary file '%s' (%s)",
@@ -1561,8 +1603,8 @@ cdftext::map_file( int fd ) {
cbl_err( "%s: could not prepare map file from FIFO %d",
__func__, input);
}
- if( false ) dbgmsg("%s: copied %ld bytes from FIFO",
- __func__, nout);
+ if( false ) dbgmsg("%s: copied " HOST_SIZE_T_PRINT_DEC " bytes from FIFO",
+ __func__, (fmt_size_t)nout);
}
}
} while( S_ISFIFO(sb.st_mode) );
@@ -1585,6 +1627,54 @@ cdftext::map_file( int fd ) {
bool lexio_dialect_mf();
+/*
+ * A valid sequence area is 6 digits or blanks at the begining of the line that
+ * contains PROGRAM-ID. Return NULL if no valid sequence area, else return
+ * pointer to BOL.
+ */
+static const char *
+valid_sequence_area( const char *data, const char *eodata ) {
+
+ for( const char *p = data;
+ (p = std::find_if(p, eodata, is_p)) != eodata;
+ p++ )
+ {
+ auto eol = std::find(p, eodata, '\n');
+ if( p == data || ISSPACE(p[-1]) ) {
+ if( is_program_id(p, eol) ) { // found program-id token
+ const char *bol = p;
+ for( ; data <= bol-1 && bol[-1] != '\n'; --bol )
+ ;
+ if( 6 < p - bol ) {
+ if( std::all_of(bol, bol+6, ::isdigit) ) {
+ return bol;
+ }
+ if( std::all_of(bol, bol+6, ::isblank) ) {
+ return bol;
+ }
+ break;
+ }
+ }
+ }
+ }
+ return nullptr;
+}
+
+/*
+ * Reference Format -- valid COBOL between columns 8 and 72 -- has data after
+ * column 72 on the PROGRAM-ID line. Extended Reference Format (that allows
+ * longer lines) has no reason to follow the PROGRAM-ID with more stuff.
+ */
+static bool
+infer_reference_format( const char *bol, const char *eodata ) {
+ assert(bol);
+ auto eol = std::find(bol, eodata, '\n');
+ if( 72 < eol - bol ) {
+ return ! std::all_of(bol + 72, eol, ::isspace);
+ }
+ return false;
+}
+
filespan_t
cdftext::free_form_reference_format( int input ) {
filespan_t source_buffer = map_file(input);
@@ -1599,29 +1689,23 @@ cdftext::free_form_reference_format( int input ) {
size_t lineno;
bytespan_t line;
// construct with length zero
- current_line_t( char data[] ) : lineno(0), line(data, data) {}
+ explicit current_line_t( char data[] ) : lineno(0), line(data, data) {}
} current( mfile.data );
/*
- * If the format is not explicitly set on the command line, test the
- * first 6 bytes of the first file to determine the format
- * heuristically. If the first 6 characters are only digits or
- * blanks, then the file is in fixed format.
+ * Infer source code format.
*/
-
if( indicator.inference_pending() ) {
- const char *p = mfile.data;
- while( p < mfile.eodata ) {
- const char * pend =
- std::find(p, const_cast<const char *>(mfile.eodata), '\n');
- if( 6 < pend - p ) break;
- p = pend;
- if( p < mfile.eodata) p++;
+ const char *bol = valid_sequence_area(mfile.data, mfile.eodata);
+ if( bol ) {
+ indicator.column = 7;
+ if( infer_reference_format(bol, mfile.eodata) ) {
+ indicator.right_margin = 73;
+ }
}
- if( valid_sequence_area(p, mfile.eodata) ) indicator.column = 7;
dbgmsg("%s:%d: %s format detected", __func__, __LINE__,
- indicator.column == 7? "FIXED" : "FREE");
+ indicator.description());
}
while( mfile.next_line() ) {
@@ -1745,15 +1829,15 @@ cdftext::free_form_reference_format( int input ) {
void
cdftext::process_file( filespan_t mfile, int output, bool second_pass ) {
static size_t nfiles = 0;
- std::list<replace_t> replacements;
- __gnu_cxx::stdio_filebuf<char> outbuf(fdopen(output, "w"), std::ios::out);
+ __gnu_cxx::stdio_filebuf<char> outbuf(fdopen(output, "a"), std::ios::out);
std::ostream out(&outbuf);
std::ostream_iterator<char> ofs(out);
// indicate current file
static const char file_push[] = "\f#FILE PUSH ", file_pop[] = "\f#FILE POP\f";
+ if( !included_files.empty() ) { ++nfiles; }; // force push/pop of included filename
if( !second_pass && nfiles++ ) {
static const char delimiter[] = "\f";
const char *filename = cobol_filename();
@@ -1772,8 +1856,8 @@ cdftext::process_file( filespan_t mfile, int output, bool second_pass ) {
std::copy_if(copied.erased_lines.p, copied.erased_lines.pend, ofs,
[]( char ch ) { return ch == '\n'; } );
struct { int in, out; filespan_t mfile; } copy;
- dbgmsg("%s:%d: line %zu, opening %s on fd %d", __func__, __LINE__,
- mfile.lineno(),
+ dbgmsg("%s:%d: line " HOST_SIZE_T_PRINT_UNSIGNED ", opening %s on fd %d",
+ __func__, __LINE__,mfile.lineno(),
copybook.source(), copybook.current()->fd);
copy.in = copybook.current()->fd;
copy.mfile = free_form_reference_format( copy.in );
@@ -1809,31 +1893,12 @@ cdftext::process_file( filespan_t mfile, int output, bool second_pass ) {
continue; // No active REPLACE directive.
}
- std::list<span_t> segments = segment_line(mfile); // no replace yields
- // // 1 segment
+ std::list<span_t> segments = segment_line(mfile);
for( const auto& segment : segments ) {
std::copy(segment.p, segment.pend, ofs);
}
- if( segments.size() == 2 ) {
- struct {
- size_t before, after;
- int delta() const { return before - after; } } nlines;
- nlines.before = std::count(segments.front().p,
- segments.front().pend, '\n');
- nlines.after = std::count(segments.back().p, segments.back().pend, '\n');
- if( nlines.delta() < 0 ) {
- yywarn("line %zu: REPLACED %zu lines with %zu lines, "
- "line count off by %d", mfile.lineno(),
- nlines.before, nlines.after, nlines.delta());
- }
- int nnl = nlines.delta();
- while( nnl-- > 0 ) {
- static const char nl[] = "\n";
- std::copy(nl, nl + 1, ofs);
- }
- }
out.flush();
}
// end of file
@@ -1841,6 +1906,7 @@ cdftext::process_file( filespan_t mfile, int output, bool second_pass ) {
std::copy(file_pop, file_pop + strlen(file_pop), ofs);
out.flush();
}
+ if( !included_files.empty() ) { --nfiles; };
}
std::list<span_t>
@@ -1856,12 +1922,30 @@ cdftext::segment_line( filespan_t& mfile ) {
return output;
}
+ /*
+ * If the replacement changes the number of lines in the replaced text, we
+ * need to reset the line number, because the next statement is on a
+ * different line in the manipulated text than in the original. Before each
+ * replacement, set the original line number. After each replacement, set
+ * the line number after the elided text on the next line.
+ */
for( const replace_t& segment : pending ) {
gcc_assert(mfile.cur <= segment.before.p);
gcc_assert(segment.before.pend <= mfile.eodata);
+ struct { unsigned long ante, post; } lineno = {
+ gb4(mfile.lineno()), gb4(mfile.lineno() + segment.after.nlines())
+ };
+ char *directive = lineno.ante == lineno.post?
+ nullptr : xasprintf("\n#line %lu \"%s\"\n",
+ lineno.ante, cobol_filename());
+
+ if( directive )
+ output.push_back( span_t(strlen(directive), directive) );
output.push_back( span_t(mfile.cur, segment.before.p) );
output.push_back( span_t(segment.after.p, segment.after.pend ) );
+ if( directive )
+ output.push_back( span_t(strlen(directive), directive) );
mfile.cur = const_cast<char*>(segment.before.pend);
}
@@ -1877,5 +1961,3 @@ cdftext::segment_line( filespan_t& mfile ) {
return output;
}
-
-//////// End of the cdf_text.h file