aboutsummaryrefslogtreecommitdiff
path: root/gcc/cobol/lexio.cc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/cobol/lexio.cc')
-rw-r--r--gcc/cobol/lexio.cc257
1 files changed, 151 insertions, 106 deletions
diff --git a/gcc/cobol/lexio.cc b/gcc/cobol/lexio.cc
index a992166..2d9fb72 100644
--- a/gcc/cobol/lexio.cc
+++ b/gcc/cobol/lexio.cc
@@ -46,8 +46,22 @@ static struct {
first_file = false;
return tf;
}
+ inline bool is_fixed() const { return column == 7; }
+ inline bool is_reffmt() const { return is_fixed() && right_margin == 73; }
+ inline bool is_free() const { return ! is_fixed(); }
+
+ const char * description() const {
+ if( is_reffmt() ) return "REFERENCE";
+ if( is_fixed() ) return "FIXED";
+ if( is_free() ) return "FREE";
+ gcc_unreachable();
+ }
} indicator = { true, false, 0, 0 };
+// public source format test functions
+bool is_fixed_format() { return indicator.is_fixed(); }
+bool is_reference_format() { return indicator.is_reffmt(); }
+
static bool debug_mode = false;
/*
@@ -86,10 +100,6 @@ cobol_set_indicator_column( int column )
indicator.column = column;
}
-bool is_fixed_format() { return indicator.column == 7; }
-bool is_reference_format() {
- return indicator.column == 7 && indicator.right_margin == 73;
-}
bool include_debug() { return indicator.column == 7 && debug_mode; }
bool set_debug( bool tf ) { return debug_mode = tf && is_fixed_format(); }
@@ -113,7 +123,7 @@ continues_at( char *bol, char *eol ) {
// Return pointer to indicator column. Test ch if provided.
// NULL means no indicator column or tested value not present.
static inline char *
-indicated( char *bol, char *eol, char ch = '\0' ) {
+indicated( char *bol, const char *eol, char ch = '\0' ) {
if( indicator.column == 0 && *bol != '*' ) {
return NULL; // no indicator column in free format, except for comments
}
@@ -130,10 +140,10 @@ indicated( char *bol, char *eol, char ch = '\0' ) {
static char *
remove_inline_comment( char *bol, char *eol ) {
- static char ends = '\0';
char *nl = std::find(bol, eol, '\n');
if( bol < nl ) {
+ static char ends = '\0';
std::swap(*nl, ends);
char *comment = strstr(bol, "*>");
if( comment ) {
@@ -198,10 +208,10 @@ maybe_add_space(const span_t& pattern, replace_t& recognized) {
}
if( befter[0] == blank || befter[1] == blank ) {
- char *s = xasprintf( "%s%.*s%s",
- befter[0],
- recognized.after.size(), recognized.after.p,
- befter[1] );
+ const char *s = xasprintf( "%s%.*s%s",
+ befter[0],
+ recognized.after.size(), recognized.after.p,
+ befter[1] );
recognized.after = span_t(s, s + strlen(s));
}
}
@@ -256,7 +266,9 @@ recognize_replacements( filespan_t mfile, std::list<replace_t>& pending_replacem
span_t found(mfile.eodata, mfile.eodata);
- if( regex_search( mfile.ccur(), (const char *)mfile.eodata, cm, re) ) {
+ if( regex_search( mfile.ccur(),
+ const_cast<const char *>(mfile.eodata),
+ cm, re) ) {
gcc_assert(cm[1].matched);
found = span_t( cm[1].first, cm[1].second );
if( yy_flex_debug ) {
@@ -291,7 +303,8 @@ recognize_replacements( filespan_t mfile, std::list<replace_t>& pending_replacem
bol = next.found.pend;
if( yy_flex_debug ) {
- size_t n = std::count((const char *)mfile.data, recognized.before.p, '\n');
+ size_t n = std::count(const_cast<const char *>(mfile.data),
+ recognized.before.p, '\n');
dbgmsg( "%s:%d: line " HOST_SIZE_T_PRINT_UNSIGNED
" @ " HOST_SIZE_T_PRINT_UNSIGNED ": '%s'\n/%.*s/%.*s/",
__func__, __LINE__,
@@ -307,10 +320,11 @@ recognize_replacements( filespan_t mfile, std::list<replace_t>& pending_replacem
next.found = span_t(mfile.eodata, mfile.eodata);
regex re(next.directive.before.p, extended_icase);
- if( regex_search(bol, (const char *)mfile.eodata, cm, re) ) {
+ if( regex_search(bol, const_cast<const char *>(mfile.eodata), cm, re) ) {
gcc_assert(cm[1].matched);
next.found = span_t( cm[1].first, cm[1].second );
- size_t n = std::count((const char *)mfile.data, next.found.p, '\n');
+ size_t n = std::count(const_cast<const char *>(mfile.data),
+ next.found.p, '\n');
if( false )
dbgmsg("%s:%d next '%.*s' will be on line " HOST_SIZE_T_PRINT_UNSIGNED
" (offset " HOST_SIZE_T_PRINT_UNSIGNED ")", __func__, __LINE__,
@@ -335,7 +349,7 @@ check_source_format_directive( filespan_t& mfile ) {
// show contents of marked subexpressions within each match
cmatch cm;
- if( regex_search(p, (const char *)mfile.eol, cm, re) ) {
+ if( regex_search(p, const_cast<const char *>(mfile.eol), cm, re) ) {
gcc_assert(cm.size() > 1);
switch( cm[3].length() ) {
case 4:
@@ -348,13 +362,14 @@ check_source_format_directive( filespan_t& mfile ) {
gcc_assert(cm[3].length() == 4 || cm[3].length() == 5);
break;
}
- mfile.cur = const_cast<char*>(cm[0].second);
+
dbgmsg( "%s:%d: %s format set, on line " HOST_SIZE_T_PRINT_UNSIGNED,
__func__, __LINE__,
indicator.column == 7? "FIXED" : "FREE",
(fmt_size_t)mfile.lineno() );
- erase_line(const_cast<char*>(cm[0].first),
- const_cast<char*>(cm[0].second));
+ char *bol = indicator.is_fixed()? mfile.cur : const_cast<char*>(cm[0].first);
+ erase_line(bol, const_cast<char*>(cm[0].second));
+ mfile.cur = const_cast<char*>(cm[0].second);
}
}
@@ -393,33 +408,22 @@ struct buffer_t : public bytespan_t {
}
};
-static bool
-valid_sequence_area( const char *p, const char *eodata ) {
- const char *pend = p + 6;
- if ( eodata < pend ) return false;
+static inline bool is_p( char ch ) { return TOUPPER(ch) == 'P'; }
- for( ; p < pend; p++ ) {
- if( ! (ISDIGIT(*p) || *p == SPACE) ) {
- return false;
+static bool
+is_program_id( const char *p, const char *eol ) {
+ static const std::string program_id("PROGRAM-ID");
+ auto eop = p + program_id.size();
+ if( eop < eol ) {
+ // PROGRAM-ID must be followed by a dot, perhaps with intervening whitespace.
+ for( const char *dot=eop; dot < eol && *dot != '.'; dot++ ) {
+ if( !ISSPACE(*dot) ) return false;
}
+ std::string line (p, eop);
+ std::transform(line.begin(), line.end(), line.begin(), ::toupper);
+ return line == program_id;
}
- return true; // characters either digits or blanks
-}
-
-// Inspect the 2nd line for telltale signs of a NIST file.
-// If true, caller sets right margin to 73, indicating Reference Format
-static bool
-likely_nist_file( const char *p, const char *eodata ) {
- if( (p = std::find(p, eodata, '\n')) == eodata ) return false;
- if ( eodata < ++p + 80 ) return false;
- p += 72;
-
- return
- ISALPHA(p[0]) && ISALPHA(p[1]) &&
- ISDIGIT(p[2]) && ISDIGIT(p[3]) && ISDIGIT(p[4]) &&
- p[5] == '4' &&
- p[6] == '.' &&
- p[7] == '2';
+ return false;
}
const char * esc( size_t len, const char input[] );
@@ -459,9 +463,9 @@ struct replacing_term_t {
bool matched, done;
span_t leading_trailing, term, stmt;
- replacing_term_t(const char input[]) : matched(false), done(false) {
- stmt = span_t(input, input);
- }
+ explicit replacing_term_t(const char input[])
+ : matched(false), done(false), stmt(span_t(input, input))
+ {}
};
extern YYLTYPE yylloc;
@@ -531,7 +535,7 @@ update_yylloc( const csub_match& stmt, const csub_match& term ) {
static replacing_term_t
parse_replacing_term( const char *stmt, const char *estmt ) {
- gcc_assert(stmt); gcc_assert(estmt); gcc_assert(stmt < estmt);
+ gcc_assert(stmt); gcc_assert(estmt); gcc_assert(stmt <= estmt);
replacing_term_t output(stmt);
static const char pattern[] =
@@ -741,7 +745,7 @@ parse_replacing_pair( const char *stmt, const char *estmt ) {
}
}
if( pair.stmt.p ) {
- yywarn("CDF syntax error '%*s'", (int)pair.stmt.size(), pair.stmt.p);
+ yywarn("CDF syntax error '%.*s'", (int)pair.stmt.size(), pair.stmt.p);
}
else {
// This eliminated a compiler warning about "format-overflow"
@@ -809,7 +813,7 @@ parse_replace_pairs( const char *stmt, const char *estmt, bool is_copy_stmt ) {
}
span_t& before(parsed.replace.before);
- span_t& after(parsed.replace.after);
+ const span_t& after(parsed.replace.after);
const char *befter[2] = { nonword_ch, nonword_ch };
gcc_assert(before.p < before.pend);
@@ -877,7 +881,7 @@ struct copy_descr_t {
};
static YYLTYPE
-location_in( const filespan_t& mfile, const csub_match cm ) {
+location_in( const filespan_t& mfile, const csub_match& cm ) {
YYLTYPE loc {
int(mfile.lineno() + 1), int(mfile.colno() + 1),
int(mfile.lineno() + 1), int(mfile.colno() + 1)
@@ -928,7 +932,7 @@ parse_copy_directive( filespan_t& mfile ) {
copy_stmt.p = mfile.eodata;
if( regex_search(mfile.ccur(),
- (const char *)mfile.eodata, cm, re) ) {
+ const_cast<const char *>(mfile.eodata), cm, re) ) {
copy_stmt = span_t( cm[0].first, cm[0].second );
if( yy_flex_debug ) {
size_t nnl = 1 + count_newlines(mfile.data, copy_stmt.p);
@@ -981,7 +985,7 @@ parse_copy_directive( filespan_t& mfile ) {
std::pair<std::list<replace_t>, char*>
result = parse_replace_pairs( cm[0].second, mfile.eodata, true );
- std::list<replace_t>& replacements(result.first);
+ const std::list<replace_t>& replacements(result.first);
outcome.parsed = (outcome.nreplace = replacements.size()) > 0;
if( outcome.parsed ) {
replace_directives.push(replacements);
@@ -1008,7 +1012,7 @@ parse_copy_directive( filespan_t& mfile ) {
}
static char *
-parse_replace_last_off( filespan_t& mfile ) {
+parse_replace_last_off( const filespan_t& mfile ) {
static const char pattern[] =
"REPLACE" "[[:space:]]+"
"(LAST[[:space:]]+)?OFF[[:space:]]*[.]"
@@ -1018,7 +1022,7 @@ parse_replace_last_off( filespan_t& mfile ) {
// REPLACE [LAST] OFF?
bool found = regex_search(mfile.ccur(),
- (const char *)mfile.eodata, cm, re);
+ const_cast<const char *>(mfile.eodata), cm, re);
gcc_assert(found); // caller ensures
gcc_assert(cm.size() == 2);
@@ -1073,7 +1077,7 @@ parse_replace_text( filespan_t& mfile ) {
(fmt_size_t)current_lineno, len, mfile.cur);
}
- if( ! regex_search(mfile.ccur(), (const char *)mfile.eodata, cm, re) ) {
+ if( ! regex_search(mfile.ccur(), mfile.eodata, cm, re) ) {
dbgmsg( "%s:%d: line " HOST_SIZE_T_PRINT_UNSIGNED
": not a REPLACE statement:\n'%.*s'",
__func__, __LINE__, (fmt_size_t)current_lineno,
@@ -1109,7 +1113,7 @@ parse_replace_text( filespan_t& mfile ) {
std::pair<std::list<replace_t>, char*>
result = parse_replace_pairs(replace_stmt.p, replace_stmt.pend, false);
- std::list<replace_t>& replacements(result.first);
+ const std::list<replace_t>& replacements(result.first);
replace_directives.push( replacements );
if( yy_flex_debug ) {
@@ -1147,7 +1151,7 @@ parse_replace_directive( filespan_t& mfile ) {
next_directive = mfile.eodata;
if( regex_search(mfile.ccur(),
- (const char *)mfile.eodata, cm, re) ) {
+ const_cast<const char *>(mfile.eodata), cm, re) ) {
gcc_assert(cm[1].matched);
next_directive = cm[0].first;
@@ -1409,7 +1413,7 @@ preprocess_filter_add( const char input[] ) {
auto filename = find_filter(filter.c_str());
if( !filename ) {
- yywarn("preprocessor '%s/%s' not found", getcwd(NULL, 0), filter);
+ yywarn("preprocessor '%s/%s' not found", getcwd(NULL, 0), filter.c_str());
return false;
}
preprocessor_filters.push_back( std::make_pair(xstrdup(filename), options) );
@@ -1455,7 +1459,7 @@ cdftext::lex_open( const char filename[] ) {
int output = open_output();
- // Process any files supplied by the -include comamnd-line option.
+ // Process any files supplied by the -include command-line option.
for( auto name : included_files ) {
int input;
if( -1 == (input = open(name, O_RDONLY)) ) {
@@ -1466,7 +1470,10 @@ cdftext::lex_open( const char filename[] ) {
filespan_t mfile( free_form_reference_format( input ) );
process_file( mfile, output );
+
+ cobol_filename_restore(); // process_file restores only for COPY
}
+ included_files.clear();
cobol_filename(filename, inode_of(input));
filespan_t mfile( free_form_reference_format( input ) );
@@ -1488,7 +1495,7 @@ cdftext::lex_open( const char filename[] ) {
argv[0] = filter;
auto last_argv = std::transform( options.begin(), options.end(), argv.begin() + 1,
- []( std::string& opt ) {
+ []( const std::string& opt ) {
return xstrdup(opt.c_str());
} );
*last_argv = NULL;
@@ -1517,11 +1524,11 @@ cdftext::lex_open( const char filename[] ) {
int status;
auto kid = wait(&status);
gcc_assert(pid == kid);
- if( kid == -1 ) cbl_err( "failed waiting for pid %d", pid);
+ if( kid == -1 ) cbl_err( "failed waiting for pid %ld", static_cast<long>(pid));
if( WIFSIGNALED(status) ) {
- cbl_errx( "%s pid %d terminated by %s",
- filter, kid, strsignal(WTERMSIG(status)) );
+ cbl_errx( "%s pid %ld terminated by %s",
+ filter, static_cast<long>(kid), strsignal(WTERMSIG(status)) );
}
if( WIFEXITED(status) ) {
if( (status = WEXITSTATUS(status)) != 0 ) {
@@ -1539,7 +1546,7 @@ int
cdftext::open_input( const char filename[] ) {
int fd = open(filename, O_RDONLY);
if( fd == -1 ) {
- dbgmsg( "could not open '%s': %m", filename );
+ dbgmsg( "could not open '%s': %s", filename, xstrerror(errno) );
}
verbose_file_reader = NULL != getenv("GCOBOL_TEMPDIR");
@@ -1553,9 +1560,9 @@ cdftext::open_input( const char filename[] ) {
int
cdftext::open_output() {
char *name = getenv("GCOBOL_TEMPDIR");
- int fd;
if( name && 0 != strcmp(name, "/") ) {
+ int fd;
char * stem = xasprintf("%sXXXXXX", name);
if( -1 == (fd = mkstemp(stem)) ) {
cbl_err( "could not open temporary file '%s' (%s)",
@@ -1620,6 +1627,54 @@ cdftext::map_file( int fd ) {
bool lexio_dialect_mf();
+/*
+ * A valid sequence area is 6 digits or blanks at the begining of the line that
+ * contains PROGRAM-ID. Return NULL if no valid sequence area, else return
+ * pointer to BOL.
+ */
+static const char *
+valid_sequence_area( const char *data, const char *eodata ) {
+
+ for( const char *p = data;
+ (p = std::find_if(p, eodata, is_p)) != eodata;
+ p++ )
+ {
+ auto eol = std::find(p, eodata, '\n');
+ if( p == data || ISSPACE(p[-1]) ) {
+ if( is_program_id(p, eol) ) { // found program-id token
+ const char *bol = p;
+ for( ; data <= bol-1 && bol[-1] != '\n'; --bol )
+ ;
+ if( 6 < p - bol ) {
+ if( std::all_of(bol, bol+6, ::isdigit) ) {
+ return bol;
+ }
+ if( std::all_of(bol, bol+6, ::isblank) ) {
+ return bol;
+ }
+ break;
+ }
+ }
+ }
+ }
+ return nullptr;
+}
+
+/*
+ * Reference Format -- valid COBOL between columns 8 and 72 -- has data after
+ * column 72 on the PROGRAM-ID line. Extended Reference Format (that allows
+ * longer lines) has no reason to follow the PROGRAM-ID with more stuff.
+ */
+static bool
+infer_reference_format( const char *bol, const char *eodata ) {
+ assert(bol);
+ auto eol = std::find(bol, eodata, '\n');
+ if( 72 < eol - bol ) {
+ return ! std::all_of(bol + 72, eol, ::isspace);
+ }
+ return false;
+}
+
filespan_t
cdftext::free_form_reference_format( int input ) {
filespan_t source_buffer = map_file(input);
@@ -1634,31 +1689,23 @@ cdftext::free_form_reference_format( int input ) {
size_t lineno;
bytespan_t line;
// construct with length zero
- current_line_t( char data[] ) : lineno(0), line(data, data) {}
+ explicit current_line_t( char data[] ) : lineno(0), line(data, data) {}
} current( mfile.data );
/*
- * If the format is not explicitly set on the command line, test the
- * first 6 bytes of the first file to determine the format
- * heuristically. If the first 6 characters are only digits or
- * blanks, then the file is in fixed format.
+ * Infer source code format.
*/
-
if( indicator.inference_pending() ) {
- const char *p = mfile.data;
- while( p < mfile.eodata ) {
- const char * pend =
- std::find(p, const_cast<const char *>(mfile.eodata), '\n');
- if( 6 < pend - p ) break;
- p = pend;
- if( p < mfile.eodata) p++;
+ const char *bol = valid_sequence_area(mfile.data, mfile.eodata);
+ if( bol ) {
+ indicator.column = 7;
+ if( infer_reference_format(bol, mfile.eodata) ) {
+ indicator.right_margin = 73;
+ }
}
- if( valid_sequence_area(p, mfile.eodata) ) indicator.column = 7;
- if( likely_nist_file(p, mfile.eodata) ) indicator.right_margin = 73;
- dbgmsg("%s:%d: %s%s format detected", __func__, __LINE__,
- indicator.column == 7? "FIXED" : "FREE",
- indicator.right_margin == 73? "" : "-extended");
+ dbgmsg("%s:%d: %s format detected", __func__, __LINE__,
+ indicator.description());
}
while( mfile.next_line() ) {
@@ -1782,15 +1829,15 @@ cdftext::free_form_reference_format( int input ) {
void
cdftext::process_file( filespan_t mfile, int output, bool second_pass ) {
static size_t nfiles = 0;
- std::list<replace_t> replacements;
- __gnu_cxx::stdio_filebuf<char> outbuf(fdopen(output, "w"), std::ios::out);
+ __gnu_cxx::stdio_filebuf<char> outbuf(fdopen(output, "a"), std::ios::out);
std::ostream out(&outbuf);
std::ostream_iterator<char> ofs(out);
// indicate current file
static const char file_push[] = "\f#FILE PUSH ", file_pop[] = "\f#FILE POP\f";
+ if( !included_files.empty() ) { ++nfiles; }; // force push/pop of included filename
if( !second_pass && nfiles++ ) {
static const char delimiter[] = "\f";
const char *filename = cobol_filename();
@@ -1846,31 +1893,12 @@ cdftext::process_file( filespan_t mfile, int output, bool second_pass ) {
continue; // No active REPLACE directive.
}
- std::list<span_t> segments = segment_line(mfile); // no replace yields
- // // 1 segment
+ std::list<span_t> segments = segment_line(mfile);
for( const auto& segment : segments ) {
std::copy(segment.p, segment.pend, ofs);
}
- if( segments.size() == 2 ) {
- struct {
- size_t before, after;
- int delta() const { return before - after; } } nlines;
- nlines.before = std::count(segments.front().p,
- segments.front().pend, '\n');
- nlines.after = std::count(segments.back().p, segments.back().pend, '\n');
- if( nlines.delta() < 0 ) {
- yywarn("line %zu: REPLACED %zu lines with %zu lines, "
- "line count off by %d", mfile.lineno(),
- nlines.before, nlines.after, nlines.delta());
- }
- int nnl = nlines.delta();
- while( nnl-- > 0 ) {
- static const char nl[] = "\n";
- std::copy(nl, nl + 1, ofs);
- }
- }
out.flush();
}
// end of file
@@ -1878,6 +1906,7 @@ cdftext::process_file( filespan_t mfile, int output, bool second_pass ) {
std::copy(file_pop, file_pop + strlen(file_pop), ofs);
out.flush();
}
+ if( !included_files.empty() ) { --nfiles; };
}
std::list<span_t>
@@ -1893,12 +1922,30 @@ cdftext::segment_line( filespan_t& mfile ) {
return output;
}
+ /*
+ * If the replacement changes the number of lines in the replaced text, we
+ * need to reset the line number, because the next statement is on a
+ * different line in the manipulated text than in the original. Before each
+ * replacement, set the original line number. After each replacement, set
+ * the line number after the elided text on the next line.
+ */
for( const replace_t& segment : pending ) {
gcc_assert(mfile.cur <= segment.before.p);
gcc_assert(segment.before.pend <= mfile.eodata);
+ struct { unsigned long ante, post; } lineno = {
+ gb4(mfile.lineno()), gb4(mfile.lineno() + segment.after.nlines())
+ };
+ char *directive = lineno.ante == lineno.post?
+ nullptr : xasprintf("\n#line %lu \"%s\"\n",
+ lineno.ante, cobol_filename());
+
+ if( directive )
+ output.push_back( span_t(strlen(directive), directive) );
output.push_back( span_t(mfile.cur, segment.before.p) );
output.push_back( span_t(segment.after.p, segment.after.pend ) );
+ if( directive )
+ output.push_back( span_t(strlen(directive), directive) );
mfile.cur = const_cast<char*>(segment.before.pend);
}
@@ -1914,5 +1961,3 @@ cdftext::segment_line( filespan_t& mfile ) {
return output;
}
-
-//////// End of the cdf_text.h file