diff options
Diffstat (limited to 'gcc/cobol/lexio.cc')
-rw-r--r-- | gcc/cobol/lexio.cc | 257 |
1 files changed, 151 insertions, 106 deletions
diff --git a/gcc/cobol/lexio.cc b/gcc/cobol/lexio.cc index a992166..2d9fb72 100644 --- a/gcc/cobol/lexio.cc +++ b/gcc/cobol/lexio.cc @@ -46,8 +46,22 @@ static struct { first_file = false; return tf; } + inline bool is_fixed() const { return column == 7; } + inline bool is_reffmt() const { return is_fixed() && right_margin == 73; } + inline bool is_free() const { return ! is_fixed(); } + + const char * description() const { + if( is_reffmt() ) return "REFERENCE"; + if( is_fixed() ) return "FIXED"; + if( is_free() ) return "FREE"; + gcc_unreachable(); + } } indicator = { true, false, 0, 0 }; +// public source format test functions +bool is_fixed_format() { return indicator.is_fixed(); } +bool is_reference_format() { return indicator.is_reffmt(); } + static bool debug_mode = false; /* @@ -86,10 +100,6 @@ cobol_set_indicator_column( int column ) indicator.column = column; } -bool is_fixed_format() { return indicator.column == 7; } -bool is_reference_format() { - return indicator.column == 7 && indicator.right_margin == 73; -} bool include_debug() { return indicator.column == 7 && debug_mode; } bool set_debug( bool tf ) { return debug_mode = tf && is_fixed_format(); } @@ -113,7 +123,7 @@ continues_at( char *bol, char *eol ) { // Return pointer to indicator column. Test ch if provided. // NULL means no indicator column or tested value not present. static inline char * -indicated( char *bol, char *eol, char ch = '\0' ) { +indicated( char *bol, const char *eol, char ch = '\0' ) { if( indicator.column == 0 && *bol != '*' ) { return NULL; // no indicator column in free format, except for comments } @@ -130,10 +140,10 @@ indicated( char *bol, char *eol, char ch = '\0' ) { static char * remove_inline_comment( char *bol, char *eol ) { - static char ends = '\0'; char *nl = std::find(bol, eol, '\n'); if( bol < nl ) { + static char ends = '\0'; std::swap(*nl, ends); char *comment = strstr(bol, "*>"); if( comment ) { @@ -198,10 +208,10 @@ maybe_add_space(const span_t& pattern, replace_t& recognized) { } if( befter[0] == blank || befter[1] == blank ) { - char *s = xasprintf( "%s%.*s%s", - befter[0], - recognized.after.size(), recognized.after.p, - befter[1] ); + const char *s = xasprintf( "%s%.*s%s", + befter[0], + recognized.after.size(), recognized.after.p, + befter[1] ); recognized.after = span_t(s, s + strlen(s)); } } @@ -256,7 +266,9 @@ recognize_replacements( filespan_t mfile, std::list<replace_t>& pending_replacem span_t found(mfile.eodata, mfile.eodata); - if( regex_search( mfile.ccur(), (const char *)mfile.eodata, cm, re) ) { + if( regex_search( mfile.ccur(), + const_cast<const char *>(mfile.eodata), + cm, re) ) { gcc_assert(cm[1].matched); found = span_t( cm[1].first, cm[1].second ); if( yy_flex_debug ) { @@ -291,7 +303,8 @@ recognize_replacements( filespan_t mfile, std::list<replace_t>& pending_replacem bol = next.found.pend; if( yy_flex_debug ) { - size_t n = std::count((const char *)mfile.data, recognized.before.p, '\n'); + size_t n = std::count(const_cast<const char *>(mfile.data), + recognized.before.p, '\n'); dbgmsg( "%s:%d: line " HOST_SIZE_T_PRINT_UNSIGNED " @ " HOST_SIZE_T_PRINT_UNSIGNED ": '%s'\n/%.*s/%.*s/", __func__, __LINE__, @@ -307,10 +320,11 @@ recognize_replacements( filespan_t mfile, std::list<replace_t>& pending_replacem next.found = span_t(mfile.eodata, mfile.eodata); regex re(next.directive.before.p, extended_icase); - if( regex_search(bol, (const char *)mfile.eodata, cm, re) ) { + if( regex_search(bol, const_cast<const char *>(mfile.eodata), cm, re) ) { gcc_assert(cm[1].matched); next.found = span_t( cm[1].first, cm[1].second ); - size_t n = std::count((const char *)mfile.data, next.found.p, '\n'); + size_t n = std::count(const_cast<const char *>(mfile.data), + next.found.p, '\n'); if( false ) dbgmsg("%s:%d next '%.*s' will be on line " HOST_SIZE_T_PRINT_UNSIGNED " (offset " HOST_SIZE_T_PRINT_UNSIGNED ")", __func__, __LINE__, @@ -335,7 +349,7 @@ check_source_format_directive( filespan_t& mfile ) { // show contents of marked subexpressions within each match cmatch cm; - if( regex_search(p, (const char *)mfile.eol, cm, re) ) { + if( regex_search(p, const_cast<const char *>(mfile.eol), cm, re) ) { gcc_assert(cm.size() > 1); switch( cm[3].length() ) { case 4: @@ -348,13 +362,14 @@ check_source_format_directive( filespan_t& mfile ) { gcc_assert(cm[3].length() == 4 || cm[3].length() == 5); break; } - mfile.cur = const_cast<char*>(cm[0].second); + dbgmsg( "%s:%d: %s format set, on line " HOST_SIZE_T_PRINT_UNSIGNED, __func__, __LINE__, indicator.column == 7? "FIXED" : "FREE", (fmt_size_t)mfile.lineno() ); - erase_line(const_cast<char*>(cm[0].first), - const_cast<char*>(cm[0].second)); + char *bol = indicator.is_fixed()? mfile.cur : const_cast<char*>(cm[0].first); + erase_line(bol, const_cast<char*>(cm[0].second)); + mfile.cur = const_cast<char*>(cm[0].second); } } @@ -393,33 +408,22 @@ struct buffer_t : public bytespan_t { } }; -static bool -valid_sequence_area( const char *p, const char *eodata ) { - const char *pend = p + 6; - if ( eodata < pend ) return false; +static inline bool is_p( char ch ) { return TOUPPER(ch) == 'P'; } - for( ; p < pend; p++ ) { - if( ! (ISDIGIT(*p) || *p == SPACE) ) { - return false; +static bool +is_program_id( const char *p, const char *eol ) { + static const std::string program_id("PROGRAM-ID"); + auto eop = p + program_id.size(); + if( eop < eol ) { + // PROGRAM-ID must be followed by a dot, perhaps with intervening whitespace. + for( const char *dot=eop; dot < eol && *dot != '.'; dot++ ) { + if( !ISSPACE(*dot) ) return false; } + std::string line (p, eop); + std::transform(line.begin(), line.end(), line.begin(), ::toupper); + return line == program_id; } - return true; // characters either digits or blanks -} - -// Inspect the 2nd line for telltale signs of a NIST file. -// If true, caller sets right margin to 73, indicating Reference Format -static bool -likely_nist_file( const char *p, const char *eodata ) { - if( (p = std::find(p, eodata, '\n')) == eodata ) return false; - if ( eodata < ++p + 80 ) return false; - p += 72; - - return - ISALPHA(p[0]) && ISALPHA(p[1]) && - ISDIGIT(p[2]) && ISDIGIT(p[3]) && ISDIGIT(p[4]) && - p[5] == '4' && - p[6] == '.' && - p[7] == '2'; + return false; } const char * esc( size_t len, const char input[] ); @@ -459,9 +463,9 @@ struct replacing_term_t { bool matched, done; span_t leading_trailing, term, stmt; - replacing_term_t(const char input[]) : matched(false), done(false) { - stmt = span_t(input, input); - } + explicit replacing_term_t(const char input[]) + : matched(false), done(false), stmt(span_t(input, input)) + {} }; extern YYLTYPE yylloc; @@ -531,7 +535,7 @@ update_yylloc( const csub_match& stmt, const csub_match& term ) { static replacing_term_t parse_replacing_term( const char *stmt, const char *estmt ) { - gcc_assert(stmt); gcc_assert(estmt); gcc_assert(stmt < estmt); + gcc_assert(stmt); gcc_assert(estmt); gcc_assert(stmt <= estmt); replacing_term_t output(stmt); static const char pattern[] = @@ -741,7 +745,7 @@ parse_replacing_pair( const char *stmt, const char *estmt ) { } } if( pair.stmt.p ) { - yywarn("CDF syntax error '%*s'", (int)pair.stmt.size(), pair.stmt.p); + yywarn("CDF syntax error '%.*s'", (int)pair.stmt.size(), pair.stmt.p); } else { // This eliminated a compiler warning about "format-overflow" @@ -809,7 +813,7 @@ parse_replace_pairs( const char *stmt, const char *estmt, bool is_copy_stmt ) { } span_t& before(parsed.replace.before); - span_t& after(parsed.replace.after); + const span_t& after(parsed.replace.after); const char *befter[2] = { nonword_ch, nonword_ch }; gcc_assert(before.p < before.pend); @@ -877,7 +881,7 @@ struct copy_descr_t { }; static YYLTYPE -location_in( const filespan_t& mfile, const csub_match cm ) { +location_in( const filespan_t& mfile, const csub_match& cm ) { YYLTYPE loc { int(mfile.lineno() + 1), int(mfile.colno() + 1), int(mfile.lineno() + 1), int(mfile.colno() + 1) @@ -928,7 +932,7 @@ parse_copy_directive( filespan_t& mfile ) { copy_stmt.p = mfile.eodata; if( regex_search(mfile.ccur(), - (const char *)mfile.eodata, cm, re) ) { + const_cast<const char *>(mfile.eodata), cm, re) ) { copy_stmt = span_t( cm[0].first, cm[0].second ); if( yy_flex_debug ) { size_t nnl = 1 + count_newlines(mfile.data, copy_stmt.p); @@ -981,7 +985,7 @@ parse_copy_directive( filespan_t& mfile ) { std::pair<std::list<replace_t>, char*> result = parse_replace_pairs( cm[0].second, mfile.eodata, true ); - std::list<replace_t>& replacements(result.first); + const std::list<replace_t>& replacements(result.first); outcome.parsed = (outcome.nreplace = replacements.size()) > 0; if( outcome.parsed ) { replace_directives.push(replacements); @@ -1008,7 +1012,7 @@ parse_copy_directive( filespan_t& mfile ) { } static char * -parse_replace_last_off( filespan_t& mfile ) { +parse_replace_last_off( const filespan_t& mfile ) { static const char pattern[] = "REPLACE" "[[:space:]]+" "(LAST[[:space:]]+)?OFF[[:space:]]*[.]" @@ -1018,7 +1022,7 @@ parse_replace_last_off( filespan_t& mfile ) { // REPLACE [LAST] OFF? bool found = regex_search(mfile.ccur(), - (const char *)mfile.eodata, cm, re); + const_cast<const char *>(mfile.eodata), cm, re); gcc_assert(found); // caller ensures gcc_assert(cm.size() == 2); @@ -1073,7 +1077,7 @@ parse_replace_text( filespan_t& mfile ) { (fmt_size_t)current_lineno, len, mfile.cur); } - if( ! regex_search(mfile.ccur(), (const char *)mfile.eodata, cm, re) ) { + if( ! regex_search(mfile.ccur(), mfile.eodata, cm, re) ) { dbgmsg( "%s:%d: line " HOST_SIZE_T_PRINT_UNSIGNED ": not a REPLACE statement:\n'%.*s'", __func__, __LINE__, (fmt_size_t)current_lineno, @@ -1109,7 +1113,7 @@ parse_replace_text( filespan_t& mfile ) { std::pair<std::list<replace_t>, char*> result = parse_replace_pairs(replace_stmt.p, replace_stmt.pend, false); - std::list<replace_t>& replacements(result.first); + const std::list<replace_t>& replacements(result.first); replace_directives.push( replacements ); if( yy_flex_debug ) { @@ -1147,7 +1151,7 @@ parse_replace_directive( filespan_t& mfile ) { next_directive = mfile.eodata; if( regex_search(mfile.ccur(), - (const char *)mfile.eodata, cm, re) ) { + const_cast<const char *>(mfile.eodata), cm, re) ) { gcc_assert(cm[1].matched); next_directive = cm[0].first; @@ -1409,7 +1413,7 @@ preprocess_filter_add( const char input[] ) { auto filename = find_filter(filter.c_str()); if( !filename ) { - yywarn("preprocessor '%s/%s' not found", getcwd(NULL, 0), filter); + yywarn("preprocessor '%s/%s' not found", getcwd(NULL, 0), filter.c_str()); return false; } preprocessor_filters.push_back( std::make_pair(xstrdup(filename), options) ); @@ -1455,7 +1459,7 @@ cdftext::lex_open( const char filename[] ) { int output = open_output(); - // Process any files supplied by the -include comamnd-line option. + // Process any files supplied by the -include command-line option. for( auto name : included_files ) { int input; if( -1 == (input = open(name, O_RDONLY)) ) { @@ -1466,7 +1470,10 @@ cdftext::lex_open( const char filename[] ) { filespan_t mfile( free_form_reference_format( input ) ); process_file( mfile, output ); + + cobol_filename_restore(); // process_file restores only for COPY } + included_files.clear(); cobol_filename(filename, inode_of(input)); filespan_t mfile( free_form_reference_format( input ) ); @@ -1488,7 +1495,7 @@ cdftext::lex_open( const char filename[] ) { argv[0] = filter; auto last_argv = std::transform( options.begin(), options.end(), argv.begin() + 1, - []( std::string& opt ) { + []( const std::string& opt ) { return xstrdup(opt.c_str()); } ); *last_argv = NULL; @@ -1517,11 +1524,11 @@ cdftext::lex_open( const char filename[] ) { int status; auto kid = wait(&status); gcc_assert(pid == kid); - if( kid == -1 ) cbl_err( "failed waiting for pid %d", pid); + if( kid == -1 ) cbl_err( "failed waiting for pid %ld", static_cast<long>(pid)); if( WIFSIGNALED(status) ) { - cbl_errx( "%s pid %d terminated by %s", - filter, kid, strsignal(WTERMSIG(status)) ); + cbl_errx( "%s pid %ld terminated by %s", + filter, static_cast<long>(kid), strsignal(WTERMSIG(status)) ); } if( WIFEXITED(status) ) { if( (status = WEXITSTATUS(status)) != 0 ) { @@ -1539,7 +1546,7 @@ int cdftext::open_input( const char filename[] ) { int fd = open(filename, O_RDONLY); if( fd == -1 ) { - dbgmsg( "could not open '%s': %m", filename ); + dbgmsg( "could not open '%s': %s", filename, xstrerror(errno) ); } verbose_file_reader = NULL != getenv("GCOBOL_TEMPDIR"); @@ -1553,9 +1560,9 @@ cdftext::open_input( const char filename[] ) { int cdftext::open_output() { char *name = getenv("GCOBOL_TEMPDIR"); - int fd; if( name && 0 != strcmp(name, "/") ) { + int fd; char * stem = xasprintf("%sXXXXXX", name); if( -1 == (fd = mkstemp(stem)) ) { cbl_err( "could not open temporary file '%s' (%s)", @@ -1620,6 +1627,54 @@ cdftext::map_file( int fd ) { bool lexio_dialect_mf(); +/* + * A valid sequence area is 6 digits or blanks at the begining of the line that + * contains PROGRAM-ID. Return NULL if no valid sequence area, else return + * pointer to BOL. + */ +static const char * +valid_sequence_area( const char *data, const char *eodata ) { + + for( const char *p = data; + (p = std::find_if(p, eodata, is_p)) != eodata; + p++ ) + { + auto eol = std::find(p, eodata, '\n'); + if( p == data || ISSPACE(p[-1]) ) { + if( is_program_id(p, eol) ) { // found program-id token + const char *bol = p; + for( ; data <= bol-1 && bol[-1] != '\n'; --bol ) + ; + if( 6 < p - bol ) { + if( std::all_of(bol, bol+6, ::isdigit) ) { + return bol; + } + if( std::all_of(bol, bol+6, ::isblank) ) { + return bol; + } + break; + } + } + } + } + return nullptr; +} + +/* + * Reference Format -- valid COBOL between columns 8 and 72 -- has data after + * column 72 on the PROGRAM-ID line. Extended Reference Format (that allows + * longer lines) has no reason to follow the PROGRAM-ID with more stuff. + */ +static bool +infer_reference_format( const char *bol, const char *eodata ) { + assert(bol); + auto eol = std::find(bol, eodata, '\n'); + if( 72 < eol - bol ) { + return ! std::all_of(bol + 72, eol, ::isspace); + } + return false; +} + filespan_t cdftext::free_form_reference_format( int input ) { filespan_t source_buffer = map_file(input); @@ -1634,31 +1689,23 @@ cdftext::free_form_reference_format( int input ) { size_t lineno; bytespan_t line; // construct with length zero - current_line_t( char data[] ) : lineno(0), line(data, data) {} + explicit current_line_t( char data[] ) : lineno(0), line(data, data) {} } current( mfile.data ); /* - * If the format is not explicitly set on the command line, test the - * first 6 bytes of the first file to determine the format - * heuristically. If the first 6 characters are only digits or - * blanks, then the file is in fixed format. + * Infer source code format. */ - if( indicator.inference_pending() ) { - const char *p = mfile.data; - while( p < mfile.eodata ) { - const char * pend = - std::find(p, const_cast<const char *>(mfile.eodata), '\n'); - if( 6 < pend - p ) break; - p = pend; - if( p < mfile.eodata) p++; + const char *bol = valid_sequence_area(mfile.data, mfile.eodata); + if( bol ) { + indicator.column = 7; + if( infer_reference_format(bol, mfile.eodata) ) { + indicator.right_margin = 73; + } } - if( valid_sequence_area(p, mfile.eodata) ) indicator.column = 7; - if( likely_nist_file(p, mfile.eodata) ) indicator.right_margin = 73; - dbgmsg("%s:%d: %s%s format detected", __func__, __LINE__, - indicator.column == 7? "FIXED" : "FREE", - indicator.right_margin == 73? "" : "-extended"); + dbgmsg("%s:%d: %s format detected", __func__, __LINE__, + indicator.description()); } while( mfile.next_line() ) { @@ -1782,15 +1829,15 @@ cdftext::free_form_reference_format( int input ) { void cdftext::process_file( filespan_t mfile, int output, bool second_pass ) { static size_t nfiles = 0; - std::list<replace_t> replacements; - __gnu_cxx::stdio_filebuf<char> outbuf(fdopen(output, "w"), std::ios::out); + __gnu_cxx::stdio_filebuf<char> outbuf(fdopen(output, "a"), std::ios::out); std::ostream out(&outbuf); std::ostream_iterator<char> ofs(out); // indicate current file static const char file_push[] = "\f#FILE PUSH ", file_pop[] = "\f#FILE POP\f"; + if( !included_files.empty() ) { ++nfiles; }; // force push/pop of included filename if( !second_pass && nfiles++ ) { static const char delimiter[] = "\f"; const char *filename = cobol_filename(); @@ -1846,31 +1893,12 @@ cdftext::process_file( filespan_t mfile, int output, bool second_pass ) { continue; // No active REPLACE directive. } - std::list<span_t> segments = segment_line(mfile); // no replace yields - // // 1 segment + std::list<span_t> segments = segment_line(mfile); for( const auto& segment : segments ) { std::copy(segment.p, segment.pend, ofs); } - if( segments.size() == 2 ) { - struct { - size_t before, after; - int delta() const { return before - after; } } nlines; - nlines.before = std::count(segments.front().p, - segments.front().pend, '\n'); - nlines.after = std::count(segments.back().p, segments.back().pend, '\n'); - if( nlines.delta() < 0 ) { - yywarn("line %zu: REPLACED %zu lines with %zu lines, " - "line count off by %d", mfile.lineno(), - nlines.before, nlines.after, nlines.delta()); - } - int nnl = nlines.delta(); - while( nnl-- > 0 ) { - static const char nl[] = "\n"; - std::copy(nl, nl + 1, ofs); - } - } out.flush(); } // end of file @@ -1878,6 +1906,7 @@ cdftext::process_file( filespan_t mfile, int output, bool second_pass ) { std::copy(file_pop, file_pop + strlen(file_pop), ofs); out.flush(); } + if( !included_files.empty() ) { --nfiles; }; } std::list<span_t> @@ -1893,12 +1922,30 @@ cdftext::segment_line( filespan_t& mfile ) { return output; } + /* + * If the replacement changes the number of lines in the replaced text, we + * need to reset the line number, because the next statement is on a + * different line in the manipulated text than in the original. Before each + * replacement, set the original line number. After each replacement, set + * the line number after the elided text on the next line. + */ for( const replace_t& segment : pending ) { gcc_assert(mfile.cur <= segment.before.p); gcc_assert(segment.before.pend <= mfile.eodata); + struct { unsigned long ante, post; } lineno = { + gb4(mfile.lineno()), gb4(mfile.lineno() + segment.after.nlines()) + }; + char *directive = lineno.ante == lineno.post? + nullptr : xasprintf("\n#line %lu \"%s\"\n", + lineno.ante, cobol_filename()); + + if( directive ) + output.push_back( span_t(strlen(directive), directive) ); output.push_back( span_t(mfile.cur, segment.before.p) ); output.push_back( span_t(segment.after.p, segment.after.pend ) ); + if( directive ) + output.push_back( span_t(strlen(directive), directive) ); mfile.cur = const_cast<char*>(segment.before.pend); } @@ -1914,5 +1961,3 @@ cdftext::segment_line( filespan_t& mfile ) { return output; } - -//////// End of the cdf_text.h file |