diff options
Diffstat (limited to 'gcc/cobol/lexio.cc')
-rw-r--r-- | gcc/cobol/lexio.cc | 322 |
1 files changed, 202 insertions, 120 deletions
diff --git a/gcc/cobol/lexio.cc b/gcc/cobol/lexio.cc index 82bacf2..2d9fb72 100644 --- a/gcc/cobol/lexio.cc +++ b/gcc/cobol/lexio.cc @@ -28,6 +28,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include "config.h" #include <ext/stdio_filebuf.h> #include "cobol-system.h" #include "cbldiag.h" @@ -45,8 +46,22 @@ static struct { first_file = false; return tf; } + inline bool is_fixed() const { return column == 7; } + inline bool is_reffmt() const { return is_fixed() && right_margin == 73; } + inline bool is_free() const { return ! is_fixed(); } + + const char * description() const { + if( is_reffmt() ) return "REFERENCE"; + if( is_fixed() ) return "FIXED"; + if( is_free() ) return "FREE"; + gcc_unreachable(); + } } indicator = { true, false, 0, 0 }; +// public source format test functions +bool is_fixed_format() { return indicator.is_fixed(); } +bool is_reference_format() { return indicator.is_reffmt(); } + static bool debug_mode = false; /* @@ -85,10 +100,6 @@ cobol_set_indicator_column( int column ) indicator.column = column; } -bool is_fixed_format() { return indicator.column == 7; } -bool is_reference_format() { - return indicator.column == 7 && indicator.right_margin == 73; -} bool include_debug() { return indicator.column == 7 && debug_mode; } bool set_debug( bool tf ) { return debug_mode = tf && is_fixed_format(); } @@ -112,7 +123,7 @@ continues_at( char *bol, char *eol ) { // Return pointer to indicator column. Test ch if provided. // NULL means no indicator column or tested value not present. static inline char * -indicated( char *bol, char *eol, char ch = '\0' ) { +indicated( char *bol, const char *eol, char ch = '\0' ) { if( indicator.column == 0 && *bol != '*' ) { return NULL; // no indicator column in free format, except for comments } @@ -129,10 +140,10 @@ indicated( char *bol, char *eol, char ch = '\0' ) { static char * remove_inline_comment( char *bol, char *eol ) { - static char ends = '\0'; char *nl = std::find(bol, eol, '\n'); if( bol < nl ) { + static char ends = '\0'; std::swap(*nl, ends); char *comment = strstr(bol, "*>"); if( comment ) { @@ -197,10 +208,10 @@ maybe_add_space(const span_t& pattern, replace_t& recognized) { } if( befter[0] == blank || befter[1] == blank ) { - char *s = xasprintf( "%s%.*s%s", - befter[0], - recognized.after.size(), recognized.after.p, - befter[1] ); + const char *s = xasprintf( "%s%.*s%s", + befter[0], + recognized.after.size(), recognized.after.p, + befter[1] ); recognized.after = span_t(s, s + strlen(s)); } } @@ -255,14 +266,17 @@ recognize_replacements( filespan_t mfile, std::list<replace_t>& pending_replacem span_t found(mfile.eodata, mfile.eodata); - if( regex_search( mfile.ccur(), (const char *)mfile.eodata, cm, re) ) { + if( regex_search( mfile.ccur(), + const_cast<const char *>(mfile.eodata), + cm, re) ) { gcc_assert(cm[1].matched); found = span_t( cm[1].first, cm[1].second ); if( yy_flex_debug ) { size_t n = count_newlines(mfile.data, found.p); - dbgmsg("%s:%d first '%.*s' is on line %zu (offset %zu)", __func__, __LINE__, + dbgmsg("%s:%d first '%.*s' is on line " HOST_SIZE_T_PRINT_UNSIGNED + " (offset " HOST_SIZE_T_PRINT_UNSIGNED ")", __func__, __LINE__, directive.before.size(), directive.before.p, - ++n, found.p - mfile.data); + (fmt_size_t)++n, (fmt_size_t)(found.p - mfile.data)); } } else { dbgmsg("%s:%d not found: '%s' in \n'%.*s'", __func__, __LINE__, @@ -289,9 +303,12 @@ recognize_replacements( filespan_t mfile, std::list<replace_t>& pending_replacem bol = next.found.pend; if( yy_flex_debug ) { - size_t n = std::count((const char *)mfile.data, recognized.before.p, '\n'); - dbgmsg( "%s:%d: line %zu @ %zu: '%s'\n/%.*s/%.*s/", __func__, __LINE__, - ++n, next.found.p - mfile.data, + size_t n = std::count(const_cast<const char *>(mfile.data), + recognized.before.p, '\n'); + dbgmsg( "%s:%d: line " HOST_SIZE_T_PRINT_UNSIGNED + " @ " HOST_SIZE_T_PRINT_UNSIGNED ": '%s'\n/%.*s/%.*s/", + __func__, __LINE__, + (fmt_size_t)++n, (fmt_size_t)(next.found.p - mfile.data), next.directive.before.p, int(recognized.before.size()), recognized.before.p, int(recognized.after.size()), recognized.after.p ); @@ -303,14 +320,16 @@ recognize_replacements( filespan_t mfile, std::list<replace_t>& pending_replacem next.found = span_t(mfile.eodata, mfile.eodata); regex re(next.directive.before.p, extended_icase); - if( regex_search(bol, (const char *)mfile.eodata, cm, re) ) { + if( regex_search(bol, const_cast<const char *>(mfile.eodata), cm, re) ) { gcc_assert(cm[1].matched); next.found = span_t( cm[1].first, cm[1].second ); - size_t n = std::count((const char *)mfile.data, next.found.p, '\n'); + size_t n = std::count(const_cast<const char *>(mfile.data), + next.found.p, '\n'); if( false ) - dbgmsg("%s:%d next '%.*s' will be on line %zu (offset %zu)", __func__, __LINE__, + dbgmsg("%s:%d next '%.*s' will be on line " HOST_SIZE_T_PRINT_UNSIGNED + " (offset " HOST_SIZE_T_PRINT_UNSIGNED ")", __func__, __LINE__, next.directive.before.size(), next.directive.before.p, - ++n, next.found.p - mfile.data); + (fmt_size_t)++n, (fmt_size_t)(next.found.p - mfile.data)); } pnext = std::min_element(futures.begin(), futures.end()); } @@ -330,7 +349,7 @@ check_source_format_directive( filespan_t& mfile ) { // show contents of marked subexpressions within each match cmatch cm; - if( regex_search(p, (const char *)mfile.eol, cm, re) ) { + if( regex_search(p, const_cast<const char *>(mfile.eol), cm, re) ) { gcc_assert(cm.size() > 1); switch( cm[3].length() ) { case 4: @@ -343,11 +362,14 @@ check_source_format_directive( filespan_t& mfile ) { gcc_assert(cm[3].length() == 4 || cm[3].length() == 5); break; } + + dbgmsg( "%s:%d: %s format set, on line " HOST_SIZE_T_PRINT_UNSIGNED, + __func__, __LINE__, + indicator.column == 7? "FIXED" : "FREE", + (fmt_size_t)mfile.lineno() ); + char *bol = indicator.is_fixed()? mfile.cur : const_cast<char*>(cm[0].first); + erase_line(bol, const_cast<char*>(cm[0].second)); mfile.cur = const_cast<char*>(cm[0].second); - dbgmsg( "%s:%d: %s format set, on line %zu", __func__, __LINE__, - indicator.column == 7? "FIXED" : "FREE", mfile.lineno() ); - erase_line(const_cast<char*>(cm[0].first), - const_cast<char*>(cm[0].second)); } } @@ -380,21 +402,28 @@ struct buffer_t : public bytespan_t { dbgmsg("flex input buffer: '%.*s'\n[xelf]", int(pos - data), data); } void dump() const { +#ifdef GETENV_OK if( getenv("lexer_input") ) show(); +#endif } }; -static bool -valid_sequence_area( const char *p, const char *eodata ) { - const char *pend = p + 6; - if ( eodata < pend ) return false; +static inline bool is_p( char ch ) { return TOUPPER(ch) == 'P'; } - for( ; p < pend; p++ ) { - if( ! (ISDIGIT(*p) || *p == SPACE) ) { - return false; +static bool +is_program_id( const char *p, const char *eol ) { + static const std::string program_id("PROGRAM-ID"); + auto eop = p + program_id.size(); + if( eop < eol ) { + // PROGRAM-ID must be followed by a dot, perhaps with intervening whitespace. + for( const char *dot=eop; dot < eol && *dot != '.'; dot++ ) { + if( !ISSPACE(*dot) ) return false; } + std::string line (p, eop); + std::transform(line.begin(), line.end(), line.begin(), ::toupper); + return line == program_id; } - return true; // characters either digits or blanks + return false; } const char * esc( size_t len, const char input[] ); @@ -434,9 +463,9 @@ struct replacing_term_t { bool matched, done; span_t leading_trailing, term, stmt; - replacing_term_t(const char input[]) : matched(false), done(false) { - stmt = span_t(input, input); - } + explicit replacing_term_t(const char input[]) + : matched(false), done(false), stmt(span_t(input, input)) + {} }; extern YYLTYPE yylloc; @@ -457,11 +486,11 @@ update_yylloc( const csub_match& stmt, const csub_match& term ) { class dump_loc_on_exit { public: dump_loc_on_exit() { - if( getenv( "update_yylloc" ) ) + if( gcobol_getenv( "update_yylloc" ) ) location_dump( "update_yylloc", __LINE__, "begin", yylloc); } ~dump_loc_on_exit() { - if( getenv( "update_yylloc" ) ) + if( gcobol_getenv( "update_yylloc" ) ) location_dump( "update_yylloc", __LINE__, "end ", yylloc); } } dloe; @@ -506,7 +535,7 @@ update_yylloc( const csub_match& stmt, const csub_match& term ) { static replacing_term_t parse_replacing_term( const char *stmt, const char *estmt ) { - gcc_assert(stmt); gcc_assert(estmt); gcc_assert(stmt < estmt); + gcc_assert(stmt); gcc_assert(estmt); gcc_assert(stmt <= estmt); replacing_term_t output(stmt); static const char pattern[] = @@ -716,13 +745,13 @@ parse_replacing_pair( const char *stmt, const char *estmt ) { } } if( pair.stmt.p ) { - yywarn("CDF syntax error '%*s'", (int)pair.stmt.size(), pair.stmt.p); + yywarn("CDF syntax error '%.*s'", (int)pair.stmt.size(), pair.stmt.p); } else { // This eliminated a compiler warning about "format-overflow" yywarn("CDF syntax error"); } - pair.stmt = span_t(0UL, stmt); + pair.stmt = span_t(size_t(0), stmt); pair.replace = replace_t(); } return pair; @@ -762,9 +791,9 @@ parse_replace_pairs( const char *stmt, const char *estmt, bool is_copy_stmt ) { // Report findings. if( false && yy_flex_debug ) { for( size_t i=0; i < cm.size(); i++ ) { - dbgmsg("%s: %s %zu: '%.*s'", __func__, + dbgmsg("%s: %s " HOST_SIZE_T_PRINT_UNSIGNED ": '%.*s'", __func__, cm[i].matched? "Pair" : "pair", - i, + (fmt_size_t)i, cm[i].matched? int(cm[i].length()) : 0, cm[i].matched? cm[i].first : ""); } @@ -784,7 +813,7 @@ parse_replace_pairs( const char *stmt, const char *estmt, bool is_copy_stmt ) { } span_t& before(parsed.replace.before); - span_t& after(parsed.replace.after); + const span_t& after(parsed.replace.after); const char *befter[2] = { nonword_ch, nonword_ch }; gcc_assert(before.p < before.pend); @@ -823,9 +852,10 @@ parse_replace_pairs( const char *stmt, const char *estmt, bool is_copy_stmt ) { } if( yy_flex_debug ) { - dbgmsg( "%s:%d: %s: %zu pairs parsed from '%.*s'", __func__, __LINE__, - parsed.done()? "done" : "not done", - pairs.size(), parsed.stmt.size(), parsed.stmt.p ); + dbgmsg( "%s:%d: %s: " HOST_SIZE_T_PRINT_UNSIGNED " pairs parsed from '%.*s'", + __func__, __LINE__, + parsed.done() ? "done" : "not done", + (fmt_size_t)pairs.size(), parsed.stmt.size(), parsed.stmt.p ); int i = 0; for( const auto& replace : pairs ) { dbgmsg("%s:%d:%4d: '%s' => '%s'", __func__, __LINE__, @@ -851,7 +881,7 @@ struct copy_descr_t { }; static YYLTYPE -location_in( const filespan_t& mfile, const csub_match cm ) { +location_in( const filespan_t& mfile, const csub_match& cm ) { YYLTYPE loc { int(mfile.lineno() + 1), int(mfile.colno() + 1), int(mfile.lineno() + 1), int(mfile.colno() + 1) @@ -902,14 +932,15 @@ parse_copy_directive( filespan_t& mfile ) { copy_stmt.p = mfile.eodata; if( regex_search(mfile.ccur(), - (const char *)mfile.eodata, cm, re) ) { + const_cast<const char *>(mfile.eodata), cm, re) ) { copy_stmt = span_t( cm[0].first, cm[0].second ); if( yy_flex_debug ) { size_t nnl = 1 + count_newlines(mfile.data, copy_stmt.p); size_t nst = 1 + count_newlines(copy_stmt.p, copy_stmt.pend); - dbgmsg("%s:%d: line %zu: COPY directive is %zu lines '%.*s'", + dbgmsg("%s:%d: line " HOST_SIZE_T_PRINT_UNSIGNED + ": COPY directive is " HOST_SIZE_T_PRINT_UNSIGNED " lines '%.*s'", __func__, __LINE__, - nnl, nst, copy_stmt.size(), copy_stmt.p); + (fmt_size_t)nnl, (fmt_size_t)nst, copy_stmt.size(), copy_stmt.p); } } } @@ -922,7 +953,8 @@ parse_copy_directive( filespan_t& mfile ) { outcome.partial_line = span_t(mfile.cur, copy_stmt.p); if( yy_flex_debug ) { - dbgmsg("%zu expressions", std::count(pattern, pattern + sizeof(pattern), '(')); + dbgmsg(HOST_SIZE_T_PRINT_UNSIGNED " expressions", + (fmt_size_t)std::count(pattern, pattern + sizeof(pattern), '(')); int i = 0; for( const auto& m : cm ) { if( m.matched ) @@ -953,7 +985,7 @@ parse_copy_directive( filespan_t& mfile ) { std::pair<std::list<replace_t>, char*> result = parse_replace_pairs( cm[0].second, mfile.eodata, true ); - std::list<replace_t>& replacements(result.first); + const std::list<replace_t>& replacements(result.first); outcome.parsed = (outcome.nreplace = replacements.size()) > 0; if( outcome.parsed ) { replace_directives.push(replacements); @@ -980,7 +1012,7 @@ parse_copy_directive( filespan_t& mfile ) { } static char * -parse_replace_last_off( filespan_t& mfile ) { +parse_replace_last_off( const filespan_t& mfile ) { static const char pattern[] = "REPLACE" "[[:space:]]+" "(LAST[[:space:]]+)?OFF[[:space:]]*[.]" @@ -990,7 +1022,7 @@ parse_replace_last_off( filespan_t& mfile ) { // REPLACE [LAST] OFF? bool found = regex_search(mfile.ccur(), - (const char *)mfile.eodata, cm, re); + const_cast<const char *>(mfile.eodata), cm, re); gcc_assert(found); // caller ensures gcc_assert(cm.size() == 2); @@ -1006,8 +1038,9 @@ parse_replace_last_off( filespan_t& mfile ) { } } - dbgmsg( "%s:%d: line %zu: parsed '%.*s', ", __func__, __LINE__, - mfile.lineno(), int(cm[0].length()), cm[0].first ); + dbgmsg( "%s:%d: line " HOST_SIZE_T_PRINT_UNSIGNED ": parsed '%.*s', ", + __func__, __LINE__, + (fmt_size_t)mfile.lineno(), int(cm[0].length()), cm[0].first ); // Remove statement from input erase_line(const_cast<char*>(cm[0].first), @@ -1039,20 +1072,23 @@ parse_replace_text( filespan_t& mfile ) { gcc_assert(mfile.line_length() > 2); if( pend[-1] == '\n' ) pend -= 2; auto len = int(pend - mfile.cur); - dbgmsg("%s:%d: line %zu: parsing '%.*s", __func__, __LINE__, - current_lineno, len, mfile.cur); + dbgmsg("%s:%d: line " HOST_SIZE_T_PRINT_UNSIGNED ": parsing '%.*s", + __func__, __LINE__, + (fmt_size_t)current_lineno, len, mfile.cur); } - if( ! regex_search(mfile.ccur(), (const char *)mfile.eodata, cm, re) ) { - dbgmsg( "%s:%d: line %zu: not a REPLACE statement:\n'%.*s'", - __func__, __LINE__, current_lineno, + if( ! regex_search(mfile.ccur(), mfile.eodata, cm, re) ) { + dbgmsg( "%s:%d: line " HOST_SIZE_T_PRINT_UNSIGNED + ": not a REPLACE statement:\n'%.*s'", + __func__, __LINE__, (fmt_size_t)current_lineno, int(mfile.line_length()), mfile.cur ); return span_t(); } // Report findings. if( yy_flex_debug ) { - dbgmsg("%zu expressions", std::count(pattern, pattern + sizeof(pattern), '(')); + dbgmsg(HOST_SIZE_T_PRINT_UNSIGNED " expressions", + (fmt_size_t)std::count(pattern, pattern + sizeof(pattern), '(')); int i = 0; for( const auto& m : cm ) { if( m.matched ) @@ -1077,12 +1113,14 @@ parse_replace_text( filespan_t& mfile ) { std::pair<std::list<replace_t>, char*> result = parse_replace_pairs(replace_stmt.p, replace_stmt.pend, false); - std::list<replace_t>& replacements(result.first); + const std::list<replace_t>& replacements(result.first); replace_directives.push( replacements ); if( yy_flex_debug ) { - dbgmsg( "%s:%d: line %zu: %zu pairs parsed from '%.*s'", __func__, __LINE__, - current_lineno, replacements.size(), int(replace_stmt.size()), replace_stmt.p ); + dbgmsg( "%s:%d: line " HOST_SIZE_T_PRINT_UNSIGNED ": " HOST_SIZE_T_PRINT_UNSIGNED + " pairs parsed from '%.*s'", __func__, __LINE__, + (fmt_size_t)current_lineno, (fmt_size_t)replacements.size(), + int(replace_stmt.size()), replace_stmt.p ); for( const auto& replace : replacements ) { int i = 0; dbgmsg("%s:%d:%4d: '%s' => '%s'", __func__, __LINE__, @@ -1113,7 +1151,7 @@ parse_replace_directive( filespan_t& mfile ) { next_directive = mfile.eodata; if( regex_search(mfile.ccur(), - (const char *)mfile.eodata, cm, re) ) { + const_cast<const char *>(mfile.eodata), cm, re) ) { gcc_assert(cm[1].matched); next_directive = cm[0].first; @@ -1162,8 +1200,9 @@ bytespan_t::append( const char *input, const char *eoinput ) { #if LEXIO auto nq = std::count_if(data, eodata, isquote); dbgmsg("%s:%3d: input ------ '%.*s'", __func__, __LINE__, int(eoinput - input), input); - dbgmsg("%s:%3d: precondition '%.*s' (%zu: %s)", __func__, __LINE__, - int(size()), data, nq, in_string()? "in string" : "not in string"); + dbgmsg("%s:%3d: precondition '%.*s' (" HOST_SIZE_T_PRINT_UNSIGNED ": %s)", + __func__, __LINE__, + int(size()), data, (fmt_size_t)nq, in_string()? "in string" : "not in string"); #endif if( !in_string() ) { // Remove trailing space unless it's part of a literal. while(data < eodata && ISSPACE(eodata[-1])) eodata--; @@ -1374,7 +1413,7 @@ preprocess_filter_add( const char input[] ) { auto filename = find_filter(filter.c_str()); if( !filename ) { - yywarn("preprocessor '%s/%s' not found", getcwd(NULL, 0), filter); + yywarn("preprocessor '%s/%s' not found", getcwd(NULL, 0), filter.c_str()); return false; } preprocessor_filters.push_back( std::make_pair(xstrdup(filename), options) ); @@ -1420,7 +1459,7 @@ cdftext::lex_open( const char filename[] ) { int output = open_output(); - // Process any files supplied by the -include comamnd-line option. + // Process any files supplied by the -include command-line option. for( auto name : included_files ) { int input; if( -1 == (input = open(name, O_RDONLY)) ) { @@ -1431,7 +1470,10 @@ cdftext::lex_open( const char filename[] ) { filespan_t mfile( free_form_reference_format( input ) ); process_file( mfile, output ); + + cobol_filename_restore(); // process_file restores only for COPY } + included_files.clear(); cobol_filename(filename, inode_of(input)); filespan_t mfile( free_form_reference_format( input ) ); @@ -1453,7 +1495,7 @@ cdftext::lex_open( const char filename[] ) { argv[0] = filter; auto last_argv = std::transform( options.begin(), options.end(), argv.begin() + 1, - []( std::string& opt ) { + []( const std::string& opt ) { return xstrdup(opt.c_str()); } ); *last_argv = NULL; @@ -1482,11 +1524,11 @@ cdftext::lex_open( const char filename[] ) { int status; auto kid = wait(&status); gcc_assert(pid == kid); - if( kid == -1 ) cbl_err( "failed waiting for pid %d", pid); + if( kid == -1 ) cbl_err( "failed waiting for pid %ld", static_cast<long>(pid)); if( WIFSIGNALED(status) ) { - cbl_errx( "%s pid %d terminated by %s", - filter, kid, strsignal(WTERMSIG(status)) ); + cbl_errx( "%s pid %ld terminated by %s", + filter, static_cast<long>(kid), strsignal(WTERMSIG(status)) ); } if( WIFEXITED(status) ) { if( (status = WEXITSTATUS(status)) != 0 ) { @@ -1504,7 +1546,7 @@ int cdftext::open_input( const char filename[] ) { int fd = open(filename, O_RDONLY); if( fd == -1 ) { - dbgmsg( "could not open '%s': %m", filename ); + dbgmsg( "could not open '%s': %s", filename, xstrerror(errno) ); } verbose_file_reader = NULL != getenv("GCOBOL_TEMPDIR"); @@ -1518,9 +1560,9 @@ cdftext::open_input( const char filename[] ) { int cdftext::open_output() { char *name = getenv("GCOBOL_TEMPDIR"); - int fd; if( name && 0 != strcmp(name, "/") ) { + int fd; char * stem = xasprintf("%sXXXXXX", name); if( -1 == (fd = mkstemp(stem)) ) { cbl_err( "could not open temporary file '%s' (%s)", @@ -1561,8 +1603,8 @@ cdftext::map_file( int fd ) { cbl_err( "%s: could not prepare map file from FIFO %d", __func__, input); } - if( false ) dbgmsg("%s: copied %ld bytes from FIFO", - __func__, nout); + if( false ) dbgmsg("%s: copied " HOST_SIZE_T_PRINT_DEC " bytes from FIFO", + __func__, (fmt_size_t)nout); } } } while( S_ISFIFO(sb.st_mode) ); @@ -1585,6 +1627,54 @@ cdftext::map_file( int fd ) { bool lexio_dialect_mf(); +/* + * A valid sequence area is 6 digits or blanks at the begining of the line that + * contains PROGRAM-ID. Return NULL if no valid sequence area, else return + * pointer to BOL. + */ +static const char * +valid_sequence_area( const char *data, const char *eodata ) { + + for( const char *p = data; + (p = std::find_if(p, eodata, is_p)) != eodata; + p++ ) + { + auto eol = std::find(p, eodata, '\n'); + if( p == data || ISSPACE(p[-1]) ) { + if( is_program_id(p, eol) ) { // found program-id token + const char *bol = p; + for( ; data <= bol-1 && bol[-1] != '\n'; --bol ) + ; + if( 6 < p - bol ) { + if( std::all_of(bol, bol+6, ::isdigit) ) { + return bol; + } + if( std::all_of(bol, bol+6, ::isblank) ) { + return bol; + } + break; + } + } + } + } + return nullptr; +} + +/* + * Reference Format -- valid COBOL between columns 8 and 72 -- has data after + * column 72 on the PROGRAM-ID line. Extended Reference Format (that allows + * longer lines) has no reason to follow the PROGRAM-ID with more stuff. + */ +static bool +infer_reference_format( const char *bol, const char *eodata ) { + assert(bol); + auto eol = std::find(bol, eodata, '\n'); + if( 72 < eol - bol ) { + return ! std::all_of(bol + 72, eol, ::isspace); + } + return false; +} + filespan_t cdftext::free_form_reference_format( int input ) { filespan_t source_buffer = map_file(input); @@ -1599,29 +1689,23 @@ cdftext::free_form_reference_format( int input ) { size_t lineno; bytespan_t line; // construct with length zero - current_line_t( char data[] ) : lineno(0), line(data, data) {} + explicit current_line_t( char data[] ) : lineno(0), line(data, data) {} } current( mfile.data ); /* - * If the format is not explicitly set on the command line, test the - * first 6 bytes of the first file to determine the format - * heuristically. If the first 6 characters are only digits or - * blanks, then the file is in fixed format. + * Infer source code format. */ - if( indicator.inference_pending() ) { - const char *p = mfile.data; - while( p < mfile.eodata ) { - const char * pend = - std::find(p, const_cast<const char *>(mfile.eodata), '\n'); - if( 6 < pend - p ) break; - p = pend; - if( p < mfile.eodata) p++; + const char *bol = valid_sequence_area(mfile.data, mfile.eodata); + if( bol ) { + indicator.column = 7; + if( infer_reference_format(bol, mfile.eodata) ) { + indicator.right_margin = 73; + } } - if( valid_sequence_area(p, mfile.eodata) ) indicator.column = 7; dbgmsg("%s:%d: %s format detected", __func__, __LINE__, - indicator.column == 7? "FIXED" : "FREE"); + indicator.description()); } while( mfile.next_line() ) { @@ -1745,15 +1829,15 @@ cdftext::free_form_reference_format( int input ) { void cdftext::process_file( filespan_t mfile, int output, bool second_pass ) { static size_t nfiles = 0; - std::list<replace_t> replacements; - __gnu_cxx::stdio_filebuf<char> outbuf(fdopen(output, "w"), std::ios::out); + __gnu_cxx::stdio_filebuf<char> outbuf(fdopen(output, "a"), std::ios::out); std::ostream out(&outbuf); std::ostream_iterator<char> ofs(out); // indicate current file static const char file_push[] = "\f#FILE PUSH ", file_pop[] = "\f#FILE POP\f"; + if( !included_files.empty() ) { ++nfiles; }; // force push/pop of included filename if( !second_pass && nfiles++ ) { static const char delimiter[] = "\f"; const char *filename = cobol_filename(); @@ -1772,8 +1856,8 @@ cdftext::process_file( filespan_t mfile, int output, bool second_pass ) { std::copy_if(copied.erased_lines.p, copied.erased_lines.pend, ofs, []( char ch ) { return ch == '\n'; } ); struct { int in, out; filespan_t mfile; } copy; - dbgmsg("%s:%d: line %zu, opening %s on fd %d", __func__, __LINE__, - mfile.lineno(), + dbgmsg("%s:%d: line " HOST_SIZE_T_PRINT_UNSIGNED ", opening %s on fd %d", + __func__, __LINE__,mfile.lineno(), copybook.source(), copybook.current()->fd); copy.in = copybook.current()->fd; copy.mfile = free_form_reference_format( copy.in ); @@ -1809,31 +1893,12 @@ cdftext::process_file( filespan_t mfile, int output, bool second_pass ) { continue; // No active REPLACE directive. } - std::list<span_t> segments = segment_line(mfile); // no replace yields - // // 1 segment + std::list<span_t> segments = segment_line(mfile); for( const auto& segment : segments ) { std::copy(segment.p, segment.pend, ofs); } - if( segments.size() == 2 ) { - struct { - size_t before, after; - int delta() const { return before - after; } } nlines; - nlines.before = std::count(segments.front().p, - segments.front().pend, '\n'); - nlines.after = std::count(segments.back().p, segments.back().pend, '\n'); - if( nlines.delta() < 0 ) { - yywarn("line %zu: REPLACED %zu lines with %zu lines, " - "line count off by %d", mfile.lineno(), - nlines.before, nlines.after, nlines.delta()); - } - int nnl = nlines.delta(); - while( nnl-- > 0 ) { - static const char nl[] = "\n"; - std::copy(nl, nl + 1, ofs); - } - } out.flush(); } // end of file @@ -1841,6 +1906,7 @@ cdftext::process_file( filespan_t mfile, int output, bool second_pass ) { std::copy(file_pop, file_pop + strlen(file_pop), ofs); out.flush(); } + if( !included_files.empty() ) { --nfiles; }; } std::list<span_t> @@ -1856,12 +1922,30 @@ cdftext::segment_line( filespan_t& mfile ) { return output; } + /* + * If the replacement changes the number of lines in the replaced text, we + * need to reset the line number, because the next statement is on a + * different line in the manipulated text than in the original. Before each + * replacement, set the original line number. After each replacement, set + * the line number after the elided text on the next line. + */ for( const replace_t& segment : pending ) { gcc_assert(mfile.cur <= segment.before.p); gcc_assert(segment.before.pend <= mfile.eodata); + struct { unsigned long ante, post; } lineno = { + gb4(mfile.lineno()), gb4(mfile.lineno() + segment.after.nlines()) + }; + char *directive = lineno.ante == lineno.post? + nullptr : xasprintf("\n#line %lu \"%s\"\n", + lineno.ante, cobol_filename()); + + if( directive ) + output.push_back( span_t(strlen(directive), directive) ); output.push_back( span_t(mfile.cur, segment.before.p) ); output.push_back( span_t(segment.after.p, segment.after.pend ) ); + if( directive ) + output.push_back( span_t(strlen(directive), directive) ); mfile.cur = const_cast<char*>(segment.before.pend); } @@ -1877,5 +1961,3 @@ cdftext::segment_line( filespan_t& mfile ) { return output; } - -//////// End of the cdf_text.h file |