/* * Copyright (c) 2021-2025 Symas Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * * Neither the name of the Symas Corporation nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ static const char * start_condition_str( int sc ) { const char *state = "???"; switch(sc) { case INITIAL: state = "INITIAL"; break; case author_state: state = "author_state"; break; case basis: state = "basis"; break; case bool_state: state = "bool_state"; break; case cdf_state: state = "cdf_state"; break; case classify: state = "classify"; break; case copy_state: state = "copy_state"; break; case comment_entries: state = "comment_entries"; break; case date_state: state = "date_state"; break; case datetime_fmt: state = "datetime_fmt"; break; case dot_state: state = "dot_state"; break; case exception: state = "exception"; break; case field_level: state = "field_level"; break; case field_state: state = "field_state"; break; case function: state = "function"; break; case hex_state: state = "hex_state"; break; case ident_state: state = "ident_state"; break; case integer_count: state = "integer_count"; break; case name_state: state = "name_state"; break; case numeric_state: state = "numeric_state"; break; case numstr_state: state = "numstr_state"; break; case partial_name: state = "partial_name"; break; case picture: state = "picture"; break; case picture_count: state = "picture_count"; break; case procedure_div: state = "procedure_div"; break; case program_id_state: state = "program_id_state"; break; case quoted1: state = "quoted1"; break; case quoted2: state = "quoted2"; break; case quoteq: state = "quoteq"; break; case raising: state = "raising"; break; case subscripts: state = "subscripts"; break; case sort_state: state = "sort_state"; break; } return state; } static const char * start_condition_is() { return start_condition_str( YY_START ); } /* * Match datetime constants. * * A 78 or CONSTANT could have a special literal for formatted * date/time functions. */ static int datetime_format_of( const char input[] ) { static const char date_fmt_b[] = "YYYYMMDD|YYYYDDD|YYYYWwwD"; static const char date_fmt_e[] = "YYYY-MM-DD|YYYY-DDD|YYYY-Www-D"; static const char time_fmt_b[] = "hhmmss([.,]s+)?|hhmmss([.,]s+)?Z|hhmmss([.,]s+)?[+]hhmm|"; static const char time_fmt_e[] = "hh:mm:ss([.,]s+)?|hh:mm:ss([.,]s+)?Z|hh:mm:ss([.,]s+)?[+]hh:mm"; static char date_pattern[ 3 * sizeof(date_fmt_e) ]; static char time_pattern[ 3 * sizeof(time_fmt_e) ]; static char datetime_pattern[ 6 * sizeof(time_fmt_e) ]; static struct pattern_t { regex_t re; const char *regex; int token; } patterns[] = { { {}, datetime_pattern, DATETIME_FMT }, { {}, date_pattern, DATE_FMT }, { {}, time_pattern, TIME_FMT }, }, * eopatterns = patterns + COUNT_OF(patterns);; // compile patterns if( ! date_pattern[0] ) { sprintf(date_pattern, "%s|%s", date_fmt_b, date_fmt_e); sprintf(time_pattern, "%s|%s", time_fmt_b, time_fmt_e); sprintf(datetime_pattern, "(%sT%s)|(%sT%s)", date_fmt_b, time_fmt_b, date_fmt_e, time_fmt_e); for( auto p = patterns; p < eopatterns; p++ ) { static const int cflags = REG_EXTENDED | REG_ICASE; static char msg[80]; int erc; if( 0 != (erc = regcomp(&p->re, p->regex, cflags)) ) { regerror(erc, &p->re, msg, sizeof(msg)); yywarn("%s:%d: %s: %s", __func__, __LINE__, keyword_str(p->token), msg); } } } // applies only in the datetime_fmt start condition if( datetime_fmt == YY_START ) { yy_pop_state(); if( input == NULL ) return 0; // See if the input is a date, time, or datetime pattern string. static const int nmatch = 3; regmatch_t matches[nmatch]; auto p = std::find_if( patterns, eopatterns, [input, &matches]( auto& pattern ) { auto erc = regexec( &pattern.re, input, COUNT_OF(matches), matches, 0 ); return erc == 0; } ); return p != eopatterns? p->token : 0; } return 0; } /* * >>DEFINE, >>IF, and >>EVALUATE */ static bool is_cdf_token( int token ) { switch(token) { case CDF_DEFINE: case CDF_DISPLAY: case CDF_IF: case CDF_ELSE: case CDF_END_IF: case CDF_EVALUATE: case CDF_WHEN: case CDF_END_EVALUATE: return true; case CALL_COBOL: case CALL_VERBATIM: case COPY: case TURN: return true; } return false; } static bool is_cdf_condition_token( int token ) { switch(token) { case CDF_IF: case CDF_ELSE: case CDF_END_IF: case CDF_EVALUATE: case CDF_WHEN: case CDF_END_EVALUATE: return true; } return false; } /* * IF and EVALUATE are partially parsed in cdf.y. ELSE and WHEN, etc., are * valid only in context. */ static bool valid_conditional_context( int token ) { switch(token) { case CDF_DEFINE: case CDF_IF: case CDF_EVALUATE: return true; case CDF_ELSE: case CDF_END_IF: return scanner_token() == CDF_IF; case CDF_WHEN: case CDF_END_EVALUATE: return scanner_token() == CDF_EVALUATE; } return true; // all other CDF tokens valid regardless of context } static bool run_cdf( int token ) { if( ! valid_conditional_context(token) ) { error_msg(yylloc, "CDF syntax error at '%s'", keyword_str(token)); return false; } parsing.inject_token(token); // because it will be needed by CDF parser if( yy_flex_debug ) dbgmsg("CDF parser start with '%s'", keyword_str(token)); parsing.parser_save(ydfparse); int erc = ydfparse(); // Parse the CDF directive. parsing.parser_restore(); if( YY_START == cdf_state ) yy_pop_state(); if( yy_flex_debug ) { dbgmsg("CDF parser returned %d, scanner SC <%s>", erc, start_condition_is()); } return 0 == erc; } #include struct pending_token_t { int token; YYSTYPE value; pending_token_t( int token, YYSTYPE value ) : token(token), value(value) {} }; #define PENDING(T) pending_token_t( (T), yylval ) static std::queue pending_tokens; int next_token() { int token = lexer(); return token; } extern int ydfchar; bool in_procedure_division(void); // act on CDF tokens int prelex() { static bool in_cdf = false; int token = next_token(); if( in_cdf ) { return token; } if( ! is_cdf_token(token) ) { return token; } in_cdf = true; assert(is_cdf_token(token)); while( is_cdf_token(token) ) { if( ! run_cdf(token) ) { dbgmsg( ">>CDF parser failed" ); return NO_CONDITION; } // Return the CDF's discarded lookahead token, if extant. token = ydfchar > 0? ydfchar : next_token(); if( token == NO_CONDITION && parsing.at_eof() ) { return token = YYEOF; } // Reenter cdf parser only if next token could affect parsing state. if( ! parsing.on() && ! is_cdf_condition_token(token) ) break; } if( yy_flex_debug ) { dbgmsg("scanner SC <%s>", start_condition_is()); } if( YY_START == copy_state || YY_START == cdf_state ) { if( token == NAME ) { auto tok = keyword_tok(ydflval.string); if( tok ) token = tok; } yy_pop_state(); dbgmsg("scanner SC <%s>, token now %s", start_condition_is(), keyword_str(token)); } /* * The final, rejected CDF token might be a LEVEL number. */ if( YY_START == field_state && level_needed() ) { switch( token ) { case NUMSTR: if( yy_flex_debug ) yywarn("final token is NUMSTR"); yylval.number = level_of(yylval.numstr.string); token = LEVEL; break; case YDF_NUMBER: if( yy_flex_debug ) yywarn("final token is YDF_NUMBER"); yylval.number = ydflval.number; token = LEVEL; break; } if( token == LEVEL ) { switch(yylval.number) { case 66: token = LEVEL66; break; case 78: token = LEVEL78; break; case 88: token = LEVEL78; break; } } } dbgmsg( ">>CDF parser done, %s returning " "%s (because final_token %s, lookhead %d) on line %d", __func__, keyword_str(token), keyword_str(final_token), ydfchar, yylineno ); in_cdf = false; return token; } /* There are 2 parsers and one scanner. * yyparse calls yylex. * yylex calls prelex * prelex calls lexer, the scanner produced by flex. * lexer reads input from yyin via lexer_input. * * prelex intercepts CDF statements, each of which it parses with ydfparse. * ydfparse affects CDF variables, which may affect how yylex treats * the input stream. * * Because the lexer is called recursively: * * yyparse -> yylex -> ydfparse -> yylex * * the global state of the scanner has changed when ydfparse returns. Part of * that state is the unused lookahead token that ydfparse discarded, stored in * final_token. prelex then returns final_token as its own, which is duly * returned to yyparse. */ int yylex(void) { static bool produce_next_sentence_target = false; int token = parsing.pending_token(); if( parsing.at_eof() ) return YYEOF; if( token ) return token; /* * NEXT SENTENCE jumps to an implied CONTINUE at the next dot ('.'). * Documentation says variously that the implied CONTINUE is before or after * that dot, but the meaning is one: after the statement that precedes the * dot. * * When the lexer encounters the dot, it returns it to the parser, which may * use it as a look-ahead token to decide the grammar production. By the * time it returns to the lexer looking for its next token, the parser will * have taken whatever actions the dot decided. At that point, the lexer * injects the label that NEXT SENTENCE jumps to. */ if( produce_next_sentence_target ) { next_sentence_label(next_sentence); produce_next_sentence_target = false; } do { token = prelex(); if( yy_flex_debug ) { if( parsing.in_cdf() ) { dbgmsg( "%s:%d: %s routing %s to CDF parser", __func__, __LINE__, start_condition_is(), keyword_str(token) ); } else if( !parsing.on() ) { dbgmsg( "eating %s because conditional compilation is FALSE", keyword_str(token) ); } } } while( token && ! parsing.feed_a_parser() ); if( next_sentence && token == '.' ) { produce_next_sentence_target = true; } if( parsing.normal() ) { final_token = token; } if( token == YYEOF && parsing.in_cdf() ) { if( yy_flex_debug) dbgmsg("deflecting EOF"); parsing.at_eof(true); return NO_CONDITION; } return token; }