diff options
Diffstat (limited to 'gcc/cobol/scan_ante.h')
-rw-r--r-- | gcc/cobol/scan_ante.h | 395 |
1 files changed, 392 insertions, 3 deletions
diff --git a/gcc/cobol/scan_ante.h b/gcc/cobol/scan_ante.h index ea304ba..31093a6 100644 --- a/gcc/cobol/scan_ante.h +++ b/gcc/cobol/scan_ante.h @@ -149,7 +149,7 @@ numstr_of( const char string[], radix_t radix = decimal_e ) { } auto nx = std::count_if(input, p, fisdigit); if( 36 < nx ) { - error_msg(yylloc, "significand of %s has more than 36 digits (%zu)", input, nx); + error_msg(yylloc, "significand of %s has more than 36 digits (%td)", input, nx); return NO_CONDITION; } @@ -356,6 +356,10 @@ static void level_found() { if( scanner_normal() ) parsing.need_level(false); } +/* + * Trim the scanned location by the amount about to re-scanned. + * Must be a macro because it expands yyless. + */ #define myless(N) \ do { \ auto n(N); \ @@ -486,7 +490,8 @@ trim_location( int nkeep) { (fmt_size_t)nline, (fmt_size_t)rescan.size()); if( nline ) { gcc_assert( yylloc.first_line + nline <= yylloc.last_line ); - yylloc.last_line =- int(nline); + yylloc.last_line -= int(nline); + gcc_assert( yylloc.first_line <= yylloc.last_line ); char *p = static_cast<char*>(memrchr(rescan.p, '\n', rescan.size())); yylloc.last_column = rescan.pend - ++p; return; @@ -604,7 +609,9 @@ static const std::map <std::string, bint_t > binary_integers { static int binary_integer_usage( const char name[]) { - cbl_name_t uname = {}; + // uname can't be cbl_name_t, because at this point name[] might have more + // than sizeof(cbl_name_t) characters. The length check comes later. + char *uname = xstrdup(name); std::transform(name, name + strlen(name), uname, ftoupper); dbgmsg("%s:%d: checking %s in %zu keyword_aliases", @@ -623,6 +630,7 @@ binary_integer_usage( const char name[]) { yylval.computational.signable = p->second.signable; dbgmsg("%s:%d: %s has type %d", __func__, __LINE__, uname, p->second.type ); + free(uname); return p->second.token; } @@ -689,6 +697,387 @@ picset( int token ) { return token; } +/** +## Script and data to produce picture_t::followers. +## Based on ISO Table 10. +#! /usr/bin/awk -f + +BEGIN { + str = "B0/ , . + +- +- CR/DB cs cs Z* Z* + + cs cs 9 AX S V P P 1 N E" + split(str, cols) +} + +$1 ~ /CR|DB|cs/ { next } + +0 && !nlines++ { + for( i=0; i < length(cols); i++ ) { + print i, cols[i], "'" $i "'" + } +} + +$field == "x" { + if( ! nout++ ) { + printf "%2d: %5s: \"", field, cols[field - 1] + } + + gsub(/^ +| +$/, "", $1) + printf "%s", $1 +} + +END { + if( ! nout++ ) { + printf "%2d: %5s: \"", field, cols[field - 1] + } + print "\"" +} + +B x x x - x - - x - x x x x x x x x - x - x - x +0 x x x - x - - x - x x x x x x x x - x - x - x +/ x x x - x - - x - x x x x x x x x - x - x - x +, x x x - x - - x - x x x x x x x - - x - x +. x x - - x - - x - x - x - x - x ++ - - - - - - - - - - - - - - - - - - - - - - - x ++ +– ++ x x x - - - - x x x x - - x x x - - x x x +CR x x x - - - - x x x x - - x x x - - x x x +DB x x x - - - - x x x x - - x x x - - x x x +cs - - - - x +cs x x x - x - - - - x x - - - - x - - x x x + +Z x x - - x - - x - x +* x x - - x - - x - x +Z x x x - x - - x - x x - - - - - - - x - x +* x x x - x - - x - x x - - - - - - - x - x ++ x x - - - - - x - - - x +– x x - - - - - x - - - x ++ x x x - - - - x - - - x x - - - - - x +– x x x - - - - x - - - x x - - - - - x +cs x x - - x - - - - - - - - x +cs x x x - x - - - - - - - - x x - - - x + +9 x x x x x - - x - x - x - x - x x x x - x - - x +A x - - - - - - - - - - - - - - x x +X x - - - - - - - - - - - - - - x x +S +V x x - - x - - x - x - x - x - x - x - x +P x x - - x - - x - x - x - x - x - x - x +P - - - - x - - x - - - - - - - - - x x - x +1 - - - - - - - - - - - - - - - - - - - - - x +N x - - - - - - - - - - - - - - - - - - - - - x +E x x x - x - - - - - - - - - - x +**/ + +class picture_t { + static const char dot = '.', comma = ','; + + typedef std::vector<std::string> followings_t; + static const std::map <char, followings_t> followers; + + const char * const begin; + const char *p, *pend; + size_t pos; + struct exclusions_t { // Nonzero if set, > 1 is false. + // crdb means CR/DB or +/-. + // pluses means 2 or more consecutive '+'. + // minuses means 2 or more consecutive '-'. + // "21) The symbol 'Z' and the symbol '*' are mutually exclusive " + // stars means '*' or Z. + unsigned short int crdb, currency, dot, pluses, minuses, stars, zzz; + exclusions_t() + : crdb(0), currency(0), dot(0), pluses(0), minuses(0), stars(0) + {} + } exclusions; + YYLTYPE loc; + + bool is_crdb() const { // input must be uppercase for CR/DB + if( p[0] == 'C' || p[0] == 'D' ) { + char input[3] = { p[0], p[1] }; + return ( 0 == strcmp(input, "CR") || 0 == strcmp(input, "DB") ); + } + return false; + } + + const char * match_paren( const char *paren ) const { + gcc_assert(paren[0] == '('); // start with opening paren + paren = std::find_if( paren, pend, + []( char ch ) { + return ch == '(' || ch == ')'; + } ); + if( *paren == '(' ) return nullptr; // no nesting + if( paren == pend ) return nullptr; + return ++paren; + } + + const char * next_not( char ch ) const { + return std::find_if( p, pend, + [ch = TOUPPER(ch)]( char next ) { + return ch != next; + } ); + } + + const char * valid_next( const char *p, const std::string& valid ) const { + if( p == pend || p + 1 == pend ) return pend; + if( p[1] == '(' ) { + return match_paren(++p); + } + auto pv = std::find(valid.begin(), valid.end(), TOUPPER(p[1])); + return pv != valid.end()? ++p : nullptr; + } + const char * valid_next( const char *p, + bool first = true, char ch = '\0' ) const { + if( p == pend || p + 1 == pend ) return pend; + if( p[0] == '(' ) { + if( (p = match_paren(p)) == nullptr ) return nullptr; + } + if( p[0] == '(' ) return nullptr; // consecutive parentheses + + int index = first? 0 : 1; + if( !ch ) ch = *p; // use current character unless overridden + auto valid = followers.find(TOUPPER(ch)); + if( valid == followers.end() ) { + YYLTYPE loc(yylloc); + loc.first_column += int(p - begin); + error_msg( loc, "PICTURE: strange character %qc, giving up", ch ); + return nullptr; + } + return valid_next(p, valid->second[index]); + } + + const char * start() { // start modifies exclusions, but not p + auto pnext = p; + + switch(TOUPPER(p[0])) { + case comma: case dot: + // use decimal_is_comma() + // 4: .: "B0/,+Z*+-9E" + exclusions.dot++; + pnext = valid_next(p, "B0/,+Z*+-9E"); + break; + case '+': case '-': + // 6: +-: "B0/,.Z*Z*9VPPE" + exclusions.crdb++; + pnext = next_not(p[0]); + if( p + 1 < pnext ) { + exclusions.pluses++; + } + pnext = valid_next(--pnext, "B0/,.Z*Z*9VPPE"); + break; + case 'Z': case '*': + exclusions.stars++; + pnext = next_not(p[0]); + break; + case 'S': + // 19: S: "9VP" + pnext = valid_next(p, "9VP"); + break; + } + + /* + * "For fixed editing sign control, the currency symbol, when used, shall + * be either the leftmost symbol in character-string-1, optionally preceded + * by one of the symbols '+' or '-' " + */ + if( pnext ) { + if( p == pnext || p[0] == '+' || p[0] == '-' ) { + if( symbol_currency(*pnext) ) { + exclusions.currency++; + pnext = next_not(*pnext); + pnext = valid_next(--pnext, true, '$'); + } + } + } + + return pnext; + } + + const char * next() { // modify state; do not modify position + auto pnext = p; + auto loc(picture_t::loc); + loc.first_column += int(p - begin); + + if( is_crdb() ) { + if( exclusions.crdb++ ) { + error_msg( loc, "PICTURE: CR/DB and %c/%c may appear only once", '+', '-' ); + return nullptr; + } + if( p + 2 != pend ) { + error_msg( loc, "PICTURE: CR/DB must appear at the end" ); + return nullptr; + } + return pend; + } + + if( symbol_currency(p[0]) ) { + if( false && exclusions.currency++ ) { // not enforced + error_msg( loc, "PICTURE: CURRENCY SYMBOL sequence may appear at most once" ); + return nullptr; + } + return valid_next(p, ! exclusions.dot, '$'); + } + + switch(TOUPPER(p[0])) { + case '(': + return match_paren(p); + break; + case 'B': case '0': case '/': + pnext = valid_next(p); + break; + case comma: + if( decimal_is_comma() ) { + if( exclusions.dot++ ) { + error_msg( loc, "PICTURE: %qc: may appear at most once", p[0] ); + return nullptr; + } + pnext = valid_next(p, true, dot); + } else { + pnext = valid_next(p); + } + break; + case dot: + if( p + 1 == pend ) { + pnext = pend; + } else { + if( decimal_is_comma() ) { + pnext = valid_next(p, true, comma ); + } else { + if( exclusions.dot++ ) { + error_msg( loc, "PICTURE: %qc: may appear at most once", p[0] ); + return nullptr; + } + pnext = valid_next(p); + } + } + break; + + case '+': case '-': + // 7 is trailing sign; 13 & 14 are numeric. Leading sign handled by start(). + if( p + 1 == pend ) { + if( exclusions.crdb++ ) { + error_msg( loc, "PICTURE: %c/%c may appear at most once as a sign", '+', '-' ); + return nullptr; + } + pnext = pend; + } else { + pnext = next_not(p[0]); + if( p + 1 < pnext ) { + if( false && exclusions.pluses++ ) { // not enforced + error_msg( loc, "PICTURE: %qc: sequence may appear at most once", p[0] ); + return nullptr; + } + } + pnext = valid_next(pnext, ! exclusions.dot); + } + break; + + case 'Z': case '*': + if( false && exclusions.stars++ ) { // not enforced + error_msg( loc, "PICTURE: %qc: sequence may appear at most once", p[0] ); + return nullptr; + } + if( (pnext = next_not(p[0])) == nullptr ) return pnext; + pnext = valid_next(pnext, ! exclusions.dot); + break; + case 'P': + pnext = valid_next(pnext, ! exclusions.dot); + break; + case '9': + case 'A': case 'X': + case 'V': + case '1': + case 'N': + pnext = valid_next(p); + break; + case 'E': + pnext = valid_next(p, "+9"); + if( pnext && *pnext == '+' ) { + pnext = valid_next(p, "9"); + } + break; + default: + error_msg( loc, "PICTURE: %qc: invalid character", p[0] ); + return nullptr; + } + return pnext; + } + + public: + picture_t( const char *p, int len ) + : begin(p) + , p(p), pend(p + len) + , loc(yylloc) + { + assert(TOUPPER(*p) == 'P'); // as in PICTURE (or PICTURE IS) + // move p to start of picture string + while( (p = std::find_if(p, pend, fisspace)) != pend ) { + this->p = p = std::find_if(p, pend, + []( char ch ) { return ! fisspace(ch); } ); + } + assert(this->p != pend); + pos = this->p - begin; + } + + bool is_valid() { + if( !p ) return false; + if( (p = start()) == nullptr ) { + return false; + } + + while( p && p < pend) { + p = next(); + } + return p == pend; + } + + int starts_at() const { return pos; } +}; + +/* + * The Followers map gives 1 or 2 lists of valid characters following a + * character, the one in the key. If there are two lists, the correct one is + * determined by the caller based on the state of the picture string, i.e., + * what has been seen before. + */ +const std::map <char, picture_t::followings_t> picture_t::followers { + /* B0/ */ { 'B', {"B0/,.Z*+-9AXVPNE" } }, + /* B0/ */ { '0', {"B0/,.Z*+-9AXVPNE" } }, + /* B0/ */ { '/', {"B0/,.Z*+-9AXVPNE" } }, + /* , */ { ',', {"B0/,.Z*+-9VPE"} }, + /* . */ { '.', {"B0/,Z*+-9E"} }, + /* + { '+', "9" }, */ + /* +- */ { '+', {"B0/,.Z*9VPE", "" } }, + /* +- */ { '-', {"B0/,.Z*9VPE", "" } }, + /* CR/DB { 'C', "" }, */ + /* cs { 'c', "B0/,.Z*+-9VP" }, */ + /* cs { 'c', "+" }, */ + /* Z* */ { 'Z', {"B0/,.+Z*9VP", "B0/,+Z*"} }, + /* Z* */ { '*', {"B0/,.+Z*9VP", "B0/,+Z*"} }, + /* + */ { '+', {"B0/,.+-9VP", "B0/,+-"} }, + /* cs */ { '$', {"B0/,.+9VP", "B0/,+"} }, + /* 9 */ { '9', {"B0/,.+9AXVPE"} }, + /* AX */ { 'A', {"B0/9AX"} }, + /* AX */ { 'X', {"B0/9AX"} }, + /* S */ { 'S', {"9VP"} }, + /* V */ { 'V', {"B0/,+Z*+-9P"} }, + /* P */ { 'P', {"+VP", "B0/,+Z*9P"} }, + /* 1 */ { '1', {"1"} }, + /* N */ { 'N', {"B0/N"} }, + /* E */ { 'E', {"+9"} }, +}; + +/* + * Although picture_t::is_valid return a bool, it's not used. The validation + * routines emit messages where the error is detected. The entire string is + * subsequently parsed by the parser, which might otherwise accept an invalid + * string, but will usually emit a message of its own. + */ +static int +validate_picture() { + picture_t picture(yytext, yyleng); + picture.is_valid(); + return picture.starts_at(); +} + static inline bool is_integer_token( int *pvalue = NULL ) { int v, n = 0; |