aboutsummaryrefslogtreecommitdiff
path: root/gcc/cobol/scan_ante.h
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/cobol/scan_ante.h')
-rw-r--r--gcc/cobol/scan_ante.h395
1 files changed, 392 insertions, 3 deletions
diff --git a/gcc/cobol/scan_ante.h b/gcc/cobol/scan_ante.h
index ea304ba..31093a6 100644
--- a/gcc/cobol/scan_ante.h
+++ b/gcc/cobol/scan_ante.h
@@ -149,7 +149,7 @@ numstr_of( const char string[], radix_t radix = decimal_e ) {
}
auto nx = std::count_if(input, p, fisdigit);
if( 36 < nx ) {
- error_msg(yylloc, "significand of %s has more than 36 digits (%zu)", input, nx);
+ error_msg(yylloc, "significand of %s has more than 36 digits (%td)", input, nx);
return NO_CONDITION;
}
@@ -356,6 +356,10 @@ static void level_found() {
if( scanner_normal() ) parsing.need_level(false);
}
+/*
+ * Trim the scanned location by the amount about to re-scanned.
+ * Must be a macro because it expands yyless.
+ */
#define myless(N) \
do { \
auto n(N); \
@@ -486,7 +490,8 @@ trim_location( int nkeep) {
(fmt_size_t)nline, (fmt_size_t)rescan.size());
if( nline ) {
gcc_assert( yylloc.first_line + nline <= yylloc.last_line );
- yylloc.last_line =- int(nline);
+ yylloc.last_line -= int(nline);
+ gcc_assert( yylloc.first_line <= yylloc.last_line );
char *p = static_cast<char*>(memrchr(rescan.p, '\n', rescan.size()));
yylloc.last_column = rescan.pend - ++p;
return;
@@ -604,7 +609,9 @@ static const std::map <std::string, bint_t > binary_integers {
static int
binary_integer_usage( const char name[]) {
- cbl_name_t uname = {};
+ // uname can't be cbl_name_t, because at this point name[] might have more
+ // than sizeof(cbl_name_t) characters. The length check comes later.
+ char *uname = xstrdup(name);
std::transform(name, name + strlen(name), uname, ftoupper);
dbgmsg("%s:%d: checking %s in %zu keyword_aliases",
@@ -623,6 +630,7 @@ binary_integer_usage( const char name[]) {
yylval.computational.signable = p->second.signable;
dbgmsg("%s:%d: %s has type %d", __func__, __LINE__,
uname, p->second.type );
+ free(uname);
return p->second.token;
}
@@ -689,6 +697,387 @@ picset( int token ) {
return token;
}
+/**
+## Script and data to produce picture_t::followers.
+## Based on ISO Table 10.
+#! /usr/bin/awk -f
+
+BEGIN {
+ str = "B0/ , . + +- +- CR/DB cs cs Z* Z* + + cs cs 9 AX S V P P 1 N E"
+ split(str, cols)
+}
+
+$1 ~ /CR|DB|cs/ { next }
+
+0 && !nlines++ {
+ for( i=0; i < length(cols); i++ ) {
+ print i, cols[i], "'" $i "'"
+ }
+}
+
+$field == "x" {
+ if( ! nout++ ) {
+ printf "%2d: %5s: \"", field, cols[field - 1]
+ }
+
+ gsub(/^ +| +$/, "", $1)
+ printf "%s", $1
+}
+
+END {
+ if( ! nout++ ) {
+ printf "%2d: %5s: \"", field, cols[field - 1]
+ }
+ print "\""
+}
+
+B x x x - x - - x - x x x x x x x x - x - x - x
+0 x x x - x - - x - x x x x x x x x - x - x - x
+/ x x x - x - - x - x x x x x x x x - x - x - x
+, x x x - x - - x - x x x x x x x - - x - x
+. x x - - x - - x - x - x - x - x
++ - - - - - - - - - - - - - - - - - - - - - - - x
++
+–
++ x x x - - - - x x x x - - x x x - - x x x
+CR x x x - - - - x x x x - - x x x - - x x x
+DB x x x - - - - x x x x - - x x x - - x x x
+cs - - - - x
+cs x x x - x - - - - x x - - - - x - - x x x
+
+Z x x - - x - - x - x
+* x x - - x - - x - x
+Z x x x - x - - x - x x - - - - - - - x - x
+* x x x - x - - x - x x - - - - - - - x - x
++ x x - - - - - x - - - x
+– x x - - - - - x - - - x
++ x x x - - - - x - - - x x - - - - - x
+– x x x - - - - x - - - x x - - - - - x
+cs x x - - x - - - - - - - - x
+cs x x x - x - - - - - - - - x x - - - x
+
+9 x x x x x - - x - x - x - x - x x x x - x - - x
+A x - - - - - - - - - - - - - - x x
+X x - - - - - - - - - - - - - - x x
+S
+V x x - - x - - x - x - x - x - x - x - x
+P x x - - x - - x - x - x - x - x - x - x
+P - - - - x - - x - - - - - - - - - x x - x
+1 - - - - - - - - - - - - - - - - - - - - - x
+N x - - - - - - - - - - - - - - - - - - - - - x
+E x x x - x - - - - - - - - - - x
+**/
+
+class picture_t {
+ static const char dot = '.', comma = ',';
+
+ typedef std::vector<std::string> followings_t;
+ static const std::map <char, followings_t> followers;
+
+ const char * const begin;
+ const char *p, *pend;
+ size_t pos;
+ struct exclusions_t { // Nonzero if set, > 1 is false.
+ // crdb means CR/DB or +/-.
+ // pluses means 2 or more consecutive '+'.
+ // minuses means 2 or more consecutive '-'.
+ // "21) The symbol 'Z' and the symbol '*' are mutually exclusive "
+ // stars means '*' or Z.
+ unsigned short int crdb, currency, dot, pluses, minuses, stars, zzz;
+ exclusions_t()
+ : crdb(0), currency(0), dot(0), pluses(0), minuses(0), stars(0)
+ {}
+ } exclusions;
+ YYLTYPE loc;
+
+ bool is_crdb() const { // input must be uppercase for CR/DB
+ if( p[0] == 'C' || p[0] == 'D' ) {
+ char input[3] = { p[0], p[1] };
+ return ( 0 == strcmp(input, "CR") || 0 == strcmp(input, "DB") );
+ }
+ return false;
+ }
+
+ const char * match_paren( const char *paren ) const {
+ gcc_assert(paren[0] == '('); // start with opening paren
+ paren = std::find_if( paren, pend,
+ []( char ch ) {
+ return ch == '(' || ch == ')';
+ } );
+ if( *paren == '(' ) return nullptr; // no nesting
+ if( paren == pend ) return nullptr;
+ return ++paren;
+ }
+
+ const char * next_not( char ch ) const {
+ return std::find_if( p, pend,
+ [ch = TOUPPER(ch)]( char next ) {
+ return ch != next;
+ } );
+ }
+
+ const char * valid_next( const char *p, const std::string& valid ) const {
+ if( p == pend || p + 1 == pend ) return pend;
+ if( p[1] == '(' ) {
+ return match_paren(++p);
+ }
+ auto pv = std::find(valid.begin(), valid.end(), TOUPPER(p[1]));
+ return pv != valid.end()? ++p : nullptr;
+ }
+ const char * valid_next( const char *p,
+ bool first = true, char ch = '\0' ) const {
+ if( p == pend || p + 1 == pend ) return pend;
+ if( p[0] == '(' ) {
+ if( (p = match_paren(p)) == nullptr ) return nullptr;
+ }
+ if( p[0] == '(' ) return nullptr; // consecutive parentheses
+
+ int index = first? 0 : 1;
+ if( !ch ) ch = *p; // use current character unless overridden
+ auto valid = followers.find(TOUPPER(ch));
+ if( valid == followers.end() ) {
+ YYLTYPE loc(yylloc);
+ loc.first_column += int(p - begin);
+ error_msg( loc, "PICTURE: strange character %qc, giving up", ch );
+ return nullptr;
+ }
+ return valid_next(p, valid->second[index]);
+ }
+
+ const char * start() { // start modifies exclusions, but not p
+ auto pnext = p;
+
+ switch(TOUPPER(p[0])) {
+ case comma: case dot:
+ // use decimal_is_comma()
+ // 4: .: "B0/,+Z*+-9E"
+ exclusions.dot++;
+ pnext = valid_next(p, "B0/,+Z*+-9E");
+ break;
+ case '+': case '-':
+ // 6: +-: "B0/,.Z*Z*9VPPE"
+ exclusions.crdb++;
+ pnext = next_not(p[0]);
+ if( p + 1 < pnext ) {
+ exclusions.pluses++;
+ }
+ pnext = valid_next(--pnext, "B0/,.Z*Z*9VPPE");
+ break;
+ case 'Z': case '*':
+ exclusions.stars++;
+ pnext = next_not(p[0]);
+ break;
+ case 'S':
+ // 19: S: "9VP"
+ pnext = valid_next(p, "9VP");
+ break;
+ }
+
+ /*
+ * "For fixed editing sign control, the currency symbol, when used, shall
+ * be either the leftmost symbol in character-string-1, optionally preceded
+ * by one of the symbols '+' or '-' "
+ */
+ if( pnext ) {
+ if( p == pnext || p[0] == '+' || p[0] == '-' ) {
+ if( symbol_currency(*pnext) ) {
+ exclusions.currency++;
+ pnext = next_not(*pnext);
+ pnext = valid_next(--pnext, true, '$');
+ }
+ }
+ }
+
+ return pnext;
+ }
+
+ const char * next() { // modify state; do not modify position
+ auto pnext = p;
+ auto loc(picture_t::loc);
+ loc.first_column += int(p - begin);
+
+ if( is_crdb() ) {
+ if( exclusions.crdb++ ) {
+ error_msg( loc, "PICTURE: CR/DB and %c/%c may appear only once", '+', '-' );
+ return nullptr;
+ }
+ if( p + 2 != pend ) {
+ error_msg( loc, "PICTURE: CR/DB must appear at the end" );
+ return nullptr;
+ }
+ return pend;
+ }
+
+ if( symbol_currency(p[0]) ) {
+ if( false && exclusions.currency++ ) { // not enforced
+ error_msg( loc, "PICTURE: CURRENCY SYMBOL sequence may appear at most once" );
+ return nullptr;
+ }
+ return valid_next(p, ! exclusions.dot, '$');
+ }
+
+ switch(TOUPPER(p[0])) {
+ case '(':
+ return match_paren(p);
+ break;
+ case 'B': case '0': case '/':
+ pnext = valid_next(p);
+ break;
+ case comma:
+ if( decimal_is_comma() ) {
+ if( exclusions.dot++ ) {
+ error_msg( loc, "PICTURE: %qc: may appear at most once", p[0] );
+ return nullptr;
+ }
+ pnext = valid_next(p, true, dot);
+ } else {
+ pnext = valid_next(p);
+ }
+ break;
+ case dot:
+ if( p + 1 == pend ) {
+ pnext = pend;
+ } else {
+ if( decimal_is_comma() ) {
+ pnext = valid_next(p, true, comma );
+ } else {
+ if( exclusions.dot++ ) {
+ error_msg( loc, "PICTURE: %qc: may appear at most once", p[0] );
+ return nullptr;
+ }
+ pnext = valid_next(p);
+ }
+ }
+ break;
+
+ case '+': case '-':
+ // 7 is trailing sign; 13 & 14 are numeric. Leading sign handled by start().
+ if( p + 1 == pend ) {
+ if( exclusions.crdb++ ) {
+ error_msg( loc, "PICTURE: %c/%c may appear at most once as a sign", '+', '-' );
+ return nullptr;
+ }
+ pnext = pend;
+ } else {
+ pnext = next_not(p[0]);
+ if( p + 1 < pnext ) {
+ if( false && exclusions.pluses++ ) { // not enforced
+ error_msg( loc, "PICTURE: %qc: sequence may appear at most once", p[0] );
+ return nullptr;
+ }
+ }
+ pnext = valid_next(pnext, ! exclusions.dot);
+ }
+ break;
+
+ case 'Z': case '*':
+ if( false && exclusions.stars++ ) { // not enforced
+ error_msg( loc, "PICTURE: %qc: sequence may appear at most once", p[0] );
+ return nullptr;
+ }
+ if( (pnext = next_not(p[0])) == nullptr ) return pnext;
+ pnext = valid_next(pnext, ! exclusions.dot);
+ break;
+ case 'P':
+ pnext = valid_next(pnext, ! exclusions.dot);
+ break;
+ case '9':
+ case 'A': case 'X':
+ case 'V':
+ case '1':
+ case 'N':
+ pnext = valid_next(p);
+ break;
+ case 'E':
+ pnext = valid_next(p, "+9");
+ if( pnext && *pnext == '+' ) {
+ pnext = valid_next(p, "9");
+ }
+ break;
+ default:
+ error_msg( loc, "PICTURE: %qc: invalid character", p[0] );
+ return nullptr;
+ }
+ return pnext;
+ }
+
+ public:
+ picture_t( const char *p, int len )
+ : begin(p)
+ , p(p), pend(p + len)
+ , loc(yylloc)
+ {
+ assert(TOUPPER(*p) == 'P'); // as in PICTURE (or PICTURE IS)
+ // move p to start of picture string
+ while( (p = std::find_if(p, pend, fisspace)) != pend ) {
+ this->p = p = std::find_if(p, pend,
+ []( char ch ) { return ! fisspace(ch); } );
+ }
+ assert(this->p != pend);
+ pos = this->p - begin;
+ }
+
+ bool is_valid() {
+ if( !p ) return false;
+ if( (p = start()) == nullptr ) {
+ return false;
+ }
+
+ while( p && p < pend) {
+ p = next();
+ }
+ return p == pend;
+ }
+
+ int starts_at() const { return pos; }
+};
+
+/*
+ * The Followers map gives 1 or 2 lists of valid characters following a
+ * character, the one in the key. If there are two lists, the correct one is
+ * determined by the caller based on the state of the picture string, i.e.,
+ * what has been seen before.
+ */
+const std::map <char, picture_t::followings_t> picture_t::followers {
+ /* B0/ */ { 'B', {"B0/,.Z*+-9AXVPNE" } },
+ /* B0/ */ { '0', {"B0/,.Z*+-9AXVPNE" } },
+ /* B0/ */ { '/', {"B0/,.Z*+-9AXVPNE" } },
+ /* , */ { ',', {"B0/,.Z*+-9VPE"} },
+ /* . */ { '.', {"B0/,Z*+-9E"} },
+ /* + { '+', "9" }, */
+ /* +- */ { '+', {"B0/,.Z*9VPE", "" } },
+ /* +- */ { '-', {"B0/,.Z*9VPE", "" } },
+ /* CR/DB { 'C', "" }, */
+ /* cs { 'c', "B0/,.Z*+-9VP" }, */
+ /* cs { 'c', "+" }, */
+ /* Z* */ { 'Z', {"B0/,.+Z*9VP", "B0/,+Z*"} },
+ /* Z* */ { '*', {"B0/,.+Z*9VP", "B0/,+Z*"} },
+ /* + */ { '+', {"B0/,.+-9VP", "B0/,+-"} },
+ /* cs */ { '$', {"B0/,.+9VP", "B0/,+"} },
+ /* 9 */ { '9', {"B0/,.+9AXVPE"} },
+ /* AX */ { 'A', {"B0/9AX"} },
+ /* AX */ { 'X', {"B0/9AX"} },
+ /* S */ { 'S', {"9VP"} },
+ /* V */ { 'V', {"B0/,+Z*+-9P"} },
+ /* P */ { 'P', {"+VP", "B0/,+Z*9P"} },
+ /* 1 */ { '1', {"1"} },
+ /* N */ { 'N', {"B0/N"} },
+ /* E */ { 'E', {"+9"} },
+};
+
+/*
+ * Although picture_t::is_valid return a bool, it's not used. The validation
+ * routines emit messages where the error is detected. The entire string is
+ * subsequently parsed by the parser, which might otherwise accept an invalid
+ * string, but will usually emit a message of its own.
+ */
+static int
+validate_picture() {
+ picture_t picture(yytext, yyleng);
+ picture.is_valid();
+ return picture.starts_at();
+}
+
static inline bool
is_integer_token( int *pvalue = NULL ) {
int v, n = 0;