1 files changed, 392 insertions, 3 deletions
diff --git a/gcc/cobol/scan_ante.h b/gcc/cobol/scan_ante.h
index ea304ba..31093a6 100644
--- a/gcc/cobol/scan_ante.h
+++ b/gcc/cobol/scan_ante.h
@@ -149,7 +149,7 @@ numstr_of( const char string[], radix_t radix = decimal_e ) {
     }
     auto nx = std::count_if(input, p, fisdigit);
     if( 36 < nx ) {
-      error_msg(yylloc, "significand of %s has more than 36 digits (%zu)", input, nx);
+      error_msg(yylloc, "significand of %s has more than 36 digits (%td)", input, nx);
       return NO_CONDITION;
     }
 
@@ -356,6 +356,10 @@ static void level_found() {
   if( scanner_normal() ) parsing.need_level(false);
 }
 
+/*
+ * Trim the scanned location by the amount about to re-scanned. 
+ * Must be a macro because it expands yyless. 
+ */
 #define myless(N)				\
   do {						\
     auto n(N);					\
@@ -486,7 +490,8 @@ trim_location( int nkeep) {
          (fmt_size_t)nline, (fmt_size_t)rescan.size());
   if( nline ) {
     gcc_assert( yylloc.first_line + nline <= yylloc.last_line );
-    yylloc.last_line =- int(nline);
+    yylloc.last_line -= int(nline);
+    gcc_assert( yylloc.first_line <= yylloc.last_line );
     char *p = static_cast<char*>(memrchr(rescan.p, '\n', rescan.size()));
     yylloc.last_column = rescan.pend - ++p;
     return;
@@ -604,7 +609,9 @@ static const std::map <std::string, bint_t > binary_integers {
 
 static int
 binary_integer_usage( const char name[]) {
-  cbl_name_t uname = {};
+  // uname can't be cbl_name_t, because at this point name[] might have more
+  // than sizeof(cbl_name_t) characters.  The length check comes later.
+  char *uname = xstrdup(name);
   std::transform(name, name + strlen(name), uname, ftoupper);
 
   dbgmsg("%s:%d: checking %s in %zu keyword_aliases",
@@ -623,6 +630,7 @@ binary_integer_usage( const char name[]) {
   yylval.computational.signable = p->second.signable;
   dbgmsg("%s:%d: %s has type %d", __func__, __LINE__,
 	 uname, p->second.type );
+  free(uname);
   return p->second.token;
 }
       
@@ -689,6 +697,387 @@ picset( int token ) {
   return token;
 }
 
+/**
+## Script and data to produce picture_t::followers.
+## Based on ISO Table 10. 
+#! /usr/bin/awk -f
+
+BEGIN  {
+  str = "B0/ , . + +- +- CR/DB cs cs Z* Z* + + cs cs 9 AX S V P P 1 N E"
+  split(str, cols)
+}
+
+$1 ~ /CR|DB|cs/ { next }
+
+0 && !nlines++ {
+  for( i=0; i < length(cols); i++ ) {
+    print i, cols[i], "'" $i "'"
+  }
+}
+
+$field == "x" {
+  if( ! nout++ ) {
+    printf "%2d: %5s: \"", field, cols[field - 1]
+  }
+
+  gsub(/^ +| +$/, "", $1) 
+  printf "%s", $1
+}
+
+END {
+  if( ! nout++ ) {
+    printf "%2d: %5s: \"", field, cols[field - 1]
+  }
+  print "\""
+}
+
+B  x x x - x - - x - x x x x x x x x - x - x - x
+0  x x x - x - - x - x x x x x x x x - x - x - x
+/  x x x - x - - x - x x x x x x x x - x - x - x
+,  x x x - x - - x - x x x x x x x - - x - x
+.  x x - - x - - x - x - x - x - x
++  - - - - - - - - - - - - - - - - - - - - - - - x
++
+–
++  x x x - - - - x x x x - - x x x - - x x x
+CR x x x - - - - x x x x - - x x x - - x x x
+DB x x x - - - - x x x x - - x x x - - x x x
+cs - - - - x
+cs x x x - x - - - - x x - - - - x - - x x x
+
+Z  x x - - x - - x - x
+*  x x - - x - - x - x
+Z  x x x - x - - x - x x - - - - - - - x - x
+*  x x x - x - - x - x x - - - - - - - x - x
++  x x - - - - - x - - - x
+–  x x - - - - - x - - - x
++  x x x - - - - x - - - x x - - - - - x
+–  x x x - - - - x - - - x x - - - - - x
+cs x x - - x - - - - - - - - x
+cs x x x - x - - - - - - - - x x - - - x
+
+9  x x x x x - - x - x - x - x - x x x x - x - - x
+A  x - - - - - - - - - - - - - - x x
+X  x - - - - - - - - - - - - - - x x
+S 
+V  x x - - x - - x - x - x - x - x - x - x
+P  x x - - x - - x - x - x - x - x - x - x
+P  - - - - x - - x - - - - - - - - - x x - x
+1  - - - - - - - - - - - - - - - - - - - - - x
+N  x - - - - - - - - - - - - - - - - - - - - - x
+E  x x x - x - - - - - - - - - - x
+**/
+
+class picture_t {
+  static const char dot = '.', comma = ',';
+
+  typedef std::vector<std::string> followings_t;
+  static const std::map <char, followings_t> followers;
+  
+  const char * const begin;
+  const char *p, *pend; 
+  size_t pos;
+  struct exclusions_t { // Nonzero if set, > 1 is false.
+    // crdb means CR/DB or +/-.
+    // pluses means 2 or more consecutive '+'.
+    // minuses means 2 or more consecutive '-'.
+    // "21) The symbol 'Z' and the symbol '*' are mutually exclusive "
+    // stars means '*' or Z.
+    unsigned short int crdb, currency, dot, pluses, minuses, stars, zzz;
+    exclusions_t()
+      : crdb(0), currency(0), dot(0), pluses(0), minuses(0), stars(0)
+    {}
+  } exclusions;
+  YYLTYPE loc;
+  
+  bool is_crdb() const { // input must be uppercase for CR/DB
+    if( p[0] == 'C' || p[0] == 'D' ) {
+      char input[3] = { p[0], p[1] };
+      return ( 0 == strcmp(input, "CR") || 0 == strcmp(input, "DB") );
+    }
+    return false;
+  }
+
+  const char * match_paren( const char *paren ) const {
+    gcc_assert(paren[0] == '(');  // start with opening paren
+    paren = std::find_if( paren, pend,
+                          []( char ch ) {
+                            return ch == '(' || ch == ')';
+                          } );
+    if( *paren == '(' ) return nullptr; // no nesting
+    if( paren == pend ) return nullptr;
+    return ++paren;
+  }
+
+  const char * next_not( char ch ) const {
+    return std::find_if( p, pend,
+                         [ch = TOUPPER(ch)]( char next ) {
+                           return ch != next;
+                         } );
+  }
+
+  const char * valid_next( const char *p, const std::string& valid ) const {
+    if( p == pend || p + 1 == pend ) return pend;
+    if( p[1] == '(' ) {
+      return match_paren(++p);
+    }
+    auto pv = std::find(valid.begin(), valid.end(), TOUPPER(p[1]));
+    return pv != valid.end()? ++p : nullptr;
+  }
+  const char * valid_next( const char *p,
+                           bool first = true, char ch = '\0' ) const {
+    if( p == pend || p + 1 == pend ) return pend;
+    if( p[0] == '(' ) {
+      if( (p = match_paren(p)) == nullptr ) return nullptr;
+    }
+    if( p[0] == '(' ) return nullptr;  // consecutive parentheses
+    
+    int index = first? 0 : 1;
+    if( !ch ) ch = *p;   // use current character unless overridden
+    auto valid = followers.find(TOUPPER(ch));
+    if( valid == followers.end() ) {
+      YYLTYPE loc(yylloc);
+      loc.first_column += int(p - begin);
+      error_msg( loc, "PICTURE: strange character %qc, giving up", ch );
+      return nullptr;
+    }
+    return valid_next(p, valid->second[index]);
+  }
+
+  const char * start() { // start modifies exclusions, but not p
+    auto pnext = p;
+
+    switch(TOUPPER(p[0])) {
+    case comma: case dot:
+      // use decimal_is_comma()
+      //  4:     .: "B0/,+Z*+-9E"
+      exclusions.dot++;
+      pnext = valid_next(p, "B0/,+Z*+-9E");
+      break;
+    case '+': case '-':
+      //  6:    +-: "B0/,.Z*Z*9VPPE"
+      exclusions.crdb++;
+      pnext = next_not(p[0]);
+      if( p + 1 < pnext ) {
+        exclusions.pluses++;
+      }
+      pnext = valid_next(--pnext, "B0/,.Z*Z*9VPPE");
+      break;
+    case 'Z': case '*': 
+      exclusions.stars++;
+      pnext = next_not(p[0]);
+      break;
+    case 'S':
+      // 19:     S: "9VP"
+      pnext = valid_next(p, "9VP");
+      break;
+    }
+
+    /*
+     * "For fixed editing sign control, the currency symbol, when used, shall
+     * be either the leftmost symbol in character-string-1, optionally preceded
+     * by one of the symbols '+' or '-' "
+     */
+    if( pnext ) {
+      if( p == pnext || p[0] == '+' || p[0] == '-' ) {
+        if( symbol_currency(*pnext) ) {
+          exclusions.currency++;
+          pnext = next_not(*pnext);
+          pnext = valid_next(--pnext, true, '$');
+        }
+      }
+    }
+    
+    return pnext;
+  }
+
+  const char * next() { // modify state; do not modify position
+    auto pnext = p;
+    auto loc(picture_t::loc);
+    loc.first_column += int(p - begin);
+
+    if( is_crdb() ) {
+      if( exclusions.crdb++ ) {
+        error_msg( loc, "PICTURE: CR/DB and %c/%c may appear only once", '+', '-' );
+        return nullptr;
+      }
+      if( p + 2 != pend ) { 
+        error_msg( loc, "PICTURE: CR/DB must appear at the end" );
+        return nullptr;
+      }
+      return pend;
+    }
+
+    if( symbol_currency(p[0]) ) {
+      if( false && exclusions.currency++ ) { // not enforced
+        error_msg( loc, "PICTURE: CURRENCY SYMBOL sequence may appear at most once" );
+        return nullptr;
+      }
+      return valid_next(p, ! exclusions.dot, '$');
+    }
+
+    switch(TOUPPER(p[0])) {
+    case '(':
+      return match_paren(p);
+      break;
+    case 'B': case '0': case '/':
+      pnext = valid_next(p);
+      break;
+    case comma: 
+      if( decimal_is_comma() ) {
+        if( exclusions.dot++ ) {
+          error_msg( loc, "PICTURE: %qc: may appear at most once", p[0] );
+          return nullptr;
+        }
+        pnext = valid_next(p, true, dot);
+      } else {
+        pnext = valid_next(p);
+      }
+      break;
+    case dot: 
+      if( p + 1 == pend ) {
+        pnext = pend;
+      } else {
+        if( decimal_is_comma() ) {
+          pnext = valid_next(p, true, comma );
+        } else {
+          if( exclusions.dot++ ) {
+            error_msg( loc, "PICTURE: %qc: may appear at most once", p[0] );
+            return nullptr;
+          }
+          pnext = valid_next(p);
+        }
+      }
+      break;
+
+    case '+': case '-':
+      // 7 is trailing sign; 13 & 14 are numeric.  Leading sign handled by start(). 
+      if( p + 1 == pend ) {
+        if( exclusions.crdb++ ) {
+          error_msg( loc, "PICTURE: %c/%c may appear at most once as a sign", '+', '-' );
+          return nullptr;
+        }
+        pnext = pend;
+      } else {
+        pnext = next_not(p[0]);
+        if( p + 1 < pnext ) {
+          if( false && exclusions.pluses++ )  { // not enforced
+            error_msg( loc, "PICTURE: %qc: sequence may appear at most once", p[0] );
+            return nullptr;
+          }
+        }
+        pnext = valid_next(pnext, ! exclusions.dot);
+      }
+      break;
+
+    case 'Z': case '*':
+      if( false && exclusions.stars++ ) { // not enforced 
+        error_msg( loc, "PICTURE: %qc: sequence may appear at most once", p[0] );
+        return nullptr;
+      }
+      if( (pnext = next_not(p[0])) == nullptr ) return pnext;
+      pnext = valid_next(pnext, ! exclusions.dot);
+      break;
+    case 'P':
+      pnext = valid_next(pnext, ! exclusions.dot);
+      break;
+    case '9':
+    case 'A': case 'X':
+    case 'V':
+    case '1':
+    case 'N':
+      pnext = valid_next(p);
+      break;
+    case 'E':
+      pnext = valid_next(p, "+9");
+      if( pnext && *pnext == '+' ) {
+        pnext = valid_next(p, "9");
+      }
+      break;
+    default:
+      error_msg( loc, "PICTURE: %qc: invalid character", p[0] );
+      return nullptr;
+    }
+    return pnext;
+  }
+  
+ public:
+  picture_t( const char *p, int len )
+    : begin(p)
+    , p(p), pend(p + len)
+    , loc(yylloc)
+  { 
+    assert(TOUPPER(*p) == 'P'); // as in PICTURE (or PICTURE IS)
+    // move p to start of picture string
+    while( (p = std::find_if(p, pend, fisspace)) != pend ) {
+      this->p = p = std::find_if(p, pend,
+                                 []( char ch ) { return ! fisspace(ch); } );
+    }
+    assert(this->p != pend);
+    pos = this->p - begin;
+  }
+
+  bool is_valid() {
+    if( !p ) return false;
+    if( (p = start()) == nullptr ) {
+      return false;
+    }
+
+    while( p && p < pend) {
+      p = next();
+    }
+    return p == pend;
+  }
+
+  int starts_at() const { return pos; }
+};
+
+/*
+ * The Followers map gives 1 or 2 lists of valid characters following a
+ * character, the one in the key. If there are two lists, the correct one is
+ * determined by the caller based on the state of the picture string, i.e.,
+ * what has been seen before.
+ */
+const std::map <char, picture_t::followings_t> picture_t::followers {
+  /*   B0/ */ { 'B', {"B0/,.Z*+-9AXVPNE" } },
+  /*   B0/ */ { '0', {"B0/,.Z*+-9AXVPNE" } },
+  /*   B0/ */ { '/', {"B0/,.Z*+-9AXVPNE" } },
+  /*     , */ { ',', {"B0/,.Z*+-9VPE"} },
+  /*     . */ { '.', {"B0/,Z*+-9E"} },
+  /*     +    { '+', "9" }, */
+  /*    +- */ { '+', {"B0/,.Z*9VPE", "" } },
+  /*    +- */ { '-', {"B0/,.Z*9VPE", "" } },
+  /* CR/DB    { 'C', "" }, */
+  /*    cs    { 'c', "B0/,.Z*+-9VP" }, */
+  /*    cs    { 'c', "+" }, */
+  /*    Z* */ { 'Z', {"B0/,.+Z*9VP", "B0/,+Z*"} },
+  /*    Z* */ { '*', {"B0/,.+Z*9VP", "B0/,+Z*"} },
+  /*     + */ { '+', {"B0/,.+-9VP",  "B0/,+-"} },
+  /*    cs */ { '$', {"B0/,.+9VP",   "B0/,+"} },
+  /*     9 */ { '9', {"B0/,.+9AXVPE"} },
+  /*    AX */ { 'A', {"B0/9AX"} },
+  /*    AX */ { 'X', {"B0/9AX"} },
+  /*     S */ { 'S', {"9VP"} },
+  /*     V */ { 'V', {"B0/,+Z*+-9P"} },
+  /*     P */ { 'P', {"+VP", "B0/,+Z*9P"} },
+  /*     1 */ { '1', {"1"} },
+  /*     N */ { 'N', {"B0/N"} },
+  /*     E */ { 'E', {"+9"} },
+};
+
+/*
+ * Although picture_t::is_valid return a bool, it's not used. The validation
+ * routines emit messages where the error is detected. The entire string is
+ * subsequently parsed by the parser, which might otherwise accept an invalid
+ * string, but will usually emit a message of its own.
+ */
+static int 
+validate_picture() {
+  picture_t picture(yytext, yyleng);
+  picture.is_valid();
+  return  picture.starts_at();
+}
+
 static inline bool
 is_integer_token( int *pvalue = NULL ) {
   int v, n = 0;