/*
 * Copyright (c) 2021-2025 Symas Corporation
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are
 * met:
 *
 * * Redistributions of source code must retain the above copyright
 *   notice, this list of conditions and the following disclaimer.
 * * Redistributions in binary form must reproduce the above
 *   copyright notice, this list of conditions and the following disclaimer
 *   in the documentation and/or other materials provided with the
 *   distribution.
 * * Neither the name of the Symas Corporation nor the names of its
 *   contributors may be used to endorse or promote products derived from
 *   this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Flex override
 */
static void /* yynoreturn */ yy_fatal_error ( const char* msg  );

static void inline
die_fatal_error( const char msg[] ) {
  cbl_internal_error("scan.o: %s",  msg);
  yy_fatal_error(msg);
}

#define YY_FATAL_ERROR(msg) die_fatal_error((msg))

/*
 * External functions
 */

void parser_enter_file(const char *filename);
void parser_leave_file();

bool is_fixed_format();
bool include_debug();
int lexer_input( char buf[], int max_size, FILE *input );

const char * keyword_str( int token );

int repository_function_tok( const char name[] );

void cobol_set_indicator_column( int column );

void next_sentence_label(cbl_label_t*);

int repeat_count( const char picture[] );

size_t program_level();

int ydfparse(void);

FILE * copy_mode_start();

/*
 * Public functions and data
 */

cbl_label_t *next_sentence;

static bool echo_on = false;

void
lexer_echo( bool tf ) {
  echo_on = tf;
}

bool
lexer_echo() {
  return echo_on;
}

// IBM says a picture can be up to 50 bytes, not 1000 words.
// ISO says a picture can be up to 63 bytes.  We allow for a NUL terminator.
static char orig_picture[PICTURE_MAX];
static char orig_number[80];

const char *
original_picture() {
  const char *out = xstrdup(orig_picture);
  assert(orig_picture[0] != '\0');
  return out;
}

char *
original_number( char input[] = NULL ) {
  if( input ) {
    if(sizeof(orig_number) < strlen(input) ) return NULL;
    strcpy(orig_number, input);
    return input;
  }
  char *out = xstrdup(orig_number);
  assert(orig_number[0] != '\0');
  return out;
}

/*
 * Local functions
 */
static const char * start_condition_str( int sc );
static const char * start_condition_is();

static bool nonspace( char ch ) { return !ISSPACE(ch); }

static int
numstr_of( const char string[], radix_t radix = decimal_e ) {
  yylval.numstr.radix = radix;
  ydflval.string = yylval.numstr.string = xstrdup(string);
  char *comma = strchr(yylval.numstr.string, ',');
  if( comma && comma[1] == '\0' ) *comma = '\0';
  if( ! original_number(yylval.numstr.string) ) {
    error_msg(yylloc, "input inconceivably long");
    return NO_CONDITION;
  }

  const char *input = yylval.numstr.string;
  auto eoinput = input + strlen(input);
  auto p = std::find_if( input, eoinput,
                         []( char ch ) { return ch == 'e' || ch == 'E';} );

  if( p < eoinput ) {
    if( eoinput == std::find(input, eoinput, symbol_decimal_point()) ) {
      // no decimal point: 1E0 is a valid user-defined name
      ydflval.string = yylval.string = yylval.numstr.string;
      return NAME;
    }
    assert(input < p);
    // "The literal to the left of the 'E' represents the significand. It may
    //  be signed and shall include a decimal point. The significand shall be
    //  from 1 to 36 digits in length."
    if( p == std::find(input, p, symbol_decimal_point()) ) {
      return NO_CONDITION;
    }
    auto nx = std::count_if(input, p, fisdigit);
    if( 36 < nx ) {
      error_msg(yylloc, "significand of %s has more than 36 digits (%zu)", input, nx);
      return NO_CONDITION;
    }

    // "The literal to the right of the 'E' represents the exponent. It may be
    //  signed and shall have a maximum of four digits and no decimal point. "
    // "The maximum permitted value and minimum permitted value of the
    //  exponent is implementor-defined." (We allow 9999.)
    nx = std::count_if(p, eoinput, fisdigit);
    if( 4 < nx ) {
      error_msg(yylloc, "exponent %s more than 4 digits", ++p);
      return NO_CONDITION;
    }
    if( eoinput != std::find(p, eoinput, symbol_decimal_point()) ) {
      error_msg(yylloc, "exponent includes decimal point", ++p);
      return NO_CONDITION;
    }

    // "If all the digits in the significand are zero, then all the digits of
    //  the exponent shall also be zero and neither significand nor exponent
    //  shall have a negative sign."
    bool zero_signficand = std::all_of( input, p,
                                        []( char ch ) {
                                          return !ISDIGIT(ch) || ch == '0'; } );
    if( zero_signficand ) {
      if( p != std::find(input, p, '-') ) {
        error_msg(yylloc, "zero significand of %s "
                 "cannot be negative", input);
        return NO_CONDITION;
      }
      if( eoinput != std::find(p, eoinput, '-') ) {
        error_msg(yylloc, "exponent of zero significand of %s "
                 "cannot be negative", input);
        return NO_CONDITION;
      }
    }
  }
  if( 1 < std::count(input, eoinput, symbol_decimal_point()) ) {
    error_msg(yylloc, "invalid numeric literal", ++p);
    return NO_CONDITION;
  }

  return NUMSTR;
}

static char *
null_trim( char name[] ) {
  auto p = std::find_if( name, name + strlen(name), fisspace );
  if( p < name + strlen(name) ) *p = '\0';
  return name;
}

/*
 * CDF management
 */
static int final_token;

static inline const char *
boolalpha( bool tf ) { return tf? "True" : "False"; }

struct cdf_status_t {
  int lineno;
  const char *filename;
  int  token;
  bool parsing;
  cdf_status_t( int token = 0, bool parsing = true )
    : lineno(yylineno), filename(cobol_filename())
    , token(token), parsing(parsing)
  {}
  bool toggle() { return parsing = ! parsing; }

  const char * str() const {
    static char line[132];
    snprintf(line, sizeof(line), "%s:%d: %s, parsing %s",
             filename, lineno, keyword_str(token), boolalpha(parsing));
    return line;
  }
  static const char * as_string( const cdf_status_t& status ) {
    return status.str();
  }
};

/*
 * Scanning status is true if tokens are being parsed and false if not (because
 * CDF is skipping some code).  Because CDF status is nested, status is true
 * only if the whole stack is true. That is, if B is stacked on A, and A is
 * false, then all of B is skipped, regardless of >>IF and >>ELSE for B.
 */
static bool run_cdf( int token );

static class parsing_status_t : public std::stack<cdf_status_t> {
  typedef int (parser_t)(void);
  struct parsing_state_t {
    bool at_eof, expect_field_level;
    int pending_token;
    parser_t *parser;
    parsing_state_t()
      : at_eof(false)
      , expect_field_level(true)
      , pending_token(0)
      , parser(yyparse)
    {}
  } state, shadow;

 public:
  bool on() const { // true only if all true
    bool parsing = std::all_of( c.begin(), c.end(),
                               []( const auto& status ) { return status.parsing; } );
    return parsing;
  }

  bool feed_a_parser() const {
    return on() || state.parser == ydfparse;
  }

  void need_level( bool tf ) {     state.expect_field_level = tf; }
  bool need_level() const { return state.expect_field_level; }

  void parser_save( parser_t * new_parser ) {
    shadow = state;
    state.parser = new_parser;
  }
  void parser_restore() {
    state.parser = shadow.parser;
  }

  void inject_token( int token ) { state.pending_token = token; }
  int pending_token() {
    int token = state.pending_token;
    state.pending_token = 0;
    return token;
  }

  void at_eof( bool tf ) { state.at_eof = shadow.at_eof = tf; assert(tf); }
  bool at_eof() const { return state.at_eof; }

  bool in_cdf() const { return state.parser == ydfparse; }
  bool normal() const { return on() && state.parser == yyparse; }

  void splat() const {
    int i=0;
    for( const auto& status : c ) {
      yywarn( "%4d\t%s", ++i, status.str() );
    }
  }
} parsing;

// Used only by parser, so scanner_normal() obviously true.
void field_done() { orig_picture[0] = '\0'; parsing.need_level(true); }

static int scanner_token() {
  if( parsing.empty() ) {
    error_msg(yylloc, ">>ELSE or >>END-IF without >>IF");
    return NO_CONDITION;
  }
  return parsing.top().token;
}

bool scanner_parsing() { return parsing.on(); }
bool scanner_normal()  { return parsing.normal(); }

void scanner_parsing( int token, bool tf ) {
  parsing.push( cdf_status_t(token, tf) );
  if( yydebug ) {
    yywarn("%10s: parsing now %5s, depth %zu",
            keyword_str(token), boolalpha(parsing.on()), parsing.size());
    parsing.splat();
  }
}
void scanner_parsing_toggle() {
  if( parsing.empty() ) {
    error_msg(yylloc, ">>ELSE without >>IF");
    return;
  }
  parsing.top().toggle();
  if( yydebug ) {
    yywarn("%10s: parsing now %5s",
            keyword_str(CDF_ELSE), boolalpha(parsing.on()));
  }
}
void scanner_parsing_pop() {
  if( parsing.empty() ) {
    error_msg(yylloc, ">>END-IF without >>IF");
    return;
  }
  parsing.pop();
  if( yydebug ) {
    yywarn("%10s: parsing now %5s, depth %zu",
            keyword_str(CDF_END_IF), boolalpha(parsing.on()), parsing.size());
    parsing.splat();
  }
}


static bool level_needed() {
  return scanner_normal() && parsing.need_level();
}

static void level_found() {
  if( scanner_normal() ) parsing.need_level(false);
}

#define myless(N)				\
  do {						\
    auto n(N);					\
    trim_location(n);				\
    yyless(n);					\
  } while(0)

class enter_leave_t {
  typedef void( parser_enter_file_f)(const char *filename);
  typedef void (parser_leave_file_f)();
  parser_enter_file_f *entering;
  parser_leave_file_f *leaving;
  const char *filename;

 public:
  enter_leave_t() : entering(NULL), leaving(NULL), filename(NULL) {}
  enter_leave_t(  parser_enter_file_f *entering, const char *filename )
    : entering(entering), leaving(NULL), filename(filename) {}
  enter_leave_t(parser_leave_file_f *leaving)
    : entering(NULL), leaving(leaving), filename(NULL) {}

  void notify() {
    if( entering ) {
      cobol_filename(filename, 0);
      if( yy_flex_debug ) dbgmsg("starting line %4d of %s",
                                 yylineno, filename);
      entering(filename);
      gcc_assert(leaving == NULL);
    }
    if( leaving ) {
      auto name = cobol_filename_restore();
      if( yy_flex_debug ) dbgmsg("resuming line %4d of %s",
                                 yylineno, name? name : "<none>");
      leaving();
      gcc_assert(entering == NULL);
    }
  }
};

static class input_file_status_t {
  std::queue <enter_leave_t> inputs;
 public:
  void enter(const char *filename) {
    inputs.push( enter_leave_t(parser_enter_file, filename) );
  }
  void leave() {
    inputs.push( parser_leave_file );
  }
  void notify() {
    while( ! inputs.empty() ) {
      auto enter_leave = inputs.front();
      enter_leave.notify();
      inputs.pop();
    }
  }
} input_file_status;

void input_file_status_notify() { input_file_status.notify(); }

void cdf_location_set(YYLTYPE loc);

static void
update_location() {
  YYLTYPE loc = {
    yylloc.last_line, yylloc.last_column,
    yylineno,         yylloc.last_column + yyleng
  };

  auto nline = std::count(yytext, yytext + yyleng, '\n');
  if( nline ) {
    char *p = static_cast<char*>(memrchr(yytext, '\n', yyleng));
    loc.last_column = (yytext + yyleng) - p;
  }

  yylloc = loc;
  cdf_location_set(loc);
  location_dump(__func__, __LINE__, "yylloc", yylloc);
}

static void
trim_location( int nkeep) {
  gcc_assert( 0 <= nkeep && nkeep <= yyleng );
  struct { char *p, *pend;
    size_t size() const { return pend - p; }
  } rescan = { yytext + nkeep, yytext + yyleng };

  auto nline = std::count(rescan.p, rescan.pend, '\n');
  dbgmsg("%s:%d: yyless(%d), rescan '%.*s' (%zu lines, %d bytes)",
         __func__, __LINE__,
         nkeep,
         int(rescan.size()), rescan.p,
         nline, rescan.size());
  if( nline ) {
    gcc_assert( yylloc.first_line + nline <= yylloc.last_line );
    yylloc.last_line =- int(nline);
    char *p = static_cast<char*>(memrchr(rescan.p, '\n', rescan.size()));
    yylloc.last_column = rescan.pend - ++p;
    return;
  }

  gcc_assert( int(rescan.size()) < yylloc.last_column );
  yylloc.last_column -= rescan.size();
  if( yylloc.last_column < yylloc.first_column ) {
    yylloc.first_column = 1;
  }

  location_dump(__func__, __LINE__, "yylloc", yylloc);
}

static void
update_location_col( const char str[], int correction = 0) {
  auto col = yylloc.last_column - strlen(str) + correction;
  if( col > 0 ) {
    yylloc.first_column = col;
  }
  location_dump(__func__, __LINE__, "yylloc", yylloc);
}

#define not_implemented(...) cbl_unimplemented_at(yylloc, __VA_ARGS__)

#define YY_USER_INIT do {			\
    static YYLTYPE ones = {1,1, 1,1};		\
    yylloc = ones;				\
  } while(0)

/*
 * YY_DECL is the generated lexer.  The parser calls yylex().  yylex() invokes
 * next_token(), which calls this lexer function.  The Flex-generated code
 * updates neither yylval nor yylloc.  That job is left to the actions.
 *
 * The parser relies on yylex to set yylval and yylloc each time it is
 * called. It apparently maintains a separate copy for each term, and uses
 * YYLLOC_DEFAULT() to update the location of nonterminals.
 */
#define YY_DECL int lexer(void)

#define YY_USER_ACTION							\
  update_location();							\
  if( yy_flex_debug ) dbgmsg("SC: %s", start_condition_is() );

# define YY_INPUT(buf, result, max_size)                        \
{                                                               \
  if( 0 == (result = lexer_input(buf, max_size, yyin)) )        \
    result = YY_NULL;                                           \
}

#define scomputable(T, C)                               \
    yylval.computational.type=T,                        \
    yylval.computational.capacity=C,                    \
    yylval.computational.signable=true, COMPUTATIONAL
#define ucomputable(T, C)                               \
    yylval.computational.type=T,                        \
    yylval.computational.capacity=C,                    \
    yylval.computational.signable=false, COMPUTATIONAL

static char *tmpstring = NULL;

#define PROGRAM current_program_index()

static uint32_t
level_of( const char input[] ) {
  unsigned int output = 0;

  if( input[0] == '0' ) input++;

  if( 1 != sscanf(input, "%u", &output) ) {
    yywarn( "%s:%d: invalid level '%s'", __func__, __LINE__, input );
  }

  return output;
}

static inline int
ndigit(int len) {
  char *input = TOUPPER(yytext[0]) == 'V'? yytext + 1 : yytext;
  int n = repeat_count(input);
  return n == -1? len : n;
}

static int
picset( int token ) {
  static const char * const eop = orig_picture + sizeof(orig_picture);
  char *p = orig_picture + strlen(orig_picture);

  if( eop < p + yyleng ) {
    error_msg(yylloc, "PICTURE exceeds maximum size of %zu bytes",
             sizeof(orig_picture) - 1);
  }
  snprintf( p, eop - p, "%s", yytext );
  return token;
}

static inline bool
is_integer_token( int *pvalue = NULL ) {
  int v, n = 0;
  if( pvalue == NULL ) pvalue = &v;
  return 1 == sscanf(yytext, "%d%n", pvalue, &n) && n == yyleng;
}

static bool need_nume = false;
bool need_nume_set( bool tf ) {
  dbgmsg( "need_nume now %s", tf? "true" : "false" );
  return need_nume = tf;
}

static int datetime_format_of( const char input[] );

static int symbol_function_token( const char name[] ) {
  auto e = symbol_function( 0, name );
  return e ? symbol_index(e) : 0;
}

bool in_procedure_division(void );

static symbol_elem_t *
symbol_exists( const char name[] ) {
  typedef std::map <std::string, size_t> name_cache_t;
  static std::map <size_t, name_cache_t> cachemap;

  cbl_name_t lname;
  std::transform( name, name + strlen(name) + 1, lname, tolower );
  auto& cache = cachemap[PROGRAM];

  if( in_procedure_division() && cache.empty() ) {
    for( auto e = symbols_begin(PROGRAM) + 1;
         PROGRAM == e->program && e < symbols_end(); e++ ) {
      if( e->type == SymFile ) {
        cbl_file_t *f(cbl_file_of(e));
        cbl_name_t lname;
        std::transform( f->name, f->name + strlen(f->name) + 1, lname, tolower );
        cache[lname] = symbol_index(e);
        continue;
      }
      if( e->type == SymField ) {
        auto f(cbl_field_of(e));
        cbl_name_t lname;
        std::transform( f->name, f->name + strlen(f->name) + 1, lname, tolower );
        cache[lname] = symbol_index(e);
      }
    }
    cache.erase("");
  }
  auto p = cache.find(lname);

  if( p == cache.end() ) {
    symbol_elem_t * e = symbol_field( PROGRAM, 0, name );
    return e;
  }

  return symbol_at(p->second);
}

static int
typed_name( const char name[] ) {
  if( 0 == PROGRAM ) return NAME;
  if( need_nume ) { need_nume_set(false); return NUME; }

  int token = repository_function_tok(name);
  switch(token) {
  case 0:
    break;
  case FUNCTION_UDF_0:
    yylval.number = symbol_function_token(name);
    __attribute__((fallthrough));
  default:
    return token;
  }

  struct symbol_elem_t *e = symbol_special( PROGRAM, name );
  if( e ) return  cbl_special_name_of(e)->token;

  if( (token = redefined_token(name)) ) { return token; }

  e = symbol_exists( name );

  auto type = e && e->type == SymField? cbl_field_of(e)->type : FldInvalid;

  switch(type) {
  case FldLiteralA:
    {
      auto f = cbl_field_of(e);
      if( is_constant(f) ) {
        int token = datetime_format_of(f->data.initial);
        if( token ) {
          yylval.string = xstrdup(f->data.initial);
          return token;
        }
      }
    }
    __attribute__((fallthrough));
  case FldLiteralN:
    {
      auto f = cbl_field_of(e);
      if( type == FldLiteralN ) {
        yylval.numstr.radix =
          f->has_attr(hex_encoded_e)? hexadecimal_e : decimal_e;
        yylval.numstr.string = xstrdup(f->data.initial);
        return NUMSTR;
      }
      if( !f->has_attr(record_key_e) ) { // not a key-name literal
        yylval.literal.set(f);
        ydflval.string = yylval.literal.data;
        return LITERAL;
      }
    }
    __attribute__((fallthrough));
  case FldInvalid:
  case FldGroup:
  case FldForward:
  case FldIndex:
  case FldAlphanumeric:
  case FldPacked:
  case FldNumericDisplay:
  case FldNumericEdited:
  case FldAlphaEdited:
  case FldNumericBinary:
  case FldFloat:
  case FldNumericBin5:
  case FldPointer:
    return NAME;
  case FldSwitch:
    return SWITCH;
  case FldClass:
    return cbl_field_of(e)->level == 88? NAME88 : CLASS_NAME;
    break;
  default:
    yywarn("%s:%d: invalid symbol type %s for symbol \"%s\"",
          __func__, __LINE__, cbl_field_type_str(type), name);
    return NAME;
  }
  return cbl_field_of(e)->level == 88? NAME88 : NAME;
}

int
retype_name_token() {
  return typed_name(ydflval.string);
}

static char *
tmpstring_append( int len ) {
  const char *extant = tmpstring == NULL ? "" : tmpstring;
  char *s = xasprintf("%s%.*s", extant, len, yytext);
  free(tmpstring);
  if( yy_flex_debug && getenv(__func__) ) {
    yywarn("%s: value is now '%s'", __func__, s);
  }
  return tmpstring = s;
}

#define pop_return yy_pop_state(); return

static bool
wait_for_the_child(void) {
  pid_t pid;
  int status;

  if( (pid = wait(&status)) == -1 ) {
    yywarn("internal error: no pending child CDF parser process");
    return false;
  }

  if( WIFSIGNALED(status) ) {
    yywarn( "process %d terminated by %s", pid, strsignal(WTERMSIG(status)) );
    return false;
  }
  if( WIFEXITED(status) ) {
    if( WEXITSTATUS(status) != 0 ) {
      yywarn("process %d exited with status %d", pid, status);
      return false;
    }
  }
  if( yy_flex_debug ) {
    yywarn("process %d exited with status %d", pid, status);
  }
  return true;
}

static bool is_not = false;

static uint64_t
integer_of( const char input[], bool is_hex = false) {
  uint64_t output = 0;
  const char *fmt = is_hex? "%ul" : "%hl";

  if( input[0] == '0' ) input++;

  if( 1 != sscanf(input, fmt, &output) ) {
    yywarn( "%s:%d: invalid integer '%s'", __func__, __LINE__, input );
  }

  return output;
}