diff options
author | James K. Lowden <jklowden@symas.com> | 2025-03-06 16:25:09 -0500 |
---|---|---|
committer | Richard Biener <rguenth@gcc.gnu.org> | 2025-03-11 07:48:21 +0100 |
commit | 3c5ed996ac94a15bc2929155f2c69cc85eef89f7 (patch) | |
tree | c365f6e25814ca3e88ae3fed34ca7a327a016540 /gcc | |
parent | a0754187274a36443707eab5506ae53ab1d71ad2 (diff) | |
download | gcc-3c5ed996ac94a15bc2929155f2c69cc85eef89f7.zip gcc-3c5ed996ac94a15bc2929155f2c69cc85eef89f7.tar.gz gcc-3c5ed996ac94a15bc2929155f2c69cc85eef89f7.tar.bz2 |
COBOL: Frontend
gcc/cobol/
* LICENSE: New file.
* Make-lang.in: New file.
* config-lang.in: New file.
* lang.opt: New file.
* lang.opt.urls: New file.
* cbldiag.h: New file.
* cdfval.h: New file.
* cobol-system.h: New file.
* copybook.h: New file.
* dts.h: New file.
* exceptg.h: New file.
* gengen.h: New file.
* genmath.h: New file.
* genutil.h: New file.
* inspect.h: New file.
* lang-specs.h: New file.
* lexio.h: New file.
* parse_ante.h: New file.
* parse_util.h: New file.
* scan_ante.h: New file.
* scan_post.h: New file.
* show_parse.h: New file.
* structs.h: New file.
* symbols.h: New file.
* token_names.h: New file.
* util.h: New file.
* cdf-copy.cc: New file.
* lexio.cc: New file.
* scan.l: New file.
* parse.y: New file.
* genapi.cc: New file.
* genapi.h: New file.
* gengen.cc: New file.
* genmath.cc: New file.
* genutil.cc: New file.
* cdf.y: New file.
* cobol1.cc: New file.
* convert.cc: New file.
* except.cc: New file.
* gcobolspec.cc: New file.
* structs.cc: New file.
* symbols.cc: New file.
* symfind.cc: New file.
* util.cc: New file.
* gcobc: New file.
* gcobol.1: New file.
* gcobol.3: New file.
* help.gen: New file.
* udf/stored-char-length.cbl: New file.
Diffstat (limited to 'gcc')
49 files changed, 68539 insertions, 0 deletions
diff --git a/gcc/cobol/LICENSE b/gcc/cobol/LICENSE new file mode 100644 index 0000000..aa5ba60 --- /dev/null +++ b/gcc/cobol/LICENSE @@ -0,0 +1,29 @@ +######################################################################### +# +# Copyright (c) 2021-2025 Symas Corporation +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of the Symas Corporation nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/gcc/cobol/Make-lang.in b/gcc/cobol/Make-lang.in new file mode 100644 index 0000000..8cc837e --- /dev/null +++ b/gcc/cobol/Make-lang.in @@ -0,0 +1,366 @@ +# Top level -*- makefile -*- fragment for Cobol +# Copyright (C) 2021-2025 Free Software Foundation, Inc. + +# This file is part of GCC. + +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. + +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +# This file provides the language dependent support in the main Makefile. +# Each language makefile fragment must provide the following targets: +# +# foo.all.cross, foo.start.encap, foo.rest.encap, +# foo.install-common, foo.install-man, foo.install-info, foo.install-pdf, +# foo.install-html, foo.info, foo.dvi, foo.pdf, foo.html, foo.uninstall, +# foo.mostlyclean, foo.clean, foo.distclean, +# foo.maintainer-clean, foo.stage1, foo.stage2, foo.stage3, foo.stage4 +# +# where `foo' is the name of the language. +# +# It should also provide rules for: +# +# - making any compiler driver (eg: g++) +# - the compiler proper (eg: cc1plus) +# - define the names for selecting the language in LANGUAGES. + +gcobol_INSTALL_NAME := $(shell echo gcobol|sed '$(program_transform_name)') +gcobol_TARGET_INSTALL_NAME := $(target_noncanonical)-$(shell echo gcobol|sed '$(program_transform_name)') + +cobol: cobol1$(exeext) +.PHONY: cobol + +BINCLUDE ?= ./gcc +LIB_INCLUDE ?= $(srcdir)/../libgcobol +LIB_SOURCE ?= $(srcdir)/../libgcobol + +# +# At this point, as of 2022-10-21, CPPFLAGS is an empty string and can be +# altered. CFLAGS and CXXFLAGS are being established upstream, and thus +# cannot, at this point, be changed. +# +# Note further that we are producing only a 64-bit version of libgcobol.so, so +# it is safe to hard-code the lib64 location. This obviously has to match the +# installation code in libgcobol/Makefile.in +# +CPPFLAGS = \ + -std=c++14 \ + -Iinclude \ + -I$(BINCLUDE) \ + -I$(LIB_INCLUDE) \ + -DEXEC_LIB=\"$(prefix)/lib64\" \ + $(END) + +YFLAGS = -Werror -Wmidrule-values -Wno-yacc \ + --debug --verbose + +LFLAGS = -d -Ca + +# +# These are the object files for creating the cobol1.exe compiler: +# +cobol1_OBJS = \ + cobol/cdf.o \ + cobol/cdf-copy.o \ + cobol/cobol1.o \ + cobol/convert.o \ + cobol/except.o \ + cobol/genutil.o \ + cobol/genapi.o \ + cobol/genmath.o \ + cobol/gengen.o \ + cobol/lexio.o \ + cobol/parse.o \ + cobol/scan.o \ + cobol/structs.o \ + cobol/symbols.o \ + cobol/symfind.o \ + cobol/util.o \ + cobol/charmaps.o \ + cobol/valconv.o \ + $(END) + +# +# There is source code in libgcobol/charmaps.cc and +# libgcobol/valconv.cc that needs to be compiled into both libgcobol +# and cobol1. We copy those two source code files from libgcobol to +# here to avoid the nightmare of one file appearing in more than one +# place. For simplicity, we make those compilations dependent on all +# of the libgcobol/*.h files, which might lead to the occasional +# unnecessary compilation. The impact of that is negligible. +# +cobol/charmaps.cc: $(LIB_SOURCE)/charmaps.cc + cp $^ $@ + +cobol/valconv.cc: $(LIB_SOURCE)/valconv.cc + cp $^ $@ + +LIB_SOURCE_H=$(wildcard $(LIB_SOURCE)/*.h) + +cobol/charmaps.o: cobol/charmaps.cc $(LIB_SOURCE_H) + +cobol/valconv.o: cobol/valconv.cc $(LIB_SOURCE_H) + +# +# These are the object files for creating the gcobol.exe "driver" +# +GCOBOL_D_OBJS = $(GCC_OBJS) cobol/gcobolspec.o + +# +# These get combined to provide a dependency relationship that ensures all +# of the "generated-files" are generated before we need them. See the root +# Makefile.in code that looks like this: +# ALL_HOST_FRONTEND_OBJS = $(foreach v,$(CONFIG_LANGUAGES),$($(v)_OBJS)) +# +cobol_OBJS = \ + $(cobol1_OBJS) \ + cobol/gcobolspec.o \ + $(END) + +# +# Frankly, I can't figure out what this does: +# +CFLAGS-cobol/gcobolspec.o += $(DRIVER_DEFINES) + +# +# This controls the build of the gcobol.exe "driver" +# +gcobol$(exeext): \ + $(GCOBOL_D_OBJS) \ + $(EXTRA_GCC_OBJS) \ + libcommon-target.a \ + $(LIBDEPS) + +$(LINKER) $(ALL_LINKERFLAGS) $(LDFLAGS) -o $@ \ + $(GCOBOL_D_OBJS) $(EXTRA_GCC_OBJS) libcommon-target.a \ + $(EXTRA_GCC_LIBS) $(LIBS) + +# +# These control the build of the cobol1.exe source-to-GENERIC converter +# + +# First, files needed for parsing: + +cobol/parse.c: cobol/parse.y + $(BISON) -o $@ $(YFLAGS) \ + --defines=cobol/parse.h \ + --report-file=cobol/parser.out $< + +cobol/cdf.c: cobol/cdf.y + $(BISON) -o $@ $(YFLAGS) \ + --defines=cobol/cdf.h --report-file=cobol/cdf.out $< + +# See "Trailing context is getting confused with trailing optional patterns" +# in Flex manual. We suppress those messages, as a convenience. +FLEX_WARNING = warning, dangerous trailing context + +cobol/scan.c: cobol/scan.l + $(FLEX) -o$@ $(LFLAGS) $< >$@~ 2>&1 + awk '! /$(FLEX_WARNING)/ {print > "/dev/stderr"; nerr++} \ + END {print "$(FLEX):", NR, "messages" > "/dev/stderr"; \ + exit nerr}' $@~ + @rm $@~ + + +# To establish prerequisites for parse.o, cdf.o, and scan.o, +# 1. capture the "make -n" output +# 2. eliminate compiler options, leaving only preprocessor options (-D and -I) +# 3. add -E -MM +# +# The below lists of include files for the the generated files is +# postprocessed: the files are one per line, used "realpath +# --relative-to=$PWD" to rationalize them, and sorted. We include +# parse.c in the list for scan.o because that's the one make(1) knows about. + +cobol/cdf.o: cobol/cdf.c \ + $(srcdir)/cobol/cbldiag.h \ + $(srcdir)/cobol/cdfval.h \ + $(srcdir)/cobol/copybook.h \ + $(srcdir)/cobol/exceptg.h \ + $(srcdir)/cobol/symbols.h \ + $(srcdir)/cobol/util.h \ + $(srcdir)/../libgcobol/common-defs.h \ + $(srcdir)/../libgcobol/ec.h \ + $(srcdir)/../libgcobol/exceptl.h + +cobol/parse.o: cobol/parse.c \ + $(srcdir)/cobol/cbldiag.h \ + $(srcdir)/cobol/cdfval.h \ + $(srcdir)/cobol/cobol-system.h \ + $(srcdir)/cobol/exceptg.h \ + $(srcdir)/cobol/genapi.h \ + $(srcdir)/cobol/inspect.h \ + $(srcdir)/cobol/parse_ante.h \ + $(srcdir)/cobol/parse_util.h \ + $(srcdir)/cobol/symbols.h \ + $(srcdir)/cobol/util.h \ + $(srcdir)/hwint.h \ + $(srcdir)/system.h \ + $(srcdir)/../include/ansidecl.h \ + $(srcdir)/../include/filenames.h \ + $(srcdir)/../include/hashtab.h \ + $(srcdir)/../include/libiberty.h \ + $(srcdir)/../include/safe-ctype.h \ + $(srcdir)/../libgcobol/common-defs.h \ + $(srcdir)/../libgcobol/ec.h \ + $(srcdir)/../libgcobol/exceptl.h \ + $(srcdir)/../libgcobol/io.h \ + auto-host.h \ + config.h + +cobol/scan.o: cobol/scan.c \ + $(srcdir)/cobol/cbldiag.h \ + $(srcdir)/cobol/cdfval.h \ + $(srcdir)/cobol/cobol-system.h \ + $(srcdir)/cobol/copybook.h \ + $(srcdir)/cobol/dts.h \ + $(srcdir)/cobol/exceptg.h \ + $(srcdir)/cobol/inspect.h \ + $(srcdir)/cobol/lexio.h \ + $(srcdir)/cobol/scan_ante.h \ + $(srcdir)/cobol/scan_post.h \ + $(srcdir)/cobol/symbols.h \ + $(srcdir)/cobol/util.h \ + $(srcdir)/hwint.h \ + $(srcdir)/system.h \ + $(srcdir)/../include/ansidecl.h \ + $(srcdir)/../include/filenames.h \ + $(srcdir)/../include/hashtab.h \ + $(srcdir)/../include/libiberty.h \ + $(srcdir)/../include/safe-ctype.h \ + $(srcdir)/../libgcobol/common-defs.h \ + $(srcdir)/../libgcobol/ec.h \ + $(srcdir)/../libgcobol/exceptl.h \ + $(srcdir)/../libgcobol/io.h \ + auto-host.h \ + config.h \ + cobol/cdf.c \ + cobol/parse.c + +# +# The src<foo> targets are executed if +# ‘--enable-generated-files-in-srcdir’ was specified as a configure +# option. +# +# srcextra copies generated dependencies into the source +# directory. This is used for files such as Flex/Bison output: files +# that are not version-controlled but should be included in any +# release tarballs. +# +# Although versioned snapshots require Flex to be installed, they do +# not require Bison. Release tarballs always include Flex/Bison +# output, and do not require those tools to be installed. +# +cobol.srcextra: cobol/parse.c cobol/cdf.c cobol/scan.c + ln -f $^ cobol/parse.h cobol/cdf.h $(srcdir)/cobol/ + + +# And the cobol1.exe front end + +cobol1$(exeext): $(cobol1_OBJS) $(BACKEND) $(LIBDEPS) attribs.o + +$(LLINKER) $(ALL_LINKERFLAGS) $(LDFLAGS) attribs.o -o $@ \ + $(cobol1_OBJS) $(BACKEND) $(LIBS) $(BACKENDLIBS) + +# FIXME +cobol.all.cross: + +cobol.start.encap: gcobol$(exeext) + +cobol.rest.encap: + +cobol.install-common: installdirs + $(INSTALL_PROGRAM) gcobol$(exeext) $(DESTDIR)$(bindir)/ + $(INSTALL_PROGRAM) cobol1$(exeext) $(DESTDIR)$(libexecsubdir)/ + $(INSTALL) -m 755 $(srcdir)/cobol/gcobc $(DESTDIR)$(bindir)/ + mkdir -p $(DESTDIR)$(datadir)/gcobol/udf + $(INSTALL_DATA) $(srcdir)/cobol/udf/* $(DESTDIR)$(datadir)/gcobol/udf/ + +cobol.install-man: installdirs + $(INSTALL_DATA) $(srcdir)/cobol/gcobol.1 $(DESTDIR)$(man1dir)/ + $(INSTALL_DATA) $(srcdir)/cobol/gcobol.3 $(DESTDIR)$(man3dir)/ + +cobol.install-info: + +cobol.install-pdf: installdirs gcobol.pdf gcobol-io.pdf + mkdir -p $(DESTDIR)$(datadir)/gcobol/pdf + $(INSTALL_DATA) gcobol.pdf gcobol-io.pdf $(DESTDIR)$(pdfdir)/ + +cobol.install-plugin: + +cobol.install-html: installdirs gcobol.html gcobol-io.html + $(INSTALL_DATA) gcobol.html gcobol-io.html $(DESTDIR)$(htmldir)/ + +cobol.info: +cobol.srcinfo: + +cobol.dvi: +cobol.srcdvi: + +cobol.pdf: gcobol.pdf gcobol-io.pdf +cobol.srcpdf: gcobol.pdf gcobol-io.pdf + ln $^ $(srcdir)/cobol/ + +gcobol.pdf: $(srcdir)/cobol/gcobol.1 + groff -mdoc -T pdf $^ > $@~ + @mv $@~ $@ +gcobol-io.pdf: $(srcdir)/cobol/gcobol.3 + groff -mdoc -T pdf $^ > $@~ + @mv $@~ $@ + +cobol.html: gcobol.html gcobol-io.html +cobol.srchtml: gcobol.html gcobol-io.html + ln $^ $(srcdir)/cobol/ + +gcobol.html: $(srcdir)/cobol/gcobol.1 + mandoc -T html $^ > $@~ + @mv $@~ $@ +gcobol-io.html: $(srcdir)/cobol/gcobol.3 + mandoc -T html $^ > $@~ + @mv $@~ $@ + +# "make uninstall" is not expected to work. It's not clear how to name +# the installed location of the cobol1 compiler. +cobol.uninstall: + rm -rf $(DESTDIR)$(bindir)/$(gcobol_INSTALL_NAME)$(exeext) \ + $(DESTDIR)$(bindir)/gcobc \ + $(DESTDIR)$(datadir)/gcobol/ \ + $(DESTDIR)$(man1dir)/gcobol.1 \ + $(DESTDIR)$(man3dir)/gcobol.3 + +cobol.man: +cobol.srcman: + +cobol.mostlyclean: + +cobol.clean: + rm -fr gcobol cobol1 cobol/* \ + ../*/libgcobol/* + +cobol.distclean: + +cobol.maintainer-clean: + +# The main makefile has already created stage?/cobol. +cobol.stage1: stage1-start + -mv cobol/*$(objext) stage1/cobol +cobol.stage2: stage2-start + -mv cobol/*$(objext) stage2/cobol +cobol.stage3: stage3-start + -mv cobol/*$(objext) stage3/cobol +cobol.stage4: stage4-start + -mv cobol/*$(objext) stage4/cobol +cobol.stageprofile: stageprofile-start + -mv cobol/*$(objext) stageprofile/cobol +cobol.stagefeedback: stagefeedback-start + -mv cobol/*$(objext) stagefeedback/cobol + +selftest-cobol: diff --git a/gcc/cobol/cbldiag.h b/gcc/cobol/cbldiag.h new file mode 100644 index 0000000..ed754f1 --- /dev/null +++ b/gcc/cobol/cbldiag.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef _CBLDIAG_H +#pragma message __FILE__ " included twice" +#else +#define _CBLDIAG_H + +const char * cobol_filename(); + +/* + * These are user-facing messages. They go through the gcc + * diagnostic framework and use text that can be localized. + */ +void yyerror( const char fmt[], ... ); +bool yywarn( const char fmt[], ... ); + +/* Location type. Borrowed from parse.h as generated by Bison. */ +#if ! defined YYLTYPE && ! defined YYLTYPE_IS_DECLARED +typedef struct YYLTYPE YYLTYPE; +struct YYLTYPE +{ + int first_line; + int first_column; + int last_line; + int last_column; +}; +# define YYLTYPE_IS_DECLARED 1 +# define YYLTYPE_IS_TRIVIAL 1 + +const YYLTYPE& cobol_location(); +#endif + +#if ! defined YDFLTYPE && ! defined YDFLTYPE_IS_DECLARED +typedef struct YDFLTYPE YDFLTYPE; +struct YDFLTYPE +{ + int first_line; + int first_column; + int last_line; + int last_column; +}; +# define YDFLTYPE_IS_DECLARED 1 +# define YDFLTYPE_IS_TRIVIAL 1 + +#endif + +// an error at a location, called from the parser for semantic errors +void error_msg( const YYLTYPE& loc, const char gmsgid[], ... ); + +void dialect_error( const YYLTYPE& loc, const char term[], const char dialect[] ); + + +// for CDF and other warnings that refer back to an earlier line +// (not in diagnostic framework yet) +void yyerrorvl( int line, const char *filename, const char fmt[], ... ); + +void cbl_unimplementedw(const char *gmsgid, ...); // warning +void cbl_unimplemented(const char *gmsgid, ...); // error +void cbl_unimplemented_at( const YYLTYPE& loc, const char *gmsgid, ... ); + +/* + * dbgmsg produce messages not intended for the user. They cannot + * be localized and fwrite directly to standard out. dbgmsg is activated by + * -fflex-debug or -fyacc-debug. + */ +void dbgmsg( const char fmt[], ... ); + +void gcc_location_set( const YYLTYPE& loc ); + +// tree.h defines yy_flex_debug as a macro because options.h +#if ! defined(yy_flex_debug) +template <typename LOC> +static void +location_dump( const char func[], int line, const char tag[], const LOC& loc) { + extern int yy_flex_debug; + if( yy_flex_debug && getenv("update_location") ) + fprintf(stderr, "%s:%d: %s location (%d,%d) to (%d,%d)\n", + func, line, tag, + loc.first_line, loc.first_column, loc.last_line, loc.last_column); +} +#endif // defined(yy_flex_debug) + +#endif diff --git a/gcc/cobol/cdf-copy.cc b/gcc/cobol/cdf-copy.cc new file mode 100644 index 0000000..dfa3f57 --- /dev/null +++ b/gcc/cobol/cdf-copy.cc @@ -0,0 +1,356 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// NOTE: Unlike charmaps-copy.cc and valprint-copy.cc, this file implements +// the Compiler Directives Facility for the COBOL "COPY" statement. So, this +// file is the actual source code, and not a copy of something in libgcobol +// +// We regret any confusion engendered. + +#include "cobol-system.h" +#include "cbldiag.h" +#include "util.h" +#include "copybook.h" + +#include <glob.h> +#include <libgen.h> + +#define COUNT_OF(X) (sizeof(X) / sizeof(X[0])) + +/* + * There are 3 kinds of replacement types: + * 1. keywords, identifiers, figurative constants, and function names + * 2. string literals + * 3. pseudo-text + * + * Types #1 and #3 are delimited by separators: + * [[:space:],.;()]. String literals begin and end with ["] or ['] + * (matched). + * + * Space in pseudo-text is "elastic"; one or more in the matching + * argument matches one or more in the input. Exception: when the + * argument is only a comma or semicolon, it matches exactly. + * + * The matching algorithm operates on the source file word by word. + * Comments are copied literally, as are any CDF statements. + * + * The candidate word is used as the beginning of all possible + * matches, in the order they appear in the COPY statement. If none + * match, the word is copied to the output and the next word is + * tried. + * + * On a match, the replacement is applied, the result copied to the + * output, and the next word is tried, starting again from the first + * match candidate. + * + * The parser composes the regular expressions. It "literalizes" + * any regex metacharacters that may appear in the COPY text and + * constructs the correct matching expression for "stretchable" + * space. This function only applies them. + */ + +extern int yydebug; +const char * cobol_filename(); +bool is_fixed_format(); +bool is_reference_format(); + +struct line_t { + char *p, *pend; + line_t( size_t len, char *data ) : p(data), pend(data + len) { + gcc_assert(p && p <= pend); + } + line_t( char *data, char *eodata ) : p(data), pend(eodata) { + gcc_assert(p && p <= pend); + } + ssize_t size() const { return pend - p; } +}; + +static bool +is_separator_space( const char *p) { + switch( *p ) { + case ',': + case ';': + if( p[1] == 0x20 ) return true; + break; + } + return ISSPACE(*p); +} + +static void +verify_bounds( size_t pos, size_t size, const char input[] ) { + gcc_assert(pos < size ); + if( !( pos < size) ) { + cbl_internal_error( "REPLACING %zu characters exceeds system capacity" + "'%s'", pos, input); + } +} + +/* + * Replace any separators in the copybook's REPLACING candidate with + * "stretchable" space. Escape any regex metacharacters in candidate. + * + * "For matching purposes, each occurrence of a separator comma, a + * separator semicolon, or a sequence of one or more separator spaces + * is considered to be a single space." + * + * If the indicator column is column 7 and is a 'D', we treat that as + * a SPACE for the purposes of matching a COPY REPLACING or REPLACE + * directive. + */ +const char * +esc( size_t len, const char input[] ) { + static char spaces[] = "([,;]?[[:space:]])+"; + static char spaceD[] = "(\n {6}D" "|" "[,;]?[[:space:]])+"; + static char buffer[64 * 1024]; + char *p = buffer; + const char *eoinput = input + len; + + const char *spacex = is_reference_format()? spaceD : spaces; + + for( const char *s=input; *s && s < eoinput; s++ ) { + *p = '\0'; + verify_bounds( 4 + size_t(p - buffer), sizeof(buffer), buffer ); + switch(*s) { + case '^': case '$': + case '(': case ')': + case '*': case '+': case '?': + case '[': case ']': + case '{': case '}': + case '|': + case '.': + *p++ = '\\'; + *p++ = *s; + break; + case '\\': + *p++ = '['; + *p++ = *s; + *p++ = ']'; + break; + + case ';': case ',': + if( ! (s+1 < eoinput && s[1] == 0x20) ) { + *p++ = *s; + break; + } + __attribute__((fallthrough)); + case 0x20: case '\n': + verify_bounds( (p + sizeof(spacex)) - buffer, sizeof(buffer), buffer ); + p = stpcpy( p, spacex ); + while( s+1 < eoinput && is_separator_space(s+1) ) { + s++; + } + break; + default: + *p++ = *s; + break; + } + } + *p = '\0'; + +#if 0 + dbgmsg("%s:%d: regex '%s'", __func__, __LINE__, buffer); +#endif + return buffer; // caller must strdup static buffer +} + +static int +glob_error(const char *epath, int eerrno) { + dbgmsg("%s: COPY file search: '%s': %s", __func__, epath, xstrerror(eerrno)); + return 0; +} + +void +copybook_directory_add( const char gcob_copybook[] ) { + if( !gcob_copybook ) return; + char *directories = xstrdup(gcob_copybook), *p = directories; + char *eodirs = strchr(directories, '\0'); + gcc_assert(eodirs); + + do { + char *pend = std::find(p, eodirs, ':'); + if( pend != eodirs ) { + *pend = '\0'; + } + copybook.directory_add(p); + p = pend; + } while( ++p < eodirs ); + +} + +class case_consistent { + int lower_upper; // -1 lower, 1 upper +public: + case_consistent() : lower_upper(0) {} + bool operator()( char ch ) { + if( !ISALPHA(ch) ) return true; + int lu = ISLOWER(ch)? -1 : 1; + if( !lower_upper ) { + lower_upper = lu; + return true; + } + return lu == lower_upper; + } +}; + +void +copybook_extension_add( const char ext[] ) { + char *alt = NULL; + bool one_case = std::all_of( ext, ext + strlen(ext), case_consistent() ); + if( one_case ) { + alt = xstrdup(ext); + gcc_assert(alt); + auto convert = ISLOWER(ext[0])? toupper : tolower; + std::transform( alt, alt+strlen(alt), alt, convert ); + } + copybook.extensions_add( ext, alt ); +} + +extern int yydebug; + +const char * copybook_elem_t::extensions; + +void +copybook_t::extensions_add( const char ext[], const char alt[] ) { + char *output; + if( alt ) { + output = xasprintf("%s,%s", ext, alt); + } else { + output = xstrdup(ext); + } + gcc_assert(output); + if( book.extensions ) { + char *s = xasprintf("%s,%s", output, book.extensions); + free(const_cast<char*>(book.extensions)); + free(output); + book.extensions = s; + } else { + book.extensions = output; + } +} + +static inline ino_t +inode_of( int fd ) { + struct stat sb; + if( -1 == fstat(fd, &sb) ) { + cbl_err("could not stat fd %d", fd); + } + return sb.st_ino; +} + +int +copybook_elem_t::open_file( const char directory[], bool literally ) { + int erc; + char *pattern, *copier = xstrdup(cobol_filename()); + if( ! directory ) { + directory = dirname(copier); + if( 0 == strcmp(".", directory) ) directory = NULL; + } + + char *path = NULL; + + if( directory || library.name ) { + if( directory && library.name ) { + path = xasprintf( "%s/%s/%s", directory, library.name, source.name ); + } else { + const char *dir = directory? directory : library.name; + path = xasprintf( "%s/%s", dir, source.name ); + } + } else { + path = xasprintf( "%s", source.name ); + } + + gcc_assert(path); + + if( literally ) { + dbgmsg("copybook_elem_t::open_file: trying %s", path); + + if( (this->fd = open(path, O_RDONLY)) == -1 ) { + dbgmsg("could not open %s: %m", path); + return fd; + } + this->source.name = path; + if( ! cobol_filename(this->source.name, inode_of(fd)) ) { + error_msg(source.loc, "recursive copybook: '%s' includes itself", path); + (void)! close(fd); + fd = -1; + } + return fd; + } + gcc_assert( ! literally ); + + if( extensions ) { + pattern = xasprintf("%s{,.cpy,.CPY,.cbl,.CBL,.cob,.COB,%s}", + path, this->extensions); + } else { + pattern = xasprintf("%s{,.cpy,.CPY,.cbl,.CBL,.cob,.COB}", path); + } + + free(copier); + + static int flags = GLOB_MARK | GLOB_BRACE | GLOB_TILDE; + glob_t globber; + + if( (erc = glob(pattern, flags, glob_error, &globber)) != 0 ) { + switch(erc) { + case GLOB_NOSPACE: + yywarn("COPY file search: out of memory"); + break; + case GLOB_ABORTED: + yywarn("COPY file search: read error"); + break; + case GLOB_NOMATCH: + dbgmsg("COPY '%s': no files match %s", this->source.name, pattern); + default: + break; // caller says no file found + } + return -1; + } + + free(pattern); + + for( size_t i=0; i < globber.gl_pathc; i++ ) { + auto filename = globber.gl_pathv[i]; + if( (this->fd = open(filename, O_RDONLY)) != -1 ) { + dbgmsg("found copybook file %s", filename); + this->source.name = xstrdup(filename); + if( ! cobol_filename(this->source.name, inode_of(fd)) ) { + error_msg(source.loc, "recursive copybook: '%s' includes itself", this->source); + (void)! close(fd); + fd = -1; + } + globfree(&globber); + return fd; + } + } + yywarn("could not open copy source for '%s'", source); + + globfree(&globber); + return -1; +} diff --git a/gcc/cobol/cdf.y b/gcc/cobol/cdf.y new file mode 100644 index 0000000..08b5341 --- /dev/null +++ b/gcc/cobol/cdf.y @@ -0,0 +1,956 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +%{ + +#include "cobol-system.h" +#include "ec.h" +#include "common-defs.h" +#include "util.h" +#include "cbldiag.h" +#include "symbols.h" +#include "copybook.h" +#include "exceptl.h" +#include "exceptg.h" + +#define COUNT_OF(X) (sizeof(X) / sizeof(X[0])) + +copybook_t copybook; + +static inline bool +is_word( int c ) { + return c == '_' || ISALNUM(c); +} + +static std::pair<long long, bool> +integer_literal( const char input[] ) { + long long v; + int n; + bool fOK = 1 == sscanf(input, "%lld%n", &v, &n) && + n == (int)strlen(input); + return std::make_pair(v, fOK); +} + +/* "The renamed symbols include 'yyparse', 'yylex', 'yyerror', + 'yynerrs', 'yylval', 'yylloc', 'yychar' and 'yydebug'. [...] The + renamed macros include 'YYSTYPE', 'YYLTYPE', and 'YYDEBUG'" */ + +extern int yylineno, yyleng; +extern char *yytext; + +static int ydflex(void); + +#define PROGRAM current_program_index() + +const YYLTYPE& cobol_location(); +static YYLTYPE location_set( const YYLTYPE& loc ); +void input_file_status_notify(); + +#define YYLLOC_DEFAULT(Current, Rhs, N) \ + do { \ + if (N) \ + { \ + (Current).first_line = YYRHSLOC (Rhs, 1).first_line; \ + (Current).first_column = YYRHSLOC (Rhs, 1).first_column; \ + (Current).last_line = YYRHSLOC (Rhs, N).last_line; \ + (Current).last_column = YYRHSLOC (Rhs, N).last_column; \ + location_dump("cdf.c", N, \ + "rhs N ", YYRHSLOC (Rhs, N)); \ + } \ + else \ + { \ + (Current).first_line = \ + (Current).last_line = YYRHSLOC (Rhs, 0).last_line; \ + (Current).first_column = \ + (Current).last_column = YYRHSLOC (Rhs, 0).last_column; \ + } \ + location_dump("cdf.c", __LINE__, "current", (Current)); \ + input_file_status_notify(); \ + gcc_location_set( location_set(Current) ); \ + } while (0) + +%} + +%code requires { + #include "cdfval.h" + + using std::map; + + static map<std::string, cdfval_t> dictionary; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-function" + static bool + cdfval_add( const char name[], + const cdfval_t& value, bool override = false ) + { + if( scanner_parsing() ) { + if( ! override ) { + if( dictionary.find(name) != dictionary.end() ) return false; + } + dictionary[name] = value; + } + return true; + } + static void + cdfval_off( const char name[] ) { + if( scanner_parsing() ) { + auto p = dictionary.find(name); + if( p == dictionary.end() ) { + dictionary[name] = cdfval_t(); + } + dictionary[name].off = true; + } + } +#pragma GCC diagnostic pop + + bool operator==( const cdfval_base_t& lhs, int rhs ); + bool operator||( const cdfval_base_t& lhs, const cdfval_base_t& rhs ); + bool operator&&( const cdfval_base_t& lhs, const cdfval_base_t& rhs ); + + cdfval_t operator<( const cdfval_base_t& lhs, const cdfval_base_t& rhs ); + cdfval_t operator<=( const cdfval_base_t& lhs, const cdfval_base_t& rhs ); + cdfval_t operator==( const cdfval_base_t& lhs, const cdfval_base_t& rhs ); + cdfval_t operator!=( const cdfval_base_t& lhs, const cdfval_base_t& rhs ); + cdfval_t operator>=( const cdfval_base_t& lhs, const cdfval_base_t& rhs ); + cdfval_t operator>( const cdfval_base_t& lhs, const cdfval_base_t& rhs ); + cdfval_t operator+( const cdfval_base_t& lhs, const cdfval_base_t& rhs ); + cdfval_t operator-( const cdfval_base_t& lhs, const cdfval_base_t& rhs ); + cdfval_t operator*( const cdfval_base_t& lhs, const cdfval_base_t& rhs ); + cdfval_t operator/( const cdfval_base_t& lhs, const cdfval_base_t& rhs ); + cdfval_t negate( cdfval_base_t lhs ); + +} + +%{ +static char *display_msg; +const char * keyword_str( int token ); + +static class exception_turns_t { + typedef std::list<size_t> filelist_t; + typedef std::map<ec_type_t, filelist_t> ec_filemap_t; + ec_filemap_t exceptions; + public: + bool enabled, location; + + exception_turns_t() : enabled(false), location(false) {}; + + const ec_filemap_t& exception_files() const { return exceptions; } + + struct args_t { + size_t nexception; + cbl_exception_files_t *exceptions; + }; + + bool add_exception( ec_type_t type, const filelist_t files = filelist_t() ) { + ec_disposition_t disposition = ec_type_disposition(type); + if( disposition != ec_implemented(disposition) ) { + cbl_unimplementedw("CDF: exception '%s'", ec_type_str(type)); + } + auto elem = exceptions.find(type); + if( elem != exceptions.end() ) return false; // cannot add twice + + exceptions[type] = files; + return true; + } + + args_t args() const { + args_t args; + args.nexception = exceptions.size(); + args.exceptions = NULL; + if( args.nexception ) { + args.exceptions = new cbl_exception_files_t[args.nexception]; + } + std::transform( exceptions.begin(), exceptions.end(), args.exceptions, + []( auto& input ) { + cbl_exception_files_t output; + output.type = input.first; + output.nfile = input.second.size(); + output.files = NULL; + if( output.nfile ) { + output.files = new size_t[output.nfile]; + std::copy(input.second.begin(), + input.second.end(), + output.files ); + } + return output; + } ); + return args; + } + + void clear() { + for( auto& ex : exceptions ) { + ex.second.clear(); + } + exceptions.clear(); + enabled = location = false; + } + +} exception_turns; + + +static bool +apply_cdf_turn( exception_turns_t& turns ) { + for( auto elem : turns.exception_files() ) { + std::set<size_t> files(elem.second.begin(), elem.second.end()); + enabled_exceptions.turn_on_off(turns.enabled, + turns.location, + elem.first, files); + } + if( getenv("SHOW_PARSE") ) enabled_exceptions.dump(); + return true; +} +%} + +%union { + bool boolean; + int number; + const char *string; + cdf_arg_t cdfarg; + cdfval_base_t cdfval; + cbl_file_t *file; + std::set<size_t> *files; +} + +%printer { fprintf(yyo, "'%s'", $$ ); } <string> +%printer { fprintf(yyo, "%s '%s'", + keyword_str($$.token), + $$.string? $$.string : "<nil>" ); } <cdfarg> +%printer { fprintf(yyo, "%ld '%s'", + $$.number, $$.string? $$.string : "" ); } <cdfval> + +%type <string> NAME NUMSTR LITERAL PSEUDOTEXT +%type <string> LSUB RSUB SUBSCRIPT +%type <cdfarg> namelit name_any name_one +%type <string> name subscript subscripts inof +%token <boolean> BOOL +%token <number> FEATURE 363 NUMBER 302 EXCEPTION_NAME 280 "EXCEPTION NAME" + +%type <cdfval> cdf_expr +%type <cdfval> cdf_relexpr cdf_reloper cdf_and cdf_bool_expr +%type <cdfval> cdf_factor +%type <boolean> cdf_cond_expr override + +%type <file> filename +%type <files> filenames + +%token BY 476 +%token COPY 360 +%token CDF_DISPLAY 382 ">>DISPLAY" +%token IN 595 +%token NAME 286 +%token NUMSTR 304 "numeric literal" +%token OF 676 +%token PSEUDOTEXT 711 +%token REPLACING 733 +%token LITERAL 297 +%token SUPPRESS 374 + +%token LSUB 365 "(" +%token SUBSCRIPT 373 RSUB 370 ")" + +%token CDF_DEFINE 381 ">>DEFINE" +%token CDF_IF 383 ">>IF" +%token CDF_ELSE 384 ">>ELSE" +%token CDF_END_IF 385 ">>END-IF" +%token CDF_EVALUATE 386 ">>EVALUATE" +%token CDF_WHEN 387 ">>WHEN" +%token CDF_END_EVALUATE 388 ">>END-EVALUATE" + +%token AS 458 CONSTANT 359 DEFINED 361 +%type <boolean> DEFINED +%token OTHER 688 PARAMETER_kw 366 "PARAMETER" +%token OFF 677 OVERRIDE 367 +%token THRU 929 +%token TRUE_kw 803 "True" + +%token CALL_COBOL 389 "CALL" +%token CALL_VERBATIM 390 "CALL (as C)" + +%token TURN 805 CHECKING 486 LOCATION 639 ON 679 WITH 831 + +%left OR 930 +%left AND 931 +%right NOT 932 +%left '<' '>' '=' NE 933 LE 934 GE 935 +%left '-' '+' +%left '*' '/' +%right NEG 937 + +%define api.prefix {ydf} +%define api.token.prefix{YDF_} + +%locations +%define parse.error verbose +%% +top: partials { YYACCEPT; } + | copy '.' + { + const char *library = copybook.library(); + if( !library ) library = "SYSLIB"; + const char *source = copybook.source(); + dbgmsg("COPY %s from %s", source, library); + YYACCEPT; + } + | copy error { + error_msg(@error, "COPY directive must end in a '.'"); + YYACCEPT; + } + | completes { YYACCEPT; } + ; + +completes: complete + | completes complete + | completes partial + ; +complete: cdf_define + | cdf_display + | cdf_turn + | cdf_call_convention + ; + + /* + * To do: read ISO 2022 to see how >>DISPLAY is dictionary! + * To do: DISPLAY UPON + * To do: decide what to do about newlines, and when; DISPLAY has + * {}... in the specification. + */ +cdf_display: CDF_DISPLAY strings { + if( scanner_parsing() ) { + fprintf(stderr, "%s\n", display_msg); + free(display_msg); + display_msg = NULL; + } + } + ; +strings: LITERAL { + display_msg = xstrdup($1); + } + | strings LITERAL { + char *p = display_msg; + display_msg = xasprintf("%s %s", p, $2); + free(p); + } + ; + +partials: partial + { + if( ! scanner_parsing() ) YYACCEPT; + } + | partials partial + { + if( ! scanner_parsing() ) YYACCEPT; + } + ; +partial: cdf_if /* text */ + | CDF_ELSE { scanner_parsing_toggle(); } + | CDF_END_IF { scanner_parsing_pop(); } + | cdf_evaluate /* text */ + | cdf_eval_when /* text */ + | CDF_END_EVALUATE { scanner_parsing_pop(); } + ; + +cdf_define: CDF_DEFINE cdf_constant NAME as cdf_expr[value] override + { + if( keyword_tok($NAME) ) { + error_msg(@NAME, "%s is a COBOL keyword", $NAME); + YYERROR; + } + if( !cdfval_add( $NAME, cdfval_t($value), $override) ) { + error_msg(@NAME, "name already in dictionary: %s", $NAME); + const cdfval_t& entry = dictionary[$NAME]; + if( entry.filename ) { + error_msg(@NAME, "%s previously defined in %s:%d", + $NAME, entry.filename, entry.lineno); + } else { + error_msg(@NAME, "%s was defined on the command line", $NAME); + } + YYERROR; + } + } + | CDF_DEFINE cdf_constant NAME '=' cdf_expr[value] override + { /* accept, but as error */ + if( scanner_parsing() ) { + error_msg(@NAME, "CDF error: %s = value invalid", $NAME); + } + } + | CDF_DEFINE cdf_constant NAME as OFF + { + cdfval_off( $NAME); + } + | CDF_DEFINE cdf_constant NAME as PARAMETER_kw override + /* + * "If the PARAMETER phrase is specified, the value referenced + * by compilation-variable-name-1 is obtained from the + * operating environment by an implementor-defined method...." + * It's a noop for us, because parameters defined with -D are + * available regardless. + */ + { + if( 0 == dictionary.count($NAME) ) { + yywarn("CDF: '%s' is defined AS PARAMETER " + "but was not defined", $NAME); + } + } + | CDF_DEFINE FEATURE as ON { + auto feature = cbl_gcobol_feature_t($2); + if( ! cobol_gcobol_feature_set(feature, true) ) { + error_msg(@FEATURE, ">>DEFINE %EBCDIC-MODE is invalid within program body"); + } + } + | CDF_DEFINE FEATURE as OFF { + auto feature = cbl_gcobol_feature_t($2); + if( ! cobol_gcobol_feature_set(feature, false) ) { + error_msg(@FEATURE, ">>DEFINE %EBCDIC-MODE is invalid within program body"); + } + } + ; +cdf_constant: %empty + | CONSTANT + ; +override: %empty { $$ = false; } + | OVERRIDE { $$ = true; } + ; + +cdf_turn: TURN except_names except_check + { + apply_cdf_turn(exception_turns); + exception_turns.clear(); + } + ; + +cdf_call_convention: + CALL_COBOL { + current_call_convention(cbl_call_cobol_e); + } + | CALL_VERBATIM { + current_call_convention(cbl_call_verbatim_e); + } + ; + + +except_names: except_name + | except_names except_name + ; +except_name: EXCEPTION_NAME[ec] { + assert($ec != ec_none_e); + exception_turns.add_exception(ec_type_t($ec)); + } + | EXCEPTION_NAME[ec] filenames { + assert($ec != ec_none_e); + std::list<size_t> files; + std::copy( $filenames->begin(), $filenames->end(), + std::back_inserter(files) ); + exception_turns.add_exception(ec_type_t($ec), files); + } + ; + +except_check: CHECKING on { exception_turns.enabled = true; } + | CHECKING OFF { exception_turns.enabled = false; } + | CHECKING on with LOCATION + { + exception_turns.enabled = exception_turns.location = true; + } + ; + +filenames: filename { + $$ = new std::set<size_t>; + $$->insert(symbol_index(symbol_elem_of($1))); + } + | filenames filename { + $$ = $1; + auto inserted = $$->insert(symbol_index(symbol_elem_of($2))); + if( ! inserted.second ) { + error_msg(@2, "%s: No file-name shall be specified more than " + " once for one exception condition", $filename->name); + } + } + ; +filename: NAME + { + struct symbol_elem_t *e = symbol_file(PROGRAM, $1); + if( !(e && e->type == SymFile) ) { + error_msg(@NAME, "invalid file name '%s'", $NAME); + YYERROR; + } + $$ = cbl_file_of(e); + } + ; + +cdf_if: CDF_IF cdf_cond_expr { + scanner_parsing(YDF_CDF_IF, $2); + } + | CDF_IF error { + ////if( scanner_parsing() ) yyerrok; + } CDF_END_IF { // not pushed, don't pop + if( ! scanner_parsing() ) YYACCEPT; + } + ; + +cdf_evaluate: CDF_EVALUATE cdf_expr + | CDF_EVALUATE TRUE_kw + ; + +cdf_eval_when: CDF_WHEN cdf_eval_obj + ; + +cdf_eval_obj: cdf_cond_expr + | cdf_expr THRU cdf_expr + | OTHER + ; + +cdf_cond_expr: BOOL + | NAME DEFINED[maybe] + { + auto p = dictionary.find($1); + bool found = p != dictionary.end(); + if( !$maybe ) found = ! found; + if( ! found ) { + $$ = !$2; + dbgmsg("CDF: %s not found in dictionary (result %s)", + $1, $$? "true" : "false"); + } else { + $$ = $2; + dbgmsg("CDF: %s found in dictionary (result %s)", + $1, $$? "true" : "false"); + } + } + | cdf_bool_expr { $$ = $1(@1) == 0? false : true; } + | FEATURE DEFINED { + const auto& feature($1); + $$ = (feature == int(feature & cbl_gcobol_features)); + dbgmsg("CDF: feature 0x%02x is %s", $1, $$? "ON" : "OFF"); + } + ; + + /* + * "Abbreviated combined relation conditions + * shall not be specified." + */ +cdf_bool_expr: cdf_bool_expr OR cdf_and { $$ = cdfval_t($1(@1) || $3(@3)); } + | cdf_and + ; + +cdf_and: cdf_and AND cdf_reloper { $$ = cdfval_t($1(@1) && $3(@3)); } + | cdf_reloper + ; + +cdf_reloper: cdf_relexpr + | NOT cdf_relexpr { $$ = cdfval_t($2.number? 1 : 0); } + ; + +cdf_relexpr: cdf_relexpr '<' cdf_expr { $$ = $1(@1) < $3(@3); } + | cdf_relexpr LE cdf_expr { $$ = $1(@1) <= $3(@3); } + | cdf_relexpr '=' cdf_expr { + $$ = cdfval_t(false); + if( ( $1.string && $3.string) || + (!$1.string && !$3.string) ) + { + $$ = $1 == $3; + } else { + const char *msg = $1.string? + "incommensurate comparison is FALSE: '%s' = %ld" : + "incommensurate comparison is FALSE: %ld = '%s'" ; + error_msg(@1, msg); + } + } + | cdf_relexpr NE cdf_expr + { + $$ = cdfval_t(false); + if( ( $1.string && $3.string) || + (!$1.string && !$3.string) ) + { + $$ = $1 != $3; + } else { + const char *msg = $1.string? + "incommensurate comparison is FALSE: '%s' = %ld" : + "incommensurate comparison is FALSE: %ld = '%s'" ; + error_msg(@1, msg); + } + } + | cdf_relexpr GE cdf_expr { $$ = $1(@1) >= $3(@3); } + | cdf_relexpr '>' cdf_expr { $$ = $1(@1) > $3(@3); } + | cdf_expr + ; + +cdf_expr: cdf_expr '+' cdf_expr { $$ = $1(@1) + $3(@3); } + | cdf_expr '-' cdf_expr { $$ = $1(@1) - $3(@3); } + | cdf_expr '*' cdf_expr { $$ = $1(@1) * $3(@3); } + | cdf_expr '/' cdf_expr { $$ = $1(@1) / $3(@3); } + | '+' cdf_expr %prec NEG { $$ = $2(@2); } + | '-' cdf_expr %prec NEG { $$ = negate($2(@2)); } + | '(' cdf_bool_expr ')' { $$ = $2(@2); } + | cdf_factor + ; + +cdf_factor: NAME { + auto that = dictionary.find($1); + if( that != dictionary.end() ) { + $$ = that->second; + } else { + if( ! scanner_parsing() ) { + yywarn("CDF skipping: no such variable '%s' (ignored)", $1); + } else { + error_msg(@NAME, "CDF error: no such variable '%s'", $1); + } + $$ = cdfval_t(); + } + } + | NUMBER { $$ = cdfval_t($1); } + | LITERAL { $$ = cdfval_t($1); } + | NUMSTR { + auto value = integer_literal($NUMSTR); + if( !value.second ) { + error_msg(@1, "CDF error: parsed %s as %ld", + $NUMSTR, value.first); + YYERROR; + } + $$ = cdfval_t(value.first); + } + ; + +copy: copy_impl + ; +copy_impl: copybook_name suppress REPLACING replace_bys + | copybook_name suppress + ; +copybook_name: COPY name_one[src] + { + if( -1 == copybook.open(@src, $src.string) ) { + error_msg(@src, "could not open copybook file " + "for '%s'", $src.string); + YYERROR; + } + } + | COPY name_one[src] IN name_one[lib] + { + copybook.library(@lib, $lib.string); + if( -1 == copybook.open(@src, $src.string) ) { + error_msg(@src, "could not open copybook file " + "for '%s' in '%'s'", $src.string, $lib.string); + YYERROR; + } + } + ; + +replace_bys: replace_by + | replace_bys replace_by + ; + +replace_by: name_any[a] BY name_any[b] + { + bool add_whitespace = false; + replace_type_t type = {}; + switch($a.token) { + case YDF_NUMSTR: + case YDF_LITERAL: + type = string_e; + break; + case YDF_NAME: + type = token_e; + break; + case YDF_PSEUDOTEXT: + type = pseudo_e; + add_whitespace = $b.token != YDF_PSEUDOTEXT; + break; + default: + cbl_err("%s:%d: logic error on token %s", + __FILE__, __LINE__, keyword_str($a.token)); + break; + } + char *replacement = const_cast<char*>($b.string); + if( add_whitespace ) { + char *s = xasprintf(" %s ", replacement); + free(replacement); + replacement = s; + } + copybook.replacement( type, $a.string, replacement ); + } + ; + +suppress: %empty + | SUPPRESS + { + copybook.suppress(); + } + ; + +name_any: namelit + | PSEUDOTEXT { $$ = (cdf_arg_t){YDF_PSEUDOTEXT, $1}; } + ; + +name_one: NAME + { + cdf_arg_t arg = { YDF_NAME, $1 }; + auto p = dictionary.find($1); + + if( p != dictionary.end() ) { + arg.string = p->second.string; + } + $$ = arg; + } + | NUMSTR { $$ = (cdf_arg_t){YDF_NUMSTR, $1}; } + | LITERAL { $$ = (cdf_arg_t){YDF_LITERAL, $1}; } + ; + +namelit: name + { + cdf_arg_t arg = { YDF_NAME, $1 }; + auto p = dictionary.find($1); + + if( p != dictionary.end() ) { + arg.string = p->second.string; + } + $$ = arg; + } + | name subscripts + { + char *s = xasprintf( "%s%s", $1, $2 ); + free(const_cast<char*>($1)); + free(const_cast<char*>($2)); + + cdf_arg_t arg = { YDF_NAME, s }; + $$ = arg; + } + | NUMSTR { $$ = (cdf_arg_t){YDF_NUMSTR, $1}; } + | LITERAL { $$ = (cdf_arg_t){YDF_LITERAL, $1}; } + ; + +name: NAME + | name inof NAME + { + char *s = xasprintf( "%s %s %s", $1, $2, $3 ); + assert($$ == $1); + free(const_cast<char*>($1)); + free(const_cast<char*>($3)); + $$ = s; + } + ; +inof: IN { static const char in[] = "IN"; $$ = in; } + | OF { static const char of[] = "OF"; $$ = of; } + ; + +subscripts: subscript + | subscripts subscript + { + char *s = xasprintf("%s%s", $1, $2 ); + if( $$ != $1 ) free(const_cast<char*>($$)); + free(const_cast<char*>($1)); + free(const_cast<char*>($2)); + $$ = s; + } + ; +subscript: SUBSCRIPT + | LSUB subscript RSUB + { + char *s = xasprintf( "%s%s%s", $1, $2, $3 ); + free(const_cast<char*>($1)); + free(const_cast<char*>($2)); + free(const_cast<char*>($3)); + $$ = s; + } + ; + +as: %empty + | AS + ; + +on: %empty + | ON + ; + +with: %empty + | WITH + ; + +%% + +static YYLTYPE cdf_location; + +static YYLTYPE +location_set( const YYLTYPE& loc ) { + return cdf_location = loc; +} + +bool // used by cobol1.cc +defined_cmd( const char arg[] ) +{ + cdfval_t value(1); + + char *name = xstrdup(arg); + char *p = strchr(name, '='); + if(p) { + *p++ = '\0'; + int pos, number; + if( 1 == sscanf(p, "%d%n", &number, &pos) && size_t(pos) == strlen(p) ) { + value = cdfval_t(number); + } else { + value = cdfval_t(p); // it's a string + } + } + + dictionary[name] = value; + + auto cdf_name = dictionary.find(name); + assert(cdf_name != dictionary.end()); + assert(cdf_name->second.is_numeric() || cdf_name->second.string != NULL); + + if( yydebug ) { + if( cdf_name->second.is_numeric() ) { + dbgmsg("%s: added -D %s = %ld", __func__, name, cdf_name->second.as_number()); + } else { + dbgmsg("%s: added -D %s = \"%s\"", __func__, name, cdf_name->second.string); + } + } + return true; +} + +bool operator==( const cdfval_base_t& lhs, int rhs ) { + gcc_assert( !lhs.string ); + return lhs.number == rhs; +} + +bool operator||( const cdfval_base_t& lhs, const cdfval_base_t& rhs ) { + gcc_assert( !lhs.string && !rhs.string ); + return lhs.number || rhs.number; +} + +bool operator&&( const cdfval_base_t& lhs, const cdfval_base_t& rhs ) { + gcc_assert( !lhs.string && !rhs.string ); + return lhs.number && rhs.number; +} + +cdfval_t operator<( const cdfval_base_t& lhs, const cdfval_base_t& rhs ) { + gcc_assert( !lhs.string && !rhs.string ); + return cdfval_t(lhs.number < rhs.number); +} + +cdfval_t operator<=( const cdfval_base_t& lhs, const cdfval_base_t& rhs ) { + gcc_assert( !lhs.string && !rhs.string ); + return cdfval_t(lhs.number <= rhs.number); +} + +cdfval_t operator==( const cdfval_base_t& lhs, const cdfval_base_t& rhs ) { + if( lhs.string && rhs.string ) { + return cdfval_t(0 == strcasecmp(lhs.string, rhs.string)); + } + if( !lhs.string && !rhs.string ) { + return cdfval_t(lhs.number == rhs.number); + } + cbl_internal_error("incommensurate operands"); + return false; +} + +cdfval_t operator!=( const cdfval_base_t& lhs, const cdfval_base_t& rhs ) { + if( lhs.string && rhs.string ) { + return cdfval_t(0 != strcasecmp(lhs.string, rhs.string)); + } + if( !lhs.string && !rhs.string ) { + return cdfval_t(lhs.number != rhs.number); + } + cbl_internal_error("incommensurate operands"); + return false; +} + +cdfval_t operator>=( const cdfval_base_t& lhs, const cdfval_base_t& rhs ) { + gcc_assert( !lhs.string && !rhs.string ); + return cdfval_t(lhs.number >= rhs.number); +} + +cdfval_t operator>( const cdfval_base_t& lhs, const cdfval_base_t& rhs ) { + gcc_assert( !lhs.string && !rhs.string ); + return cdfval_t(lhs.number > rhs.number); +} + +cdfval_t operator+( const cdfval_base_t& lhs, const cdfval_base_t& rhs ) { + gcc_assert( !lhs.string && !rhs.string ); + return cdfval_t(lhs.number + rhs.number); +} + +cdfval_t operator-( const cdfval_base_t& lhs, const cdfval_base_t& rhs ) { + gcc_assert( !lhs.string && !rhs.string ); + return cdfval_t(lhs.number - rhs.number); +} + +cdfval_t operator*( const cdfval_base_t& lhs, const cdfval_base_t& rhs ) { + gcc_assert( !lhs.string && !rhs.string ); + return cdfval_t(lhs.number * rhs.number); +} + +cdfval_t operator/( const cdfval_base_t& lhs, const cdfval_base_t& rhs ) { + gcc_assert( !lhs.string && !rhs.string ); + return cdfval_t(lhs.number / rhs.number); +} + +cdfval_t negate( cdfval_base_t lhs ) { + gcc_assert( !lhs.string ); + lhs.number = -lhs.number; + return lhs; +} + +#undef yylex +int yylex(void); + +static int ydflex(void) { + return yylex(); +} + +bool +cdf_value( const char name[], cdfval_t value ) { + auto p = dictionary.find(name); + + if( p != dictionary.end() ) return false; + + dictionary[name] = value; + return true; +} + +const cdfval_t * +cdf_value( const char name[] ) { + auto p = dictionary.find(name); + + if( p == dictionary.end() ) return NULL; + + return &p->second; +} + +static bool +verify_integer( const YDFLTYPE& loc, const cdfval_base_t& val ) { + if( val.string ) { + error_msg(loc, "'%s' is not an integer", val.string); + return false; + } + return true; +} + +cdfval_base_t& +cdfval_base_t::operator()( const YDFLTYPE& loc ) { + static cdfval_t zero(0); + return verify_integer(loc, *this) ? *this : zero; +} diff --git a/gcc/cobol/cdfval.h b/gcc/cobol/cdfval.h new file mode 100644 index 0000000..1453f2a --- /dev/null +++ b/gcc/cobol/cdfval.h @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _CDF_VAL_H_ +#define _CDF_VAL_H_ + +#include <assert.h> +#include <stdint.h> +#include <stdlib.h> + +bool scanner_parsing(); + +struct YDFLTYPE; +struct cdfval_base_t { + bool off; + const char *string; + int64_t number; + cdfval_base_t& operator()( const YDFLTYPE& loc ); +}; + +struct cdf_arg_t { + int token; + const char *string; +}; + +extern int yylineno; +const char * cobol_filename(); + +struct cdfval_t : public cdfval_base_t { + int lineno; + const char *filename; + + cdfval_t() + : lineno(yylineno), filename(cobol_filename()) + { + cdfval_base_t::off = false; + cdfval_base_t::string = NULL; + cdfval_base_t::number = 0; + } + cdfval_t( const char value[] ) + : lineno(yylineno), filename(cobol_filename()) + { + cdfval_base_t::off = false; + cdfval_base_t::string = value; + cdfval_base_t::number = 0; + } + cdfval_t( long long value ) + : lineno(yylineno), filename(cobol_filename()) + { + cdfval_base_t::off = false; + cdfval_base_t::string = NULL; + cdfval_base_t::number = value; + } + cdfval_t( int64_t value ) + : lineno(yylineno), filename(cobol_filename()) + { + cdfval_base_t::off = false; + cdfval_base_t::string = NULL; + cdfval_base_t::number = value; + } + cdfval_t( int value ) + : lineno(yylineno), filename(cobol_filename()) + { + cdfval_base_t::off = false; + cdfval_base_t::string = NULL; + cdfval_base_t::number = value; + } + cdfval_t( const cdfval_base_t& value ) + : lineno(yylineno), filename(cobol_filename()) + { + cdfval_base_t *self(this); + *self = value; + } + + bool is_numeric() const { return ! (off || string); } + int64_t as_number() const { assert(is_numeric()); return number; } +}; + +bool +cdf_value( const char name[], cdfval_t value ); + +const cdfval_t * +cdf_value( const char name[] ); + +#endif diff --git a/gcc/cobol/cobol-system.h b/gcc/cobol/cobol-system.h new file mode 100644 index 0000000..81529bd --- /dev/null +++ b/gcc/cobol/cobol-system.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef COBOL_SYSTEM_H +#define COBOL_SYSTEM_H + +// The following "local" #include is part of the GCC core code +#include "config.h" + +/* Define this so that inttypes.h defines the PRI?64 macros even + when compiling with a C++ compiler. Define it here so in the + event inttypes.h gets pulled in by another header it is already + defined. */ +#define __STDC_FORMAT_MACROS + +// These must be included before the #poison declarations in system.h. + +#define INCLUDE_STRING +#define INCLUDE_VECTOR +#define INCLUDE_MAP +#define INCLUDE_SET +#define INCLUDE_LIST +#define INCLUDE_ALGORITHM + +#include <iterator> +#include <stack> +#include <deque> +#include <numeric> +#include <limits> +#include <cmath> + +#include <unordered_map> +#include <unordered_set> + +// The following "local" #include is part of the GCC core code +#include "system.h" + +#endif diff --git a/gcc/cobol/cobol1.cc b/gcc/cobol/cobol1.cc new file mode 100644 index 0000000..c2e68ed --- /dev/null +++ b/gcc/cobol/cobol1.cc @@ -0,0 +1,692 @@ +/* gcobol backend interface + Copyright (C) 2021-2025 Free Software Foundation, Inc. + Contributed by Robert J. Dubner and James K. Lowden + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + + +#include "cobol-system.h" +#include "coretypes.h" +#include "tree.h" +#include "diagnostic.h" +#include "opts.h" +#include "debug.h" +#include "langhooks.h" +#include "langhooks-def.h" +#include "target.h" +#include "stringpool.h" +#define HOWEVER_GCC_DEFINES_TREE 1 +#include "ec.h" +#include "common-defs.h" +#include "util.h" +#include "cbldiag.h" +#include "symbols.h" +#include "inspect.h" +#include "io.h" +#include "genapi.h" +#include "exceptl.h" +#include "exceptg.h" +#include "util.h" +#include "gengen.h" // This has some GTY(()) markers +#include "structs.h" // This has some GTY(()) markers + +/* Required language-dependent contents of a type. + + Without it, we get + + gt-cobol-cobol1.h:858: undefined reference to `gt_pch_nx_lang_type(void *) + + */ + +struct GTY (()) lang_type + { + char dummy; + }; + +/* Language-dependent contents of a decl. + Without it, we get + + gt-cobol-cobol1.h:674: more undefined references to `gt_pch_nx_lang_decl + + */ + +struct GTY (()) lang_decl + { + char dummy; + }; + +/* + * Language-dependent contents of an identifier. + * This must include a tree_identifier. + */ +struct GTY (()) lang_identifier + { + struct tree_identifier common; + }; + +/* The resulting tree type. */ + +union GTY ((desc ("TREE_CODE (&%h.generic) == IDENTIFIER_NODE"), + chain_next ("CODE_CONTAINS_STRUCT (TREE_CODE (&%h.generic), " + "TS_COMMON) ? ((union lang_tree_node *) TREE_CHAIN " + "(&%h.generic)) : NULL"))) lang_tree_node + { + union tree_node GTY ((tag ("0"), desc ("tree_node_structure (&%h)"))) generic; + struct lang_identifier GTY ((tag ("1"))) identifier; + }; + +/* We don't use language_function. + + But without the placeholder: + + /usr/bin/ld: gtype-desc.o: in function `gt_ggc_mx_function(void*)': + ../build/gcc/gtype-desc.cc:1763: undefined reference to `gt_ggc_mx_language_function(void*)' + /usr/bin/ld: gtype-desc.o: in function `gt_pch_nx_function(void*)': + ../build/gcc/gtype-desc.cc:5727: undefined reference to `gt_pch_nx_language_function(void*)' + + */ + +struct GTY (()) language_function + { + int dummy; + }; + +/* + * Language hooks. + */ + +#define ATTR_NULL 0 +#define ATTR_LEAF_LIST (ECF_LEAF) +#define ATTR_NOTHROW_LEAF_LIST (ECF_NOTHROW | ECF_LEAF) +#define ATTR_NOTHROW_LEAF_MALLOC_LIST (ECF_NOTHROW | ECF_LEAF | ECF_MALLOC) +#define ATTR_CONST_NOTHROW_LEAF_LIST (ECF_NOTHROW | ECF_LEAF | ECF_CONST) +#define ATTR_PURE_NOTHROW_LEAF_LIST (ECF_NOTHROW | ECF_LEAF | ECF_PURE) +#define ATTR_NOTHROW_LIST (ECF_NOTHROW) +#define ATTR_CONST_NOTHROW_LIST (ECF_NOTHROW | ECF_CONST) +#define ATTR_ALLOC_WARN_UNUSED_RESULT_SIZE_2_NOTHROW_LIST \ + (ECF_NOTHROW | ECF_LEAF | ECF_MALLOC) +#define ATTR_ALLOC_WARN_UNUSED_RESULT_SIZE_2_NOTHROW_LEAF_LIST \ + (ECF_NOTHROW | ECF_LEAF) +#define ATTR_COLD_NORETURN_NOTHROW_LEAF_LIST \ + (ECF_COLD | ECF_NORETURN | \ + ECF_NOTHROW | ECF_LEAF) +#define ATTR_PURE_NOTHROW_NONNULL_LEAF (ECF_PURE|ECF_NOTHROW|ECF_LEAF) +#define ATTR_MALLOC_WARN_UNUSED_RESULT_NOTHROW_NONNULL_LEAF (ECF_MALLOC|ECF_NOTHROW|ECF_LEAF) +#define ATTR_TMPURE_NORETURN_NOTHROW_LEAF_COLD_LIST (ECF_TM_PURE|ECF_NORETURN|ECF_NOTHROW|ECF_LEAF|ECF_COLD) +#define ATTR_NORETURN_NOTHROW_LIST (ECF_NORETURN|ECF_NOTHROW) +#define ATTR_NOTHROW_NONNULL_LEAF (ECF_NOTHROW|ECF_LEAF) + +static void +gfc_define_builtin (const char *name, tree type, enum built_in_function code, + const char *library_name, int attr) +{ + tree decl; + + decl = add_builtin_function (name, type, code, BUILT_IN_NORMAL, + library_name, NULL_TREE); + set_call_expr_flags (decl, attr); + + set_builtin_decl (code, decl, true); +} + +static void +create_our_type_nodes_init() + { + for(int i=0; i<256; i++) + { + char_nodes[i] = build_int_cst_type(CHAR, i); + } + + // Create some useful constants to avoid cluttering up the code + // build_int_cst_type() calls + pvoid_type_node = build_pointer_type(void_type_node); + integer_minusone_node = build_int_cst_type(INT, -1); + integer_two_node = build_int_cst_type(INT, 2); + integer_eight_node = build_int_cst_type(INT, 8); + size_t_zero_node = build_int_cst_type(SIZE_T, 0); + int128_zero_node = build_int_cst_type(INT128, 0); + int128_five_node = build_int_cst_type(INT128, 5); + int128_ten_node = build_int_cst_type(INT128, 10); + char_ptr_type_node = build_pointer_type(CHAR); + uchar_ptr_type_node = build_pointer_type(UCHAR); + wchar_ptr_type_node = build_pointer_type(WCHAR); + long_double_ten_node = build_real_from_int_cst( + LONGDOUBLE, + build_int_cst_type(INT,10)); + sizeof_size_t = build_int_cst_type(SIZE_T, sizeof(size_t)); + sizeof_pointer = build_int_cst_type(SIZE_T, sizeof(void *)); + + bool_true_node = build2(EQ_EXPR, + integer_type_node, + integer_one_node, + integer_one_node); + + bool_false_node = build2( EQ_EXPR, + integer_type_node, + integer_one_node, + integer_zero_node); + } + + +static bool +cobol_langhook_init (void) + { + build_common_tree_nodes (true); + + create_our_type_nodes_init(); + + tree char_pointer_type_node = build_pointer_type (char_type_node); + tree const_char_pointer_type_node + = build_pointer_type (build_type_variant (char_pointer_type_node, 1, 0)); + + tree ftype; + + ftype = build_function_type_list (pvoid_type_node, + size_type_node, + NULL_TREE); + gfc_define_builtin ("__builtin_malloc", + ftype, + BUILT_IN_MALLOC, + "malloc", + ATTR_NOTHROW_LEAF_MALLOC_LIST); + + ftype = build_function_type_list (pvoid_type_node, pvoid_type_node, + size_type_node, NULL_TREE); + gfc_define_builtin ("__builtin_realloc", ftype, BUILT_IN_REALLOC, + "realloc", ATTR_NOTHROW_LEAF_LIST); + + ftype = build_function_type_list (void_type_node, + pvoid_type_node, NULL_TREE); + gfc_define_builtin ("__builtin_free", ftype, BUILT_IN_FREE, + "free", ATTR_NOTHROW_LEAF_LIST); + + ftype = build_function_type_list (pvoid_type_node, + const_ptr_type_node, + integer_type_node, + size_type_node, + NULL_TREE); + gfc_define_builtin ("__builtin_memchr", ftype, BUILT_IN_MEMCHR, + "memchr", ATTR_PURE_NOTHROW_NONNULL_LEAF); + + + ftype = build_function_type_list (size_type_node, + const_char_pointer_type_node, + NULL_TREE); + gfc_define_builtin ("__builtin_strlen", ftype, BUILT_IN_STRLEN, + "strlen", ATTR_PURE_NOTHROW_NONNULL_LEAF); + + + ftype = build_function_type_list (char_pointer_type_node, + const_char_pointer_type_node, + NULL_TREE); + gfc_define_builtin ("__builtin_strdup", ftype, BUILT_IN_STRDUP, + "strdup", ATTR_MALLOC_WARN_UNUSED_RESULT_NOTHROW_NONNULL_LEAF); + + ftype = build_function_type_list (void_type_node, NULL_TREE); + gfc_define_builtin ("__builtin_abort", ftype, BUILT_IN_ABORT, + "abort", ATTR_TMPURE_NORETURN_NOTHROW_LEAF_COLD_LIST); + + ftype = build_function_type_list (void_type_node, + integer_type_node, + NULL_TREE); + gfc_define_builtin ("__builtin_exit", ftype, BUILT_IN_EXIT, + "exit", ATTR_TMPURE_NORETURN_NOTHROW_LEAF_COLD_LIST); + + ftype = build_function_type_list (integer_type_node, + const_char_pointer_type_node, + const_char_pointer_type_node, + size_type_node, + NULL_TREE); + gfc_define_builtin ("__builtin_strncmp", ftype, BUILT_IN_STRNCMP, + "strncmp", ATTR_PURE_NOTHROW_NONNULL_LEAF); + + ftype = build_function_type_list (integer_type_node, + const_char_pointer_type_node, + const_char_pointer_type_node, + NULL_TREE); + gfc_define_builtin ("__builtin_strcmp", ftype, BUILT_IN_STRCMP, + "strcmp", ATTR_PURE_NOTHROW_NONNULL_LEAF); + + ftype = build_function_type_list (char_pointer_type_node, + char_pointer_type_node, + const_char_pointer_type_node, + NULL_TREE); + gfc_define_builtin ("__builtin_strcpy", ftype, BUILT_IN_STRCPY, + "strcpy", ATTR_NOTHROW_NONNULL_LEAF); + + build_common_builtin_nodes (); + + // Make sure this is a supported configuration. + if( !targetm.scalar_mode_supported_p (TImode) || !float128_type_node ) + { + sorry ("COBOL requires a 64-bit configuration"); + } + + return true; + } + + +void cobol_set_debugging( bool flex, bool yacc, bool parser ); +void cobol_set_indicator_column( int column ); +void copybook_directory_add( const char gcob_copybook[] ); +void copybook_extension_add( const char ext[] ); +bool defined_cmd( const char arg[] ); +void lexer_echo( bool tf ); + +static void +cobol_langhook_init_options_struct (struct gcc_options *opts) { + opts->x_yy_flex_debug = 0; + opts->x_yy_debug = 0; + opts->x_cobol_trace_debug = 0; + + cobol_set_debugging( false, false, false ); + + copybook_directory_add( getenv("GCOB_COPYBOOK") ); +} + +static unsigned int +cobol_option_lang_mask (void) { + return CL_Cobol; +} + +bool use_static_call( bool yn ); +void add_cobol_exception( ec_type_t type, bool ); + +bool include_file_add(const char input[]); +bool preprocess_filter_add( const char filter[] ); + +bool max_errors_exceeded( int nerr ) { + return flag_max_errors != 0 && flag_max_errors <= nerr; +} + +static void +enable_exceptions( bool enable ) { + for( char * name = xstrdup(cobol_exceptions); + NULL != (name = strtok(name, ",")); name = NULL ) { + ec_type_t type = ec_type_of(name); + if( type == ec_none_e ) { + yywarn("unrecognized exception '%s' was ignored", name); + continue; + } + ec_disposition_t disposition = ec_type_disposition(type); + if( disposition != ec_implemented(disposition) ) { + cbl_unimplemented("exception '%s'", name); + } + add_cobol_exception(type, enable ); + } +} + +static bool +cobol_langhook_handle_option (size_t scode, + const char *arg ATTRIBUTE_UNUSED, + HOST_WIDE_INT value, + int kind ATTRIBUTE_UNUSED, + location_t loc ATTRIBUTE_UNUSED, + const struct + cl_option_handlers *handlers ATTRIBUTE_UNUSED) + { + // process_command (decoded_options_count, decoded_options); + enum opt_code code = (enum opt_code) scode; + + switch(code) + { + case OPT_D: + defined_cmd(arg); + return true; + case OPT_E: + lexer_echo(true); + return true; + + case OPT_I: + copybook_directory_add(arg); + return true; + case OPT_copyext: + copybook_extension_add(cobol_copyext); + return true; + + case OPT_fstatic_call: + use_static_call( arg? true : false ); + return true; + + case OPT_fdefaultbyte: + wsclear(cobol_default_byte); + return true; + + case OPT_fflex_debug: + yy_flex_debug = 1; + cobol_set_debugging( true, yy_debug == 1, cobol_trace_debug == 1 ); + return true; + case OPT_fyacc_debug: + yy_debug = 1; + cobol_set_debugging(yy_flex_debug == 1, + true, + cobol_trace_debug == 1 ); + return true; + case OPT_ftrace_debug: + cobol_set_debugging( yy_flex_debug == 1, yy_debug == 1, true ); + return true; + + case OPT_fcobol_exceptions: { + if( cobol_exceptions[0] == '=' ) cobol_exceptions++; + enable_exceptions(value == 1); + return true; + } + + case OPT_fmax_errors: + flag_max_errors = atoi(arg); + return true; + + case OPT_ffixed_form: + cobol_set_indicator_column(-7); + return true; + case OPT_ffree_form: + cobol_set_indicator_column(0); + return true; + + case OPT_findicator_column: + cobol_set_indicator_column( indicator_column ); + return true; + + case OPT_dialect: + cobol_dialect_set(cbl_dialect_t(cobol_dialect)); + return true; + + case OPT_fsyntax_only: + mode_syntax_only(identification_div_e); + break; + case OPT_preprocess: + if( ! preprocess_filter_add(arg) ) { + cbl_errx( "could not execute preprocessor %s", arg); + } + return true; + case OPT_include: + if( ! include_file_add(cobol_include) ) { + cbl_errx( "could not include %s", cobol_include); + } + return true; + + case OPT_main: + // This isn't right. All OPT_main should be replaced + error("We should never see a non-equal dash-main in cobol1.c"); + exit(1); + return true; + + case OPT_main_: + register_main_switch(cobol_main_string); + return true; + + case OPT_nomain: + return true; + + case OPT_finternal_ebcdic: + cobol_gcobol_feature_set(feature_internal_ebcdic_e); + return true; + + default: + break; + } + + Cobol_handle_option_auto (&global_options, &global_options_set, + scode, arg, value, + cobol_option_lang_mask (), kind, + loc, handlers, global_dc); + + return true; + } + +void +cobol_parse_files (int nfile, const char **files); + +static void +cobol_langhook_parse_file (void) + { + cobol_parse_files (num_in_fnames, in_fnames); + } + +static tree +cobol_langhook_type_for_mode (enum machine_mode mode, int unsignedp) + { + if (mode == TYPE_MODE (float_type_node)) + return float_type_node; + + if (mode == TYPE_MODE (double_type_node)) + return double_type_node; + + if (mode == TYPE_MODE (float32_type_node)) + return float32_type_node; + + if (mode == TYPE_MODE (float64_type_node)) + return float64_type_node; + + if (mode == TYPE_MODE (float128_type_node)) + return float128_type_node; + + if (mode == TYPE_MODE (intQI_type_node)) + return unsignedp ? unsigned_intQI_type_node : intQI_type_node; + if (mode == TYPE_MODE (intHI_type_node)) + return unsignedp ? unsigned_intHI_type_node : intHI_type_node; + if (mode == TYPE_MODE (intSI_type_node)) + return unsignedp ? unsigned_intSI_type_node : intSI_type_node; + if (mode == TYPE_MODE (intDI_type_node)) + return unsignedp ? unsigned_intDI_type_node : intDI_type_node; + if (mode == TYPE_MODE (intTI_type_node)) + return unsignedp ? unsigned_intTI_type_node : intTI_type_node; + + if (mode == TYPE_MODE (integer_type_node)) + return unsignedp ? unsigned_type_node : integer_type_node; + + if (mode == TYPE_MODE (long_integer_type_node)) + return unsignedp ? long_unsigned_type_node : long_integer_type_node; + + if (mode == TYPE_MODE (long_long_integer_type_node)) + return unsignedp ? long_long_unsigned_type_node + : long_long_integer_type_node; + + if (COMPLEX_MODE_P (mode)) + { + if (mode == TYPE_MODE (complex_float_type_node)) + return complex_float_type_node; + if (mode == TYPE_MODE (complex_double_type_node)) + return complex_double_type_node; + if (mode == TYPE_MODE (complex_long_double_type_node)) + return complex_long_double_type_node; + if (mode == TYPE_MODE (complex_integer_type_node) && !unsignedp) + return complex_integer_type_node; + } + + return NULL; + } + +////static tree +////cobol_langhook_type_for_size (unsigned int bits ATTRIBUTE_UNUSED, +//// int unsignedp ATTRIBUTE_UNUSED) +//// { +//// gcc_unreachable (); +//// return NULL; +//// } + +/* Record a builtin function. We just ignore builtin functions. */ + +static tree +cobol_langhook_builtin_function (tree decl) + { + return decl; + } + +static bool +cobol_langhook_global_bindings_p (void) + { + return false; + } + +static tree +cobol_langhook_pushdecl (tree decl ATTRIBUTE_UNUSED) + { + // This function is necessary, but is apparently never being called + gcc_unreachable (); + } + +static tree +cobol_langhook_getdecls (void) + { + return NULL; + } + +char * +cobol_name_mangler(const char *cobol_name_) + { + // The caller should free the returned string. + + // This is a solution to the problem of hyphens and the fact that COBOL + // names can start with digits. + // + // COBOL names can't start with underscore; GNU assembler names can. + // Assembler names can't start with a digit 0-9; COBOL names can. + // + // We convert all COBOL names to lowercase, so uppercase characters aren't + // seen. + // + // COBOL names can have hyphens; assembler names can't. + // + // So if a name starts with a digit, we prepend an underscore. + // We convert the whole name to lowercase. + // We replace hyphens with '$' + // + + if( !cobol_name_ ) + { + return nullptr; + } + + // Allocate enough space for a prepended underscore and a final '\0' + char *cobol_name = (char *)xmalloc(strlen(cobol_name_)+2); + size_t n = 0; + if( cobol_name_[0] >= '0' && cobol_name_[0] <= '9' ) + { + // The name starts with 0-9, so we are going to lead it + // with an underscore + cobol_name[n++] = '_'; + } + for(size_t i=0; i<strlen(cobol_name_); i++) + { + // Convert to lowercase, replacing '-' with '$' + int ch = cobol_name_[i]; + if( ch == '-' ) + { + cobol_name[n++] = '$'; + } + else + { + cobol_name[n++] = TOLOWER(ch); + } + } + cobol_name[n++] = '\0'; + + return cobol_name; + } + +cbl_call_convention_t parser_call_target_convention( tree func ); + +static +void +cobol_set_decl_assembler_name (tree decl) + { + tree id; + + /* set_decl_assembler_name may be called on TYPE_DECL to record ODR + name for C++ types. By default types have no ODR names. */ + if (TREE_CODE (decl) == TYPE_DECL) + { + return; + } + + /* The language-independent code should never use the + DECL_ASSEMBLER_NAME for lots of DECLs. Only FUNCTION_DECLs and + VAR_DECLs for variables with static storage duration need a real + DECL_ASSEMBLER_NAME. */ + gcc_assert (TREE_CODE (decl) == FUNCTION_DECL + || (VAR_P (decl) && (TREE_STATIC (decl) + || DECL_EXTERNAL (decl) + || TREE_PUBLIC (decl)))); + + const char *name = IDENTIFIER_POINTER (DECL_NAME (decl)); + char *mangled_name = cobol_name_mangler(name); + + // A verbatim CALL does not get mangled. + if( cbl_call_verbatim_e == parser_call_target_convention(decl) ) + { + strcpy(mangled_name, name); + } + + id = get_identifier(mangled_name); + free(mangled_name); + + SET_DECL_ASSEMBLER_NAME (decl, id); + } + +/* Get a value for the SARIF v2.1.0 "artifact.sourceLanguage" property, + based on the list in SARIF v2.1.0 Appendix J. */ + +const char * +cobol_get_sarif_source_language(const char *) + { + return "cobol"; + } + +#undef LANG_HOOKS_BUILTIN_FUNCTION +#undef LANG_HOOKS_GETDECLS +#undef LANG_HOOKS_GLOBAL_BINDINGS_P +#undef LANG_HOOKS_HANDLE_OPTION +#undef LANG_HOOKS_INIT +#undef LANG_HOOKS_INIT_OPTIONS_STRUCT +#undef LANG_HOOKS_NAME +#undef LANG_HOOKS_OPTION_LANG_MASK +#undef LANG_HOOKS_PARSE_FILE +#undef LANG_HOOKS_PUSHDECL +#undef LANG_HOOKS_TYPE_FOR_MODE +////#undef LANG_HOOKS_TYPE_FOR_SIZE +#undef LANG_HOOKS_SET_DECL_ASSEMBLER_NAME +#undef LANG_HOOKS_GET_SARIF_SOURCE_LANGUAGE + +// We use GCC in the name, not GNU, as others do, +// because "GnuCOBOL" refers to a different GNU project. +// https://www.gnu.org/software/software.html +#define LANG_HOOKS_NAME "GCC COBOL" + +#define LANG_HOOKS_INIT cobol_langhook_init +#define LANG_HOOKS_OPTION_LANG_MASK cobol_option_lang_mask + +#define LANG_HOOKS_INIT_OPTIONS_STRUCT cobol_langhook_init_options_struct +#define LANG_HOOKS_HANDLE_OPTION cobol_langhook_handle_option + +#define LANG_HOOKS_BUILTIN_FUNCTION cobol_langhook_builtin_function +#define LANG_HOOKS_GETDECLS cobol_langhook_getdecls +#define LANG_HOOKS_GLOBAL_BINDINGS_P cobol_langhook_global_bindings_p +#define LANG_HOOKS_PARSE_FILE cobol_langhook_parse_file +#define LANG_HOOKS_PUSHDECL cobol_langhook_pushdecl + +#define LANG_HOOKS_TYPE_FOR_MODE cobol_langhook_type_for_mode +////#define LANG_HOOKS_TYPE_FOR_SIZE cobol_langhook_type_for_size + +#define LANG_HOOKS_SET_DECL_ASSEMBLER_NAME cobol_set_decl_assembler_name + +#define LANG_HOOKS_GET_SARIF_SOURCE_LANGUAGE cobol_get_sarif_source_language + +struct lang_hooks lang_hooks = LANG_HOOKS_INITIALIZER; + +#include "gt-cobol-cobol1.h" +#include "gtype-cobol.h" diff --git a/gcc/cobol/config-lang.in b/gcc/cobol/config-lang.in new file mode 100644 index 0000000..ef35dcd --- /dev/null +++ b/gcc/cobol/config-lang.in @@ -0,0 +1,38 @@ +# Copyright (C) 2004-2025 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +# Configure looks for the existence of this file to auto-config each language. +# We define several parameters used by configure: +# +# language - name of language as it would appear in $(LANGUAGES) +# compilers - value to add to $(COMPILERS) +# diff_excludes - files to ignore when building diffs between two versions. + +language="cobol" + +compilers="cobol1\$(exeext)" + +target_libs="target-libgcobol" + +# Files that should be scanned by gengtype.c to generate the garbage +# collection tables. + +gtfiles="\$(srcdir)/cobol/cobol1.cc" + +# Do not build by default +build_by_default="no" diff --git a/gcc/cobol/convert.cc b/gcc/cobol/convert.cc new file mode 100644 index 0000000..a0ef9d5 --- /dev/null +++ b/gcc/cobol/convert.cc @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cobol-system.h" + +#include "coretypes.h" +#include "tree.h" +#include "fold-const.h" +#include "convert.h" + +// This is required by some generic routines + +tree +convert (tree /*type*/, tree /*expr*/) +{ +// The routine is necessary, but in our testing of the GCOBOL compiler, it never +// is called. I am commenting this cloned code out. I am keeping it so I have +// something to refer to if and when the necessity to reconstitute it arises. +// RJ Dubner, 2025-02-17 +#if 0 + if (type == error_mark_node + || expr == error_mark_node + || TREE_TYPE (expr) == error_mark_node) + return error_mark_node; + + if (type == TREE_TYPE (expr)) + return expr; + + if (TYPE_MAIN_VARIANT (type) == TYPE_MAIN_VARIANT (TREE_TYPE (expr))) + return fold_convert (type, expr); + + switch (TREE_CODE (type)) + { + case VOID_TYPE: + case BOOLEAN_TYPE: + return fold_convert (type, expr); + case INTEGER_TYPE: + return fold (convert_to_integer (type, expr)); + case POINTER_TYPE: + return fold (convert_to_pointer (type, expr)); + case REAL_TYPE: + return fold (convert_to_real (type, expr)); + case COMPLEX_TYPE: + return fold (convert_to_complex (type, expr)); + default: + break; + } +#endif + + gcc_unreachable (); +} diff --git a/gcc/cobol/copybook.h b/gcc/cobol/copybook.h new file mode 100644 index 0000000..3e2cf9d --- /dev/null +++ b/gcc/cobol/copybook.h @@ -0,0 +1,205 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef _COPYBOOK_H +#pragma message __FILE__ " included twice" +#else +#define _COPYBOOK_H + +FILE * copy_mode_start(); + +const char * cobol_filename(); +bool cobol_filename( const char *name, ino_t inode ); + +void scanner_parsing( int token, bool tf ); +void scanner_parsing_toggle(); +void scanner_parsing_pop(); + +/* + * COPY support On encountering a COPY statement, the parser continues + * to parse, collecting the replacement values, if any. At statement + * end (at the period), the system rearranges input to apply the + * replacements before the input text is read by the lexer. + */ + +enum replace_type_t { string_e, token_e, pseudo_e }; + +struct copybook_replace_t { + replace_type_t type; + const char *src, *tgt; +}; +class copybook_t; + +class copybook_elem_t { + friend copybook_t; + struct copybook_loc_t { + YYLTYPE loc; + const char *name; + copybook_loc_t() : name(NULL) {} + } source, library; + bool suppress; + static const char *extensions; + public: + struct { bool source, library; } literally; + int fd; + size_t nsubexpr; + std::deque<copybook_replace_t> replacements; + + copybook_elem_t() + : suppress(false) + , fd(-1) + , nsubexpr(0) + , regex_text(NULL) + { + literally = {}; + } + + void clear() { + suppress = false; + nsubexpr = 0; + if( fd ) close(fd); + fd = -1; + // TODO: free src & tgt + replacements.clear(); + } + + int open_file( const char dir[], bool literally = false ); + void extensions_add( const char ext[], const char alt[] ); + + static inline bool is_quote( const char ch ) { + return ch == '\'' || ch == '"'; + } + static inline bool quoted( const char name[] ) { + gcc_assert(name); + return is_quote(name[0]); + } + static char * dequote( const char orig[] ) { + gcc_assert(quoted(orig)); + auto name = (char*)xcalloc(1, strlen(orig)); + gcc_assert(name); + char *tgt = name; + + // For a literal name, we de-quote it and try to open it in the + // current working directory. The COBOL literal could include + // (escaped) doubled quotes, which we reduce to one. + for( const char *src = orig; src < orig + strlen(orig); ) { + if( is_quote(src[0]) ) { + if( src[0] == src[1] ) { + *tgt++ = *src++; // copy one of doubled quote + } + src++; // skip quote + continue; + } + *tgt++ = *src++; + } + *tgt = '\0'; + + return name; + } + +private: + char *regex_text; +}; + +class copybook_t { + std::list<const char *> directories; + copybook_elem_t book; + + // Take copybook name from the environment, if defined, else use it verbatim. + static const char * transform_name( const char name[] ) { + char uname[ strlen(name) ]; + const char *value = getenv(name); + if( !value ) { + auto ename = name + strlen(name); + std::transform( name, ename, uname, + []( char ch ) { return TOUPPER(ch); } ); + value = getenv(uname); // try uppercase of envar name + if( !value ) value = name; // keep original unmodified + } + if( false && value != uname ) { + dbgmsg("using copybook file '%s' from environment variable '%s'", + value, name); + } + return xstrdup(value); + } + + public: + copybook_t() { directories.push_back(NULL); } + + void suppress( bool tf = true ) { book.suppress = tf; } + bool suppressed() { return book.suppress; } + void source( const YYLTYPE& loc, const char name[] ) { + book.source.loc = loc; + book.literally.source = copybook_elem_t::quoted(name); + book.source.name = book.literally.source? + copybook_elem_t::dequote(name) : transform_name(name); + } + void library( const YYLTYPE& loc, const char name[] ) { + book.library.loc = loc; + book.literally.library = copybook_elem_t::quoted(name); + book.library.name = book.literally.library? + copybook_elem_t::dequote(name) : transform_name(name); + } + void replacement( replace_type_t type, const char src[], const char tgt[] ) { + copybook_replace_t elem = { type, src, tgt }; + book.replacements.push_back(elem); + } + + copybook_elem_t *current() { return &book; } + const char *source() const { return book.source.name; } + const char *library() const { return book.library.name; } + + int open(YYLTYPE loc, const char name[]) { + int fd = -1; + book.clear(); + this->source(loc, name); + + for( auto dir : directories ) { + if( true ) { + dbgmsg("copybook_t::open '%s' OF '%s' %s", + book.source.name, + dir? dir: ".", + book.literally.source? ", literally" : "" ); + } + if( (fd = book.open_file(dir, book.literally.source)) != -1 ) break; + } + return fd; + } + + const char * directory_add( const char name[] ) { + directories.push_back(name); + return name; + } + void extensions_add( const char ext[], const char alt[] ); +}; + +extern copybook_t copybook; + +#endif diff --git a/gcc/cobol/dts.h b/gcc/cobol/dts.h new file mode 100644 index 0000000..618f649 --- /dev/null +++ b/gcc/cobol/dts.h @@ -0,0 +1,109 @@ +/* + * Contributed to the public domain by James K. Lowden + * Tuesday October 17, 2023 + * + * This stand-in for std::regex was written because the implementation provided + * by the GCC libstdc++ in GCC 11 proved too slow, where "slow" means "appears + * not to terminate". Some invocations of std::regex_search took over 5 + * seconds (or minutes) and used over 1900 stack frames, and "never" returned. + * Because the same patterns and input presented no difficulty to the C standad + * library regex functions, I recast the C++ implementation in terms of + * regex(3). + * + * Unlike std::regex, this dts version supports only Posix EREs, and requires + * the input to be NUL-terminated. + * + * It is my hope and expectation to replace this implementation with the + * standard one when it is improved. + */ + +#include <stdexcept> +#include <vector> + +#include <regex.h> + +namespace dts { + class csub_match : public regmatch_t { + const char *input; + public: + const char *first, *second; + bool matched; + + explicit csub_match( const char *input = NULL) + : input(input) + , first(NULL), second(NULL), matched(false) + { + static regmatch_t empty = { -1, -1 }; + regmatch_t& self(*this); + self = empty; + } + csub_match( const char input[], const regmatch_t& m ) + : input(input) + { + regmatch_t& self(*this); + self = m; + matched = rm_so != -1; + first = rm_so == -1? NULL : input + rm_so; + second = rm_eo == -1? NULL : input + rm_eo; + } + + int length() const { return rm_eo - rm_so; } + }; + + typedef std::vector<csub_match> cmatch; + + class regex : public regex_t { + size_t nsubexpr; + const char *pattern; + public: + enum cflag_t { extended = REG_EXTENDED, icase = REG_ICASE }; + + regex( const char pattern[], int flags ) : pattern(pattern) { + nsubexpr = 1 + std::count(pattern, pattern + strlen(pattern), '('); + int erc = regcomp(this, pattern, flags); + if( erc != 0 ) { + char msg[80]; + regerror(erc, this, msg, sizeof msg); +#if __cpp_exceptions + throw std::logic_error(msg); +#else + pattern = NULL; + cbl_errx("%s", msg); +#endif + } + } + ~regex() { regfree(this); } + + size_t size() const { return nsubexpr; } + bool ready() const { return pattern != NULL; } + private: + regex( const regex& ) {} + }; + + inline bool regex_search( const char input[], const char *eoinput, + cmatch& cm, regex& re ) { + if( eoinput != NULL && *eoinput != '\0' ) { +#if __cpp_exceptions + static const char msg[] = "input not NUL-terminated"; + throw std::domain_error( msg ); +#else + eoinput = strchr(input, '\0'); +#endif + } + if( eoinput == NULL ) eoinput = strchr(input, '\0'); + auto ncm = re.size(); + cm.resize(ncm); + regmatch_t cms[ncm]; + + + int erc = regexec( &re, input, ncm, cms, 0 ); + if( erc != 0 ) return false; + std::transform( cms, cms+ncm, cm.begin(), + [input]( const regmatch_t& m ) { + return csub_match( input, m ); + } ); + return true; + } +}; + + diff --git a/gcc/cobol/except.cc b/gcc/cobol/except.cc new file mode 100644 index 0000000..859a76d --- /dev/null +++ b/gcc/cobol/except.cc @@ -0,0 +1,370 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cobol-system.h" +#include "coretypes.h" +#include "tree.h" +#define HOWEVER_GCC_DEFINES_TREE 1 +#include "ec.h" +#include "common-defs.h" +#include "util.h" +#include "cbldiag.h" +#include "symbols.h" +#include "inspect.h" +#include "io.h" +#include "genapi.h" +#include "gengen.h" +#include "exceptl.h" +#include "util.h" + +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" + +static const ec_descr_t * +ec_type_descr( ec_type_t type ) { + auto p = std::find( __gg__exception_table, __gg__exception_table_end, type ); + if( p == __gg__exception_table_end ) { + cbl_internal_error("no such exception: 0x%04x", type); + } + return p; +} + +const char * +ec_type_str( ec_type_t type ) { + auto p = ec_type_descr(type); + return p->name; +} + +ec_disposition_t +ec_type_disposition( ec_type_t type ) { + auto p = ec_type_descr(type); + return p->disposition; +} + +static size_t +ec_level( ec_type_t ec ) { + if( ec == ec_all_e ) return 1; + if( 0 == (static_cast<unsigned int>(ec) & ~EC_ALL_E) ) return 2; + return 3; +} + +cbl_enabled_exceptions_t enabled_exceptions; + +void +cbl_enabled_exceptions_t::dump() const { + if( empty() ) { + cbl_message(2, "cbl_enabled_exceptions_t: no exceptions" ); + return; + } + int i = 1; + for( auto& elem : *this ) { + cbl_message(2, "cbl_enabled_exceptions_t: %2d {%s, %s, %s, %zu}", + i++, + elem.enabled? " enabled" : "disabled", + elem.location? "location" : " none", + ec_type_str(elem.ec), + elem.file ); + } +} + + +bool +cbl_enabled_exceptions_t::turn_on_off( bool enabled, + bool location, + ec_type_t type, + std::set<size_t> files ) +{ + // A Level 3 EC is added unilaterally; it can't knock out a lower level. + if( ec_level(type) == 3 ) { + if( files.empty() ) { + auto elem = cbl_enabled_exception_t(enabled, location, type); + apply(elem); + return true; + } + + for( size_t file : files ) { + auto elem = cbl_enabled_exception_t(enabled, location, type, file); + apply(elem); + } + return true; + } + + /* + * std::remove_if cannot be used with std::set because its elements are const. + * std::set::erase_if became available only in C++20. + */ + if( enabled ) { // remove any disabled + if( files.empty() ) { + auto p = begin(); + while( end() != (p = std::find_if( begin(), end(), + [ec = type]( const auto& elem ) { + return + !elem.enabled && + ec_cmp(ec, elem.ec); } )) ) { + erase(p); + } + } else { + for( size_t file: files ) { + auto p = begin(); + while( end() != (p = std::find_if( begin(), end(), + [ec = type, file]( const auto& elem ) { + return + !elem.enabled && + file == elem.file && + ec_cmp(ec, elem.ec); } )) ) { + erase(p); + } + } + } + auto elem = cbl_enabled_exception_t(enabled, location, type); + apply(elem); + return true; + } + assert(!enabled); + assert(ec_level(type) < 3); + + if( files.empty() ) { + if( type == ec_all_e ) { + clear(); + return true; + } + // Remove any matching Level-2 or Level-3 ECs, regardless of their files. + auto p = begin(); + while( end() != (p = std::find_if( begin(), end(), + [ec = type]( const auto& elem ) { + return + elem.enabled && + elem.ec != ec_all_e && + ec_cmp(ec, elem.ec); } )) ) { + erase(p); + } + // Keep the EC as an exception if a higher-level would othewise apply. + p = std::find_if( begin(), end(), + [ec = type]( const auto& elem ) { + return + elem.enabled && + (elem.ec == ec_all_e || elem.ec < ec) && + elem.file == 0 && + ec_cmp(ec, elem.ec); } ); + if( p != end() ) { + auto elem = cbl_enabled_exception_t(enabled, location, type); + apply(elem); + } + } else { + // Remove any matching or lower-level EC for the same file. + for( size_t file: files ) { + auto p = begin(); + while( end() != (p = std::find_if( begin(), end(), + [ec = type, file]( const auto& elem ) { + return + elem.enabled && + // ec is higher level and matches + (ec == ec_all_e || ec <= elem.ec) && + file == elem.file && + ec_cmp(ec, elem.ec); } )) ) { + erase(p); + } + // Keep the EC as an exception if a higher-level would othewise apply. + p = std::find_if( begin(), end(), + [ec = type, file]( const auto& elem ) { + return + elem.enabled && + (elem.ec == ec_all_e || elem.ec < ec) && + file == elem.file && + ec_cmp(ec, elem.ec); } ); + if( p != end() ) { + auto elem = cbl_enabled_exception_t(enabled, location, type, file); + apply(elem); + } + } + } + + return true; +} + +const cbl_enabled_exception_t * +cbl_enabled_exceptions_t::match( ec_type_t type, size_t file ) { + auto output = enabled_exception_match( begin(), end(), type, file ); + return output != end()? &*output : NULL; +} + +class choose_declarative { + size_t program; + public: + choose_declarative( size_t program ) : program(program) {} + + bool operator()( const cbl_declarative_t& dcl ) { + return dcl.global || program == symbol_at(dcl.section)->program; + } +}; + +bool +sort_supers_last( const cbl_declarative_t& a, const cbl_declarative_t& b ) { + if( symbol_at(a.section)->program == symbol_at(b.section)->program ) { + return a.section < b.section; + } + return symbol_at(a.section)->program > symbol_at(b.section)->program; +} + +cbl_field_t * new_temporary_decl(); + +/* + * For a program, create a "DECLARATIVES" entry in the symbol table, + * representing eligible declarative sections in priorty order: + * in-program first, followed by any global declaratives in parent + * programs. These decribe the USE criteria declared for each + * declarative section. + * + * The field's initial value is actually an array of + * cbl_declarartive_t, in which the first element is unused, except + * that array[0].section represents the number of elements, starting + * at array[1]. + * + * The returned value is the declarative's symbol index. It is passed + * to match_exception, which scans it for a declarative whose criteria + * match the raised exception. That function returns the + * cbl_declarative_t::section, which the program then uses to PERFORM + * that section. + */ +size_t +symbol_declaratives_add( size_t program, + const std::list<cbl_declarative_t>& dcls ) +{ + auto n = dcls.size(); + if( n == 0 ) return 0; + + auto blob = new cbl_declarative_t[ 1 + n ]; + + auto pend = std::copy_if( dcls.begin(), dcls.end(), blob + 1, + choose_declarative(program) ); + + std::sort( blob + 1, pend, sort_supers_last ); + + // Overload blob[0].section to be the count. + blob[0].section = (pend - blob) - 1; + + size_t len = reinterpret_cast<char*>(pend) + - reinterpret_cast<char*>(blob); + assert(len == (blob[0].section + 1) * sizeof(blob[0])); + + // Construct a "blob" in the symbol table. + static int blob_count = 1; + char achBlob[32]; + sprintf(achBlob, "_DECLARATIVE_BLOB%d_", blob_count++); + + cbl_field_data_t data = { .memsize = capacity_cast(len), + .capacity = capacity_cast(len), + .initial = reinterpret_cast<char*>(blob), + .picture = reinterpret_cast<char*>(blob) }; + cbl_field_t field = { 0, FldBlob, FldInvalid, constant_e, + 0, 0, 0, cbl_occurs_t(), 0, "", + 0, {}, data, NULL }; + strcpy(field.name, achBlob); + + auto e = symbol_field_add(program, &field); + parser_symbol_add(cbl_field_of(e)); + return symbol_index(e); +} + +/* + * Generate the code to evaluate declaratives. This is the "secret + * section" right after END DECLARATIVES. Its name is + * _DECLARATIVES_EVAL, and it is performed after every statement that + * could raise an exception. + * + * The code calls the library routine __gg__match_exception, which + * compares the raised exception to the criteria set by the USE + * statements in the DECLARATIVES super-section. It returns an + * integer, which is an index to the label in the symbol table that + * defines the section for the matching USE criteria. + * + * The generated code is a sequence of IF statements comparing the + * returned integer to that of each declarative. If equal, that + * section is PERFORMed, and control branches to the end of this + * section, and thence back to the statement it came from. + */ +#include "io.h" +size_t current_file_index(); +file_status_t current_file_handled_status(); + +void +declarative_runtime_match( cbl_field_t *declaratives, cbl_label_t *lave ) { + if( getenv("SHOW_PARSE") ) + { + fprintf(stderr, "( %d ) %s: \n", cobol_location().first_line, __func__); + } + if( getenv("TRACE1") ) + { + gg_printf(">>>>>>( %d )(%s) declaratives:%s lave:%s\n", + build_int_cst_type(INT, cobol_location().first_line), + gg_string_literal(__func__), + gg_string_literal(declaratives->name), + gg_string_literal(lave->name), + NULL_TREE); + } + static auto yes = new_temporary(FldConditional); + static auto psection = new_temporary(FldNumericBin5); + + // Send blob, get declarative section index. + auto index = new_temporary(FldNumericBin5); + parser_match_exception(index, declaratives); + + auto p = declaratives->data.initial; + const auto dcls = reinterpret_cast<const cbl_declarative_t *>(p); + size_t ndcl = dcls[0].section; // overloaded + + // Compare returned index to each section index. + for( auto p = dcls + 1; p < dcls + 1 + ndcl; p++ ) { + parser_set_numeric( psection, p->section ); + parser_relop( yes, index, eq_op, psection ); + parser_if( yes ); + auto section = cbl_label_of(symbol_at(p->section)); + parser_perform(section); + parser_label_goto(lave); + parser_else(); + parser_fi(); + } + + parser_label_label(lave); + + // A performed declarative may clear the raised exception with RESUME. + // If not cleared and fatal, the default handler will exit. + parser_check_fatal_exception(); +} + +ec_type_t +ec_type_of( const cbl_name_t name ) { + auto p = std::find_if( __gg__exception_table, __gg__exception_table_end, + [name]( const ec_descr_t& descr ) { + return 0 == strcasecmp(name, descr.name); + } ); + return p == __gg__exception_table_end? ec_none_e : p->type; +} + diff --git a/gcc/cobol/exceptg.h b/gcc/cobol/exceptg.h new file mode 100644 index 0000000..4500c0f --- /dev/null +++ b/gcc/cobol/exceptg.h @@ -0,0 +1,61 @@ + /* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef _EXCEPTL_H_ +#define _EXCEPTL_H_ + +/* This file contains exception processing declarations needed by the gcc/cobol + compilation. It's not included in the libgcobol compilation. */ + +extern const char * ec_type_str( ec_type_t type ); +extern ec_disposition_t ec_type_disposition( ec_type_t type ); + +extern void declarative_runtime_match(cbl_field_t *declaratives, + cbl_label_t *lave ); + +static inline ec_disposition_t +ec_implemented( ec_disposition_t disposition ) { + return ec_disposition_t( size_t(disposition) & ~0x80 ); +} + + +// >>TURN arguments +struct cbl_exception_files_t { + ec_type_t type; + size_t nfile; + size_t *files; + bool operator<( const cbl_exception_files_t& that ) { + return type < that.type; + } +}; + +size_t symbol_declaratives_add( size_t program, + const std::list<cbl_declarative_t>& dcls ); + +#endif diff --git a/gcc/cobol/gcobc b/gcc/cobol/gcobc new file mode 100755 index 0000000..93e1bd302 --- /dev/null +++ b/gcc/cobol/gcobc @@ -0,0 +1,465 @@ +#! /bin/sh -e + +# +# COPYRIGHT +# The gcobc program is in public domain. +# If it breaks then you get to keep both pieces. +# +# This file emulates the GnuCOBOL cobc compiler to a limited degree. +# For options that can be "mapped" (see migration-guide.1), it accepts +# cobc options, changing them to the gcobol equivalents. Options not +# recognized by the script are passed verbatim to gcobol, which will +# reject them unless of course they are gcobol options. +# +# User-defined variables, and their defaults: +# +# Variable Default Effect +# echo none If defined, echo the gcobol command +# gcobcx none Produce verbose messages +# gcobol ./gcobol Name of the gcobol binary +# GCOBCUDF PREFIX/share/cobol/udf/ Location of UDFs to be prepended to input +# +# By default, this script includes all files in $GCOBCUDF. To defeat +# that behavior, use GCOBCUDF=none. +# +# A list of supported options is produced with "gcobc -HELP". +# +## Maintainer note. In modifying this file, the following may make +## your life easier: +## +## - To force the script to exit, either set exit_status to 1, or call +## the error function. +## - As handled options are added, add them to the HELP here-doc. +## - The compiler can produce only one kind of output. In this +## script, that's known by $mode. Options that affect the type of +## output set the mode variable. Everything else is appended to the +## opts variable. +## + +if [ "$COBCPY" ] +then + copydir="-I$COBCPY" +fi + +if [ "$COB_COPY_DIR" ] +then + copydir="-I$COB_COPY_DIR" +fi + +# TODO: this file likely needs to query gcobol for its shared path instead +udf_default="${0%/*}/../share/gcobol/udf" +if [ ! -d "$udfdir" ] +then + # the one above is the installed one from the packages this one was previously used + udf_default="${0%/*}/../share/cobol/udf" +fi +udfdir="${GCOBCUDF:-$udf_default}" + +if [ -d "$udfdir" ] +then + for F in "$udfdir"/* + do + if [ -f "$F" ] + then + includes="$includes -include $F " + fi + done +else + if [ "${GCOBCUDF:-none}" != "none" ] + then + echo warning: no such directory: "'$GCOBCUDF'" + fi +fi + +exit_status=0 +skip_arg= +opts="$copydir ${dialect:--dialect mf} $includes" +mode=-shared + +incomparable="has no comparable gcobol option" + +if [ "${gcobcx:-0}" -gt 1 ] +then + set -x +fi + +error() { + echo "error: $1" >&2 + exit_status=1 +} +warn() { + echo "warning: $1 ignored" >&2 +} +ignore_arg() { + warn "$1" + skip_arg="$1" +} +no_warn() { :; } # silence is golden + +help() { + cat<<EOF +$0 recognizes the following GnuCOBOL cobc output mode options: + -b, -c, -m, -S, -x +$0 recognizes the following GnuCOBOL cobc compilation options: + -C + -d, --debug + -E + -g + --coverage + -ext + -fec=exception-name, -fno-ec=exception-name + -fformat + --fixed + -F, --free + -fimplicit-init + -h, --help + -save-temps= + -save-temps + -std=mvs + -std=mf +Options that are the same in gcobol and cobc are passed through verbatim. +Options that have no analog in gcobol produce a warning message. +To produce this message, use -HELP. +To see the constructed cobc command-line, use -echo. +To override the default cobc, set the "cobc" environment variable. +By default, gcobc invokes the gcobol the same directory the gcobc resides. +To override, set the gcobol environment variable. +EOF +} + +# +# Simply iterate over the command-line tokens. We can't use getopts +# here because it's not designed for single-dash words (e.g. -shared). +# + +for opt in "$@" +do + if [ "$skip_arg" ] + then + skip_arg= + continue + fi + + if [ "$pending_arg" ] + then + opts="$opts $pending_arg $opt" + pending_arg= + continue + fi + + case $opt in + -A | -Q) warn "$opt" + ;; + -b) mode="-shared" + ;; + -c) mode="-c" + ;; + --conf=*) warn "$opt" + ;; + -C) error "$opt $incomparable" + ;; + -d | --debug) opts="$opts -fcobol-exceptions=EC-ALL" + warn "$opt implies -fstack-check:" + ;; + # -D + -E) opts="$opts $opt -fsyntax-only" + ;; + -echo) echo="echo" + ;; + + -fec=* | -fno-ec=*) + opt="$(echo "$opt" | sed -E 's/-f(no-)?ec=/-f\1cobol-exceptions=EC-/g')" + opts="$opts $opt" + ;; + -ext) + pending_arg=$opt + ;; + -ext=*) opts="$opts $(echo "$opt" | sed 's/-ext=/-copyext ./')" + ;; + + # A.3 Compiler options + -fsign=*) warn "$opt" ;; + -ffold-copy=*) warn "$opt" ;; + -ffold-call=*) warn "$opt" ;; + -fmax-errors=*) warn "$opt" ;; + -fintrinsics=*) warn "$opt" ;; + -fdump=*) warn "$opt" ;; + -fcallfh=*) warn "$opt" ;; + -fsqlschema=*) warn "$opt" ;; + -fsql) warn "$opt" ;; + -fno-recursive-check) no_warn "$opt" ;; + -fstack-extended) warn "$opt" ;; + -fno-remove-unreachable) no_warn "$opt" ;; + -finline-intrinsic) warn "$opt" ;; + -ftrace) warn "$opt" ;; + -ftraceall) warn "$opt" ;; + -fsymtab) warn "$opt" ;; + # -fsyntax-only is identical + -fdebugging-line) warn "$opt" ;; + -fsource-location) warn "$opt" ;; + -fstack-check) warn "$opt" ;; + -fsection-exit-check) warn "$opt" ;; + -fimplicit-goback-check) warn "$opt" ;; + -fwrite-after) warn "$opt" ;; + -fmfcomment) warn "$opt" ;; + -facucomment) warn "$opt" ;; + -fno-trunc) no_warn "$opt" ;; + -fsingle-quote) warn "$opt" ;; + -foptional-file) warn "$opt" ;; + -fstatic-call | -fno-static-call) + opts="$opts $opt" + static_used="x" + ;; + -fno-gen-c-decl-static-call) no_warn "$opt" ;; + -fmf-files) warn "$opt" ;; + -ffile-format=*) warn "$opt" ;; + -fno-theaders) no_warn "$opt" ;; + -fno-tsource) no_warn "$opt" ;; + -fno-tmessages) no_warn "$opt" ;; + -ftsymbols) warn "$opt" ;; + -fdatamap) warn "$opt" ;; + -fno-diagnostics-show-option) no_warn "$opt" ;; + -fibmcomp) warn "$opt" ;; + -fno-ibmcomp) no_warn "$opt" ;; + + # A.4 Compiler dialect configuration options + -fname=*) warn "$opt" ;; + -freserved-words=*) warn "$opt" ;; + -ftab-width=*) warn "$opt" ;; + -ftext-column=*) warn "$opt" ;; + -fpic-length=*) warn "$opt" ;; + -fword-length=*) warn "$opt" ;; + -fliteral-length=*) warn "$opt" ;; + -fnumeric-literal-length=*) warn "$opt" ;; + -fdefaultbyte=*) warn "$opt" ;; + -falign-record=*) warn "$opt" ;; + -fkeycompress=*) warn "$opt" ;; + -falign-opt) warn "$opt" ;; + -fbinary-size=*) warn "$opt" ;; + -fbinary-byteorder=*) warn "$opt" ;; + -fassign-clause=*) warn "$opt" ;; + -fscreen-section-rules=*) warn "$opt" ;; + -fdpc-in-data=*) warn "$opt" ;; + -ffilename-mapping) warn "$opt" ;; + -fpretty-display) warn "$opt" ;; + -fbinary-truncate | -fno-binary-truncate) warn "$opt" ;; + -fcomplex-odo) warn "$opt" ;; + -fodoslide) warn "$opt" ;; + -findirect-redefines) warn "$opt" ;; + -flarger-redefines-ok) warn "$opt" ;; + -frelax-syntax-checks) warn "$opt" ;; + -fref-mod-zero-length) warn "$opt" ;; + -frelax-level-hierarchy) warn "$opt" ;; + -flocal-implies-recursive) warn "$opt" ;; + -fsticky-linkage) warn "$opt" ;; + -fmove-ibm) warn "$opt" ;; + -fperform-osvs) warn "$opt" ;; + -farithmetic-osvs) warn "$opt" ;; + -fconstant-folding) warn "$opt" ;; + -fhostsign) warn "$opt" ;; + -fprogram-name-redefinition) warn "$opt" ;; + -faccept-update) warn "$opt" ;; + -faccept-auto) warn "$opt" ;; + -fconsole-is-crt) warn "$opt" ;; + -fno-echo-means-secure) no_warn "$opt" ;; + -fline-col-zero-default) warn "$opt" ;; + -freport-column-plus) warn "$opt" ;; + -fdisplay-special-fig-consts) warn "$opt" ;; + -fbinary-comp-1) warn "$opt" ;; + -fnumeric-pointer) warn "$opt" ;; + -fmove-non-numeric-lit-to-numeric-is-zero) warn "$opt" ;; + -fimplicit-assign-dynamic-var) warn "$opt" ;; + -fcomment-paragraphs=*) warn "$opt" ;; + -fmemory-size-clause=*) warn "$opt" ;; + -fmultiple-file-tape-clause=*) warn "$opt" ;; + -flabel-records-clause=*) warn "$opt" ;; + -fvalue-of-clause=*) warn "$opt" ;; + -fdata-records-clause=*) warn "$opt" ;; + -ftop-level-occurs-clause=*) warn "$opt" ;; + -fsame-as-clause=*) warn "$opt" ;; + -ftype-to-clause=*) warn "$opt" ;; + -fusage-type=*) warn "$opt" ;; + -fsynchronized-clause=*) warn "$opt" ;; + -fsync-left-right=*) warn "$opt" ;; + -fspecial-names-clause=*) warn "$opt" ;; + -fgoto-statement-without-name=*) warn "$opt" ;; + -fstop-literal-statement=*) warn "$opt" ;; + -fstop-identifier-statement=*) warn "$opt" ;; + -fdebugging-mode=*) warn "$opt" ;; + -fuse-for-debugging=*) warn "$opt" ;; + -fpadding-character-clause=*) warn "$opt" ;; + -fnext-sentence-phrase=*) warn "$opt" ;; + -flisting-statements=*) warn "$opt" ;; + -ftitle-statement=*) warn "$opt" ;; + -fentry-statement=*) warn "$opt" ;; + -fmove-noninteger-to-alphanumeric=*) warn "$opt" ;; + -foccurs-max-length-without-subscript) warn "$opt" ;; + -flength-in-data-division) warn "$opt" ;; + -fmove-figurative-constant-to-numeric=*) warn "$opt" ;; + -fmove-figurative-space-to-numeric=*) warn "$opt" ;; + -fmove-figurative-quote-to-numeric=*) warn "$opt" ;; + -fodo-without-to=*) warn "$opt" ;; + -fodo-last-varlen=*) warn "$opt" ;; + -fsection-segments=*) warn "$opt" ;; + -falter-statement=*) warn "$opt" ;; + -fcall-overflow=*) warn "$opt" ;; + -fnumeric-boolean=*) warn "$opt" ;; + -fhexadecimal-boolean=*) warn "$opt" ;; + -fnational-literals=*) warn "$opt" ;; + -fhexadecimal-national-literals=*) warn "$opt" ;; + -fnational-character-literals=*) warn "$opt" ;; + -fhp-octal-literals=*) warn "$opt" ;; + -facu-literals=*) warn "$opt" ;; + -fword-continuation=*) warn "$opt" ;; + -fnot-exception-before-exception=*) warn "$opt" ;; + -faccept-display-extensions=*) warn "$opt" ;; + -frenames-uncommon-levels=*) warn "$opt" ;; + -fsymbolic-constant=*) warn "$opt" ;; + -fconstant-78=*) warn "$opt" ;; + -fconstant-01=*) warn "$opt" ;; + -fperform-varying-without-by=*) warn "$opt" ;; + -freference-out-of-declaratives=*) warn "$opt" ;; + -freference-bounds-check=*) warn "$opt" ;; + -fprogram-prototypes=*) warn "$opt" ;; + -fcall-convention-mnemonic=*) warn "$opt" ;; + -fcall-convention-linkage=*) warn "$opt" ;; + -fnumeric-value-for-edited-item=*) warn "$opt" ;; + -fincorrect-conf-sec-order=*) warn "$opt" ;; + -fdefine-constant-directive=*) warn "$opt" ;; + -ffree-redefines-position=*) warn "$opt" ;; + -frecords-mismatch-record-clause=*) warn "$opt" ;; + -frecord-delimiter=*) warn "$opt" ;; + -fsequential-delimiters=*) warn "$opt" ;; + -frecord-delim-with-fixed-recs=*) warn "$opt" ;; + -frecord-sequential-advancing=*) warn "$opt" ;; + -fmissing-statement=*) warn "$opt" ;; + -fzero-length-literals=*) warn "$opt" ;; + -fxml-generate-extra-phrases=*) warn "$opt" ;; + -fcontinue-after=*) warn "$opt" ;; + -fgoto-entry=*) warn "$opt" ;; + -fdepending-on-not-fixed=*) warn "$opt" ;; + -fbinary-sync-clause=*) warn "$opt" ;; + -fnonnumeric-with-numeric-group-usage=*) warn "$opt" ;; + -fassign-variable=*) warn "$opt" ;; + -fassign-using-variable=*) warn "$opt" ;; + -fassign-ext-dyn=*) warn "$opt" ;; + -fassign-disk-from=*) warn "$opt" ;; + -fvsam-status=*) warn "$opt" ;; + -fself-call-recursive=*) warn "$opt" ;; + + # TODO: create a temporary COBOL file with COBOL-WORDS directives + # and force-include it + -fnot-reserved=*) warn "$opt" ;; + -freserved=*) warn "$opt" ;; + -fnot-register=*) warn "$opt" ;; + -fregister=*) warn "$opt" ;; + + -fformat=auto ) ;; # gcobol and gnucobol default + + -fixed | --fixed | -fformat=fixed | -fformat=variable | -fformat=xcard) + # note: variable + xcard are only _more similar_ to fixed than free, + # (with changing right-column to 250/255, which isn't supported in gcobol, yet) + opts="$opts -ffixed-form" + ;; + + -F | -free | --free | -fformat=free | -fformat=* ) + # note: "all other formats" are only _more similar_ to free than fixed + opts="$opts -ffree-form" + ;; + + -h | --help) opts="$opts --help" + ;; + + -HELP) help && exit + ;; + -i | --info) warn "$opt" + ;; + + # -I + -fimplicit-init) warn "$opt" + ;; + -j | -job) warn "$opt" + ;; + -K) ignore_arg "$opt" + ;; + -K*) warn "$opt" + ;; + # -l + # -L + --list*) warn "$opt" + ;; + -m) mode="-shared" + ;; + # -main + # -nomain + # -o + # -O0, -Ox + -O | -O2 | -Os) warn "$opt" + ;; + -S) mode="$opt" + ;; + -save-temps=*) opt="$(echo "$opt" | sed -E 's/^.+=//')" + export GCOBOL_TEMPDIR="$opt" + ;; + -save-temps) export GCOBOL_TEMPDIR="${PWD:-$(pwd)}" + ;; + # -shared is identical + + -std=mvs) opts="$opts -dialect ibm" + ;; + -std=mf) opts="$opts -dialect mf" + ;; + -t | -T | -tlines=* | -P | -P=* | -X | --Xref) + warn "$opt (no listing)" + ;; + -q | --brief) warn "$opt" + ;; + -v | --verbose) opts="$opts -V" + ;; + # note: we want -dumpversion to be passed to gcc + -V | --version | -version) opts="$opts --version" + ;; + + # pass through, strangely -Wall is not supported + -w | -W | -Wextra) opts="$opts $opt" + ;; + -Wno-*) no_warn "$opt" + ;; + + -W*) ignore_arg "$opt" + ;; + + -x) mode= + ;; + + *) opts="$opts $opt" # pass through + ;; + esac +done + +# cobc default: +if [ "$static_used" = "" ] +then + opts="$opts -fno-static-call"; +fi + +if [ "$exit_status" -gt 0 ] +then + exit $exit_status +fi + +# To override the default gcobol, set the "gcobol" environment variable. +gcobol="${gcobol:-${0%/*}/gcobol}" + +if [ "$echo" ] +then + echo $gcobol $mode $opts + exit +fi + +if [ "$gcobcx" ] +then + set -x +fi + +exec $gcobol $mode $opts diff --git a/gcc/cobol/gcobol.1 b/gcc/cobol/gcobol.1 new file mode 100644 index 0000000..64c017c --- /dev/null +++ b/gcc/cobol/gcobol.1 @@ -0,0 +1,1628 @@ +.ds lang COBOL +.ds gcobol GCC\ \*[lang]\ Front-end +.ds isostd ISO/IEC 1989:2023 +.Dd \& February 2025 +.Dt GCOBOL 1\& "GCC \*[lang] Compiler" +.Os Linux +.Sh NAME +.Nm gcobol +.Nd \*[gcobol] +.Sh SYNOPSIS +.Nm +.Op Fl D Ns Ar name Ns Oo Li = Ns Ar value Oc +.Op Fl E +.Op Fl fdefaultbyte Ns Li = Ns Ar value +.Op Fl fsyntax-only +.Op Fl I Ns Ar copybook-path +.Op Fl fmax-errors Ns Li = Ns Ar nerror +.Oo +.Fl nomain | +.Fl main Ar filename | +.Fl main Ns Li = Ns Ar filename +.Fl main Ns Li = Ns Ar filename:program-id +.Oc +.Op Fl fcobol-exceptions Ar exception Ns Op Ns \/, Ns Ar exception Ns ... +.Op Fl copyext Ar ext +.Op Fl ffixed-form | Fl ffree-form +.Op Fl findicator-column +.Op Fl finternal-ebcdic +.Op Fl dialect Ar dialect-name +.Op Fl include Ar filename +.Op Fl preprocess Ar preprocess-filter +.Op Fl fflex-debug +.Op Fl fyacc-debug +.Ar filename Op ... +. +.Sh DESCRIPTION +.Nm +compiles \*[lang] source code to object code, and optionally produces an +executable binary or shared object. As a GCC component, it accepts +all options that affect code-generation and linking. Options specific +to \*[lang] are listed below. +.Bl -tag -width \0\0debug +.It Fl main Ar filename +.Nm +will generate a +.Fn main +function as an entry point calling the first PROGRAM-ID in +.Ar filename . +.Pp +.Fl main +is the default. When none of +.Fl nomain , +.Fl c , +or +.Fl shared , +is present, an implicit +.Fl main +is inserted into the command line ahead of the first source file name. +.It Fl main Ns Li = Ns Ar filename +The .o object module for +.Ar filename +will include a +.Fn main +entry point calling the first PROGRAM-ID in +.Ar filename +.It Fl main Ns Li = Ns Ar filename:program-id +The .o object module for +.Ar filename +will include a +.Fn main +entry point that calls the +.Ar program-id +entry point +.It Fl nomain +No +.Fn main +entry point will be generated by this +compilation. The +.Fl nomain +option is incompatible with +.Fl main , +and is implied by +.Fl shared . +It is also implied by +.Fl c +when there is no +.Fl main +present. +.Pp +See below for examples showing the use of +.Fl main +and +.Fl nomain. +.It Fl D Ar name Ns Op Li = Ns Ar expr +Define a CDF name (for use with +.Sy >>IF ) +to have the value of +.Ar expr . +.It Fl E +Write the CDF-processed \*[lang] input to standard output in free-form +reference format. Certain non-\*[lang] markers are included in the +output to indicate where copybook files were included. For +line-number consistency with the input, blank lines are retained. +.Pp +Unlike the C compiler, This option does not prevent compilation. +To prevent compilation, use the option +.D1 Fl Sy fsyntax-only +also. +.It Fl fdefaultbyte Ns Li = Ns Ar value +Use +.Ar value , +a number between 0 and 255, as the default value for all +WORKING-STORAGE data items that have no VALUE clause. By default, +alphanumeric data items are initialized with blanks, and numeric data +items are initialized to zero. This option overrides the default with +.Ar value . +.It Fl fsyntax-only +Invoke only the parser. Check the code for syntax errors, but don't do +anything beyond that. +.It Fl copyext Ar ext +For the CDF directive +.D1 COPY Ar name +if +.Ar name +is unquoted, several varieties of +.Ar name +are tried, as described below under +.Xr Copybooks Ns . +The +.Fl copyext +option extends the names searched to include +.Ar ext . +If +.Ar ext +is all uppercase or all lowercase, both forms are tried, with preference given to the one supplied. If +.Ar ext +is mixed-case, only that version is tried. +For example, with +.D1 Fl copyext Ar .abc +given the CDF directive +.D1 COPY name +.Nm +will add to possible names searched +.Ql name.abc +and +.Ql name.ABC +in that order. +.It Fl ffixed-form +Use strict +.Em "Reference Format" +in reading the \*[lang] input: +72-character lines, with a 6-character sequence area, and an indicator +column. Data past column 72 are ignored. +.It Fl ffree-form +Force the \*[lang] input to be interpreted as +.Em "free format" . +Line breaks are insignificant, except that +.Ql * +at the start of a line acts as a comment marker. +Equivalent to +.Fl indicator-column Ar 0 Ns Li . +. +.It Fl findicator-column +describes the location of the Indicator Area in a \*[lang] file +in +.Em "Reference Format" , +where the first 6 columns \(em known as the +.Dq "Sequence Number Area" +\(em are ignored, and the 7th column \(em the Indicator +Area \(em may hold a character of significance to the compiler. +.Pp +Although +.Em "reference format" , +strictly speaking, ignores data after column 72, +with this option +.Nm +accepts long \*[lang] lines, sometimes known as +.Em "extended source format" . +Text past column 72 is treated as ordinary \*[lang] text. (Line +continuation remains in effect, however, +provided no text appears +.Em past +column 72.) +.Pp +There is no maximum line length. Regardless of source code format, +the entire program could appear on one line. +.Pp +By default, +.Nm +auto-detects the source code format by examining the +.Em "sequence number area" +of the first line of the first file: if those characters are all +digits or blanks, the file is assumed to be in +.Em "reference format" , +with the indicator area in column 7. +.Pp +. +.It Fl fcobol-exceptions Ar exception Op Ns , Ns Ar exception Ns ... +By default, no exception condition is enabled (including fatal ones), +and by the ISO standard exception conditions are enabled only via the +CDF +.Sy "TURN" +directive. This option enables one or more exception conditions by +default, as though +.Sy TURN +had appeared at the top of the first source code file. +This option may also appear more than once on the command line. +.Pp +The value of +.Ar exception +is a Level 1, 2, or 3 exception condition name, as described by +\*[isostd]. +.Ql EC-ALL +means enable all exceptions. +.Pp +The +.Fl fno-cobol-exceptions +form turns off +.Ar exception , +just as though +.D1 >>TURN Ar exception CHECKING OFF +had appeared. +.Pp +Not all exception conditions are implemented. Any that are not +produce a warning message. +. +.It Fl fmax-errors Ar nerror +.Ar nerror +represents the number of error messages produced. Without this option, +.Nm +attempts to recover from a syntax error by resuming compilation at the +next statement, continuing until end-of-file. With it, +.Nm +counts the messages as they're produced, and stops when +.Ar nerror +is reached. +.It Fl fstatic-call Ns , Fl fno-static-call +With +.Fl fno-static-call , +.Nm +never uses static linking for +.D1 Sy CALL Ar program +By default, or with +.Fl fstatic-call , +if +.Ar program +is an alphanumeric literal, +.Nm +uses static linkage, meaning the compiler produces an external symbol +.Ar program +for the linker to resolve. +(In the future, that will work with +.Sy CONSTANT +data items, too.) With static linkage, if +.Ar program +is not supplied by the source code module or another object file or library +at build time, the linker will produce an +.Dq "unresolved symbol" +error. With +.Fl fno-static-call , +.Nm +always uses dynamic linking. +.Pp +This option affects the +.Sy CALL +statement for literals only. If +.Ar program +is a non-constant data item, it is always resolved using dynamic +linking, with +.Xr dlsym 3 Ns Li , +because its value is determined at run time. +.It Fl dialect Ar dialect-name +By default, +.Nm +accepts \*[lang] syntax as defined by \*[isostd], with some +extensions for backward compatibility with COBOL-85. To make the +compiler more generally useful, some additional syntax is supported by +this option. +.Pp +The value of +.Ar dialect-name +may be +.Bl -tag -compact +.It ibm +to indicate IBM COBOL 6.3 syntax, specifically +.D1 STOP <number>. +.It gnu +to indicate GnuCOBOL syntax +.It mf +to indicate MicroFocus syntax, specifically +.Sy LEVEL 78 +constants. +.El +.Pp +Only a few such non-standard constructs are accepted, and +.Nm +makes no claim to emulate other compilers. But to the extent that a +feature is popular but nonstandard, this option provides a way to +support it, or add it. +. +.It Fl include Ar filename +Process +.Ar filename +as if +.D1 COPY Dq Ar filename +appeared as the first line of +the primary source file. If +.Ar filename +is not an absolute path, the directory searched is the current working +directory, not the directory containing the main source file. The +name is used verbatim. No permutations are applied, and no +directories searched. +.Pp +If multiple +.Fl include +options are given, the files are included in +the order they appear on the command line. +. +.It Fl preprocess Ar preprocess-filter +After all CDF text-manipulation has been applied, and before the +prepared \*[lang] is sent to the +.Sy cobol1 +compiler, the input may be +further altered by one or more filters. In the tradition of +.Xr sed 1 , +each +.Ar preprocess-filter +reads from standard input and writes to standard output. +.Pp +To supply options to +.Ar preprocess-filter , +use a comma-separated string, similar to how linker options are supplied to +.Fl Sy Wl . +(Do not put any spaces after the commas, because the shell will treat it as an option separator.) +.Nm +replaces each comma with a space when +.Ar preprocess-filter +is invoked. For example, +.D1 Fl preprocess Li tee,output.cbl +invokes +.Xr tee 1 +with the output filename argument +.Pa output.cbl , +causing a copy of the input to be written to the file. +.Pp +.Nm +searches the current working directory and the PATH environment +variable directories for an executable file whose name matches +.Ar preprocess-filter . +The first one found is used. If none is found, an error is reported +and the compiler is not invoked. +.Pp +The +.Fl preprocess +option may appear more than once on the command line. Each +.Ar preprocess-filter +is applied in turn, in order of appearance. +.Pp +The +.Ar preprocess-filter +should return a zero exit status, indicating success. If it returns a +nonzero exit status, an error is reported and the compiler is not +invoked. +. +.It Fl fflex-debug Ns Li , Fl fyacc-debug +produce messages useful for compiler development. The +.Fl fflex-debug +option prints the tokenized input stream. The +.Fl fyacc-debug +option shows the shift and reduce actions taken by the parser. +.El +. +.Sh COMPILATION SCENARIOS +.D1 gcobol Ar xyz.cob +.D1 gcobol -main Ar xyz.cob +.D1 gcobol -main= Ns Ar xyz.cob Ar xyz.cob +These are equivalent. The +.Ar xyz.cob +code is compiled and a +.Fn main +function is +inserted that calls the first PROGRAM-ID in the +.Ar xyz.cob +source file. +.Pp +.D1 gcobol -nomain Ar xyz.cob Ar elsewhere.o +The +.Fl nomain +option prevents a +.Fn main +function from being generated by the gcobol compiler. +A +.Fn main +entry point must be present in the +.Ar elsewhere.o +module; without it the +linker will report a +.Dq "missing main" +error. +.Pp +.D1 gcobol Ar aaa.cob Ar bbb.cob Ar ccc.cob +.D1 gcobol -main Ar aaa.cob Ar bbb.cob Ar ccc.cob +The two commands are equivalent. The three source code modules are compiled and +linked together along with a generated +.Fn main +function that calls the first +PROGRAM-ID in the +.Ar aaa.cob +module. +.Pp +.D1 gcobol Ar aaa.cob Ar bbb.cob Fl main Ar ccc.cob +.D1 gcobol -main Ns = Ns Ar ccc.cob Ar aaa.cob Ar bbb.cob Ar ccc.cob +These two commands have the same result: An +.Ar a.out +executable is created that +starts executing at the first PROGRAM-ID in +.Ar ccc.cob . +.Pp +.D1 gcobol -main Ns = Ns Ar bbb.cob:b-entry Ar aaa.cob Ar bbb.cob Ar ccc.cob +An +.Ar a.out +executable is created that starts executing at the PROGRAM-ID +.Ar "b-entry" . +.Pp +.D1 gcobol -c Ar aaa.cob +.D1 gcobol -c -main Ar bbb.cob +.D1 gcobol -c Ar ccc.cob +.D1 gcobol Ar aaa.o Ar bbb.o Ar ccc.o +The first three commands each create a .o file. The +.Ar bbb.o +file will contain a +.Fn main +entry point that calls the first PROGRAM-ID in +.Ar bbb . +The fourth links the three .o files into an +.Ar a.out . +. +.Sh EBCDIC +The +.Fl finternal-ebcdic +option is useful when working with mainframe \*[lang] programs intended +for EBCDIC-encoded files. With this option, while the \*[lang] text +remains in ASCII, the character literals and field initial values +produce EBCDIC strings in the compiled binary, and any character data +read from a file are interpreted as EBCDIC data. The file data are +not +.Em converted ; +rather, the file is assumed to use EBCDIC representation. String +literals in the \*[lang] text +.Em are +converted, so that they can be compared meaningfully with data in the file. +.Pp +Only file data and character literals are affected. Data read from +and written to the environment, or taken from the command line, are +interpreted according the +.Xr locale 7 +in force during execution. The same is true of +.Sy ACCEPT +and +.Sy DISPLAY . +Names known to the operating system, such as file names and the names +of environment variables, are processed verbatim. +.Pp +At the present time, this is an all-or-nothing setting. Support for +.Sy USAGE +and +.Sy CODESET , +which would allow conversion between encodings, remains a future goal. +.Pp +See also +.Sx "Feature-set Variables" , +below. +. +.Sh REDEFINES ... USAGE POINTER +Per ISO, an item that +.Sy REDEFINES +another may not be larger than the item it redefines, unless that item +has LEVEL 01 and is not EXTERNAL. In +.Nm , +using +.Fl dialect Ar ibm , +this rule is relaxed for +.Sy REDEFINES +with +.Sy USAGE POINTER +whose redefined member is a 4-byte +.Sy USAGE COMP-5 +(usually +.Sy PIC S9(8) Ns ), +or vice-versa. +In that case, the redefined member is re-sized to be 8 bytes, to +accommodate the pointer. This feature allows pointer arithmetic on a +64-bit system with source code targeted at a 32-bit system. +.Pp +See also +.Sx "Feature-set Variables" , +below. +. +.Sh IMPLEMENTATION NOTES +.Nm +is a gcc compiler, and follows gcc conventions where applicable. +Sometimes those conventions (and user expectations) conflict with +common Mainframe practice. Unless required of the compiler by the ISO +specification, any such conflicts are resolved in favor of gcc. +.Ss Linking +Unlike, C, the \*[lang] +.Sy CALL +statement implies dynamic linking, because for +.D1 Sy CALL Ar program +.Ar program +can be a variable whose value is determined at runtime. +However, the parameter may also be compile-time constant, either an +alphanumeric literal, or a +.Sy CONSTANT +data item. +.Pp +.Nm +supports static linking where possible, unless defeated by +.Fl fno-static-call . +If the parameter value is known at compile time, the compiler produces +an external reference to be resolved by the linker. The referenced +program is normally supplied via an object module, a static library, +or a shared object. If it is not supplied, the linker will report an +.Dq "unresolved symbol" +error, either at build time or, if using a shared object, when the +program is executed. This feature informs the programmer of the error +at the earliest opportunity. +.Pp +Programs that are expected to execute +correctly in the presence of an unresolved symbol (perhaps because the +program logic won't require that particular +.Sy CALL ) +can use the +.Fl no-static-call +option. That forces all +.Sy CALL +statements to be resolved dynamically, at runtime. +.ig +Programs that are expected to execute +correctly in the presence of an unresolved symbol (perhaps because the +program logic won't require that particular +.Sy CALL ) +can use linker options to produce an executable anyway. +.Pp +One corner case yet remains. The +.Sy CALL +statement includes an +.Sy "ON ERROR" +clause whose purpose is to handle errors arising when the called program is not found. +Control is transferred to the +.Sy "ON ERROR" +clause when the +.Sy EC-PROGRAM-NOT-FOUND +exception condition is raised. That exception condition is not raised in +.Nm +when: +.Bl -bullet -compact +.It +the +.Sy CALL +parameter +is known at compile time, i.e., is an alphanumeric literal or +.Sy CONSTANT +data item, and +.It +the executable was generated with the linker option to ignore unresolved symbols. +.El +In that case, the program is terminated with a signal. No recovery with +.Sy "ON ERROR" +is possible. +.Pp +Should your program meet those particular conditions, all is not lost. +There are workarounds, and an option could be added to use dynamic +linking for all +.Sy CALL +statement, regardless of compile-time constants. +.. +. +.Ss Implemented Exception Conditions +Not all Exception Conditions are implemented. Any attempt to enable +an EC that that is not implemented produces a warning message. +The following are implemented: +.Pp +.Bl -tag -offset 5n -compact +.It EC-FUNCTION-ARGUMENT +for the following functions: +.Bl -item -compact +.It +ACOS +.It +ANNUITY +.It +ASIN +.It +LOG +.It +LOG10 +.It +PRESENT-VALUE +.It +SQRT +.El +.It EC-SORT-MERGE-FILE-OPEN +.It EC-BOUND-SUBSCRIPT +subscript not an integer, less than 1, or greater than occurs +.It EC-BOUND-REF-MOD +refmod start not an integer, start less than 1, start greater than +variable size, length not an integer, length less than 1, and +start+length exceeds variable size +.It EC-BOUND-ODO +DEPENDING not an integer, greater than occurs upper limit, +less than occurs lower limit, and subscript greater than DEPENDING for sending item +.It EC-SIZE-ZERO-DIVIDE +for both fixed-point and floating-point division +.It EC-SIZE-TRUNCATION +.It EC-SIZE-EXPONENTIATION +.El +.Pp +As of this writing, no \*[lang] compiler documents a complete +implementation of \*[isostd] Exception Conditions. +.Nm +will give priority to those ECs that the user community deems most +valuable. +. +.Sh EXTENSIONS TO ISO \*[lang] +Standard \*[lang] has no provision for environment variables as defined +by Unix and Windows, or command-line arguments. +.Nm +supports them using syntax similar to that of GnuCOBOL. ISO and IBM +also define incompatible ways to return the program's exit status to +the operating system. +.Nm +supports IBM syntax. +. +.Ss Environment Variables +To read an environment variable: +.Pp +.D1 ACCEPT Ar target Li FROM ENVIRONMENT Ar envar +.Pp +where +.Ar target +is a data item defined in +.Sy "DATA DIVISION" , +and +.Ar envar +names an environment variable. +.Ar envar +may be a string literal or alphanumeric data item whose value is the +name of an environment variable. The value of the named environment +variable is moved to +.Ar target . +The rules are the same as for +.Sy MOVE . +.Pp +To write an environment variable: +.Pp +.D1 SET ENVIRONMENT Ar envar Li TO Ar source +.Pp +where +.Ar source +is a data item defined in +.Sy DATA DIVISION , +and +.Ar envar +names an environment variable. +.Ar envar +again may be a string literal or alphanumeric data item whose value is the +name of an environment variable. The value of the named environment +variable is set to the value of +.Ar source . +. +.Ss Command-line Arguments +To read command-line arguments, use the registers +.Sy COMMAND-LINE +and +.Sy COMMAND-LINE-COUNT +in an +.Sy ACCEPT +statement (only). +Used without a subscript, +.Sy COMMAND-LINE +returns the whole command line as a single string. With a subscript, +.Sy COMMAND-LINE +is a table of command-line arguments. For example, if the +program is invoked as +.sp +.D1 Sy ./program Fl i Ar input Ar output +.sp +then +.sp +.D1 ACCEPT target FROM COMMAND-LINE(3) +.sp +moves +.Ar input +into +.Ar target . +The program name is the first thing in the whole command line and is +found in COMMAND-LINE(1) +.Sy COMMAND-LINE +table. +.Pp +To discover how many arguments were provided on the command line, use +.sp +.D1 ACCEPT Ar target Li FROM COMMAND-LINE-COUNT +.sp +If +.Sy ACCEPT +refers to a nonexistent environment variable or command-line +argument, the target is set to +.Sy LOW-VALUES . +.Pp +The system command line parameters can also be accessed through the LINKAGE +SECTION in the program where execution starts. The data structure looks like +this: +.Bd -literal + linkage section. + 01 argc pic 999. + 01 argv. + 02 argv-table occurs 1 to 100 times depending on argc. + 03 argv-element pointer. + 01 argv-string pic x(100) . +.Ed +and the code to access the third parameter looks like this +.Bd -literal + procedure division using by value argc by reference argv. + set address of argv-string to argv-element(3) + display argv-string +.Ed +. +.Ss #line directive +The parser accepts lines in the form +.D1 #line Ar lineno Dq Ar filename Ns . +The effect is to set the current line number to +.Ar lineno +and the current input filename to +.Ar filename . +Preprocessors may use this directive to control the filename and line +numbers reported in error messages and in the debugger. +. +.Ss SELECT ... ASSIGN TO +In the phrase +.sp +.D1 ASSIGN TO Ar filename +.sp +.Ar filename +may appear in quotes or not. If quoted, it represents a filename as +known to the operating system. If unquoted, it names either a data +element or an environment variable containing the name of a file. +If +.Ar filename +matches the name of a data element, that element is used. If not, +resolution of +.Ar filename +is deferred until runtime, when the name must appear in the program's +environment. +. +.Sh ISO \*[lang] Implementation Status +.Ss USAGE Data Types +.Nm +supports the following +.Sy USAGE IS +clauses: +.Bl -tag -compact -width POINTER\0 +.It Sy INDEX +for use as an index in a table. +.It Sy POINTER +for variables whose value is the address of an external function, +.Sy PROGRAM-ID , +or data item. Assignment is via the +.Sy SET +statement. +.It Sy BINARY, Sy COMP , Sy COMPUTATIONAL, Sy COMP-4, Sy COMPUTATIONAL-4 +big-endian integer, 1 to 16 bytes, per PICTURE. +.It Sy COMP-1 , Sy COMPUTATIONAL-1 , Sy FLOAT-BINARY-32 +IEEE 754 single-precision (4-byte) floating point, as provided by the +hardware. +.It Sy COMP-2 , Sy COMPUTATIONAL-2 , Sy FLOAT-BINARY-64 +IEEE 754 double-precision (8-byte) floating point, as provided by the +hardware. +.It Sy COMP-3 , Sy COMPUTATIONAL-3, Sy PACKED-DECIMAL +currently unimplemented. +.It Sy COMP-5 , Sy COMPUTATIONAL-5 +little-endian integer, 1 to 16 bytes, per +.Sy PICTURE. +.It Sy FLOAT-BINARY-128 , FLOAT-EXTENDED +implements 128-bit floating point, per IEEE 754. +.El +.Pp +.Nm +supports ISO integer +.Sy BINARY-<type> +types, most of which alias +.Sy COMP-5. +. +.hw unsigned +.sp +.TS +LB LB LB LB +LB LB LB LB +L L L L . +COMP-5 Compatible +Picture BINARY Type Bytes Value + T{ +BINARY-CHAR [UNSIGNED] +T} 1 0 \(em 256 +S9(1...4) T{ +BINARY-CHAR SIGNED +T} 1 -128 \(em +127 +\09(1...4) T{ +BINARY-SHORT [UNSIGNED] +T} 2 0 \(em 65535 +S9(1...4) T{ +BINARY-SHORT SIGNED +T} 2 -32768 \(em +32767 +\09(5...9) T{ +BINARY-LONG [UNSIGNED] +T} 4 0 \(em 4,294,967,295 +S9(5...9) T{ +BINARY-LONG SIGNED +T} 4 T{ +-2,147,483,648 \(em +2,147,483,647 +T} +\09(10...18) T{ +BINARY-LONG-LONG [UNSIGNED] +T} 8 T{ +0 \(em 18,446,744,073,709,551,615 +T} +S9(10...18) T{ +BINARY-LONG-LONG SIGNED +T} 8 T{ +-9,223,372,036,854,775,808 \(em +9,223,372,036,854,775,807 +T} +.TE +.Pp +These define a size (in bytes) and cannot be +used with a +.Sy PICTURE +clause. +Per the ISO standard, +.Sy SIGNED +is the default for the +.Sy "BINARY-" Ns Ar type +aliases. +.Pp +All computation \(em both integer and floating point \(em is done +using 128-bit intermediate forms. +. +.Ss Environment Names +In +.Nm +.sp +.Dl DISPLAY UPON +.sp +maps +.Sy SYSOUT +and +.Sy STDOUT +to standard output, and +.Sy SYSPUNCH , +.Sy SYSPCH +and +.Sy STDERR +to standard error. +. +.Ss Exit Status +.Nm +supports the ISO syntax for returning an exit status to the operating system, +.Pp +.D1 STOP RUN Oo WITH Oc Bro NORMAL | ERROR Brc Oo STATUS Oc Ar status +.Pp +In addition, +.Nm +also supports the IBM syntax for returning an exit status to +the operating system. Use the +.Sy RETURN-CODE +register: +.Bd -literal -offset indent +MOVE ZERO TO RETURN-CODE. +GOBACK. +.Ed +.Pp +The +.Sy RETURN-CODE +register is defined as a 4-byte binary integer. +.ig +.Pp +The ISO standard supports an extended form of +.Sy GOBACK : +.Pp +.D1 GOBACK {ERROR | NORMAL} WITH Ar status +.Pp +where +.Ar status +is a numeric data item or literal. This syntax has the same effect as: +.Bd -literal -offset indent +MOVE status TO RETURN-CODE. +GOBACK. +.Ed +The use of +.Sy ERROR +or +.Sy NORMAL +has no effect; the two are interchangeable. +.. +. +.Ss Compiler-Directing Facility (CDF) +The CDF should be used with caution because no comprehensive test +suite has been identified. +. +.Ss Conditional Compilation +.Bl -tag -width >>DEFINE +.It >> Ns Sy DEFINE Ar name Sy AS Bro Ar expression Li | Sy PARAMETER Brc Op Sy OVERRIDE +Define +.Ar name +as a compilation variable to have the value +.Ar expression . +If +.Ar name +was previously defined, +.Sy OVERRIDE +is required, else the directive is invalid. +.Sy AS PARAMETER +is accepted, but has no effect in +.Nm . +. +.It >> Ns Sy DEFINE Ar name AS Sy OFF +releases the definition +.Ar name , +making it subsequently invalid for use. +.\" ISO requires AS; cdf.y does not. +. +.It >> Ns Sy IF Ar cce Ar text Oo >> Ns Sy ELSE Ar alt-text Oc Li >> Ns Sy END-IF +evaluates +.Ar cce , +a +.Em "constant conditional expression\/" , +for conditional compilation. +If a name, +.Ar cce +may be defined with the +.Fl D +command-line parameter. If true, the \*[lang] text +.Ar text +is compiled. If false, +.Ar else-text , +if present, is compiled. +.Bo Sy IS Bo Sy NOT Bc Bc Sy DEFINED +is supported. Boolean literals are not supported. +. +.It >> Ns Sy EVALUATE +Not implemented. +.El +. +.Ss Other CDF Directives +.Bl -tag -width >>PROPAGATE +.It >> Ns Sy CALL-CONVENTION Ar convention +.Ar convention +may be one of: +.Bl -tag -compact +.It Sy \*[lang] +Use standard \*[lang] case-insensitive symbol-name matching. For +.Sy CALL Dq Ar name , +.Ar name +is rendered by the compiler in lowercase. +.It Sy C +Use case-sensitive symbol-name matching. The +.Sy CALL +target is not changed in any way; it is used verbatim. +.It Sy VERBATIM +An alias for >>\c +.Sy "CALL-CONVENTION C" . +.El +.It >> Ns Sy COBOL-WORDS EQUATE Ar keyword Sy WITH Ar alias +makes +.Ar alias +a synonym for +.Ar keyword . +.It >> Ns Sy COBOL-WORDS UNDEFINE Ar keyword +.Ar keyword +is removed from the \*[lang] grammar. Use of it in a program will provoke +a syntax error from the compiler. +.It >> Ns Sy COBOL-WORDS SUBSTITUTE Ar keyword Sy BY Ar new-word +.Ar keyword +is deleted as a keyword from the grammar, replaced by +.Ar new-word . +.Ar keyword +may thereafter be used as a user-defined word. +.It >> Ns Sy COBOL-WORDS RESERVE Ar new-word +Treat +.Ar new-word +as a \*[lang] keyword. It cannot be used by the program, either as a +keyword or as a user-defined word. +. +.It >> Ns Sy DISPLAY Ar string ... +Write +.Ar string +to standard error as a warning message. +.It >> Ns Sy SOURCE Ar format +.Ar format +may be one of: +.Bl -tag -compact +.It Sy FIXED +Source conforms to \*[lang] Reference Format with unlimited line length. +.It Sy FREE +Line endings and indentation are ignored by the compiler, except that a +.Ql "*" +at the beginning of a line is recognized as a comment. +.El +.El +.Pp +.Bl -tag -width >>PROPAGATE -compact +.It >> Ns Sy FLAG-02 +Not implemented. +.It >> Ns Sy FLAG-85 +Not implemented. +.It >> Ns Sy FLAG-NATIVE-ARITHMETIC +Not implemented. +.It >> Ns Sy LEAP-SECOND +Not implemented. +.It >> Ns Sy LISTING +Not implemented. +.It >> Ns Sy PAGE +Not implemented. +.It >> Ns Sy PROPAGATE +Not implemented. +.It >> Ns Sy TURN Oo +.Ar ec Oo Ar file Li ... Oc ... +.Oc Sy CHECKING Bro Oo Sy ON Oc Oo Oo Sy WITH Oc Sy LOCATION Oc | Sy OFF Brc +Enable (or, with +.Sy OFF , +disable) exception condition +.Ar ec +optionally associated with the file connectors +.Ar file . +If +.Sy LOCATION +is specified, +.Nm +reports at runtime the source filename and line number of the +statement that triggered the exception condition. +.El +. +.Ss Feature-set Variables +Some command-line options affect CDF +.Em "feature-set" +variables that are special to +.Nm . +They can be set and tested using +.Sy >>DEFINE +and +.Sy >>IF , +and are distinguished by a leading +.Ql \&% +in the name, which is otherwise invalid in a \*[lang] identifier: +.Pp +.Bl -tag -compact +.It Sy %EBCDIC-MODE +is set by +.Fl finternal-ebcdic . +.It Sy %64-BIT-POINTER +is implied by +.Fl "dialect ibm" . +.El +.Pp +To set a feature-set variable, use +.Dl >>SET Ar feature Li [AS] {ON | OFF} +If +.Ar feature +is +.Sy %EBCDIC-MODE , +the directive must appear before +.Sy PROGRAM-ID . +.Pp +To test a feature-set variable, use +.Dl >>IF Ar feature Li DEFINED +.. +.Ss Copybooks +.Nm +supports the CDF +.Sy COPY +statement, with or without its +.Sy REPLACING +component. For any statement +.sp +.D1 COPY Ar copybook +.sp +.Nm +looks first for an environment variable named +.Va copybook +and, if found, uses the contents of that variable as the name of the +copybook file. If that file does not exist, it continues looking for +a file named one of: +.sp +.Bl -bullet -compact -offset 5n +.It +.Pa copybook +(literally) +.It +.Pa copybook.cpy +.It +.Pa copybook.CPY +.It +.Pa copybook.cbl +.It +.Pa copybook.CBL +.It +.Pa copybook.cob +.It +.Pa copybook.COB +.El +.sp +in that order. It looks first in the same directory as the source +code file, and then in any +.Ar copybook-path +named with the +.Fl I +option. +. +.\" FIXME: need escape mechanism for directories with ':' in the name. +.Ar copybook-path +may (like the shell's +.Ev PATH +variable) be a colon-separated list. +. +The +.Fl I +option may occur multiple times on the command line. Each successive +.Ar copybook-path +is concatenated to previous ones. +Relative paths (having no leading +.Ql / Ns +\&) +are searched relative to the compiler's current working directory. +.Pp +For example, +.D1 \& +.D1 Fl I Li /usr/local/include:include +.D1 \& +searches first the directory where the \*[lang] program is found, next in +.Pa /usr/local/include , +and finally in an +.Pa include +subdirectory of the directory from which +.Nm +was invoked. +. +.Ss Intrinsic functions +.Nm +implements all intrinsic functions defined by \*[isostd], plus a few +others. They are listed alphabetically below. +.Bl -item -compact +.It +ABS ACOS ANNUITY ASIN ATAN +.It +BASECONVERT BIT_OF BIT_TO_CHAR BOOLEAN_OF_INTEGER BYTE_LENGTH +.It +CHAR CHAR_NATIONAL COMBINED_DATETIME CONCAT CONVERT COS CURRENT_DATE +.It +DATE_OF_INTEGER DATE_TO_YYYYMMDD DAY_OF_INTEGER DAY_TO_YYYYDDD DISPLAY_OF +.It +E EXCEPTION_FILE +EXCEPTION_FILE_N EXCEPTION_LOCATION EXCEPTION_LOCATION_N +EXCEPTION_STATEMENT EXCEPTION_STATUS EXP EXP10 +.It +FACTORIAL FIND_STRING +FORMATTED_CURRENT_DATE FORMATTED_DATE FORMATTED_DATETIME +FORMATTED_TIME FRACTION_PART +.It +HEX_OF HEX_TO_CHAR HIGHEST_ALGEBRAIC +.It +INTEGER INTEGER_OF_BOOLEAN INTEGER_OF_DATE INTEGER_OF_DAY +INTEGER_OF_FORMATTED_DATE INTEGER_PART +.It +LENGTH LOCALE_COMPARE +LOCALE_DATE LOCALE_TIME LOCALE_TIME_FROM_SECONDS LOG LOG10 LOWER_CASE +LOWEST_ALGEBRAIC +.It +MAX MEAN MEDIAN MIDRANGE MIN MOD MODULE_NAME +.It +NATIONAL_OF NUMVAL NUMVAL_C NUMVAL_F ORD +.It +ORD_MAX ORD_MIN +.It +PI PRESENT_VALUE +.It +RANDOM RANGE REM REVERSE +.It +SECONDS_FROM_FORMATTED_TIME +SECONDS_PAST_MIDNIGHT SIGN SIN SMALLEST_ALGEBRAIC SQRT +STANDARD_COMPARE STANDARD_DEVIATION SUBSTITUTE SUM +.It +TAN TEST_DATE_YYYYMMDD TEST_DAY_YYYYDDD TEST_FORMATTED_DATETIME +TEST_NUMVAL TEST_NUMVAL_C TEST_NUMVAL_F TRIM +.It +ULENGTH UPOS UPPER_CASE +USUBSTR USUPPLEMENTARY UUID4 UVALID UWIDTH +.It +VARIANCE +.It +WHEN_COMPILED +.It +YEAR_TO_YYYY +.El +. +.Ss Binary floating point DISPLAY +How the DISPLAY presents binary floating point numbers depends on the value. +.Pp +When a value has six or fewer decimal digits to the left of the +decimal point, it is expressed as +.Em 123456.789... . +.Pp +When a value is less than 1 and has no more than three zeroes to the +right of the decimal point, it is expressed as +.Em 0.0001234... . +.Pp +Otherwise, exponential notation is used: +.Em 1.23456E+7 . +.Pp +In all cases, trailing zeroes on the right of the number are removed +from the displayed value. +.Pp +.Bl -tag -compact -width FLOAT-EXTENDED +.It COMP-1 +displayed with 9 decimal digits. +.It COMP-2 +displayed with 17 decimal digits. +.It FLOAT-EXTENDED +displayed with 36 decimal digits. +.El +.Pp +Those digit counts are consistent with the IEEE 754 requirements for +information interchange. As one example, the description for COMP-2 +binary64 values (per Wikipedia). +.Pp +If an IEEE 754 double-precision number is converted to a decimal +string with at least 17 significant digits, and then converted back to +double-precision representation, the final result must match the +original number. +.Pp +17 digits was chosen so that the +.Sy DISPLAY +statement shows the contents +of a COMP-2 variable without hiding any information. +. +.Ss Binary floating point MOVE +During a +.Sy MOVE +statement, a floating-point value may be truncated. It will not be +unusual for Numeric Display values to be altered when moved through a +floating-point value. +.Pp +This program: +.Bd -literal + 01 PICV999 PIC 9999V999. + 01 COMP2 COMP-2. + PROCEDURE DIVISION. + MOVE 1.001 to PICV999 + MOVE PICV999 TO COMP2 + DISPLAY "The result of MOVE " PICV999 " TO COMP2 is " COMP2 + MOVE COMP2 to PICV999 + DISPLAY "The result of MOVE COMP2 TO PICV999 is " PICV999 +.Ed +.Pp +generates this result: +.Bd -literal + The result of MOVE 0001.001 TO COMP2 is 1.00099999999999989 + The result of MOVE COMP2 TO PICV999 is 0001.000 +.Ed +.Pp +However, the internal implementation can produce results that might be seem surprising: +.Bd -literal + The result of MOVE 0055.110 TO COMP2 is 55.1099999999999994 + The result of MOVE COMP2 TO PICV999 is 0055.110 +.Ed +.Pp +The source of this inconsistency is the way +.Nm +stores and converts +numbers. Converting the floating-point value to the numeric display +value 0055110 is done by multiplying 55.109999...\& by 1,000 and then +truncating the result to an integer. And it turns out that even +though 55.11 can’t be represented in floating-point as an exact value, +the product of the multiplication, 55110, is an exact value. +.Pp +In cases where it is important for conversions to have predictable +results, we need to be able to apply rounding, which can be done with +an arithmetic statement: +.Bd -literal + MOVE 1.001 to PICV999 + MOVE PICV999 TO COMP2 + DISPLAY "The result of MOVE " PICV999 " TO COMP2 is " COMP2 + MOVE COMP2 to PICV999 + DISPLAY "The result of MOVE COMP2 TO PICV999 is " PICV999 + ADD COMP2 to ZERO GIVING PICV999 ROUNDED + DISPLAY "The result of ADD COMP2 to ZERO GIVING PICV999 ROUNDED is " PICV999 +.sp + The result of MOVE 0001.001 TO COMP2 is 1.00099999999999989 + The result of MOVE COMP2 TO PICV999 is 0001.000 + The result of ADD COMP2 to ZERO GIVING PICV999 ROUNDED is 0001.001 +.Ed +.Ss Binary floating point computation +.Nm +attempts to do internal computations using binary integers when +possible. Thus, simple arithmetic between binary values and numeric +display values conclude with binary intermediate results. +.Pp +If a floating-point value gets included in the mix of variables +specified for a calculation, then the intermediate result becomes a +128-bit floating-point value. +. +.Ss A warning about binary floating point comparison +The cardinal rule when doing comparisons involving floating-point +values: Never, ever, test for equality. It’s just not worth the hassle. +.Pp +For example: +.Bd -literal + WORKING-STORAGE SECTION. + 01 COMP1 COMP-1 VALUE 555.11. + 01 COMP2 COMP-2 VALUE 555.11. + PROCEDURE DIVISION. + DISPLAY "COMPARE " COMP1 " with " COMP2 + IF COMP1 EQUAL COMP2 DISPLAY "Equal" ELSE DISPLAY "Not equal" END-IF +.sp + MOVE COMP1 to COMP2 + DISPLAY "COMPARE " COMP1 " with " COMP2 + IF COMP1 EQUAL COMP2 DISPLAY "Equal" ELSE DISPLAY "Not equal" END-IF +.Ed +.Pp +the results: +.Bd -literal + COMPARE 555.1099854 with 555.110000000000014 + Not equal + COMPARE 555.1099854 with 555.1099853515625 + Equal +.Ed +.Pp +Why? Again, it has to do with the internals of +.Nm . +When differently sized floating-point values need to be compared, they +are first converted to 128-bit floats. And it turns out that when a +COMP1 is moved to a COMP2, and they are both converted to +FLOAT-EXTENDED, the two resulting values are (probably) equal. +.Pp +Avoid testing for equality unless you really know what you are doing +and you really test the code. And then avoid it anyway. +.Pp +Finally, it is observably the case that the +.Nm +implementations of floating-point conversions and comparisons don’t +precisely match the behavior of other \*[lang] compilers. +.Pp +You have been warned. +. +.Sh ENVIRONMENT +.Bl -tag -width COBPATH +.It Ev COBPATH +If defined, specifies the directory paths to be used by the +.Nm +runtime library, +.Pa libgcobol.so , +to locate shared objects. +Like +.Ev LD_LIBRARY_PATH , +it may contain several directory names separated by a colon +.Pq Ql \&: . +.Ev COBPATH +is searched first, followed by +.Ev LD_LIBRARY_PATH . +.Pp +Each directory is searched for files whose name ends in +.Ql ".so" . +For each such file, +.Xr dlopen 3 +is attempted, and, if successful +.Xr dlsym 3 . +No relationship is defined between the symbol's name and the filename. +.Pp +Without +.Ev COBPATH , +binaries produced by +.Nm +behave as one might expect of any program compiled with gcc. Any +shared objects needed by the program are mentioned on the command line +with a +.Fl l Ns Ar library +option, and are found by following the executable's +.Pa RPATH +or otherwise per the configuration of the runtime linker, +.Xr ld.so 8 . +. +.It Ev UPSI +\*[lang] defines a User Programmable Status Indicator (UPSI) switch. In +.Nm , +the settings are denoted +.Sy UPSI-0 +through +.Sy UPSI-7 , +where 0-7 indicates a bit position. The value of the UPSI switches is +taken from the +.Ev UPSI +environment variable, whose value is a string of up to eight 1's and +0's. The first character represents the value of +.Sy UPSI-0 , +and missing values are assigned 0. For example, +.Sy UPSI=1000011 +in the environment sets bits 0, 5, and 6 on, which means that +.Sy UPSI-0 , +.Sy UPSI-5 , +and +.Sy UPSI-6 +are on. +.It Ev GCOBOL_TEMPDIR +causes any temporary files created during CDF processing to be written +to a file whose name is specified in the value of +.Ev GCOBOL_TEMPDIR . +If the value is just +.Dq / , +the effect is different: each copybook read is reported on standard +error. This feature is meant to help diagnose mysterious copybook +errors. +.El +. +.Sh FILES +Executables produced by +.Nm +require the runtime support library +.Pa libgcobol , +which is provided both as a static library and as a shared object. +. +.\" .Sh DIAGNOSTICS +. +.Sh COMPATIBILITY +The ISO standard leaves the default file organization up to the implementation; in +.Nm , +the default is +.Sy "SEQUENTIAL" . +. +.Ss On-Disk Format +Any ability to use files produced by other \*[lang] compilers, or for +those compilers to use files produced by +.Nm , +is the product of luck and intuition. Various compilers interpret the +ISO standard differently, and the standard's text is +not always definitive. +.Pp +For +.Sy "ORGANIZATION IS LINE SEQUENTIAL" +files (explicitly or by default), +.Nm , +absent specific direction, produces an ordinary Linux text file: for +each WRITE, the data are written, followed by an ASCII NL (hex 0A) +character. On READ, the record is read up to the size of the +specified record or NL, whichever comes first. The NL is not included +in the data brought into the record buffer; it serves only as an +on-disk record-termination marker. Consequently, +.Sy SEQUENTIAL +and +.Sy "LINE SEQUENTIAL" +files work the same way: the \*[lang] program never sees the record +terminator. +.Pp +When +.Sy READ +and +.Sy WRITE +are used with +.Sy ADVANCING , +however, the game changes. If +.Sy ADVANCING +is used with +.Sy "LINE SEQUENTIAL" +files, +it is honored by +.Nm . +.Pp +Other compilers may not do likewise. +According to ISO, in +.Sy WRITE +(14.9.47.3 General rules) +.Sy ADVANCING +is +.Em ignored +for files for which +.Dq "the physical file does not support vertical positioning" . +It further states that, in the absence of +.Sy ADVANCING , +.Sy WRITE +proceeds as if +.Dq "as if the user has specified AFTER ADVANCING 1 LINE" . +Some other implementations interpret that to mean that the first +.Sy WRITE +to a +.Sy "LINE SEQUENTIAL" +file results in a leading NL on the first line, and no trailing NL on +the last line. Some furthermore +.Em prohibit +the use of +.Sy ADVANCING +with +.Sy "LINE SEQUENTIAL" +files. +. +.\" .Sh SEE ALSO +. +.Sh STANDARDS +The reference standard for +.Nm +is \*[isostd]. +.Bl -bullet -compact +.It +If +.Nm +compiles code consistent with that standard, the resulting program +should execute correctly; any other result is a bug. +.It +If +.Nm +compiles code that does not comply with that standard, but runs correctly according to some other specification, that represents a non-standard extension. One day, the +.Fl pedantic +option will produce diagnostic messages for such code. +.It +If +.Nm +rejects code consistent with that standard, that represents an aspect +of \*[lang] that is (or is not) on the To Do list. If you would like +to see it compile, please get in touch with the developers. +.El +. +.Ss Status of NIST \*[lang] Compiler Verification Suite +.Bl -tag -compact -width "\0\0100% NC" +.It NC 100% +Nucleus +.It SQ 100% +Sequential I/O +.It RL 100% +Relative I/O +.It IX 100% +Indexed I/O +.It IC 100% +Inter-Program Communication +.It ST 100% +Sort-Merge +.It SM 100% +Source Text Manipulation RW \en Report Writer +.It CM +Communication +.It DB to do? +Debug +.It SG +Segmentation +.It IF 100% +Intrinsic Function +.El +.Pp +Where +.Nm +passes 100% of the tests in a module, we exclude the (few) tests for +obsolete features. The authors regard features that were obsolete in +1985 to be well and truly obsolete today, and did not implement them. +. +.Ss Notable deferred features +CCVS-85 modules not marked with above with any status (CM, and SG) are on the +.Dq "hard maybe" +list, meaning they await an interested party with real code using the feature. +.Pp +.Nm +does not implement Report Writer or Screen Section. +. +.Ss Beyond COBOL/85 +.Nm +increasingly implements \*[isostd]. For example, +.Sy DECLARATIVES +is not tested by CCVS-85, but are implemented by +.Nm Ns . +Similarly, Exception Conditions were not defined in 1985, and +.Nm +contains a growing number of them. +.Pp +The authors are well aware that a complete, pure \*[lang]-85 compiler +won't compile most existing \*[lang] code. Every vendor offered (and +offers) extensions, and most environments rely on a variety of +preprocessors and ancillary systems defined outside the standard. The +express goal of adding an ISO \*[lang] front-end to GCC is to establish a +foundation on which any needed extensions can be built. +. +.Sh HISTORY +\*[lang], the language, may well be older than the reader. To the +author's knowledge, free \*[lang] compilers first began to appear in 2000. +Around that time an earlier \*[lang] for GCC project +.br +.Lk https://cobolforgcc.sourceforge.net/ cobolforgcc +met with some success, but was never officially merged into GCC. +.Pp +This compiler, +.Nm , +was begun by +.Lk https://www.cobolworx.com/ COBOLworx +in the fall of 2021. The +project announced a complete implementation of the core language +features in December 2022. +. +.Sh AUTHORS +.Bl -tag -compact +.It "James K. Lowden" +(jklowden@cobolworx.com) is responsible for the parser. +.It "Robert Dubner" +(rdubner@cobolworx.com) is responsible for producing the GIMPLE tree, +which is input to the GCC back-end. +.El +. +.Sh CAVEATS +.Bl -bullet -compact +.It +.Nm +has been tested only on x64 and Apple M1 processors running Linux in +64-bit mode. +.It +The I/O support has not been extensively tested, and does not +implement or emulate many features related to VSAM and other mainframe +subsystems. While LINE-SEQUENTIAL files are ordinary text files that +can be manipulated with standard utilities, INDEXED and RELATIVE files +produced by +.Nm +are not compatible with that of any other \*[lang] compiler. Enhancements +to the I/O support will be readily available to the paying customer. +.El +. +.\" .Sh BUGS diff --git a/gcc/cobol/gcobol.3 b/gcc/cobol/gcobol.3 new file mode 100644 index 0000000..adc141a --- /dev/null +++ b/gcc/cobol/gcobol.3 @@ -0,0 +1,328 @@ +.ds lang COBOL +.ds gcobol GCC\ \*[lang]\ Front-end +.Dd \& March 2024 +.Dt GCOBOL 3\& "GCC \*[lang] Compiler" +.Os Linux +.Sh NAME +.Nm gcobol +.Nd \*[gcobol] I/O function API +.Sh LIBRARY +.Pa libgcobol +. +.Sh SYNOPSIS +.In symbols.h +.In io.h +.In gcobolio.h +. +.Ft gcobol_io_t Fn gcobol_fileops +.Bd -literal +class gcobol_io_t { +public: + static const char constexpr marquee[64]; + typedef void (open_t)( cblc_file_t *file, + char *filename, + int mode_char, + int is_quoted ); + typedef void (close_t)( cblc_file_t *file, + int how ); + typedef void (start_t)( cblc_file_t *file, + int relop, // needs enum + int first_last_key, + size_t length ); + typedef void (read_t)( cblc_file_t *file, + int where ); + typedef void (write_t)( cblc_file_t *file, + unsigned char *data, + size_t length, + int after, + int lines, + int is_random ); + typedef void (rewrite_t)( cblc_file_t *file, + size_t length, bool is_random ); + typedef void (delete_t)( cblc_file_t *file, + bool is_random ); + open_t *Open; + close_t *Close; + start_t *Start; + read_t *Read; + write_t *Write; + rewrite_t *Rewrite; + delete_t *Delete; +\0\0... +}; +.Ed +. +.Sh DESCRIPTION +.Nm +supplies replaceable I/O functionality via +.Fn gcobol_fileops . +It returns a pointer to a structure of C function pointers that +implement sequential, relative, and indexed file operations over files +whose On Disk Format (ODF) is defined by +.Nm . +A user wishing to use another library that implements the same +functionality over a different ODF must supply a different implementation of +.Fn gcobol_fileops , +plus 7 functions, as described in this document. The pointers to +those user-implemented functions are placed in a C++ object of type +.Vt gcobol_io_t +and an instantiation of that type is returned by +.Fn gcobol_fileops . +The compiled program initializes I/O operations by calling that +function the first time any file is opened. +.Pp +Each function takes as its first argument a pointer to a +.Vt cblc_file_t +object, which is analogous to a +.Vt FILE +object used in the C +.Sy stdio +functions. The +.Vt cblc_file_t +structure acts as a communication area between the compiled program +and the I/O library. Any information needed about the file is kept +there. Notably, the outcome of any operation is stored in that +structure in the +.Va file_status +member, not as a return code. Information about the +.Em operation +(as opposed to the +.Em file ) +appear as parameters to the function. +.Pp +.Vt cblc_file_t +has one member, not used by +.Nm , +that is reserved for the user: +.Dl Vt "void *" Pa implementation . +.Pp +User-supplied I/O functions may assign and dereference +.Pa implementation . +.Nm +will preserve the value, but never references it. +.Pp +The 7 function pointers in +.Vt gcobol_io_t +are +.Bl -hang -width Rewrite +.It Open +.Ft void +.Fn open_t "cblc_file_t *file" "char *filename" "int mode_char" "int is_quoted" +.br +parameters: +.Bl -tag -width mode_char -compact +.It Ar filename +is the filename, as known to the OS +.It Ar mode_char +is one of +.Bl -hang -width MM -compact +.It Sq r +OPEN INPUT: read-only mode +.It Sq w +OPEN OUTPUT: create a new file or overwrite an existing one +.It Sq a +EXTEND: append to sequential file +.It Sq + +modify existing file +.El +.It Ar is_quoted +If +.Sy true , +.Ar filename +is taken literally. If +.Sy false , +.Ar filename +is interpreted as the name of an environment variable, the contents of +which, if extant, are taken as the name of the file to be opened. If +no such variable exists, then +.Ar filename +is used verbatim. +.El +.It Close +.Ft void +.Fn close_t "cblc_file_t *file" "int how" +.br +parameters: +.Bl -hang -width how -compact +.It Ar how +A value of 0x08 closes a +.Dq REEL\ unit . +Because no such thing is supported, the function sets the file status to +.Dq 07 , +meaning +.Em "not a tape" . +.El +.It Start +.Ft void +.Fn start_t "cblc_file_t *file" "int relop" "int first_last_key" "size_t length" +.br +parameters: +.Bl -tag -width length -compact +.It Ar relop +is one of +.Bl -hang -width LT -compact +.It Li 0 +means +.Sq < +.It Li 1 +means +.Sq <= +.It Li 2 +means +.Sq = +.It Li 3 +means +.Sq != +.It Li 4 +means +.Sq >= +.It Li 5 +means +.Sq > +.El +.It Ar first_last_key +is the key number (starting at 1) of the key within the +.Vt cblc_file_t +structure. +.It Ar length +is the size of the key (TODO: per the START statement?) +.El +.It Read +.Ft void +.Fn read_t "cblc_file_t *file" "int where" +parameters: +.Bl -tag -width where -compact +.It Ar where +.Bl -hang -width 000 -compact +.It Li -2 +PREVIOUS +.It Li -1 +NEXT +.It Ar \0N +represents a key number, starting with 1, in the +.Vt cblc_file_t +structure. The value of that key is used to find the record, and read it. +.El +.El +.It Write +.Ft void +.Fn write_t "cblc_file_t *file" "unsigned char *data" \ +"size_t length" "int after" "int lines" "int is_random" +.br +parameters: +.Bl -hang -width is_random -compact +.It Ar data +address of in-memory buffer to write +.It Ar length +length of in-memory buffer to write +.It Ar after +has the value 1 if the +.D1 "AFTER ADVANCING n LINES" +phrase was present in the +.Sy WRITE +statement, else 0 +.It Ar lines +may be one of +.Bl -hang -width 00000 -compact +.It Li -666 +ADVANCING PAGE +.It Li \0\0-1 +no +.Sy ADVANCING +phrase appeared +.It \0\0\00 +ADVANCING 0 LINES +is valid +.It \0\0>0 +the value of +.Ar n +in +ADVANCING +.Ar n +LINES +.El +.It Ar is_random +is +.Sy true +if the +.Em "access mode" +is RANDOM +.El +.It Rewrite +.Ft void +.Fn rewrite_t "cblc_file_t *file" "size_t length" "bool is_random" +parameters: +.Bl -hang -width is_random -compact +.It Ar length +number of bytes to write +.It Ar is_random +.Sy true +if +.Em "access mode" +is RANDOM +.El +.It Delete +.Ft void +.Fn delete_t "cblc_file_t *file" "bool is_random" +parameters: +.Bl -hang -width is_random -compact +.It Ar is_random +.Sy true +if +.Em "access mode" +is RANDOM +.El +.El +. +.Pp +The library implements one function that the +.Nm Ns +-produced binary calls directly: +.Bl -item +.It +.Ft gcobol_io_t * +.Fn gcobol_fileops +.br +This function populates a +.Vt gcobol_io_t +object with the above function pointers. The compiled binary begins +by calling +.Fn gcobol_fileops Ns , +and then uses the supplied pointers to effect I/O. +.El +. +.\" The following commands should be uncommented and +.\" used where appropriate. +.\" .Sh IMPLEMENTATION NOTES +.\" This next command is for sections 2, 3, and 9 only +.\" (function return values). +.Sh RETURN VALUES +I/O functions return +.Sy void . +.Fn gcobol_fileops +returns +.Vt gcobol_io_t* . +.\" .Sh FILES +.\" .Sh COMPATIBILITY +.\" This next command is for sections 2, 3, 4, and 9 only +.\" (settings of the errno variable). +.\" .Sh ERRORS +.\" .Sh SEE ALSO +.Sh STANDARDS +The I/O library supplied by +.Nm , +.Sy libgcobolio.so , +supports the I/O semantics defined by ISO \*[lang]. +It is not intended to be compatible with any other ODF. That is, +.Sy libgcobolio.so +cannot be used to exchange data with any other \*[lang] implementation. +.Pp +The purpose of the +.Vt gcobol_io_t +structure is to allow the use of other I/O implementations with other ODF representations. +.\" .Sh HISTORY +.\" .Sh AUTHORS +.Sh CAVEATS +The library is not well tested, not least because it is not implemented. +.Sh BUGS +The future is yet to come. diff --git a/gcc/cobol/gcobolspec.cc b/gcc/cobol/gcobolspec.cc new file mode 100644 index 0000000..20ca757 --- /dev/null +++ b/gcc/cobol/gcobolspec.cc @@ -0,0 +1,694 @@ +/* Specific flags and argument handling of the Cobol front-end. + Copyright (C) 2021-2025 Free Software Foundation, Inc. + +This file is part of GCC. + +GNU CC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GNU CC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +/* This file implements gcobol's language-specific option handling for the COBOL front + end. It is based on a similar file for the Fortran front end, which + itself was derived from the C front end. Specifically, it defines + + lang_specific_driver(cl_decoded_option**, unsigned int*, int*) + + for gcobol. + + For GNU COBOL, we do the following to the argument list + before passing it to `gcc': + + 1. Make sure `-lgcobol -lm' is at the end of the list. + + 2. Make sure each time `-lgcobol' or `-lm' is seen, it forms + part of the series `-lgcobol -lm'. + + #1 and #2 are not done if `-nostdlib' or any option that disables + the linking phase is present, or if `-xfoo' is in effect. Note that + a lack of source files or -l options disables linking. + + The way this file builds the new argument list was rewritten to be easier to + maintain, and improve the way it decides to add or not add extra arguments, + etc. Several improvements were made in the handling of arguments, primarily + to make it more consistent with `gcc' itself. */ + +/* + * Number of extra output files that lang_specific_pre_link may generate. + * Unused. + */ + +#include "cobol-system.h" +#include "coretypes.h" +#include "opt-suggestions.h" +#include "gcc.h" +#include "opts.h" +#include "tm.h" +#include "intl.h" + +int lang_specific_extra_outfiles = 0; + +#ifndef MATH_LIBRARY +#define MATH_LIBRARY "m" +#endif + +#ifndef DL_LIBRARY +#define DL_LIBRARY "dl" +#endif + +#ifndef STDCPP_LIBRARY +#define STDCPP_LIBRARY "stdc++" +#endif + +#ifndef COBOL_LIBRARY +#define COBOL_LIBRARY "gcobol" +#endif + +/* The original argument list and related info is copied here. */ +static const struct cl_decoded_option *original_options; + +/* The new argument list will be built here. */ +static std::vector<cl_decoded_option>new_opt; + +// #define NOISY 1 + +static void +append_arg(const struct cl_decoded_option arg) + { +#ifdef NOISY + static int counter = 1; + fprintf( stderr, + ">>>>>> #%2d Appending %4ld %s\n", + counter++, + arg.opt_index, + arg.orig_option_with_args_text); +#endif + + new_opt.push_back(arg); + } + +static void +append_option (size_t opt_index, const char *arg, int value) + { + /* Append an option described by OPT_INDEX, ARG and VALUE to the list + being built. */ + struct cl_decoded_option decoded; + generate_option(opt_index, arg, value, CL_DRIVER, &decoded); + append_arg(decoded); + } + +static void +add_arg_lib(const char *library, bool force_static ATTRIBUTE_UNUSED) + { + /* Append a libgcobol argument to the list being built. If + FORCE_STATIC, ensure the library is linked statically. */ +#ifdef HAVE_LD_STATIC_DYNAMIC + if( force_static ) + { + append_option (OPT_Wl_, LD_STATIC_OPTION, 1); + } + append_option (OPT_l, library, 1); +#endif +#ifdef HAVE_LD_STATIC_DYNAMIC + if( force_static ) + { + append_option (OPT_Wl_, LD_DYNAMIC_OPTION, 1); + } +#endif + } + +static void +append_rdynamic() + { + // This is a bit ham-handed, but I was in a hurry. + struct cl_decoded_option decoded = {}; + decoded.opt_index = OPT_rdynamic; + decoded.orig_option_with_args_text = "-rdynamic"; + decoded.canonical_option[0] = "-rdynamic"; + decoded.canonical_option_num_elements = 1; + decoded.value = 1; + append_arg(decoded); + return; + } + +static void +append_rpath() + { +#ifdef EXEC_LIB + // Handing append_option() something on the stack Just Doesn't Work + if( strlen(EXEC_LIB) ) + { + static char ach[256]; + snprintf(ach, sizeof(ach), "-rpath=%s", EXEC_LIB); + append_option (OPT_Wl_, ach, 1); + } +#endif + return; + } + +static void +append_allow_multiple_definition() + { + append_option (OPT_Wl_, "--allow-multiple-definition", 1); + return; + } + +static void +append_fpic() + { + // This is a bit ham-handed, but I was in a hurry. + struct cl_decoded_option decoded = {}; + decoded.opt_index = OPT_rdynamic; + decoded.orig_option_with_args_text = "-fPIC"; + decoded.canonical_option[0] = "-fPIC"; + decoded.canonical_option_num_elements = 1; + decoded.value = 1; + append_arg(decoded); + return; + } + +void +lang_specific_driver (struct cl_decoded_option **in_decoded_options, + unsigned int *in_decoded_options_count, + int *in_added_libraries ATTRIBUTE_UNUSED) + { + int argc = (int)*in_decoded_options_count; + struct cl_decoded_option *decoded_options = *in_decoded_options; + + // This is the language in effect; it is changed by the OPT_x option. + // Start it out with the default of "none", which is the same as "cobol". + const char *language = "none"; + + /* The number of input and output files in the incoming arg list. */ + int n_infiles = 0; + int n_outfiles = 0; + + // The number of input files when the language is "none" or "cobol" + int n_cobol_files = 0; + + // saw_OPT_no_main means "don't expect -main" + bool saw_OPT_no_main = false; + + // The number of incoming OPT_main and OPT_main_ options seen + int n_mains = 0; + + bool saw_OPT_c = false; + bool saw_OPT_shared = false; + bool saw_OPT_pic = false; + bool saw_OPT_PIC = false; + + bool verbose = false; + + // These flags indicate whether we need various libraries + + bool need_libgcobol = true; + bool need_libmath = (MATH_LIBRARY[0] != '\0'); + bool need_libdl = (DL_LIBRARY[0] != '\0'); + bool need_libstdc = (STDCPP_LIBRARY[0] != '\0'); + // bool need_libquadmath = (QUADMATH_LIBRARY[0] != '\0'); + bool need_rdynamic = true; + bool need_allow_multiple_definition = true; + + // Separate flags for a couple of static libraries + bool static_libgcobol = false; + bool static_in_general = false; + + /* WEIRDNESS ALERT: + + Sometime around August of 2024, changes were made to the GCC source code + that resulted in an "memory released twice" run-time error when a + std::unordered_map was destructed twice, which usually can't happen. But + it was happening in a gcobol-generated executable. Investigation revealed + that + + gocobol ... libgcobol.a -lgcobol + + resulted in __gg__alphabet_states being destructed twice. + + This should not happen! In normal -shared code, including both libxxx.a + and -lxxx is perfectly legitimate and causes no problem, because the first + one to be encountered provides the globals. But something about the + extremely complex makefile for libgcobol was resulting in the double + destructor problem. + + A couple of days of looking for a fix were unsuccessful. + + So, I have added logic to this module to prevent the otherwise automatic + insertion of "-lgcobol" when there is an explicit "libgcobol.a" in the + parameters. + + */ + + int index_libgcobol_a = 0; + + // This is for the -Wl,-rpath=<EXEC_LIB> + bool need_rpath = true; + + bool no_files_error = true; + +#ifdef NOISY + int counter=1; + for(int i = 0; i < argc; i++) + { + fprintf( stderr, + ">>>>>> #%2d Incoming: %4ld %s\n", + counter++, + decoded_options[i].opt_index, + decoded_options[i].orig_option_with_args_text); + } + fprintf (stderr, "\n"); +#endif + + // There is always the possibility that no changes to the options + // will be needed: + + /* First pass through arglist. + + If -nostdlib or a "turn-off-linking" option is anywhere in the + command line, don't do any library-option processing (except + relating to -x). */ + + for(int i = 1; i < argc; ++i) + { + if (decoded_options[i].errors & CL_ERR_MISSING_ARG) + { + continue; + } + + if( strcmp( decoded_options[i].orig_option_with_args_text, "-###") == 0 ) + { + no_files_error = false; + } + + switch(decoded_options[i].opt_index) + { + case OPT_SPECIAL_input_file: + no_files_error = false; + n_infiles += 1; + if( strcmp(language, "none") == 0 + || strcmp(language, "cobol") == 0 ) + { + n_cobol_files += 1; + } + if( strstr(decoded_options[i].orig_option_with_args_text, "libgcobol.a") ) + { + // We have been given an explicit libgcobol.a. We need to note that. + index_libgcobol_a = i; + } + continue; + + case OPT_shared: + saw_OPT_shared = true; + break; + + case OPT_fpic: + saw_OPT_pic = true; + break; + + case OPT_fPIC: + saw_OPT_PIC = true; + break; + + case OPT_c: + // With this option, no libraries need be loaded + saw_OPT_c = true; + need_libgcobol = false; + need_libmath = false; + need_libdl = false; + need_libstdc = false; + // need_libquadmath = false; + need_rdynamic = false; + break; + + case OPT_rdynamic: + need_rdynamic = false; + break; + + case OPT_Wl_: + if( strstr(decoded_options[i].orig_option_with_args_text, + "--allow-multiple-definitions") ) + { + need_allow_multiple_definition = false; + } + if( strstr(decoded_options[i].orig_option_with_args_text, "-rpath") ) + { + // The caller is doing something with -rpath. Assume they know what + // they are doing + + // On second thought, always install our rpath. It goes at the end, + // so if the user specifies and rpath that they prefer, it'll get + // taken first. + need_rpath = true; + } + break; + + case OPT_nostdlib: + case OPT_nodefaultlibs: + case OPT_r: + case OPT_S: + case OPT_fsyntax_only: + case OPT_E: + // With these options, no libraries need be loaded + need_libgcobol = false; + need_libmath = false; + need_libdl = false; + need_libstdc = false; + // need_libquadmath = false; + need_rdynamic = false; + break; + + case OPT_static_libgcobol: +#ifdef HAVE_LD_STATIC_DYNAMIC + static_libgcobol = true; + need_libgcobol = true; +#endif + break; + + case OPT_l: + n_infiles += 1; + if(strcmp(decoded_options[i].arg, MATH_LIBRARY) == 0) + { + need_libmath = false; + } + else if(strcmp(decoded_options[i].arg, DL_LIBRARY) == 0) + { + need_libdl = false; + } + else if(strcmp(decoded_options[i].arg, COBOL_LIBRARY) == 0) + { + need_libgcobol = false; + } + else if(strcmp(decoded_options[i].arg, STDCPP_LIBRARY) == 0) + { + need_libstdc = false; + } + break; + + case OPT_o: + n_outfiles += 1; + break; + + case OPT_nomain: + saw_OPT_no_main = true; + break; + + case OPT_main: + case OPT_main_: + n_mains += 1; + break; + + case OPT_v: + no_files_error = false; + verbose = true; + break; + + case OPT_x: + language = decoded_options[i].arg; + break; + + case OPT__version: + no_files_error = false; + break; + + case OPT__help: + /* + * $ man ./gcobol.1 | ./help.gen + */ + puts( "Options specific to gcobol: " ); + puts( + " -main option uses the first PROGRAM of filename as the entry point for\n" + " the main() procedure. \n" + " -no_main \n" + " means that there is no -main, and the main() entry point is\n" + " provided by some other compilation or .o file\n" + " -findicator-column\n" + " describes the location of the Indicator Area in a COBOL file with\n" + " standard 80-column lines. \n" + " -ffixed-form\n" + " Use strict Reference Format in reading the COBOL input: 72-char‐\n" + " acter lines, with a 6-character sequence area, and an indicator\n" + " column. \n" + " -ffree-form\n" + " Force the COBOL input to be interpreted as free format. \n" + " -fmax-errors nerror\n" + " nerror represents the number of error messages produced. \n" + " -fflex-debug, -fyacc-debug\n" + " produce messages useful for compiler development. \n" ); + + + /* Let gcc.cc handle this, as it has a really + cool facility for handling --help and --verbose --help. */ + return; + + default: + break; + } + } + + if( saw_OPT_no_main && n_mains ) + { + char ach[] = "\"-no-main\" and \"-main\" are incompatible"; + fatal_error(input_location,"%s", ach); + } + + bool suppress_main = saw_OPT_no_main + || (saw_OPT_c && n_mains==0) + || saw_OPT_shared; + + if( no_files_error || ((n_outfiles != 0) && (n_infiles == 0)) ) + { + fatal_error(input_location, "no input files"); + } + + /* If there are no input files, there is no need for any libraries. */ + if( n_infiles == 0 ) + { + need_libgcobol = false; + need_libmath = false; + need_libdl = false; + need_libstdc = false; + // need_libquadmath = false; + } + + /* Second pass through arglist, transforming arguments as appropriate. */ + + append_arg(decoded_options[0]); /* Start with command name, of course. */ + + bool first_COBOL_file = true; + bool prior_main = false; + const char *entry_point = NULL; + + // Reset the current language, in case it was changed during the first pass + language = "none"; + + for(int i = 1; i < argc; ++i) + { + if (decoded_options[i].errors & CL_ERR_MISSING_ARG) + { + append_arg(decoded_options[i]); + continue; + } + + switch (decoded_options[i].opt_index) + { + case OPT_SPECIAL_input_file: + if( strcmp(language, "none") == 0 + || strcmp(language, "cobol") == 0 ) + { + // This is a COBOL source code file + if( !suppress_main && n_mains==0 && first_COBOL_file ) + { + // This is a case where the -c option is not present, and there + // were no -main switches. So, we are going to insert a -main switch + // in front of this, the first COBOL file + first_COBOL_file = false; + prior_main = true; + } + + if( prior_main ) + { + char ach[128]; + if( entry_point ) + { + strcpy(ach, entry_point); + } + else + { + strcpy(ach, decoded_options[i].arg); + } + append_option(OPT_main_, ach, 1); + prior_main = false; + entry_point = NULL; + } + } + append_arg(decoded_options[i]); + break; + + case OPT_main: + if( prior_main ) + { + char ach[] = "Multiple \"-main\" without a source file"; + fatal_error(input_location, "%s", ach); + } + // This is a simple -main that needs to be followed by a COBOL file + prior_main = true; + break; + + case OPT_main_: // Note the trailing underscore + if( prior_main ) + { + char ach[] = "Multiple \"-main\" without a source file"; + fatal_error(input_location, "%s", ach); + } + // This is -main=<arg> that needs to be followed by a COBOL file + entry_point = decoded_options[i].arg; + prior_main = true; + break; + + case OPT_nomain: + append_arg(decoded_options[i]); + break; + + case OPT_x: + language = decoded_options[i].arg; + append_arg(decoded_options[i]); + break; + + case OPT_static_libgcobol: + // Don't pass this one on to cobol1 + break; + +////#ifdef __x86_64__ +//// case OPT_m32: +//// error ( "unrecognized command-line option %<-%s%>; " +//// "(32-bit executables cannot be generated)", "m32"); +//// break; +////#endif + case OPT_static: + static_in_general = true; + break; + + default: + append_arg(decoded_options[i]); + break; + } + } + + /* As described above, we have empirically noticed that when the command line + explicitly specifies libgcobol.a as an input, a following -lgcobol causes + the "on exit" functions of the library to be executed twice. This can + cause trouble for c++ class destructors that expect to be run only once. + + So, we rather hamhandedly prevent the inclusion of the default -lgcobol + parameter when a libgcobol.a was found to be present. + + Note that if the user *explicitly* specifies both libgcobol.a and + -lgocobol, then he gets what he asked for, and the problem then belongs to + them. + + */ + + if( index_libgcobol_a ) + { + need_libgcobol = false; + } + + if( need_libgcobol ) + { + if( 0 != strcmp(EXEC_LIB, "/usr/lib") ) + { + append_option(OPT_L, EXEC_LIB, 1); + } + add_arg_lib(COBOL_LIBRARY, static_libgcobol); + } + if( need_libmath ) + { + add_arg_lib(MATH_LIBRARY, static_in_general); + } + if( need_libdl ) + { + add_arg_lib(DL_LIBRARY, static_in_general); + } + if( need_libstdc && static_in_general ) + { + add_arg_lib(STDCPP_LIBRARY, static_in_general); + } + + if( saw_OPT_shared && !saw_OPT_pic && !saw_OPT_PIC ) + { + append_fpic(); + } + + if( need_rdynamic ) + { + append_rdynamic(); + } + + if( need_allow_multiple_definition && (n_infiles || n_outfiles) ) + { + append_allow_multiple_definition(); + } + + if( need_rpath && (n_infiles || n_outfiles) ) + { + append_rpath(); + } + + if( prior_main ) + { + char ach[] = "\"-main\" without a source file"; + fatal_error(input_location, "%s", ach); + } + + // We now take the new_opt vector, and turn it into an array of + // cl_decoded_option + + size_t new_option_count = new_opt.size(); + struct cl_decoded_option *new_options = XNEWVEC (struct cl_decoded_option, new_option_count); + + for(size_t i=0; i<new_option_count; i++) + { + new_options[i] = new_opt[i]; + } + +#ifdef NOISY + verbose = true; +#endif + if( verbose && new_options != original_options ) + { + fprintf(stderr, _("Driving: (%ld)\n"), new_option_count); + for(size_t i=0; i<new_option_count; i++) + { + fprintf(stderr, + " [%2ld] %4ld %s\n", + i, + new_options[i].opt_index, + new_options[i].orig_option_with_args_text); + } + fprintf (stderr, "\n"); + } + + *in_decoded_options_count = new_option_count; + *in_decoded_options = new_options; + } + +/* + * Called before linking. + * Returns 0 on success and -1 on failure. + * Unused. + */ +int +lang_specific_pre_link( void ) + { + return 0; + } + diff --git a/gcc/cobol/genapi.cc b/gcc/cobol/genapi.cc new file mode 100644 index 0000000..eac1e26 --- /dev/null +++ b/gcc/cobol/genapi.cc @@ -0,0 +1,16926 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include "cobol-system.h" + +#include "coretypes.h" +#include "tree.h" +#include "tree-iterator.h" +#include "stringpool.h" +#include "diagnostic-core.h" + +#define HOWEVER_GCC_DEFINES_TREE 1 + +#include "ec.h" +#include "common-defs.h" +#include "util.h" +#include "cbldiag.h" +#include "symbols.h" +#include "gengen.h" +#include "inspect.h" +#include "io.h" +#include "genapi.h" +#include "genutil.h" +#include "genmath.h" +#include "structs.h" +#include "gcobolio.h" +#include "libgcobol.h" +#include "charmaps.h" +#include "valconv.h" +#include "show_parse.h" + +extern int yylineno; + +#define TSI_BACK (tsi_last(current_function->statement_list_stack.back())) + +extern char *cobol_name_mangler(const char *cobol_name); +static tree gg_attribute_bit_get(struct cbl_field_t *var, cbl_field_attr_t bits); + +static tree label_list_out_goto; +static tree label_list_out_label; +static tree label_list_back_goto; +static tree label_list_back_label; + +static void hijack_for_development(const char *funcname); + +static size_t sv_data_name_counter = 1; +static int call_counter = 1; +static int pseudo_label = 1; + +static bool suppress_cobol_entry_point = false; +static char ach_cobol_entry_point[256] = ""; + +bool bSHOW_PARSE = getenv("SHOW_PARSE"); +bool show_parse_sol = true; +int show_parse_indent = 0; + +#define DEFAULT_LINE_NUMBER 2 + +#ifdef LINE_TICK +/* This code is used from time to time when sorting out why compilation + takes more time than expected */ +static void +line_tick() + { + using namespace std::chrono; + static high_resolution_clock::time_point t1 = high_resolution_clock::now(); + static high_resolution_clock::time_point t2; + int line_now = CURRENT_LINE_NUMBER; + static int line = 0; + if( (line_now / 10000) != (line / 10000) ) + { + line = line_now; + t2 = high_resolution_clock::now(); + duration<double> time_span = duration_cast<duration<double>>(t2 - t1); + fprintf(stderr, "%6d %6.1lf\n", line, time_span.count()); + } + } +#else +#define line_tick() +#endif + +typedef struct TREEPLET + { + tree pfield; + tree offset; + tree length; + } TREEPLET; + +static +void +treeplet_fill_source(TREEPLET &treeplet, cbl_refer_t &refer) + { + treeplet.pfield = gg_get_address_of(refer.field->var_decl_node); + treeplet.offset = refer_offset_source(refer); + treeplet.length = refer_size_source(refer); + } + +tree file_static_variable(tree type, const char *v) + { + // This routine returns a reference to an already-defined file_static variable + // You need to know the type that was used for the definition. + return gg_declare_variable(type, v, NULL, vs_file_static); + } + +static void move_helper(tree size_error, // INT + cbl_refer_t destref, + cbl_refer_t sourceref, + TREEPLET &tsource, + cbl_round_t rounded, + bool check_for_error, + bool restore_on_error = false + ); + +// set using -f-trace-debug, defined in lang.opt +int f_trace_debug; + +// When doing WRITE statements, the IBM Language Reference and the ISO/IEC_2014 +// standard specify that when the ADVANCING clause is omitted, the default is +// AFTER ADVANCING 1 LINE. +// +// MicroFocus and GnuCOBOL state that the default is BEFORE ADVANCING 1 LINE +// +// During initial compiler development, we used Michael Coughlin's "Beginning +// COBOL For Programmers" textbook for source code examples, and it was clear +// from at least one sample program that his compiler used the Microfocus +// convention. For ease of development, we took on that same convention, but +// we provide here for a switch that changes that behavior: + +static bool auto_advance_is_AFTER_advancing = 0; + +/* This is a little complicated. In order to keep things general, we are + assuming that any function we call will be returning a 64-bit value. In + places where we know that not to be true, we'll have to do appropriate + casts. For example, main() returns an INT, as do functions that + return the default RETURN-CODE will have */ + +#define COBOL_FUNCTION_RETURN_TYPE SSIZE_T + +#define MAX_AFTERS 8 + +// These variables contol a little state machine. When a simple -main is in +// effect, the first program in the module becomes the target of a main() +// that we synthesize function. When -main=module:progid is in effect, we +// create a main() that calls progid. When active, progid is kept in +// the map main_strings. +static std::unordered_map<std::string, std::string> main_strings; +static bool this_module_has_main = false; // sticky switch for the module +static bool next_program_is_main = false; // transient switch for the module +static char *main_entry_point = NULL; + +static bool static_call = true; +bool use_static_call( bool yn ) { return static_call = yn; } +static bool use_static_call() { return static_call; } + +// This global variable can be set upstream, like from a compiler +// command line switch. "1" for stdout, "2" for stderr, or "filename" + +const char *gv_trace_switch = NULL; + +// The environment variable wins over the command line +char const *bTRACE1 = NULL; +tree trace_handle; +tree trace_indent; +bool cursor_at_sol = true; + +static void +trace1_init() + { + static bool first_time = true; + if( first_time ) + { + first_time = false; + trace_handle = gg_define_variable(INT, "trace_handle", vs_static); + trace_indent = gg_define_variable(INT, "trace_indent", vs_static); + + bTRACE1 = getenv("TRACE1") ? getenv("TRACE1") : gv_trace_switch; + + if( bTRACE1 && strcmp(bTRACE1, "0") != 0 ) + { + if( strcmp(bTRACE1, "1") == 0 ) + { + gg_assign(trace_handle , integer_one_node); + } + else if( strcmp(bTRACE1, "2") == 0 ) + { + gg_assign(trace_handle , integer_two_node); + } + else + { + gg_assign(trace_handle , + gg_open(gg_string_literal(bTRACE1), + build_int_cst_type(INT, O_CREAT|O_WRONLY|O_TRUNC))); + } + } + else + { + // In case bTRACE1 pointed to an empty string + bTRACE1 = NULL; + } + } + } + +static void +create_cblc_string_variable(const char *var_name, const char *var_contents) + { + // This is a way of having the compiler communicate with GDB. I create a + // global const char[] string with a known name so that GDB can look for that + // variable and pick up its contents. + + // This probably should be in the .debug_info section, but for the moment I + // don't know how to do that, but I do know how to do this: + + tree array_of_characters = build_array_type_nelts(CHAR, strlen(var_contents)+1); + TYPE_NAME(array_of_characters) = get_identifier("cblc_string"); + tree constr = build_string(strlen(var_contents)+1, var_contents); + TREE_TYPE(constr) = array_of_characters; + TREE_STATIC(constr) = 1; + TREE_CONSTANT(constr) = 1; + tree entry_point = gg_declare_variable(array_of_characters, + var_name, + constr, + vs_external); + gg_define_from_declaration(entry_point); + } + +static void +build_main_that_calls_something(const char *something) + { + // This routine generates main(), which has as its body a call to "something". + // which is a call to a simple `extern int something(void)` routine. + + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" main will call ") + SHOW_PARSE_TEXT(something) + SHOW_PARSE_END + } + + gg_set_current_line_number(DEFAULT_LINE_NUMBER); + + gg_define_function( INT, + "main", + INT, "argc", + build_pointer_type(CHAR_P), "argv", + NULL_TREE); + + // Pick up pointers to the input parameters: + // First is the INT which is the number of argv[] entries + tree argc = DECL_ARGUMENTS(current_function->function_decl); + // Second is the char **argv + tree argv = TREE_CHAIN(argc); // overall source length + + gg_call( VOID, + "__gg__stash_argc_argv", + argc, + argv, + NULL_TREE); + + // Call the top-level COBOL function. We know it has to return an INT, + // so we need to cast it from the SIZE_T that all COBOL are assumed + // to return: + + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT_ABC("main calls \"", something, "\"") + TRACE1_END + } + + // Let MODULE-NAME know that we were launched by a generated -main program + gg_call(VOID, + "__gg__module_name_push", + gg_string_literal("Mmain"), + NULL_TREE); + + char *psz = cobol_name_mangler(something); + gg_assign(var_decl_main_called, integer_one_node); + gg_return(gg_cast(INT, gg_call_expr( COBOL_FUNCTION_RETURN_TYPE, + psz, + argc, + argv, + NULL_TREE))); + strncpy(ach_cobol_entry_point, psz, sizeof(ach_cobol_entry_point)-1); + free(psz); + gg_finalize_function(); + } + +static std::unordered_map<std::string, size_t>gotos_labels; +#define LABEL_COUNT_OFFSET 100 + +static +tree +get_field_p(size_t index) + { + if(index) + { + cbl_field_t *field = cbl_field_of(symbol_at(index)); + + if( !field->var_decl_node ) + { + dbgmsg("%s (type: %s) improperly has a NULL var_decl_node", + field->name, + cbl_field_type_str(field->type)); + cbl_internal_error( + "Probable cause: it was referenced without being defined."); + } + + return gg_get_address_of(field->var_decl_node); + } + else + { + return gg_cast(cblc_field_p_type_node, null_pointer_node); + } + } + +static +char * +level_88_helper(size_t parent_capacity, + const cbl_domain_elem_t &elem, + size_t &returned_size) + { + // We return a MALLOCed return value, which the caller must free. + char *retval = (char *)xmalloc(parent_capacity + 64); + char *builder = (char *)xmalloc(parent_capacity + 64); + size_t nbuild = 0; + + cbl_figconst_t figconst = cbl_figconst_of( elem.name()); + if( figconst ) + { + nbuild = 1; + strcpy(retval, "1Fx"); + switch(figconst) + { + case normal_value_e : + // This really should never happend + abort(); + break; + case low_value_e : + retval[2] = 'L'; + break; + case zero_value_e : + retval[2] = 'Z'; + break; + case space_value_e : + retval[2] = 'S'; + break; + case quote_value_e : + retval[2] = 'Q'; + break; + case high_value_e : + retval[2] = 'H'; + break; + case null_value_e: + retval[2] = '\0'; + break; + } + returned_size = 3; + } + else + { + // We are working with an ordinary string. + + // Pick up the string + size_t first_name_length = elem.size(); + char *first_name = (char *)xmalloc(first_name_length + 1); + memcpy(first_name, elem.name(), first_name_length); + first_name[first_name_length] = '\0'; + + // Convert it to EBCDIC, when necessary; leave it alone when not necessary. + for(size_t i=0; i<first_name_length; i++) + { + first_name[i] = ascii_to_internal(first_name[i]); + } + + if( parent_capacity == 0 ) + { + // Special case: parent_capacity is zero when this routine has been + // called as part of a debugging trace. + if( elem.all ) + { + strcpy(builder+nbuild, "ALL "); + nbuild += 4; + } + memcpy(builder+nbuild, first_name, first_name_length); + nbuild += first_name_length; + } + else + { + if( elem.all ) + { + while(nbuild < parent_capacity ) + { + builder[nbuild] = first_name[nbuild % first_name_length]; + nbuild += 1; + } + } + else + { + memcpy(builder+nbuild, first_name, first_name_length); + nbuild += first_name_length; + } + } + returned_size = sprintf(retval, "%zdA", nbuild); + memcpy(retval + returned_size, builder, nbuild); + returned_size += nbuild; + free(first_name); + free(builder); + } + return retval; + } + +static char * +get_level_88_domain(size_t parent_capacity, cbl_field_t *var, size_t &returned_size) + { + if( var->type != FldClass || var->level != 88 ) + { + returned_size = 0; + return NULL; + } + + // Entering here means we know that this is FldClass of level 88 + + // We convert the incoming information at var->data.domains to a single + // stream of bytes. We return a malloced pointer to that stream; returned + // size is the size of the stream. + + // The nature of an 88 is that each element is a pair + + // The following pairs are zero-terminated strings. It thus + // follows that the strings cannot contain '\0' characters. + + // Each element of the pair is converted to a stream: + // For strings of bytes: + // ddd A <ddd bytes> + // For figurative constants: + // 1Fx, where x is in [LZSQH], for LOW-VALUE ZERO SPACE QUOTE HIGH-VALUE + + // Numerics are converted to strings, and handled as above + + size_t retval_capacity = 64; + char *retval = (char *)xmalloc(retval_capacity); + size_t output_index = 0; + + // Loop through the provided domains: + returned_size = 0; + const struct cbl_domain_t *domain = var->data.domain; + while( domain->first.name() ) + { + // We have another pair to process + size_t stream_len; + char *stream; + + // Do the first element of the domain + stream = level_88_helper(parent_capacity, domain->first, stream_len); + if( output_index + stream_len > retval_capacity ) + { + retval_capacity *= 2; + retval = (char *)xrealloc(retval, retval_capacity); + } + memcpy(retval + output_index, stream, stream_len); + output_index += stream_len; + returned_size += stream_len; + free(stream); + + // Do the second element of the domain + stream = level_88_helper(parent_capacity, domain->last, stream_len); + if( output_index + stream_len > retval_capacity ) + { + retval_capacity *= 2; + retval = (char *)xrealloc(retval, retval_capacity); + } + memcpy(retval + output_index, stream, stream_len); + output_index += stream_len; + returned_size += stream_len; + free(stream); + domain += 1; + } + retval[returned_size++] = '\0'; + return retval; + } + +static +char * +get_class_condition_string(cbl_field_t *var) + { + // We know at this point that var is FldClass + // The LEVEL is not 88, so this is a CLASS SPECIAL-NAME + + const struct cbl_domain_t *domain = var->data.domain; + + /* There are five possibilities we need to deal with. + + 66 + 66 THROUGH 91 + 91 THROUGH 66 // This is the same as 66 THROUGH 91 + "A" + "A" THROUGH "Z + "Z" THROUGH "A" // This is the same as "A" THROUGH "Z" + "ABCJ12" // This is the same as "A" "B" "C" ... + + Expressly presented numbers are the ordinal positions in the run-time + character set. So, an ASCII "A" would be given as 66, which is one + greater than 65, which is the ASCII codepoint for "A". An EBCDIC "A" + would be presented as 194, which is one greater than 193, which is the + decimal representation of an EBCDIC "A", whose hex code is 0xC2. + + We need to account for EBCDIC as well as ASCII. In EBCDIC, + "A" THROUGH "Z" doesn't mean what it looks like it means, because EBCIDC + encoding has gaps between I and J, and between R and S. That isn't true + in ASCII. We don't want to deal with these issues at compile time, so we + are encoding numeric ordinals with their negated values, while other + characters are given as the numeric forms of their ASCII encoding. + Conversion to EBCDIC occurs at runtime. + + In support of this strategy, character strings like "ABCD" are broken up + into "A" "B" "C" "D" and converted to their hexadecimal representations. + */ + + char ach[8192]; + memset(ach, 0, sizeof(ach)); + char *p = ach; + + while( domain->first.is_numeric || domain->first.name() ) + { + // *What* were they smoking back then? + + uint8_t value1; + uint8_t value2; + + char achFirstName[256]; + char achLastName[256]; + + size_t first_name_length = domain->first.size() + ? domain->first.size() + : strlen(domain->first.name()); + size_t last_name_length = domain->last.size() + ? domain->last.size() + : strlen(domain->last.name()); + + if( domain->first.is_numeric ) + { + if( strlen(ach) > sizeof(ach) - 1000 ) + { + cbl_internal_error("Nice try, but you can't fire me. I quit!"); + } + + // We are working with unquoted strings that contain the values 1 through + // 256: + value1 = (uint8_t)atoi(domain->first.name()); + value2 = (uint8_t)atoi(domain->last.name()); + if( value2 < value1 ) + { + std::swap(value1, value2); + } + if( value1 != value2 ) + { + p += sprintf(p, "-%2.2X/-%2.2X ", value1-1, value2-1); + } + else + { + p += sprintf(p, "-%2.2X ", value1-1); + } + } + else if( first_name_length == 1 ) + { + // Since the first.name is a single character, we can do this as + // a single-character pair. + + // Keep in mind that the single character might be a two-byte UTF-8 + // codepoint + uint8_t ch1 = domain->first.name()[0]; + uint8_t ch2 = domain->last.name()[0]; + + gcc_assert(first_name_length <= 2); + gcc_assert(last_name_length <= 2); + + char *p2; + size_t one; + p2 = achFirstName; + one = 8; + raw_to_internal(&p2, &one, domain->last.name(), last_name_length); + ch2 = achFirstName[0]; + + p2 = achLastName; + one = 8; + raw_to_internal(&p2, &one, domain->first.name(), first_name_length); + ch1 = achLastName[0]; + + if( ch1 < ch2 ) + { + value1 = ch1; + value2 = ch2; + } + else + { + value2 = ch1; + value1 = ch2; + } + if( value1 != value2 ) + { + p += sprintf(p, "%2.2X/%2.2X ", value1, value2); + } + else + { + p += sprintf(p, "%2.2X ", value1); + } + } + else + { + gcc_assert( first_name_length > 1 ); + + // We are working with a string larger than 1 character. The COBOL + // spec says there can't be a THROUGH, so we ignore the last.name: + char *p2; + size_t one; + p2 = achFirstName; + one = 8; + raw_to_internal(&p2, &one, domain->last.name(), last_name_length); + + for(size_t i=0; i<first_name_length; i++) + { + p += sprintf(p, "%2.2X ", (unsigned char)achFirstName[i]); + } + } + domain += 1; + } + + // Wipe out the trailing space + ach[strlen(ach)-1] = '\0'; + char *retval = xstrdup(ach); + + return retval; + } + +struct program_reference_t { + size_t caller; + const char *called; + + program_reference_t( size_t caller, const char called[] ) + : caller(caller), called(xstrdup(called)) + {} + bool operator==( const program_reference_t& that ) const { + return caller == that.caller && 0 == strcasecmp(called, that.called); + } + bool operator<( const program_reference_t& that ) const { + if( caller == that.caller ) return 0 < strcasecmp(called, that.called); + return caller < that.caller; + } +}; + +struct called_tree_t { + tree node; + cbl_call_convention_t convention; + + called_tree_t( tree node, + cbl_call_convention_t convention ) + : node(node), convention(convention) + {} + bool operator==( const called_tree_t& that ) const { + return node == that.node && convention == that.convention; + } + + class match_tree { // match node regardless of convention + tree node; + + public: + match_tree( tree node ) : node(node) {} + bool operator()( const called_tree_t& that ) const { + return this->node == that.node; + } + }; +}; + +static std::map<program_reference_t, std::list<called_tree_t> > call_targets; +static std::map<tree, cbl_call_convention_t> called_targets; + +static void +parser_call_target( tree func ) + { + cbl_call_convention_t convention = current_call_convention(); + const char *name = IDENTIFIER_POINTER( DECL_NAME(func) ); + program_reference_t key(current_program_index(), name); + + // Each func is unique and inserted only once. + assert( called_targets.find(func) == called_targets.end() ); + called_targets[func] = convention; + + called_tree_t value(func, convention); + auto& p = call_targets[key]; + p.push_back(value); + } + +/* + * Is the node a recorded call target? The language-dependent + * function cobol_set_decl_assembler_name will lower-case the name + * unless, for a specific call, this function returns + * cbl_call_verbatim_e. + */ +cbl_call_convention_t +parser_call_target_convention( tree func ) + { + auto p = called_targets.find(func); + if( p != called_targets.end() ) return p->second; + + return cbl_call_cobol_e; + } + +void +parser_call_targets_dump() + { + dbgmsg( "call targets for #%zu", current_program_index() ); + for( const auto& elem : call_targets ) { + const auto& k = elem.first; + const auto& v = elem.second; + fprintf(stderr, "\t#%-3zu %s calls %s ", + k.caller, cbl_label_of(symbol_at(k.caller))->name, k.called); + char ch = '['; + for( auto func : v ) { + fprintf( stderr, "%c %s", ch, IDENTIFIER_POINTER(DECL_NAME(func.node)) ); + ch = ','; + } + fprintf(stderr, " ]\n"); + } + } + +size_t +parser_call_target_update( size_t caller, + const char plain_name[], + const char mangled_name[] ) + { + auto key = program_reference_t(caller, plain_name); + auto p = call_targets.find(key); + if( p == call_targets.end() ) return 0; + + for( auto func : p->second ) + { + func.convention = cbl_call_verbatim_e; + DECL_NAME(func.node) = get_identifier(mangled_name); + } + return p->second.size(); + } + +static tree +function_handle_from_name(cbl_refer_t &name, + tree function_return_type) + { + Analyze(); + + tree function_type = build_varargs_function_type_array( + function_return_type, + 0, + NULL); + tree function_pointer = build_pointer_type(function_type); + tree function_handle = gg_define_variable(function_pointer, "..function_handle.1", vs_stack); + + if( name.field->type == FldPointer ) + { + // If the parameter is a pointer, just pick up the value and head for the + // exit + if( refer_is_clean(name) ) + { + gg_memcpy(gg_get_address_of(function_handle), + member(name.field->var_decl_node, "data"), + build_int_cst_type(SIZE_T, sizeof(void *))); + } + else + { + gg_memcpy(gg_get_address_of(function_handle), + qualified_data_source(name), + build_int_cst_type(SIZE_T, sizeof(void *))); + } + return function_handle; + } + else if( use_static_call() && is_literal(name.field) ) + { + // It's a literal, and we are using static calls. Generate the CALL, and + // pass the address expression to parser_call_target(). That will cause + // parser_call_target_update() to replace any nested CALL "foo" with the + // local "foo.60" name. + + // We create a reference to it, which is later resolved by the linker. + tree addr_expr = gg_get_function_address( function_return_type, + name.field->data.initial); + gg_assign(function_handle, addr_expr); + + tree func = TREE_OPERAND(addr_expr, 0); + parser_call_target(func); // add function to list of call targets + } + else + { + // This is not a literal or static + if( name.field->type == FldLiteralA ) + { + gg_assign(function_handle, + gg_cast(build_pointer_type(function_type), + gg_call_expr(VOID_P, + "__gg__function_handle_from_literal", + build_int_cst_type(INT, current_function->our_symbol_table_index), + gg_string_literal(name.field->data.initial), + NULL_TREE))); + } + else + { + gg_assign(function_handle, + gg_cast(build_pointer_type(function_type), + gg_call_expr( VOID_P, + "__gg__function_handle_from_name", + build_int_cst_type(INT, current_function->our_symbol_table_index), + gg_get_address_of(name.field->var_decl_node), + refer_offset_source(name), + refer_size_source( name), + NULL_TREE))); + } + } + + return function_handle; + } + +void +parser_initialize_programs(size_t nprogs, struct cbl_refer_t *progs) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" ") + for( size_t i=0; i<nprogs; i++) + { + if( i > 0 ) + { + SHOW_PARSE_INDENT + } + if( progs[i].field->type == FldLiteralA ) + { + SHOW_PARSE_TEXT("\"") + SHOW_PARSE_TEXT(progs[i].field->data.initial) + SHOW_PARSE_TEXT("\"") + } + else + { + SHOW_PARSE_TEXT("") + SHOW_PARSE_TEXT(progs[i].field->name) + } + } + SHOW_PARSE_END + } + + for( size_t i=0; i<nprogs; i++ ) + { + tree function_handle = function_handle_from_name( progs[i], + COBOL_FUNCTION_RETURN_TYPE); + gg_call(VOID, + "__gg__to_be_canceled", + gg_cast(SIZE_T, function_handle), + NULL_TREE); + } + } + +void parser_statement_begin() + { + SHOW_PARSE + { + SHOW_PARSE_HEADER + char ach[64]; + snprintf (ach, sizeof(ach), + " yylineno %d first/last %d/%d", + yylineno, + cobol_location().first_line, + cobol_location().last_line ); + SHOW_PARSE_TEXT(ach); + SHOW_PARSE_END + } + + + if( gg_get_current_line_number() == DEFAULT_LINE_NUMBER ) + { + // This code is prevents anomolies when the first line of a program is + // a PERFORM <proc> ... TEST AFTER ... UNTIL ... + gg_set_current_line_number(CURRENT_LINE_NUMBER-1); + gg_assign(var_decl_nop, build_int_cst_type(INT, 106)); + } + + gg_set_current_line_number(CURRENT_LINE_NUMBER); + } + +static void +initialize_variable_internal( cbl_refer_t refer, + bool explicitly=false, + bool just_once=false) + { + // fprintf(stderr, "initialize_variable_internal for %s\n", refer.field->name); + // gg_printf("initialize_variable_internal for %s\n", + // gg_string_literal(refer.field->name), + // NULL_TREE); + cbl_field_t *parsed_var = refer.field; + + if( parsed_var->type == FldLiteralA ) + { + return; + } + + if( parsed_var->is_key_name() ) + { + // This field is actually a placeholder for a RECORD KEY alias. It didn't + // go through parser_symbol_add(), and so any attempt to initialize it + // results in an error because there is no var_decl_node. + return; + } + + if( is_register_field( parsed_var) ) + { + return; + } + + if( parsed_var && parsed_var->type == FldBlob ) + { + return; + } + + Analyze(); + SHOW_PARSE + { + do + { + fprintf( stderr, + "( %d ) %s():", + CURRENT_LINE_NUMBER, + __func__); + } + while(0); + SHOW_PARSE_REF(" ", refer); + if( parsed_var->data.initial ) + { + SHOW_PARSE_TEXT(" >>") + if( parsed_var->level == 88) + { + size_t returned_size = 0; + char *string88 = get_level_88_domain(0, parsed_var, returned_size); + + char *p = string88; + bool first = true; + while(*p) + { + char *pend; + size_t length1 = strtoull(p, &pend, 10); + char *string1 = pend + 1; + char flag = *pend; + p = string1 + length1; + if(flag == 'A' ) + { + char ach2[] = "x"; + SHOW_PARSE_TEXT("\"") + for(size_t i=0; i<length1; i++) + { + ach2[0] = string1[i]; + SHOW_PARSE_TEXT(ach2) + } + SHOW_PARSE_TEXT("\"") + } + else + { + switch(string1[0]) + { + case 'L': + SHOW_PARSE_TEXT("LOW-VALUE") + break; + case 'Z': + SHOW_PARSE_TEXT("ZERO") + break; + case 'S': + SHOW_PARSE_TEXT("SPACE") + break; + case 'Q': + SHOW_PARSE_TEXT("QUOTE") + break; + case 'H': + SHOW_PARSE_TEXT("HIGH-VALUE") + break; + default: + SHOW_PARSE_TEXT("???") + break; + } + } + if( first ) + { + SHOW_PARSE_TEXT("/") + } + else + { + if(*p) + { + SHOW_PARSE_TEXT(" ") + } + } + first = !first; + } + free(string88); + } + else if( parsed_var->type == FldClass ) + { + char *p = get_class_condition_string(parsed_var); + SHOW_PARSE_TEXT(p); + free(p); + } + else + { + switch(parsed_var->type) + { + case FldGroup: + case FldAlphanumeric: + case FldNumericEdited: + case FldAlphaEdited: + case FldLiteralA: + SHOW_PARSE_TEXT(parsed_var->data.initial); + break; + default: + { + char ach[128]; + strfromf128(ach, sizeof(ach), "%.16E", parsed_var->data.value); + SHOW_PARSE_TEXT(ach); + break; + } + } + + } + SHOW_PARSE_TEXT("<<") + } + SHOW_PARSE_END + } + + CHECK_FIELD(parsed_var); + + // When initializing a variable, we have to ignore any DEPENDING ON clause + // that might otherwise apply + suppress_dest_depends = true; + + bool is_redefined = false; + + cbl_field_t *family_tree = parsed_var; + while(family_tree) + { + if( symbol_redefines(family_tree) ) + { + is_redefined = true; + break; + } + + family_tree = parent_of(family_tree); + } + + if( parsed_var->level == 66 ) + { + // Treat RENAMES as if they are redefines: + is_redefined = true; + } + + if( parsed_var->data.initial ) + { + bool a_parent_initialized = false; + cbl_field_t *parent = parent_of(parsed_var); + while( parent ) + { + if( parent->attr & has_value_e ) + { + a_parent_initialized = true; + break; + } + parent = parent_of(parent); + } + if( !a_parent_initialized ) + { + parsed_var->attr |= has_value_e; + } + } + + static const int DEFAULT_BYTE_MASK = 0x00000000FF; + static const int NSUBSCRIPT_MASK = 0x0000000F00; + static const int NSUBSCRIPT_SHIFT = 8; + static const int DEFAULTBYTE_BIT = 0x0000001000; + static const int EXPLICIT_BIT = 0x0000002000; + static const int REDEFINED_BIT = 0x0000004000; + static const int JUST_ONCE_BIT = 0x0000008000; + + int flag_bits = 0; + flag_bits |= explicitly ? EXPLICIT_BIT : 0; + flag_bits |= is_redefined && !explicitly ? REDEFINED_BIT : 0 ; + flag_bits |= wsclear() + ? DEFAULTBYTE_BIT + (*wsclear() & DEFAULT_BYTE_MASK) + : 0; + flag_bits |= (refer.nsubscript << NSUBSCRIPT_SHIFT) & NSUBSCRIPT_MASK; + flag_bits |= just_once ? JUST_ONCE_BIT : 0 ; + + suppress_dest_depends = false; // Set this to false so that refer_is_clean is valid + //fprintf(stderr, "refer_is_clean %2.2d %s %d 0x%lx\n", refer.field->level, refer.field->name, refer_is_clean(refer), refer.field->attr); + + if( !refer_is_clean(refer) ) + { + gg_call(VOID, + "__gg__initialize_variable", + gg_get_address_of(refer.field->var_decl_node), + refer_offset_dest(refer), + build_int_cst_type(INT, flag_bits), + NULL_TREE); + } + else + { + // We have a clean refer with no mods, so we can send just the pointer to + // the field + gg_call(VOID, + "__gg__initialize_variable_clean", + gg_get_address_of(refer.field->var_decl_node), + build_int_cst_type(INT, flag_bits) , + NULL_TREE); + } + + suppress_dest_depends = true; + + TRACE1 + { + TRACE1_HEADER + if( refer.field->level ) + { + gg_fprintf( trace_handle, + 1, "%2.2d ", + build_int_cst_type(INT, refer.field->level)); + } + TRACE1_REFER_INFO("", refer) + if( refer.field->level == 88 ) + { + TRACE1_TEXT(" ["); + + size_t returned_size = 0; + char *string88 = get_level_88_domain(0, parsed_var, returned_size); + + char *p = string88; + bool first = true; + while(*p) + { + char *pend; + size_t length1 = strtoull(p, &pend, 10); + char *string1 = pend + 1; + char flag = *pend; + p = string1 + length1; + if( flag == 'A' ) + { + char ach2[] = "x"; + TRACE1_TEXT("\"") + for(size_t i=0; i<length1; i++) + { + ach2[0] = string1[i]; + TRACE1_TEXT(ach2) + } + TRACE1_TEXT("\"") + } + else + { + switch(string1[0]) + { + case 'L': + TRACE1_TEXT("LOW-VALUE") + break; + case 'Z': + TRACE1_TEXT("ZERO") + break; + case 'S': + TRACE1_TEXT("SPACE") + break; + case 'Q': + TRACE1_TEXT("QUOTE") + break; + case 'H': + TRACE1_TEXT("HIGH-VALUE") + break; + default: + TRACE1_TEXT("???") + break; + } + } + if( first ) + { + TRACE1_TEXT("/") + } + else + { + if(*p) + { + TRACE1_TEXT(" ") + } + } + first = !first; + } + free(string88); + TRACE1_TEXT("] "); + } + else if( parsed_var->type == FldClass ) + { + char *p = get_class_condition_string(parsed_var); + TRACE1_TEXT(p); + free(p); + } + else + { + TRACE1_FIELD_VALUE("", parsed_var, "") + } + TRACE1_END + } + suppress_dest_depends = false; + } + +//static void +//initialize_variable_internal( cbl_field_t *field, +// bool explicitly=false, +// bool just_once=false) +// { +// cbl_refer_t wrapper(field); +// initialize_variable_internal( wrapper, +// explicitly, +// just_once); +// } + +void +parser_initialize(cbl_refer_t refer, bool like_parser_symbol_add) + { + //gg_printf("parser_initialize %s\n", gg_string_literal(refer.field->name), NULL_TREE); + if( like_parser_symbol_add ) + { + initialize_variable_internal(refer); + } + else + { + gcc_assert(refer.field->data.initial); + static const bool explicitly = true; + initialize_variable_internal(refer, explicitly); + } + } + +static void +get_binary_value_from_float(tree value, + cbl_refer_t &dest, + cbl_field_t *source, + tree source_offset + ) + { + // The destination is something with rdigits; the source is FldFloat + tree ftype; + switch( source->data.capacity ) + { + case 4: + ftype = FLOAT; + break; + case 8: + ftype = DOUBLE; + break; + case 16: + ftype = FLOAT128; + break; + default: + gcc_unreachable(); + break; + } + tree fvalue = gg_define_variable(ftype); + gg_assign(fvalue, + gg_indirect(gg_cast(build_pointer_type(ftype), + gg_add( member(source->var_decl_node,"data"), + source_offset)))); + + // We need to convert the floating point value to an integer value with the + // rdigits lined up properly. + + int rdigits = get_scaled_rdigits( dest.field ); + gg_assign(fvalue, + gg_multiply(fvalue, + gg_float(ftype, + build_int_cst_type(INT, + get_power_of_ten(rdigits))))); + + // And we need to throw away any digits to the left of the leftmost digits: + // At least, we need to do so in principl. I am deferring this problem until + // I understand it better. + + // We now have a floating point value that has been multiplied by 10**rdigits + gg_assign(value, gg_trunc(TREE_TYPE(value), fvalue)); + } + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-function" +static void +gg_attribute_bit_clear(struct cbl_field_t *var, cbl_field_attr_t bits) + { + gg_assign( member(var, "attr"), + gg_bitwise_and( member(var, "attr"), + gg_bitwise_not( build_int_cst_type(SIZE_T, bits) ))); + } + +static +tree +gg_attribute_bit_get(struct cbl_field_t *var, cbl_field_attr_t bits) + { + tree retval = gg_bitwise_and( member(var, "attr"), + build_int_cst_type(SIZE_T, bits) ); + return retval; + } + +static void +gg_attribute_bit_set(struct cbl_field_t *var, cbl_field_attr_t bits) + { + gg_assign( member(var, "attr"), + gg_bitwise_or(member(var, "attr"), + build_int_cst_type(SIZE_T, bits))); + } +#pragma GCC diagnostic pop + +static void +gg_default_qualification(struct cbl_field_t * /*var*/) + { +// gg_attribute_bit_clear(var, refmod_e); + } + +static void +gg_get_depending_on_value(tree depending_on, cbl_field_t *current_sizer) + { + // We have to deal with the possibility of a DEPENDING_ON variable, + // and we have to apply array bounds whether or not there is a DEPENDING_ON + // variable: + + tree occurs_lower = gg_define_variable(LONG, "_lower"); + tree occurs_upper = gg_define_variable(LONG, "_upper"); + + gg_assign(occurs_lower, build_int_cst_type(LONG, current_sizer->occurs.bounds.lower)); + gg_assign(occurs_upper, build_int_cst_type(LONG, current_sizer->occurs.bounds.upper)); + + if( current_sizer->occurs.depending_on ) + { + // Get the current value of the depending_on data-item: + tree value = gg_define_int128(); + get_binary_value( value, + NULL, + cbl_field_of(symbol_at(current_sizer->occurs.depending_on)), + size_t_zero_node); + gg_assign(depending_on, gg_cast(LONG, value)); + IF( depending_on, lt_op, occurs_lower ) + // depending_is can be no less than occurs_lower: + gg_assign(depending_on, occurs_lower ); + ELSE + ENDIF + IF( depending_on, gt_op, occurs_upper ) + // depending_is can be no greater than occurs_upper: + gg_assign(depending_on, occurs_upper ); + ELSE + ENDIF + } + else + { + gg_assign(depending_on, occurs_upper); + } + } + +static int +digits_to_bytes(int digits) + { + int retval; + if( digits <= 2 ) + { + retval = 1; + } + else if( digits <= 4 ) + { + retval = 2; + } + else if( digits <= 9 ) + { + retval = 4; + } + else if( digits <= 18 ) + { + retval = 8; + } + else + { + retval = 16; + } + return retval; + } + +static size_t +get_bytes_needed(cbl_field_t *field) + { + size_t retval = 0; + switch(field->type) + { + case FldIndex: + case FldPointer: + case FldFloat: + case FldLiteralN: + retval = field->data.capacity; + break; + + case FldNumericDisplay: + { + int digits; + if( field->attr & scaled_e && field->data.rdigits<0) + { + digits = field->data.digits + -field->data.rdigits; + } + else + { + digits = field->data.digits; + } + retval = digits_to_bytes(digits); + break; + } + + case FldPacked: + { + int digits; + if( field->attr & scaled_e && field->data.rdigits<0) + { + digits = field->data.digits + -field->data.rdigits; + } + else + { + digits = field->data.digits; + } + if( !(field->attr & separate_e) ) + { + // This is COMP-3, so there is a sign nybble. + digits += 1; + } + retval = (digits+1)/2; + break; + } + + case FldNumericBinary: + case FldNumericBin5: + { + if( field->data.digits ) + { + int digits; + if( field->attr & scaled_e && field->data.rdigits<0) + { + digits = field->data.digits + -field->data.rdigits; + } + else + { + digits = field->data.digits; + } + retval = digits_to_bytes(digits); + } + else + { + retval = field->data.capacity; + } + break; + } + + default: + cbl_internal_error("%s(): Knows not the variable type %s for %s", + __func__, + cbl_field_type_str(field->type), + field->name ); + break; + } + return retval; + } + +static void +normal_normal_compare(bool debugging, + tree return_int, + cbl_refer_t *left_side_ref, + cbl_refer_t *right_side_ref, + tree left_side, + tree right_side ) + { + Analyze(); + + // If a value is intermediate_e, then the rdigits can vary at run-time, so + // we can't rely on the compile-time rdigits. + + bool left_intermediate = (left_side_ref->field->attr & intermediate_e); + bool right_intermediate = (right_side_ref->field->attr & intermediate_e); + + if( debugging ) + { + gg_printf("normal_normal_compare(): left_intermediate/right_intermediate %d/%d\n", + left_intermediate ? integer_one_node : integer_zero_node , + right_intermediate ? integer_one_node : integer_zero_node , + NULL_TREE); + } + + bool needs_adjusting; + if( !left_intermediate && !right_intermediate ) + { + // Yay! Both sides have fixed rdigit values. + + // Flag needs_adjusting as false, because we are going to do it here: + needs_adjusting = false; + int adjust = get_scaled_rdigits(left_side_ref->field) + - get_scaled_rdigits(right_side_ref->field); + + if( adjust > 0 ) + { + // We need to make right_side bigger to match the scale of left_side + scale_by_power_of_ten_N(right_side, adjust); + } + else if( adjust < 0 ) + { + // We need to make left_side bigger to match the scale of right_side + scale_by_power_of_ten_N(left_side, -adjust); + } + } + else + { + // At least one side is right_intermediate + + tree adjust; + if( !left_intermediate && right_intermediate ) + { + // left is fixed, right is intermediate + adjust = gg_define_int(); + gg_assign(adjust, + build_int_cst_type( INT, + get_scaled_rdigits(left_side_ref->field))); + + gg_assign(adjust, + gg_subtract(adjust, + gg_cast(INT, + member(right_side_ref->field->var_decl_node, + "rdigits")))); + needs_adjusting = true; + } + else if( left_intermediate && !right_intermediate ) + { + // left is intermediate, right is fixed + adjust = gg_define_int(); + gg_assign(adjust, gg_cast(INT, member(left_side_ref->field, "rdigits"))); + gg_assign(adjust, + gg_subtract(adjust, + build_int_cst_type( INT, + get_scaled_rdigits(right_side_ref->field)))); + needs_adjusting = true; + } + else // if( left_intermediate && right_intermediate ) + { + // Both sides are intermediate_e + adjust = gg_define_int(); + gg_assign(adjust, gg_cast(INT, member(left_side_ref->field, "rdigits"))); + gg_assign(adjust, + gg_subtract(adjust, + gg_cast(INT, + member(right_side_ref->field, "rdigits")))); + needs_adjusting = true; + } + + if( needs_adjusting ) + { + if( debugging ) + { + gg_printf("normal_normal_compare(): The value of adjust is %d\n", + adjust, + NULL_TREE); + } + IF( adjust, gt_op, integer_zero_node ) + { + // The right side needs to be scaled up + scale_by_power_of_ten(right_side, adjust); + } + ELSE + { + IF( adjust, lt_op, integer_zero_node ) + { + // The left side needs to be scaled up + scale_by_power_of_ten(left_side, gg_negate(adjust)); + } + ELSE + ENDIF + } + ENDIF + } + } + + if( TREE_TYPE(left_side) != TREE_TYPE(right_side) ) + { + // One is signed, the other isn't: + if( left_side_ref->field->attr & signable_e ) + { + // The left side can be negative. If it is, the return value has to be + // -1 for left < right + IF( left_side, lt_op, gg_cast(TREE_TYPE(left_side), integer_zero_node) ) + { + if( debugging ) + { + gg_printf("normal_normal_compare(): different types returning -1\n", + NULL_TREE); + } + gg_assign( return_int, integer_minusone_node); + } + ELSE + { + // Both sides are positive, allowing a direct comparison. + IF( gg_cast(TREE_TYPE(right_side), left_side), lt_op, right_side ) + { + if( debugging ) + { + gg_printf("normal_normal_compare(): returning -1\n", NULL_TREE); + } + gg_assign( return_int, integer_minusone_node); + } + ELSE + { + IF( gg_cast(TREE_TYPE(right_side), left_side), gt_op, right_side) + { + if( debugging ) + { + gg_printf("normal_normal_compare(): returning +1\n", NULL_TREE); + } + gg_assign( return_int, integer_one_node); + } + ELSE + { + if( debugging ) + { + gg_printf("normal_normal_compare(): returning zero\n", NULL_TREE); + } + gg_assign( return_int, integer_zero_node); + } + ENDIF + } + ENDIF + } + ENDIF + } + else + { + // The right side can be negative. If it is, the return value has to be + // +1 for left > right + IF( right_side, lt_op, gg_cast(TREE_TYPE(right_side), integer_zero_node) ) + { + if( debugging ) + { + gg_printf("normal_normal_compare(): different types returning +1\n", NULL_TREE); + } + gg_assign( return_int, integer_one_node); + } + ELSE + { + // Both sides are positive, allowing a direct comparison. + IF( left_side, lt_op, gg_cast(TREE_TYPE(left_side), right_side) ) + { + if( debugging ) + { + gg_printf("normal_normal_compare(): returning -1\n", NULL_TREE); + } + gg_assign( return_int, integer_minusone_node); + } + ELSE + { + IF( left_side, gt_op, gg_cast(TREE_TYPE(left_side), right_side) ) + { + if( debugging ) + { + gg_printf("normal_normal_compare(): returning +1\n", NULL_TREE); + } + gg_assign( return_int, integer_one_node); + } + ELSE + { + if( debugging ) + { + gg_printf("normal_normal_compare(): returning zero\n", NULL_TREE); + } + gg_assign( return_int, integer_zero_node); + } + ENDIF + } + ENDIF + } + ENDIF + } + } + else + { + // Both sides are the same type, allowing a direct comparison. + IF( left_side, lt_op, right_side ) + { + if( debugging ) + { + gg_printf("normal_normal_compare(): returning -1\n", NULL_TREE); + } + gg_assign( return_int, integer_minusone_node); + } + ELSE + { + IF( left_side, gt_op, right_side ) + { + if( debugging ) + { + gg_printf("normal_normal_compare(): returning +1\n", NULL_TREE); + } + gg_assign( return_int, integer_one_node); + } + ELSE + { + if( debugging ) + { + gg_printf("normal_normal_compare(): returning zero\n", NULL_TREE); + } + gg_assign( return_int, integer_zero_node); + } + ENDIF + } + ENDIF + } + } + +static void +compare_binary_binary(tree return_int, + cbl_refer_t *left_side_ref, + cbl_refer_t *right_side_ref ) + { + Analyze(); + static const bool debugging = false; + + // We know the two sides have binary values that can be extracted. + tree left_side; + tree right_side; + + // Use SIZE128 when we need two 64-bit registers to hold the value. All + // others fit into 64-bit LONG with pretty much the same efficiency. + + size_t left_bytes_needed = get_bytes_needed(left_side_ref->field); + size_t right_bytes_needed = get_bytes_needed(right_side_ref->field); + + if( left_bytes_needed >= SIZE128 + || right_bytes_needed >= SIZE128 ) + { + if( debugging ) + { + gg_printf("compare_binary_binary(): using int128\n", NULL_TREE); + } + + left_side = gg_define_int128(); + right_side = gg_define_int128(); + } + else + { + if( debugging ) + { + gg_printf("compare_binary_binary(): using int64\n", NULL_TREE); + } + left_side = gg_define_variable( left_side_ref->field->attr & signable_e ? LONG : ULONG ); + right_side = gg_define_variable(right_side_ref->field->attr & signable_e ? LONG : ULONG ); + } + + //tree dummy = gg_define_int(); + static tree hilo_left = gg_define_variable(INT, "..cbb_hilo_left", vs_file_static); + static tree hilo_right = gg_define_variable(INT, "..cbb_hilo_right", vs_file_static); + + get_binary_value(left_side, + NULL, + left_side_ref->field, + refer_offset_source(*left_side_ref), + hilo_left); + get_binary_value(right_side, + NULL, + right_side_ref->field, + refer_offset_source(*right_side_ref), + hilo_right); + IF( hilo_left, eq_op, integer_one_node ) + { + // left side is hi-value + IF( hilo_right, eq_op, integer_one_node ) + { + if( debugging ) + { + gg_printf("compare_binary_binary(): left and right are HIGH-VALUE\n", NULL_TREE); + } + gg_assign(return_int, integer_zero_node); + } + ELSE + { + if( debugging ) + { + gg_printf("compare_binary_binary(): left is HIGH-VALUE\n", NULL_TREE); + } + gg_assign(return_int, integer_one_node); + } + ENDIF + } + ELSE + { + // left is not HIGH-VALUE: + IF( hilo_left, eq_op, integer_minus_one_node ) + { + // left side is LOW-VALUE + IF( hilo_right, eq_op, integer_minus_one_node ) + { + if( debugging ) + { + gg_printf("compare_binary_binary(): left and right are LOW-VALUE\n", NULL_TREE); + } + gg_assign(return_int, integer_zero_node); + } + ELSE + { + // Right side is not low-value + if( debugging ) + { + gg_printf("compare_binary_binary(): left is LOW-VALUE\n", NULL_TREE); + } + gg_assign(return_int, integer_one_node); + } + ENDIF + } + ELSE + { + // Left side is normal + IF( hilo_right, eq_op, integer_one_node ) + { + if( debugging ) + { + gg_printf("compare_binary_binary(): right is HIGH-VALUE\n", NULL_TREE); + } + gg_assign(return_int, integer_minus_one_node); + } + ELSE + { + IF( hilo_right, eq_op, integer_minus_one_node ) + { + if( debugging ) + { + gg_printf("compare_binary_binary(): right is LOW-VALUE\n", NULL_TREE); + } + gg_assign(return_int, integer_one_node); + } + ELSE + { + if( debugging ) + { + gg_printf("compare_binary_binary(): left and right are normal\n", NULL_TREE); + } + normal_normal_compare(debugging, + return_int, + left_side_ref, + right_side_ref, + left_side, + right_side + ); + } + ENDIF + } + ENDIF + } + ENDIF + } + ENDIF + } + +#define DEBUG_COMPARE + +static void +cobol_compare( tree return_int, + cbl_refer_t &left_side_ref, + cbl_refer_t &right_side_ref ) + { + Analyze(); +// gg_printf("cobol_compare %s %s \"%s\" \"%s\"\n", + // gg_string_literal(left_side_ref.field->name), + // gg_string_literal(right_side_ref.field->name), + // member(left_side_ref.field, "data"), + // gg_string_literal(right_side_ref.field->data.initial), + // NULL_TREE); + + CHECK_FIELD(left_side_ref.field); + CHECK_FIELD(right_side_ref.field); + // This routine is in support of conditionals in the COBOL program. + // It takes two arbitrary COBOL variables from the parser and compares them + // according to a nightmarish set of rules. + + // See ISO/IEC 1989:2014(E) section 8.8.4.1.1 (page 153) + + // The return_int value is -1 when left_side < right_side + // 0 left_side == right_side + // 1 left_side > right_side + + bool compared = false; + + // In the effort to convert to in-line GIMPLE comparisons, I became flummoxed + // by comparisons involving REFMODs. This will have to be revisited, but for + // now I decided to keep using the libgcobol code, which according to NIST + // works properly. + + if( !left_side_ref.refmod.from + && !left_side_ref.refmod.len + && !right_side_ref.refmod.from + && !right_side_ref.refmod.len ) + { + cbl_refer_t *lefty = &left_side_ref; + cbl_refer_t *righty = &right_side_ref; + + int ntries = 1; + while( ntries <= 2 ) + { + switch( lefty->field->type ) + { + case FldLiteralN: + { + switch( righty->field->type ) + { + case FldLiteralN: + case FldNumericBinary: + case FldNumericBin5: + case FldPacked: + case FldNumericDisplay: + case FldIndex: + compare_binary_binary(return_int, lefty, righty); + compared = true; + break; + + case FldGroup: + case FldAlphanumeric: + case FldLiteralA: + { + // Comparing a FldLiteralN to an alphanumeric + // It is the case that data.initial is in the original form seen + // in the source code, which means that even in EBCDIC mode the + // characters are in the "ASCII" state. + + static size_t buffer_size = 0; + static char *buffer = NULL; + raw_to_internal(&buffer, + &buffer_size, + lefty->field->data.initial, + strlen(lefty->field->data.initial)); + + gg_assign( return_int, gg_call_expr( + INT, + "__gg__literaln_alpha_compare", + gg_string_literal(buffer), + gg_get_address_of(righty->field->var_decl_node), + refer_offset_source(*righty), + refer_size_source( *righty), + build_int_cst_type(INT, + (righty->all ? REFER_T_MOVE_ALL : 0)), + NULL_TREE)); + compared = true; + break; + } + + default: + break; + } + break; + } + + case FldNumericBin5: + case FldNumericBinary: + case FldPacked: + case FldNumericDisplay: + { + switch( righty->field->type ) + { + case FldNumericBin5: + case FldNumericBinary: + case FldPacked: + case FldNumericDisplay: + { + compare_binary_binary(return_int, lefty, righty); + compared = true; + break; + } + + default: + break; + } + break; + } + + default: + break; + } + if( compared ) + { + break; + } + // We weren't able to compare left/right. Let's see if we understand + // right/left + std::swap(lefty, righty); + ntries += 1; + } + + if( compared && ntries == 2 ) + { + // We have a successful comparision, but we managed it on the second try, + // which means our result has the wrong sign. Fix it: + gg_assign(return_int, gg_negate(return_int)); + } + } + + if( !compared ) + { + // None of our explicit comparisons up above worked, so we revert to the + // general case: + int leftflags = (left_side_ref.all ? REFER_T_MOVE_ALL : 0) + + (left_side_ref.addr_of ? REFER_T_ADDRESS_OF : 0); + int rightflags = (right_side_ref.all ? REFER_T_MOVE_ALL : 0) + + (right_side_ref.addr_of ? REFER_T_ADDRESS_OF : 0); + gg_assign( return_int, gg_call_expr( + INT, + "__gg__compare", + gg_get_address_of(left_side_ref.field->var_decl_node), + refer_offset_source(left_side_ref), + refer_size_source( left_side_ref), + build_int_cst_type(INT, leftflags), + gg_get_address_of(right_side_ref.field->var_decl_node), + refer_offset_source(right_side_ref), + refer_size_source( right_side_ref), + build_int_cst_type(INT, rightflags), + integer_zero_node, + NULL_TREE)); + } + +// gg_printf(" result is %d\n", return_int, NULL_TREE); + } + +static void +move_tree( cbl_field_t *dest, + tree offset, + tree psz_source, + tree length_bump=integer_zero_node) // psz_source is a null-terminated string + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_FIELD(" ", dest); + SHOW_PARSE_END + } + + bool moved = true; + + tree source_length = gg_define_size_t(); + gg_assign(source_length, gg_strlen(psz_source)); + gg_assign(source_length, gg_add(source_length, gg_cast(SIZE_T, length_bump))); + + tree min_length = gg_define_size_t(); + + tree location = gg_define_uchar_star(); + tree length = gg_define_size_t(); + + gg_assign(location, + gg_add(member(dest->var_decl_node, "data"), + offset)); + gg_assign(length, + member(dest->var_decl_node, "capacity")); + + IF(source_length, lt_op, length) + { + gg_assign(min_length, source_length); + } + ELSE + { + gg_assign(min_length, length); + } + ENDIF + + tree value; + tree rdigits; + + switch( dest->type ) + { + case FldGroup: + case FldAlphanumeric: + // Space out the alphanumeric destination: + gg_memset( location, + build_int_cst_type(INT, internal_space), + length ); + // Copy the alphanumeric result over. + gg_memcpy( location, + psz_source, + min_length ); + break; + + case FldNumericDisplay: + case FldNumericEdited: + case FldNumericBinary: + case FldNumericBin5: + case FldPacked: + case FldIndex: + { + value = gg_define_int128(); + rdigits = gg_define_int(); + + gg_assign(value, + gg_call_expr( INT128, + "__gg__dirty_to_binary_internal", + psz_source, + source_length, + gg_get_address_of(rdigits), + NULL_TREE)); + + gg_call(VOID, + "__gg__int128_to_qualified_field", + gg_get_address_of(dest->var_decl_node), + offset, + build_int_cst_type(SIZE_T, dest->data.capacity), + value, + rdigits, + build_int_cst_type(INT, truncation_e), + null_pointer_node, + NULL_TREE); + } + break; + + case FldAlphaEdited: + { + gg_call(VOID, + "__gg__string_to_alpha_edited_ascii", + location, + psz_source, + min_length, + member(dest->var_decl_node, "picture"), + NULL); + break; + } + + default: + moved = false; + break; + } + + TRACE1 + { + TRACE1_HEADER + gg_fprintf(trace_handle, 1, "source: \"%s\"", psz_source); + TRACE1_END + TRACE1_INDENT + TRACE1_FIELD( "dest : ", dest, "") + TRACE1_END + } + + if( !moved ) + { + dbgmsg("###### %10s in %s:%d\n", __func__, __FILE__, __LINE__ ); + cbl_internal_error( "I don't know how to MOVE an alphabetical string to %s(%s) \n", + cbl_field_type_str(dest->type), + dest->name + ); + return; + } + } + +static void +move_tree_to_field(cbl_field_t *field, tree psz) + { + move_tree(field, integer_zero_node, psz); + } + +static tree +get_string_from(cbl_field_t *field) + { + // This returns a malloced copy of either a literal string or a + // an alphanumeric field. The idea is that eventually free() will be + // called in the runtime space: + + tree psz = gg_define_char_star(); + + if( field ) + { + switch( field->type ) + { + case FldLiteralA: + { + gg_assign(psz, + gg_cast(CHAR_P, + gg_malloc(build_int_cst_type(SIZE_T, + field->data.capacity+1)))); + char *litstring = get_literal_string(field); + gg_memcpy(psz, + gg_string_literal(litstring), + build_int_cst_type(SIZE_T, field->data.capacity+1)); + break; + } + + case FldGroup: + case FldAlphanumeric: + // make a copy of .data: + gg_assign(psz, + gg_cast(CHAR_P, + gg_malloc(build_int_cst_type(SIZE_T, + field->data.capacity+1)))); + gg_memcpy( psz, + member(field, "data"), + member(field, "capacity")); + // null-terminate it: + gg_assign( gg_array_value(psz, member(field, "capacity")), + char_nodes[0]); + break; + + case FldForward: + { + // At the present time, we are assuming this happens when somebody + // specifies an unquoted file name in an ASSIGN statement: + // SELECT file3 ASSIGN DISK. + // + // In that case, we just return DISK, which is field->name: + psz = gg_strdup(gg_string_literal(field->name)); + break; + } + + default: + cbl_internal_error( + "%s(): field->type %s must be literal or alphanumeric", + __func__, cbl_field_type_str(field->type)); + break; + } + } + else + { + gg_assign(psz, gg_cast(CHAR_P, null_pointer_node)); + } + return psz; + } + +static char * +combined_name(cbl_label_t *label) + { + // This routine returns a pointer to a static, so make sure you use the result + // before calling the routine again + char *para_name = nullptr; + char *sect_name = nullptr; + const char *program_name = current_function->our_unmangled_name; + + if( label->type == LblParagraph ) + { + para_name = label->name; + + if( label->parent ) + { + // It's possible for implicit + cbl_label_t *section_label = cbl_label_of(symbol_at(label->parent)); + sect_name = section_label->name; + } + } + else + { + sect_name = label->name; + } + + static size_t retval_size = 256; + static char *retval= (char *)xmalloc(retval_size); + + char *paragraph = cobol_name_mangler(para_name); + char *section = cobol_name_mangler(sect_name); + char *mangled_program_name = cobol_name_mangler(program_name); + + while( retval_size < (paragraph ? strlen(paragraph) : 0 ) + + (section ? strlen(section) : 0 ) + + (mangled_program_name ? strlen(mangled_program_name) : 0 ) + + 24 ) + { + retval_size *= 2; + retval = (char *)xrealloc(retval, retval_size); + } + + *retval = '\0'; + char ach[24]; + if( paragraph ) + { + strcat(retval, paragraph); + } + strcat(retval, "."); + if( section ) + { + strcat(retval, section); + } + strcat(retval, "."); + if( mangled_program_name ) + { + strcat(retval, mangled_program_name); + } + sprintf(ach, ".%ld", current_function->program_id_number); + strcat(retval, ach); + sprintf(ach, ".%ld", symbol_label_id(label)); + strcat(retval, ach); + free(mangled_program_name); + free(section); + free(paragraph); + + return retval; + } + +// We implement SECTION and PARAGRAPH stuff before the rest of program +// structure, because we have some static routines in here that are called +// by enter_ and leave_ program, and so on. + +static void +assembler_label(const char *label) + { + // label has to be a valid label for the assembler + static size_t length = 0; + static char *build = nullptr; + + const char local_text[] = ":"; + if( length < strlen(label) + strlen(local_text) + 1 ) + { + length = strlen(label) + strlen(local_text) + 1; + free(build); + build = (char *)xmalloc(length); + } + + strcpy(build, label); + strcat(build, local_text); + + gg_insert_into_assembler(build); + } + +static void +section_label(struct cbl_proc_t *procedure) + { + // With nested programs, you can have multiple program/section pairs with the + // the same names; we use a deconflictor to avoid collisions + + gg_set_current_line_number(CURRENT_LINE_NUMBER); + + size_t deconflictor = symbol_label_id(procedure->label); + + cbl_label_t *label = procedure->label; + // The _initialize_program section isn't relevant. + static size_t psz_length = 256; + static char *psz = (char *)xmalloc(psz_length); + sprintf(psz, + "# SECTION %s in %s (%ld)", + label->name, + current_function->our_unmangled_name, + deconflictor); + gg_insert_into_assembler(psz); + + // The label has to start with an underscore. I tried a period, but those + // don't seem to show up in GDB's internal symbol tables. + char *combined = combined_name(procedure->label); + if( psz_length < strlen(combined) + 36 + 1 ) + { + free(psz); + psz_length = strlen(combined) + 36 + 1; + psz = (char *)xmalloc(psz_length); + } + sprintf(psz, + "_sect.%s", + combined_name(procedure->label)); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(psz); + SHOW_PARSE_END + } + assembler_label(psz); + gg_assign(var_decl_nop, build_int_cst_type(INT, 108)); + } + +static void +paragraph_label(struct cbl_proc_t *procedure) + { + // We need to give each paragraph a unique and assembler-compatible name + // that can be found and used by GDB. + // Complications: + // 1) paragraph names can be reused in the same program, provided they + // are in different sections. + // 2) paragraph names can be duplicated in a section, provided that they + // are not referenced by the program. We provide a deconflictor to + // separate such labels. + + gg_set_current_line_number(CURRENT_LINE_NUMBER); + + cbl_label_t *paragraph = procedure->label; + cbl_label_t *section = nullptr; + + if( procedure->label->parent ) + { + section = cbl_label_of(symbol_at(procedure->label->parent)); + } + + char *para_name = paragraph->name; + char *section_name = section ? section->name : nullptr; + + static size_t psz_length = 256; + static char *psz = (char *)xmalloc(psz_length); + + static size_t deconflictor = symbol_label_id(procedure->label); + + sprintf(psz, + "# PARAGRAPH %s of %s in %s (%ld)", + para_name, + section_name, + current_function->our_unmangled_name, + deconflictor); + gg_insert_into_assembler(psz); + + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(psz); + SHOW_PARSE_END + } + + // The label has to start with an underscore. I tried a period, but those + // don't seem to show up in GDB's internal symbol tables. + char *combined = combined_name(procedure->label); + if( psz_length < strlen(combined) + 36 + 1 ) + { + free(psz); + psz_length = strlen(combined) + 36 + 1; + psz = (char *)xmalloc(psz_length); + } + + sprintf(psz, + "_para.%s", + combined_name(procedure->label)); + assembler_label(psz); + gg_assign(var_decl_nop, build_int_cst_type(INT, 109)); + } + +static void +pseudo_return_push(cbl_proc_t *procedure, tree return_addr) + { + // Put the return address onto the stack: + //gg_suppress_location(true); + + TRACE1 + { + TRACE1_HEADER + gg_printf("%s %p %p", + gg_string_literal(procedure->label->name), + gg_cast(SIZE_T, procedure->exit.addr), + return_addr, + NULL_TREE); + TRACE1_END + } + + gg_call(VOID, + "__gg__pseudo_return_push", + procedure->exit.addr, + return_addr, + NULL_TREE); + + //gg_suppress_location(false); + } + +static void +pseudo_return_pop(cbl_proc_t *procedure) + { + //gg_suppress_location(true); + + TRACE1 + { + TRACE1_HEADER + gg_printf("%s comparing proc_exit %p to global_exit %p -- ", + gg_string_literal(procedure->label->name), + gg_cast(SIZE_T, procedure->exit.addr), + var_decl_exit_address, + NULL_TREE); + } + + IF( var_decl_exit_address, eq_op, procedure->exit.addr ) + { + TRACE1 + { + TRACE1_TEXT("Returning") + } + // The top of the stack is us! + + // Pick up the return address from the pseudo_return stack: + gg_assign(current_function->void_star_temp, + gg_call_expr( VOID_P, + "__gg__pseudo_return_pop", + NULL_TREE)); + // And do the return: + gg_goto(current_function->void_star_temp); + } + ELSE + { + TRACE1 + { + TRACE1_TEXT("No match") + } + ENDIF + } + TRACE1 + { + TRACE1_END + } + //gg_suppress_location(false); + } + +static void +leave_procedure(struct cbl_proc_t *procedure, bool /*section*/) + { + if(procedure) + { + // fprintf(stderr, "LeavingProcedure: (%p) %s %p %p %p %p %p %p\n", + // procedure, + // procedure->name, + // procedure->top.go_to, + // procedure->top.label, + // procedure->exit.go_to, + // procedure->exit.label, + // procedure->bottom.go_to, + // procedure->bottom.label); + // Procedure can be null, for example at the beginning of a + // new program, or after somebody else has cleared it out. + gg_append_statement(procedure->exit.label); + + char ach[256]; + sprintf(ach, + "_procret.%ld:", + symbol_label_id(procedure->label)); + gg_insert_into_assembler(ach); + pseudo_return_pop(procedure); + gg_append_statement(procedure->bottom.label); + } + } + +static void +leave_section_internal() + { + Analyze(); + SHOW_PARSE + { + if(gg_trans_unit.function_stack.size() && current_function && current_function->current_section) + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" ") + SHOW_PARSE_TEXT(current_function->current_section->label->name) + SHOW_PARSE_END + } + } + + if( current_function->current_section ) + { + // gg_printf( "Leaving section %s\n", + // build_string_literal( strlen(current_function->current_section->label->name)+1, current_function->current_section->label->name), + // NULL_TREE); + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT_ABC("\"", current_function->current_section->label->name, "\""); + TRACE1_END + } + leave_procedure(current_function->current_section, true); + + current_function->current_section = NULL; + } + else + { + //gg_printf("Somebody is leaving a section twice\n", NULL_TREE); + } + } + +void +parser_leave_section( struct cbl_label_t */*label*/ ) {} + +static void +leave_paragraph_impl() + { + Analyze(); + SHOW_PARSE + { + if(gg_trans_unit.function_stack.size() && current_function && current_function->current_paragraph) + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" ") + SHOW_PARSE_TEXT(current_function->current_paragraph->label->name) + SHOW_PARSE_END + } + } + + if( current_function->current_paragraph ) + { + // gg_printf( "Leaving paragraph %s\n", + // build_string_literal( strlen(current_function->current_paragraph->label->name)+1, current_function->current_paragraph->label->name), + // NULL_TREE); + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT_ABC("\"", current_function->current_paragraph->label->name, "\""); + TRACE1_END + } + leave_procedure(current_function->current_paragraph, false); + current_function->current_paragraph = NULL; + } + else + { + //gg_printf("Somebody is leaving a paragraph twice\n", NULL_TREE); + } + } + +void parser_leave_paragraph( cbl_label_t * ) {} +static inline void leave_paragraph_internal() { leave_paragraph_impl(); } + +static struct cbl_proc_t * +find_procedure(cbl_label_t *label) + { +// SHOW_PARSE +// { +// SHOW_PARSE_HEADER +// SHOW_PARSE_LABEL(" ", label) +// SHOW_PARSE_TEXT("\n"); +// } + + cbl_proc_t *retval = label->structs.proc; + + // We have to cope with an oddball circumstance. When label->entered is + // greater than zero, it means that a paragraph with this label has been + // entered and left already. This means that a paragraph name has been + // defined more than once. Had it been referenced with a GOTO or PERFORM, + // that would have been a syntax error. + // + // + // In this case, we need to replace the existing cbl_proc_t structure. We + // will be laying down labels for this second (or more) instance of + // parser_enter_paragraph, and we must create different labels. + + if( !retval ) + { + static int counter=1; + char ach[2*sizeof(cbl_name_t)]; + + // This is a new section or paragraph; we need to create its values: + retval = (struct cbl_proc_t *)xmalloc(sizeof(struct cbl_proc_t)); + retval->label = label; + + gg_create_goto_pair(&retval->top.go_to, + &retval->top.label, + &retval->top.addr, + &retval->top.decl); + gg_create_goto_pair(&retval->exit.go_to, + &retval->exit.label, + &retval->exit.addr + ); + gg_create_goto_pair(&retval->bottom.go_to, + &retval->bottom.label, + &retval->bottom.addr + ); + + // fprintf(stderr, "NewProcedure: (%p) %s %p %p %p %p %p %p\n", + // retval, + // retval->name, + // retval->top.go_to, + // retval->top.label, + // retval->exit.go_to, + // retval->exit.label, + // retval->bottom.go_to, + // retval->bottom.label); + + // If this procedure is a paragraph, and it becomes the target of + // an ALTER statement, alter_location will be used to make that change + sprintf(ach, "_%s_alter_loc_%d", label->name, counter); + retval->alter_location = gg_define_void_star(ach, vs_static); + DECL_INITIAL(retval->alter_location) = null_pointer_node; + + counter +=1 ; + + label->structs.proc = retval; + } + + return retval; + } + +void +parser_enter_section(cbl_label_t *label) + { + Analyze(); + // Do the leaving before the SHOW_PARSE; it makes the output more sensible + // A new section ends the current paragraph: + leave_paragraph_internal(); + + // And the current section: + leave_section_internal(); + + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_LABEL(" ", label) + SHOW_PARSE_END + } + + CHECK_LABEL(label); + + // This NOP is needed to give GDB a line number for the entry point of + // paragraphs + gg_set_current_line_number(CURRENT_LINE_NUMBER); + gg_assign(var_decl_nop, build_int_cst_type(INT, 101)); + + struct cbl_proc_t *procedure = find_procedure(label); + gg_append_statement(procedure->top.label); + section_label(procedure); + current_function->current_section = procedure; + + TRACE1 + { + TRACE1_HEADER + TRACE1_LABEL("\"", label, "\"") + TRACE1_END + } + } + +void +parser_enter_paragraph(cbl_label_t *label) + { + Analyze(); + // Do the leaving before the SHOW_PARSE; the output makes more sense that way + // A new paragraph ends the current paragraph: + leave_paragraph_internal(); + + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_LABEL(" ", label) + SHOW_PARSE_END + } + + CHECK_LABEL(label); + + struct cbl_proc_t *procedure = find_procedure(label); + gg_append_statement(procedure->top.label); + paragraph_label(procedure); + current_function->current_paragraph = procedure; + + TRACE1 + { + TRACE1_HEADER + TRACE1_LABEL("\"", label, "\"") + TRACE1_END + } + } + +void +parser_exit_section(void) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT_ABC("\"", current_function->current_section->label->name, "\"") + TRACE1_END + } + gg_append_statement(current_function->current_section->exit.go_to); + } + +void +parser_exit_paragraph(void) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT_ABC("\"", current_function->current_paragraph->label->name, "\"") + TRACE1_END + } + gg_append_statement(current_function->current_paragraph->exit.go_to); + } + +void +parser_exit_perform(struct cbl_perform_tgt_t *tgt, bool cycle) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + TRACE1 + { + TRACE1_HEADER + TRACE1_END + } + if(cycle) + { + gg_append_statement(tgt->addresses.testA.go_to); + } + else + { + gg_append_statement(tgt->addresses.exit.go_to); + } + } + +void +parser_alter( cbl_perform_tgt_t *tgt ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + TRACE1 + { + TRACE1_HEADER + TRACE1_END + } + cbl_label_t *altered = tgt->from(); + cbl_label_t *proceed_to = tgt->to(); + + struct cbl_proc_t *altered_proc = find_procedure(altered); + struct cbl_proc_t *proceed_to_proc = find_procedure(proceed_to); + + gg_assign( altered_proc->alter_location, + proceed_to_proc->top.addr); + } + +void +parser_goto( cbl_refer_t value_ref, size_t narg, cbl_label_t * const labels[] ) + { + // This is part of the Terrible Trio of parser_perform, parser_goto and + // parser_enter_[procedure]. parser_goto has an easier time of it than + // the other two, because it just has to jump from here to the entry point + // of the paragraph [or section] + Analyze(); + + SHOW_PARSE + { + SHOW_PARSE_HEADER + for(size_t i=0; i<narg; i++) + { + SHOW_PARSE_TEXT(" "); + SHOW_PARSE_TEXT(labels[i]->name); + } + SHOW_PARSE_END + } + TRACE1 + { + TRACE1_HEADER + for(size_t i=0; i<narg; i++) + { + TRACE1_TEXT(labels[i]->name); + TRACE1_TEXT(" "); + } + TRACE1_END + } + + gcc_assert(narg >= 1); + + // This is a computed GOTO. It might have only one element, which is + // an ordinary GOTO without a DEPENDING ON clause. We create that table + // anyway, because in the case of an ALTER statement, we will be replacing + // that sole element with the PROCEED TO element. + + // We need to create a static array of pointers to locations: + static int comp_gotos = 1; + char ach[32]; + sprintf(ach, "_comp_goto_%d", comp_gotos++); + tree array_of_pointers_type = build_array_type_nelts(VOID_P, narg); + tree array_of_pointers = gg_define_variable(array_of_pointers_type, ach, vs_static); + + // We have the array. Now we need to build the constructor for it + tree constr = make_node(CONSTRUCTOR); + TREE_TYPE(constr) = array_of_pointers_type; + TREE_STATIC(constr) = 1; + TREE_CONSTANT(constr) = 1; + + for(size_t i=0; i<narg; i++) + { + CHECK_LABEL(labels[i]); + struct cbl_proc_t *procedure = find_procedure(labels[i]); + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr), + build_int_cst_type(SIZE_T, i), + procedure->top.addr ); + } + DECL_INITIAL(array_of_pointers) = constr; + + // We need to pick up the value argument as an INT: + tree value = gg_define_int(); + + if( value_ref.field ) + { + get_binary_value( value, + NULL, + value_ref.field, + refer_offset_source(value_ref)); + // Convert it from one-based to zero-based: + gg_decrement(value); + // Check to see if the value is in the range 0...narg-1: + IF( value, ge_op, integer_zero_node) + { + IF( value, lt_op, build_int_cst_type(INT, narg) ) + { + // It is in the valid range, so we can do the goto: + Analyzer.ExitMessage(); + gg_goto(gg_array_value(array_of_pointers, value)); + } + ELSE + { + // Otherwise, just fall through + } + ENDIF + } + ELSE + ENDIF + } + else + { + // This is a simple GOTO. Because it is a simple GO TO, there is the + // possibility that this paragraph was the target of an ALTER statement. + IF( current_function->current_paragraph->alter_location, ne_op, null_pointer_node ) + { + // Somebody did an ALTER statement before we got here + gg_assign(current_function->void_star_temp, current_function->current_paragraph->alter_location); + } + ELSE + { + // This paragraph wasn't the target of an ALTER: + gg_assign(current_function->void_star_temp, gg_array_value(array_of_pointers, 0)); + } + ENDIF + Analyzer.ExitMessage(); + gg_goto(current_function->void_star_temp); + } + return; + } + +void +parser_perform(cbl_label_t *label, bool suppress_nexting) + { + label->used = yylineno; + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_LABEL(" ", label) + char ach[32]; + sprintf(ach, " label is at %p", label); + SHOW_PARSE_TEXT(ach) + sprintf(ach, " label->proc is %p", label->structs.proc); + SHOW_PARSE_TEXT(ach) + SHOW_PARSE_END + } + + TRACE1 + { + TRACE1_HEADER + TRACE1_LABEL("", label, "") + TRACE1_END + } + + CHECK_LABEL(label); + + struct cbl_proc_t *procedure = find_procedure(label); + + // We need to create the unnamed return address that we + // will instantiate right after the goto: + tree return_address_decl = build_decl( UNKNOWN_LOCATION, + LABEL_DECL, + NULL_TREE, + void_type_node); + DECL_CONTEXT(return_address_decl) = current_function->function_decl; + TREE_USED(return_address_decl) = 1; + + tree return_label_expr = build1(LABEL_EXPR, + void_type_node, + return_address_decl); + tree return_addr = gg_get_address_of(return_address_decl); + +// cbl_parser_mod *parser_mod = new cbl_parser_mod; + + // Put the return address onto the pseudo-return stack + pseudo_return_push(procedure, return_addr); + + // Create the code that will launch the paragraph + // The following comment is, believe it or not, necessary. The insertion + // includes a line number insertion that's needed because when the goto/label + // pairs were created, the locations of the goto instruction and the label + // were not known. + + char *para_name = nullptr; + char *sect_name = nullptr; + const char *program_name = current_function->our_unmangled_name; + size_t deconflictor = symbol_label_id(label); + + char ach[256]; + if( label->type == LblParagraph ) + { + cbl_label_t *section_label = cbl_label_of(symbol_at(label->parent)); + para_name = label->name; + sect_name = section_label->name; + sprintf(ach, + "# PERFORM %s of %s of %s (%ld)", + para_name, + sect_name, + program_name, + deconflictor); + + gg_insert_into_assembler(ach); + } + else + { + sect_name = label->name; + sprintf(ach, + "# PERFORM %s of %s (%ld)", + sect_name, + program_name, + deconflictor); + gg_insert_into_assembler(ach); + } + + if( !suppress_nexting ) + { + sprintf(ach, + "_proccall.%ld.%d:", + symbol_label_id(label), + call_counter++); + gg_insert_into_assembler( ach ); + } + + // We do the indirect jump in order to prevent the compiler from complaining + // in the case where we are performing a USE GLOBAL DECLARATIVE. Without the + // indirection, the compiler isn't able to handle the case where we are + // jumping to a location in our parent program-id; it can't find a matching + // local symbol, and crashes. + gg_goto(procedure->top.addr); + + // And create the return address label: + gg_append_statement(return_label_expr); + TRACE1 + { + TRACE1_HEADER + TRACE1_LABEL("back_from_performing ", label, "") + TRACE1_END + } + } + +void +parser_perform_times( cbl_label_t *proc_1, cbl_refer_t count ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_LABEL(" ", proc_1) + SHOW_PARSE_REF(" ", count) + SHOW_PARSE_TEXT(" TIMES") + char ach[32]; + sprintf(ach, " proc_1 is at %p", proc_1); + SHOW_PARSE_TEXT(ach) + sprintf(ach, " proc_1->proc is %p", proc_1->structs.proc); + SHOW_PARSE_TEXT(ach) + SHOW_PARSE_END + } + TRACE1 + { + TRACE1_HEADER + TRACE1_END + } + + char ach[256]; + size_t our_pseudo_label = pseudo_label++; + sprintf(ach, + "_proccallb.%ld:", + our_pseudo_label); + gg_insert_into_assembler( ach ); + + tree counter = gg_define_variable(LONG); + + // Get the count: + get_binary_value( counter, + NULL, + count.field, + refer_offset_source(count)); + + // Make sure the initial count is valid: + WHILE( counter, gt_op, gg_cast(LONG, integer_zero_node) ) + { + static const bool suppress_nexting = true; + parser_perform(proc_1, suppress_nexting); + gg_decrement(counter); + } + WEND + + sprintf(ach, + "_procretb.%ld:", + our_pseudo_label); + gg_insert_into_assembler(ach); + } + +static void +internal_perform_through( cbl_label_t *proc_1, + cbl_label_t *proc_2, + bool suppress_nexting ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_LABEL(" ", proc_1); + char ach[32]; + sprintf(ach, " proc_1 is at %p", proc_1); + SHOW_PARSE_TEXT(ach) + sprintf(ach, " proc_1->proc is %p", proc_1->structs.proc); + SHOW_PARSE_TEXT(ach) + if( proc_2 ) + { + SHOW_PARSE_INDENT + SHOW_PARSE_LABEL("", proc_2); + sprintf(ach, " proc_2 is at %p", proc_2); + SHOW_PARSE_TEXT(ach) + sprintf(ach, " proc_2->proc is %p", proc_2->structs.proc); + SHOW_PARSE_TEXT(ach) + } + SHOW_PARSE_END + } + TRACE1 + { + TRACE1_HEADER + TRACE1_END + } + + CHECK_LABEL(proc_1); + + if(!proc_2) + { + parser_perform(proc_1, suppress_nexting); + return; + } + + CHECK_LABEL(proc_2); + + struct cbl_proc_t *proc1 = find_procedure(proc_1); + struct cbl_proc_t *proc2 = find_procedure(proc_2); + + // We need to create the unnamed return address that we + // will instantiate right after the goto: + tree return_address_decl = build_decl( UNKNOWN_LOCATION, + LABEL_DECL, + NULL_TREE, + void_type_node); + DECL_CONTEXT(return_address_decl) = current_function->function_decl; + TREE_USED(return_address_decl) = 1; + + tree return_label_expr = build1(LABEL_EXPR, + void_type_node, + return_address_decl); + tree return_addr = gg_get_address_of(return_address_decl); + + //cbl_parser_mod *parser_mod_proc1 = new cbl_parser_mod; + //cbl_parser_mod *parser_mod_proc2 = new cbl_parser_mod; + + // Put the return address of the second procedure onto the stack: + pseudo_return_push(proc2, return_addr); + + // Create the code that will launch the first procedure + gg_insert_into_assembler("# PERFORM %s THROUGH %s", + proc_1->name, proc_2->name); + + if( !suppress_nexting ) + { + char ach[256]; + sprintf(ach, + "_proccall.%ld.%d:", + symbol_label_id(proc_2), + call_counter++); + gg_insert_into_assembler(ach); + } + + gg_append_statement(proc1->top.go_to); + + // And create the return address label: + gg_append_statement(return_label_expr); + } + +static void +internal_perform_through_times( cbl_label_t *proc_1, + cbl_label_t *proc_2, + cbl_refer_t &count) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_LABEL(" ", proc_1); + char ach[32]; + sprintf(ach, " proc_1 is at %p", proc_1); + SHOW_PARSE_TEXT(ach) + sprintf(ach, " proc_1->proc is %p", proc_1->structs.proc); + SHOW_PARSE_TEXT(ach) + if( proc_2 ) + { + SHOW_PARSE_INDENT + SHOW_PARSE_LABEL("", proc_2); + sprintf(ach, " proc_2 is at %p", proc_2); + SHOW_PARSE_TEXT(ach) + sprintf(ach, " proc_2->proc is %p", proc_2->structs.proc); + SHOW_PARSE_TEXT(ach) + } + SHOW_PARSE_REF(" ", count); + SHOW_PARSE_TEXT(" TIMES"); + SHOW_PARSE_END + } + TRACE1 + { + TRACE1_HEADER + TRACE1_END + } + + size_t our_pseudo_label = pseudo_label++; + + char ach[256]; + sprintf(ach, + "_proccallb.%ld:", + our_pseudo_label); + gg_insert_into_assembler( ach ); + + tree counter = gg_define_variable(LONG); + get_binary_value( counter, + NULL, + count.field, + refer_offset_source(count)); + WHILE( counter, gt_op, gg_cast(LONG, integer_zero_node) ) + { + internal_perform_through(proc_1, proc_2, true); // true means suppress_nexting + gg_decrement(counter); + } + WEND + + sprintf(ach, + "_procretb.%ld:", + our_pseudo_label); + gg_insert_into_assembler( ach ); + } + +void +register_main_switch(const char *main_string) + { + char *mstr = xstrdup(main_string); + char *p = strchr(mstr, ':'); + if( p ) + { + *p = '\0'; + main_string = p+1; + main_strings[mstr] = main_string; + } + else + { + main_strings[mstr] = ""; + } + free(mstr); + } + +static int file_level = 0; + +void +parser_first_statement( int lineno ) + { + // In the event that this routine is the one that main() calls to get the + // execution ball rolling, we want the GDB "start" function to be able + // to set a temporary breakpoint at this location. We get that rolling + // here. + + char ach[256]; + + SHOW_PARSE + { + SHOW_PARSE_HEADER + sprintf(ach, " lineno is %d, suppression is %d", lineno, suppress_cobol_entry_point); + SHOW_PARSE_TEXT(ach) + SHOW_PARSE_END + } + + if( strcmp(current_function->our_name, ach_cobol_entry_point) == 0 + && !suppress_cobol_entry_point ) + { + sprintf(ach, + "%s:%d", + current_filename.back().c_str(), + lineno); + *ach_cobol_entry_point = '\0'; + create_cblc_string_variable("_cobol_entry_point", ach); + + SHOW_PARSE + { + SHOW_PARSE_INDENT + char ach2[512]; + sprintf(ach2, "setting _cobol_entry_point to \"%s\"", ach); + SHOW_PARSE_TEXT(ach2) + SHOW_PARSE_END + } + } + + if( !suppress_cobol_entry_point ) + { + char achentry[128]; + sprintf(ach, + "%s:%d", + current_filename.back().c_str(), + lineno); + + sprintf(achentry, "_prog_entry_point_%s", current_function->our_name); + create_cblc_string_variable(achentry, ach); + } + } + +#define linemap_add(...) + +void +parser_enter_file(const char *filename) + { + SHOW_PARSE + { + SHOW_PARSE_HEADER + char ach[32]; + sprintf(ach, " entering level:%d %s", file_level+1, filename); + SHOW_PARSE_TEXT(ach); + SHOW_PARSE_END + } + + current_filename.push_back(filename); + + std::unordered_map<std::string, std::string>::const_iterator it + = main_strings.find(filename); + + if( it != main_strings.end() ) + { + // There was a -main switch for this file. + this_module_has_main = true; + next_program_is_main = true; + + const char *pname = it->second.c_str(); + if( pname && strlen(pname) ) + { + main_entry_point = xstrdup(pname); + } + } + + // Let the linemap routine know we are working on a new file: + linemap_add(line_table, LC_ENTER, 0, filename, 1); + + if( file_level == 0 ) + { + // Build a translation_unit_decl: + gg_build_translation_unit(filename); + create_our_type_nodes(); + } + + file_level += 1; + + if( file_level == 1 ) + { + // This table is used for "creating" the file-static named variables used in + // the GENERIC we generate. + + // Establish our variable declarations for global variables in libgcobol: + +#define SET_VAR_DECL(A, B, C) \ + A = gg_declare_variable(B, C, NULL_TREE, vs_external_reference) + + SET_VAR_DECL(var_decl_exception_code , INT , "__gg__exception_code"); + SET_VAR_DECL(var_decl_exception_handled , INT , "__gg__exception_handled"); + SET_VAR_DECL(var_decl_exception_file_number , INT , "__gg__exception_file_number"); + SET_VAR_DECL(var_decl_exception_file_status , INT , "__gg__exception_file_status"); + SET_VAR_DECL(var_decl_exception_file_name , CHAR_P , "__gg__exception_file_name"); + SET_VAR_DECL(var_decl_exception_statement , CHAR_P , "__gg__exception_statement"); + SET_VAR_DECL(var_decl_exception_source_file , CHAR_P , "__gg__exception_source_file"); + SET_VAR_DECL(var_decl_exception_line_number , INT , "__gg__exception_line_number"); + SET_VAR_DECL(var_decl_exception_program_id , CHAR_P , "__gg__exception_program_id"); + SET_VAR_DECL(var_decl_exception_section , CHAR_P , "__gg__exception_section"); + SET_VAR_DECL(var_decl_exception_paragraph , CHAR_P , "__gg__exception_paragraph"); + + SET_VAR_DECL(var_decl_default_compute_error , INT , "__gg__default_compute_error"); + SET_VAR_DECL(var_decl_rdigits , INT , "__gg__rdigits"); + SET_VAR_DECL(var_decl_odo_violation , INT , "__gg__odo_violation"); + SET_VAR_DECL(var_decl_unique_prog_id , SIZE_T , "__gg__unique_prog_id"); + + SET_VAR_DECL(var_decl_entry_location , VOID_P , "__gg__entry_pointer"); + SET_VAR_DECL(var_decl_exit_address , VOID_P , "__gg__exit_address"); + + SET_VAR_DECL(var_decl_call_parameter_signature , CHAR_P , "__gg__call_parameter_signature"); + SET_VAR_DECL(var_decl_call_parameter_count , INT , "__gg__call_parameter_count"); + SET_VAR_DECL(var_decl_call_parameter_lengths , build_array_type(SIZE_T, NULL), + "__gg__call_parameter_lengths"); + SET_VAR_DECL(var_decl_return_code , SHORT , "__gg__data_return_code"); + + SET_VAR_DECL(var_decl_arithmetic_rounds_size , SIZE_T , "__gg__arithmetic_rounds_size"); + SET_VAR_DECL(var_decl_arithmetic_rounds , INT_P , "__gg__arithmetic_rounds"); + SET_VAR_DECL(var_decl_fourplet_flags_size , SIZE_T , "__gg__fourplet_flags_size"); + SET_VAR_DECL(var_decl_fourplet_flags , INT_P , "__gg__fourplet_flags"); + + SET_VAR_DECL(var_decl_treeplet_1f , cblc_field_pp_type_node , "__gg__treeplet_1f" ); + SET_VAR_DECL(var_decl_treeplet_1o , SIZE_T_P , "__gg__treeplet_1o" ); + SET_VAR_DECL(var_decl_treeplet_1s , SIZE_T_P , "__gg__treeplet_1s" ); + SET_VAR_DECL(var_decl_treeplet_2f , cblc_field_pp_type_node , "__gg__treeplet_2f" ); + SET_VAR_DECL(var_decl_treeplet_2o , SIZE_T_P , "__gg__treeplet_2o" ); + SET_VAR_DECL(var_decl_treeplet_2s , SIZE_T_P , "__gg__treeplet_2s" ); + SET_VAR_DECL(var_decl_treeplet_3f , cblc_field_pp_type_node , "__gg__treeplet_3f" ); + SET_VAR_DECL(var_decl_treeplet_3o , SIZE_T_P , "__gg__treeplet_3o" ); + SET_VAR_DECL(var_decl_treeplet_3s , SIZE_T_P , "__gg__treeplet_3s" ); + SET_VAR_DECL(var_decl_treeplet_4f , cblc_field_pp_type_node , "__gg__treeplet_4f" ); + SET_VAR_DECL(var_decl_treeplet_4o , SIZE_T_P , "__gg__treeplet_4o" ); + SET_VAR_DECL(var_decl_treeplet_4s , SIZE_T_P , "__gg__treeplet_4s" ); + SET_VAR_DECL(var_decl_nop , INT , "__gg__nop" ); + SET_VAR_DECL(var_decl_main_called , INT , "__gg__main_called" ); + } + } + +void +parser_leave_file() + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + char ach[256]; + sprintf(ach, "leaving level:%d %s", file_level, current_filename.back().c_str()); + SHOW_PARSE_TEXT(ach) + SHOW_PARSE_END + } + if( file_level > 0) + { + linemap_add(line_table, LC_LEAVE, false, NULL, 0); + } + file_level -= 1; + current_filename.pop_back(); + } + +void +enter_program_common(const char *funcname, const char *funcname_) + { + // We arrive here when processing a PROGRAM-ID. + + // At this point, we don't know how many formal parameters there are going + // to be. + + // We are going to create a function returning a 64-bit value, but it'll + // have no parameters. We'll chain the parameters on in parser_division(), + // when we process PROCEDURE DIVISION USING... + + gg_define_function_with_no_parameters( COBOL_FUNCTION_RETURN_TYPE, + funcname, + funcname_); + + current_function->first_time_through = + gg_define_variable(INT, + "_first_time_through", + vs_static, + integer_one_node); + + gg_create_goto_pair(¤t_function->skip_init_goto, + ¤t_function->skip_init_label); + + IF( current_function->first_time_through, eq_op, integer_zero_node ) + gg_append_statement(current_function->skip_init_goto); + ELSE + ENDIF + + gg_assign(current_function->first_time_through, integer_zero_node); + + // Establish variables that are function-wide in scope: + current_function->void_star_temp = gg_define_void_star("_void_star_temp"); + + current_function->perform_exit_address + = gg_define_void_star("_perform_exit_address"); + + // Make sure the following are null, because when we create the unnamed + // default section, parser_enter_section will attempt to close them out. And + // it's possible on the first go-through that they have garbage values. + + current_function->current_section = NULL; + current_function->current_paragraph = NULL; + + current_function->is_truly_nested = false; + + // Text conversion must be initialized before the code generated by + // parser_symbol_add runs. + + // The text_conversion_override exists both in the library and in the compiler + + __gg__set_internal_codeset(internal_codeset_is_ebcdic()); + gg_call(VOID, + "__gg__set_internal_codeset", + internal_codeset_is_ebcdic() + ? integer_one_node : integer_zero_node, + NULL_TREE); + + __gg__text_conversion_override(td_default_e, cs_default_e); + gg_call(VOID, + "__gg__text_conversion_override", + build_int_cst_type(INT, td_default_e), + build_int_cst_type(INT, cs_default_e), + NULL_TREE); + + gg_call(VOID, + "__gg__codeset_figurative_constants", + NULL_TREE); + + static int counter=1; + char ach[32]; + + sprintf(ach, "_cf_fds_%d", counter); + current_function->first_declarative_section + = gg_define_variable(CHAR_P, + ach, + vs_static, + null_pointer_node); + sprintf(ach, "_cf_cbmc_%d", counter); + current_function->called_by_main_counter = gg_define_variable(INT, + ach, + vs_static, + integer_zero_node); + counter += 1; + + // Initialize the TRACE logic, which has to be done before the first TRACE1 + // invocation, but after there is a function to lay down GIMPLE code in. + + // That is to say: Here. Multiple invocations of trace1_init are harmless. + trace1_init(); + } + +/* Creates a function for program-id 'funcname_'. Returns 1 when funcname_ + is "main" and the -main compiler switch is active for this moudle */ + +void +parser_enter_program( const char *funcname_, + bool is_function, // True for user-defined-function + int *pretval) + { + *pretval = 0; + + // The first thing we have to do is mangle this name. This is safe even + // though the end result will be mangled again, because the mangler doesn't + // change a mangled name. + char *mangled_name = cobol_name_mangler(funcname_); + + size_t parent_index = current_program_index(); + char funcname[128]; + if( parent_index ) + { + // This is a nested function. Tack on the parent_index to the end of it. + sprintf(funcname, "%s.%ld", mangled_name, parent_index); + } + else + { + // This is a top-level function; just use the straight mangled name + strcpy(funcname, mangled_name); + } + free(mangled_name); + + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" ") + SHOW_PARSE_TEXT(funcname) + SHOW_PARSE_END + } + + if( !is_function && !parent_index ) + { + // This is a top_level program, and not a function + if( next_program_is_main ) + { + next_program_is_main = false; + if(main_entry_point) + { + build_main_that_calls_something(main_entry_point); + free(main_entry_point); + main_entry_point = NULL; + } + else + { + build_main_that_calls_something(funcname); + } + } + } + + // Call this after build_main_that_calls_something, because it manipulates + // the current line number to DEFAULT_LINE_NUMBER. We have to manipulate it + // back afterward. + gg_set_current_line_number(CURRENT_LINE_NUMBER); + + if( strcmp(funcname_, "main") == 0 && this_module_has_main ) + { + // setting 'retval' to 1 let's the caller know that we are being told + // both to synthesize a main() entry point to duplicate GCC's default + // behavior, and to create an explicit entry point named "main". This will + // eventually result in a link error (because of the duplicated entry + // points. The return value serves as an alert; it's up to the caller to + // decide what to do. + *pretval = 1; + } + + if( strcmp(funcname, "dubner") == 0) + { + // This should be enabled by an environment variable. + // But for now I am being cutesy + hijack_for_development(funcname); + return; + } + + enter_program_common(funcname, funcname_); + current_function->is_function = is_function; + + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT("entered program \"") + TRACE1_TEXT(funcname) + TRACE1_TEXT("\"") + TRACE1_END + } + } + +void +parser_end_program(const char *prog_name ) + { + if( gg_trans_unit.function_stack.size() ) + { + // The body has been created by various parser calls. It's time + // to wrap this sucker up! + + // Ending the program ends the current paragraph and section: + leave_paragraph_internal(); + leave_section_internal(); + } + + SHOW_PARSE + { + SHOW_PARSE_HEADER + TRACE1_TEXT_ABC("\"", prog_name, "\"") + SHOW_PARSE_END + } + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT_ABC("\"", prog_name, "\"") + TRACE1_END + } + + if( gg_trans_unit.function_stack.size() ) + { + // The body has been created by various parser calls. It's time + // to wrap this sucker up! + + // Put in a harmless return in case there was no EXIT PROGRAM statement. + // It's harmless because if it isn't needed, a return was already + // executed, and this generated code will never be executed + parser_exit( cbl_refer_t() ); + + // Tell the GCC compiler to do the GIMPLIFY thing. + gg_finalize_function(); + } + } + +static void +remove_p_from_picture(char *picture) + { + // At this point, attr has the scaled_e flag, and rdigits tells us + // which way to scale. So, the P characters in picture are now + // a liability. + + char *rabbit = picture; + char *fox = picture; + + for(;;) + { + char ch = *rabbit++; + if( ch == '\0' ) + { + break; + } + if( ch == 'P' || ch == 'p' ) + { + if( *rabbit == '(' ) + { + while( *rabbit != ')' ) + { + rabbit += 1; + } + rabbit += 1; + // rabbit now points to one past the closing parenthesis + } + size_t to_move = strlen(rabbit); + memmove(fox, rabbit, to_move+1); // +1 snags the '\0' + rabbit = fox; + } + else + { + fox += 1; + } + } + } + +static tree vti_array; +static tree vti_constructor; +static int vti_list_size; +static int vti_next_variable; + +void +parser_init_list_size(int count_of_variables) + { + if( mode_syntax_only() ) return; + + vti_list_size = count_of_variables; + char ach[48]; + sprintf(ach, + "..variables_to_init_%ld", + current_function->our_symbol_table_index); + tree array_of_variables_type = build_array_type_nelts(VOID_P, + count_of_variables+1); + vti_array = gg_define_variable( array_of_variables_type, + ach, + vs_file_static); + vti_constructor = make_node(CONSTRUCTOR); + TREE_TYPE(vti_constructor) = array_of_variables_type; + TREE_STATIC(vti_constructor) = 1; + TREE_CONSTANT(vti_constructor) = 1; + vti_next_variable = 0; + } + +void +parser_init_list_element(cbl_field_t *field) + { + if( mode_syntax_only() ) return; + + gcc_assert(vti_next_variable < vti_list_size); + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(vti_constructor), + build_int_cst_type(SIZE_T, vti_next_variable++), + gg_get_address_of(field->var_decl_node) ); + if( vti_next_variable == vti_list_size) + { + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(vti_constructor), + build_int_cst_type(SIZE_T, vti_next_variable++), + null_pointer_node ); + DECL_INITIAL(vti_array) = vti_constructor; + } + } + +void +parser_init_list() + { + if( mode_syntax_only() ) return; + + char ach[48]; + sprintf(ach, + "..variables_to_init_%ld", + current_function->our_symbol_table_index); + tree array = gg_trans_unit_var_decl(ach); + gg_call(VOID, + "__gg__variables_to_init", + gg_get_address_of(array), + wsclear() ? gg_string_literal(wsclear()) : null_pointer_node, + NULL_TREE); + } + +static void +psa_FldLiteralN(struct cbl_field_t *field ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_FIELD(" ", field) + SHOW_PARSE_END + } + // We are constructing a completely static constant structure, based on the + // text string in .initial + + __int128 value = 0; + + do + { + // This is a false do{}while, to isolate the variables: + + // We need to convert data.initial to an __int128 value + char *p = const_cast<char *>(field->data.initial); + int sign = 1; + if( *p == '-' ) + { + field->attr |= signable_e; + sign = -1; + p += 1; + } + else if( *p == '+' ) + { + // We set it signable so that the instruction DISPLAY +1 + // actually outputs "+1" + field->attr |= signable_e; + p += 1; + } + + // We need to be able to handle + // 123 + // 123.456 + // 123E<exp> + // 123.456E<exp> + // where <exp> can be N, +N and -N + // + // Oh, yeah, and we're talking handling up to 32 digits, or more, so using + // library routines is off the table. + + int digits = 0; + int rdigits = 0; + int rdigit_delta = 0; + int exponent = 0; + + char *exp = strchr(p, 'E'); + if( !exp ) + { + exp = strchr(p, 'e'); + } + if(exp) + { + exponent = atoi(exp+1); + } + + // We can now calculate the value, and the number of digits and rdigits. + + // We count up leading zeroes as part of the attr->digits calculation. + // It turns out that certain comparisons need to know the number of digits, + // because "IF "2" EQUAL 002" is false, while "IF "2" EQUAL 2" is true. So, + // we need to count up leading zeroes. + + for(;;) + { + char ch = *p++; + if( ch == symbol_decimal_point() ) + { + rdigit_delta = 1; + continue; + } + if( ch < '0' || ch > '9' ) + { + break; + } + digits += 1; + rdigits += rdigit_delta; + value *= 10; + value += ch - '0'; + } + + if( exponent < 0 ) + { + rdigits += -exponent; + } + else + { + while(exponent--) + { + if(rdigits) + { + rdigits -= 1; + } + else + { + digits += 1; + value *= 10; + } + } + } + + if(digits < rdigits) + { + digits = rdigits; + } + field->data.digits = digits; + field->data.rdigits = rdigits; + + // We now need to calculate the capacity. + + unsigned char *pvalue = (unsigned char *)&value; + int capacity; + if( *(uint64_t*)(pvalue + 8) ) + { + // Bytes 15 through 8 are non-zero + capacity = 16; + } + else if( *(uint32_t*)(pvalue + 4) ) + { + // Bytes 7 through 4 are non-zero + capacity = 8; + } + else if( *(uint16_t*)(pvalue + 2) ) + { + // Bytes 3 and 2 + capacity = 4; + } + else if( pvalue[1] ) + { + // Byte 1 is non-zero + capacity = 2; + } + else + { + // The value is zero through 0xFF + capacity = 1; + } + + value *= sign; + + // One last adjustment. The number is signable, so the binary value + // is going to be treated as twos complement. That means that the highest + // bit has to be 1 for negative signable numbers, and 0 for positive. If + // necessary, adjust capacity up by one byte so that the variable fits: + + if( capacity < 16 && (field->attr & signable_e) ) + { + if( value < 0 && (((pvalue[capacity-1] & 0x80) == 0 ))) + { + capacity *= 2; + } + else if( value >= 0 && (((pvalue[capacity-1] & 0x80) == 0x80 ))) + { + capacity *= 2; + } + } + field->data.capacity = capacity; + + }while(0); + + char base_name[257]; + char id_string[32] = ""; + + static size_t our_index = 0; + + sprintf(id_string, ".%ld", ++our_index); + strcpy(base_name, field->name); + strcat(base_name, id_string); + + tree var_type; + + if( field->data.capacity == 16 ) + { + /* GCC-13 has no provision for an int128 constructor. So, we use a + union for our necessary __int128. + + typedef union cblc_int128_t + { + unsigned char array16[16]; + __uint128 uval128; + __int128 sval128; + } cblc_int128_t; + + We build a constructor for the array16[], and then we use that + constructor in the constructor for the union. + */ + + // Build the constructor for array16 + tree array16_type = build_array_type_nelts(UCHAR, 16); + tree array_16_constructor = make_node(CONSTRUCTOR); + TREE_TYPE(array_16_constructor) = array16_type; + TREE_STATIC(array_16_constructor) = 1; + TREE_CONSTANT(array_16_constructor) = 1; + + for(int i=0; i<16; i++) + { + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(array_16_constructor), + build_int_cst_type(INT, i), + build_int_cst_type(UCHAR, + ((unsigned char *)&value)[i])); + } + + // The array16 constructor is ready to be used + + // So, we need a constructor for the union: + // Now we create the union: + var_type = cblc_int128_type_node; + + tree union_constructor = make_node(CONSTRUCTOR); + TREE_TYPE(union_constructor) = var_type; + TREE_STATIC(union_constructor) = 1; + TREE_CONSTANT(union_constructor) = 1; + + // point next_field to the first field of the union, and + // set the value to be the table constructor + tree next_field = TYPE_FIELDS(var_type); + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(union_constructor), + next_field, + array_16_constructor ); + + tree new_var_decl = gg_define_variable( var_type, + base_name, + vs_static); + DECL_INITIAL(new_var_decl) = union_constructor; + + field->data_decl_node = member(new_var_decl, "sval128"); + TREE_READONLY(field->data_decl_node) = 1; + TREE_CONSTANT(field->data_decl_node) = 1; + + // Convert the compile-time data.value to a run-time variable decl node: + sprintf(id_string, ".%ld", ++our_index); + strcpy(base_name, field->name); + strcat(base_name, id_string); + field->literal_decl_node = gg_define_variable(DOUBLE, id_string, vs_static); + TREE_READONLY(field->literal_decl_node) = 1; + TREE_CONSTANT(field->literal_decl_node) = 1; + char ach[128]; + strfromf128(ach, sizeof(ach), "%.36E", field->data.value); + REAL_VALUE_TYPE real; + real_from_string(&real, ach); + tree initer = build_real (DOUBLE, real); + DECL_INITIAL(field->literal_decl_node) = initer; + + } + else + { + // The value is 1, 2, 4, or 8 bytes, so an ordinary constructor can be used. + var_type = tree_type_from_size( field->data.capacity, + field->attr & signable_e); + tree new_var_decl = gg_define_variable( var_type, + base_name, + vs_static); + DECL_INITIAL(new_var_decl) = build_int_cst_type(var_type, value); + field->data_decl_node = new_var_decl; + } + } + +static void +psa_FldBlob(struct cbl_field_t *var ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_FIELD(" ", var) + SHOW_PARSE_END + } + + // We are constructing a completely static constant structure. We know the + // capacity. We'll create it from the data.initial. The var_decl_node will + // be a pointer to the data + + char base_name[257]; + char id_string[32] = ""; + + static size_t our_index = 0; + + sprintf(id_string, ".%ld", ++our_index); + strcpy(base_name, var->name); + strcat(base_name, id_string); + + // Build the constructor for the array of bytes + + tree array_type = build_array_type_nelts(UCHAR, var->data.capacity); + tree array_constructor = make_node(CONSTRUCTOR); + TREE_TYPE(array_constructor) = array_type; + TREE_STATIC(array_constructor) = 1; + TREE_CONSTANT(array_constructor) = 1; + + for(size_t i=0; i<var->data.capacity; i++) + { + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(array_constructor), + build_int_cst_type(INT, i), + build_int_cst_type(UCHAR, var->data.initial[i])); + } + + // The array constructor is ready to be used + tree var_decl_node = gg_define_variable( array_type, + base_name, + vs_static); + DECL_INITIAL(var_decl_node) = array_constructor; + var->var_decl_node = gg_get_address_of(var_decl_node); + } + +void +parser_accept( struct cbl_refer_t refer, + enum special_name_t special_e ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_REF(" ", refer); + SHOW_PARSE_END + } + TRACE1 + { + TRACE1_HEADER + TRACE1_END + } + + /* + enum special_name_t + { + SYSIN_e, + SYSIPT_e, + SYSOUT_e, + SYSLIST_e, + SYSLST_e, + SYSPUNCH_e, + SYSPCH_e, + CONSOLE_e, + C01_e, C02_e, C03_e, C04_e, C05_e, C06_e, + C07_e, C08_e, C09_e, C10_e, C11_e, C12_e, + CSP_e, + S01_e, S02_e, S03_e, S04_e, S05_e, + AFP_5A_e, + }; + */ + + // The ISO spec describes the valid special names for ACCEPT as implementation + // dependent. We are following IBM's lead. + + tree environment = build_int_cst_type(INT, special_e); + + switch( special_e ) + { + case CONSOLE_e: + case SYSIPT_e: + case SYSIN_e: + break; + default: + dbgmsg("%s(): We don't know what to do with special_name_t %d,", __func__, special_e); + dbgmsg("%s(): so we are ignoring it.", __func__); + yywarn("unrecognized SPECIAL NAME ignored"); + return; + break; + } + + gg_call(VOID, + "__gg__accept", + environment, + gg_get_address_of(refer.field->var_decl_node), + refer_offset_dest(refer), + refer_size_dest(refer), + NULL_TREE); + } + +// TODO: update documentation. +void +parser_accept_exception( cbl_label_t *accept_label ) + { + // We can't use Analyze() on this one, because the exit ends up being laid + // down before the enter when the goto logic gets untangled by the compiler. + + // We are entering either SIZE ERROR or NOT SIZE ERROR code + RETURN_IF_PARSE_ONLY; + set_up_on_exception_label(accept_label); + + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" Laying down GOTO OVER") + SHOW_PARSE_LABEL(" ", accept_label) + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("Laying down LABEL INTO:") + SHOW_PARSE_LABEL(" ", accept_label) + SHOW_PARSE_END + } + + // Jump over the [NOT] ON EXCEPTION code that is about to be laid down + gg_append_statement( accept_label->structs.arith_error->over.go_to ); + // Create the label that allows the following code to be executed at + // when an ERROR, or NOT ERROR, has been determined to have taken place: + gg_append_statement( accept_label->structs.arith_error->into.label ); + } + +void +parser_accept_exception_end( cbl_label_t *accept_label ) + { + // We can't use Analyze() on this one, because the exit ends up being laid + // down before the enter when the goto logic gets untangled by the compiler. + + // We have reached the end of the ERROR, or NOT ERROR, code. + + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" Laying down GOTO BOTTOM") + SHOW_PARSE_LABEL(" ", accept_label) + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("Laying down LABEL OVER:") + SHOW_PARSE_LABEL(" ", accept_label) + SHOW_PARSE_END + } + + // Jump to the end of the arithmetic code: + gg_append_statement( accept_label->structs.arith_error->bottom.go_to ); + // Lay down the label that allows the ERROR/NOT ERROR instructions + // to exist in a lacuna that doesn't get executed unless somebody jumps + // to it: + gg_append_statement( accept_label->structs.arith_error->over.label ); + } + +void +parser_accept_command_line( cbl_refer_t tgt, + cbl_refer_t source, + cbl_label_t *error, + cbl_label_t *not_error ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + if( error ) + { + SHOW_PARSE_LABEL(" error ", error) + } + if( not_error ) + { + SHOW_PARSE_LABEL(" not_error ", not_error) + } + SHOW_PARSE_END + } + + static tree erf = gg_define_variable(INT, "..pac_erf", vs_file_static); + + if( !source.field ) + { + // The whole command-line is wanted + gg_assign(erf, + gg_call_expr( INT, + "__gg__get_command_line", + gg_get_address_of(tgt.field->var_decl_node), + refer_offset_dest(tgt), + refer_size_dest(tgt), + NULL_TREE)); + if( error ) + { + // There is an ON EXCEPTION phrase: + IF( erf, ne_op, integer_zero_node ) + { + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("Laying down GOTO error->INTO for_command_line") + SHOW_PARSE_LABEL(" ", error) + } + gg_append_statement( error->structs.arith_error->into.go_to ); + } + ELSE + { + } + ENDIF + } + if( not_error ) + { + // There is an NOT ON EXCEPTION phrase: + IF( erf, eq_op, integer_zero_node ) + { + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("Laying down GOTO not_error->INTO for command_line") + SHOW_PARSE_LABEL(" ", not_error) + } + gg_append_statement( not_error->structs.arith_error->into.go_to ); + } + ELSE + { + } + ENDIF + } + } + else + { + // A particular parameter has been requested: + gg_assign(erf, + gg_call_expr( INT, + "__gg__get_argv", + gg_get_address_of(tgt.field->var_decl_node), + refer_offset_dest(tgt), + refer_size_dest(tgt), + gg_get_address_of(source.field->var_decl_node), + refer_offset_dest(source), + refer_size_dest(source), + NULL_TREE)); + if( error ) + { + // There is an ON EXCEPTION phrase: + IF( erf, ne_op, integer_zero_node ) + { + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("Laying down GOTO error->INTO for_argv") + SHOW_PARSE_LABEL(" ", error) + } + gg_append_statement( error->structs.arith_error->into.go_to ); + } + ELSE + { + } + ENDIF + } + if( not_error ) + { + // There is an NOT ON EXCEPTION phrase: + IF( erf, eq_op, integer_zero_node ) + { + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("Laying down GOTO not_error->INTO for_argv") + SHOW_PARSE_LABEL(" ", not_error) + } + gg_append_statement( not_error->structs.arith_error->into.go_to ); + } + ELSE + { + } + ENDIF + } + } + if( error ) + { + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("Laying down LABEL error->bottom") + SHOW_PARSE_LABEL(" ", error) + } + gg_append_statement( error->structs.arith_error->bottom.label ); + } + if( not_error ) + { + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("Laying down LABEL not_error->bottom") + SHOW_PARSE_LABEL(" ", not_error) + SHOW_PARSE_END + } + gg_append_statement( not_error->structs.arith_error->bottom.label ); + } + } + +void +parser_accept_command_line_count( cbl_refer_t tgt ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + TRACE1 + { + TRACE1_HEADER + TRACE1_END + } + gg_call( VOID, + "__gg__get_argc", + gg_get_address_of(tgt.field->var_decl_node), + refer_offset_dest(tgt), + refer_size_dest(tgt), + NULL_TREE); + } + +void +parser_accept_envar(struct cbl_refer_t tgt, + struct cbl_refer_t envar, + cbl_label_t *error, + cbl_label_t *not_error ) + { + Analyze(); + + SHOW_PARSE + { + SHOW_PARSE_HEADER + if( error ) + { + SHOW_PARSE_LABEL(" error ", error) + } + if( not_error ) + { + SHOW_PARSE_LABEL(" not_error ", not_error) + } + SHOW_PARSE_END + } + TRACE1 + { + TRACE1_HEADER + TRACE1_END + } + + static tree erf = gg_define_variable(INT, "..pae_erf", vs_file_static); + + gg_assign(erf, + gg_call_expr( INT, + "__gg__accept_envar", + gg_get_address_of(tgt.field->var_decl_node), + refer_offset_dest(tgt), + refer_size_dest(tgt), + gg_get_address_of(envar.field->var_decl_node), + refer_offset_source(envar), + refer_size_source(envar), + NULL_TREE)); + if( error ) + { + // There is an ON EXCEPTION phrase: + IF( erf, ne_op, integer_zero_node ) + { + gg_append_statement( error->structs.arith_error->into.go_to ); + } + ELSE + { + } + ENDIF + } + if( not_error ) + { + // There is an NOT ON EXCEPTION phrase: + IF( erf, eq_op, integer_zero_node ) + { + gg_append_statement( not_error->structs.arith_error->into.go_to ); + } + ELSE + { + } + ENDIF + } + if( error ) + { + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("Laying down LABEL error->bottom") + SHOW_PARSE_LABEL(" ", error) + } + gg_append_statement( error->structs.arith_error->bottom.label ); + } + if( not_error ) + { + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("Laying down LABEL not_error->bottom") + SHOW_PARSE_LABEL(" ", not_error) + SHOW_PARSE_END + } + gg_append_statement( not_error->structs.arith_error->bottom.label ); + } + } + +void +parser_set_envar( struct cbl_refer_t name, struct cbl_refer_t value ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + TRACE1 + { + TRACE1_HEADER + TRACE1_END + } + + // Set name to value using setenv(3) + gg_call(BOOL, + "__gg__set_envar", + gg_get_address_of(name.field->var_decl_node), + refer_offset_source(name), + refer_size_source(name), + gg_get_address_of(value.field->var_decl_node), + refer_offset_source(value), + refer_size_source(value), + NULL_TREE); + } + +void +parser_accept_date_yymmdd( struct cbl_field_t *target ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + + CHECK_FIELD(target); + + tree pointer = gg_define_char_star(); + gg_assign(pointer, gg_call_expr(CHAR_P, + "__gg__get_date_yymmdd", + NULL_TREE)); + gg_default_qualification(target); + move_tree_to_field( target, + pointer); + + gg_free(pointer); + + TRACE1 + { + TRACE1_HEADER + TRACE1_FIELD("", target, "") + TRACE1_END + } + } + +void +parser_accept_date_yyyymmdd( struct cbl_field_t *target ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + + CHECK_FIELD(target); + + tree pointer = gg_define_char_star(); + gg_assign(pointer, gg_call_expr(CHAR_P, + "__gg__get_date_yyyymmdd", + NULL_TREE)); + gg_default_qualification(target); + move_tree_to_field( target, + pointer); + + gg_free(pointer); + + TRACE1 + { + TRACE1_HEADER + TRACE1_FIELD("", target, "") + TRACE1_END + } + } + +void +parser_accept_date_yyddd( struct cbl_field_t *target ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + + CHECK_FIELD(target); + + tree pointer = gg_define_char_star(); + gg_assign(pointer, gg_call_expr(CHAR_P, + "__gg__get_date_yyddd", + NULL_TREE)); + gg_default_qualification(target); + move_tree_to_field( target, + pointer); + + gg_free(pointer); + + TRACE1 + { + TRACE1_HEADER + TRACE1_FIELD("", target,""); + TRACE1_END + } + } + +void +parser_accept_date_yyyyddd( struct cbl_field_t *target ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + + CHECK_FIELD(target); + + tree pointer = gg_define_char_star(); + gg_assign(pointer, gg_call_expr(CHAR_P, + "__gg__get_yyyyddd", + NULL_TREE)); + gg_default_qualification(target); + move_tree_to_field( target, + pointer); + + gg_free(pointer); + + TRACE1 + { + TRACE1_HEADER + TRACE1_FIELD("", target, "") + TRACE1_END + } + } + +void +parser_accept_date_dow( struct cbl_field_t *target ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + + CHECK_FIELD(target); + + tree pointer = gg_define_char_star(); + gg_assign(pointer, gg_call_expr(CHAR_P, + "__gg__get_date_dow", + NULL_TREE)); + gg_default_qualification(target); + move_tree_to_field( target, + pointer); + + gg_free(pointer); + + TRACE1 + { + TRACE1_HEADER + TRACE1_FIELD("", target, "") + TRACE1_END + } + } + +void +parser_accept_date_hhmmssff( struct cbl_field_t *target ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + + CHECK_FIELD(target); + + tree pointer = gg_define_char_star(); + gg_assign(pointer, gg_call_expr(CHAR_P, + "__gg__get_date_hhmmssff", + NULL_TREE)); + gg_default_qualification(target); + move_tree_to_field( target, + pointer); + + gg_free(pointer); + + TRACE1 + { + TRACE1_HEADER + TRACE1_FIELD("", target, "") + TRACE1_END + } + } + +/* + * If the encoding is anything but custom, the enumerated type + * cbl_encoding_t suffices to describe it. At least for now, the rest + * of cbl_alphabet_t in those cases is unused. + * + * To get the symbol index: symbol_index(symbol_elem_of(&alphabet)) + * + * The parameter is always a reference to an element in the symbol table. + */ +void +parser_alphabet( cbl_alphabet_t& alphabet ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + fprintf(stderr, "%s\n", alphabet.name); + switch(alphabet.encoding) + { + case ASCII_e: + fprintf(stderr, "ASCII\n"); + break; + case iso646_e: + fprintf(stderr, "ISO646\n"); + break; + case EBCDIC_e: + fprintf(stderr, "EBCDIC\n"); + break; + case custom_encoding_e: + fprintf(stderr, "%s\n", alphabet.name); + break; + } + SHOW_PARSE_END + } + + size_t alphabet_index = symbol_index(symbol_elem_of(&alphabet)); + + switch(alphabet.encoding) + { + case ASCII_e: + case iso646_e: + case EBCDIC_e: + break; + + case custom_encoding_e: + { + unsigned char ach[256]; + + tree table_type = build_array_type_nelts(UCHAR, 256); + tree table256 = gg_define_variable(table_type); + for( int i=0; i<256; i++ ) + { + // character i has the ordinal alphabet[i] + unsigned char ch = ascii_to_internal(i); + + ach[ch] = (alphabet.alphabet[i]); + gg_assign( gg_array_value(table256, ch), + build_int_cst_type(UCHAR, (alphabet.alphabet[i])) ); + } + __gg__alphabet_create(alphabet.encoding, + alphabet_index, + ach, + alphabet.low_index, + alphabet.high_index); + gg_call(VOID, + "__gg__alphabet_create", + build_int_cst_type(INT, alphabet.encoding), + build_int_cst_type(SIZE_T, alphabet_index), + gg_get_address_of(table256), + build_int_cst_type(INT, alphabet.low_index), + build_int_cst_type(INT, alphabet.high_index), + NULL_TREE ); + break; + } + } + } + +void +parser_alphabet_use( cbl_alphabet_t& alphabet ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + switch(alphabet.encoding) + { + case ASCII_e: + fprintf(stderr, "ASCII\n"); + break; + case iso646_e: + fprintf(stderr, "ISO646\n"); + break; + case EBCDIC_e: + fprintf(stderr, "EBCDIC\n"); + break; + case custom_encoding_e: + fprintf(stderr, "%s\n", alphabet.name); + break; + } + SHOW_PARSE_END + } + + size_t alphabet_index = symbol_index(symbol_elem_of(&alphabet)); + + switch(alphabet.encoding) + { + case ASCII_e: + case iso646_e: + case EBCDIC_e: + __gg__low_value_character = DEGENERATE_LOW_VALUE; + __gg__high_value_character = DEGENERATE_HIGH_VALUE; + gg_call(VOID, + "__gg__alphabet_use", + build_int_cst_type(INT, alphabet.encoding), + null_pointer_node, + NULL_TREE); + break; + + case custom_encoding_e: + std::unordered_map<size_t, alphabet_state>::const_iterator it = + __gg__alphabet_states.find(alphabet_index); + + assert( it != __gg__alphabet_states.end()); + __gg__low_value_character = it->second.low_char; + __gg__high_value_character = it->second.high_char; + + gg_call(VOID, + "__gg__alphabet_use", + build_int_cst_type(INT, alphabet.encoding), + build_int_cst_type(SIZE_T, alphabet_index), + NULL_TREE); + break; + } + } + +void +parser_display_literal(const char *literal, bool advance) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" \""); + SHOW_PARSE_TEXT(literal) + SHOW_PARSE_TEXT("\""); + SHOW_PARSE_END + } + + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT("About to DISPLAY a literal:") + TRACE1_END + } + + tree file_descriptor = integer_one_node; // Just stdout, for now + gg_write( file_descriptor, + gg_string_literal(literal), + build_int_cst_type(integer_type_node,(int)strlen(literal)) ); + + if( advance ) + { + gg_write( file_descriptor, + gg_string_literal("\n"), + integer_one_node); + } + cursor_at_sol = advance; + } + +void +parser_display_internal(tree file_descriptor, + cbl_refer_t refer, + bool advance) + { + Analyze(); + if( refer.field->type == FldConditional ) + { + TRACE1 + { + gg_create_true_false_statement_lists(refer.field->var_decl_node); + gg_fprintf(file_descriptor, 0, "TRUE"); + ELSE + gg_fprintf(file_descriptor, 0, "FALSE"); + ENDIF + } + } + else if( refer.field->type == FldLiteralA ) + { + gg_call(VOID, + "__gg__display_string", + file_descriptor, + build_string_literal(refer.field->data.capacity, + refer.field->data.initial), + build_int_cst_type(SIZE_T, refer.field->data.capacity), + advance ? integer_one_node : integer_zero_node, + NULL_TREE ); + cursor_at_sol = advance; + } + else if( refer.field->type == FldLiteralN ) + { + // The parser found the string of digits from the source code and converted + // it to a _Float128. + + // The bad news is that something like 555.55 can't be expressed exactly; + // internally it is 555.5499999999.... + + // The good news is that we know any string of 33 or fewer digits is + // converted to _Float128 and then converted back again, you get the same + // string. + + // We make use of that here + + char ach[128]; + strfromf128(ach, sizeof(ach), "%.33E", refer.field->data.value); + char *p = strchr(ach, 'E'); + if( !p ) + { + // Probably INF -INF NAN or -NAN, so ach has our result + } + else + { + p += 1; + int exp = atoi(p); + if( exp >= 6 || exp <= -5 ) + { + // We are going to stick with the E notation, so ach has our result + } + else + { + int precision = 32 - exp; + char achFormat[24]; + sprintf(achFormat, "%%.%df", precision); + strfromf128(ach, sizeof(ach), achFormat, refer.field->data.value); + } + __gg__remove_trailing_zeroes(ach); + } + + if( symbol_decimal_point() == ',' ) + { + char *p = strchr(ach, '.' ); + if( p ) + { + *p = symbol_decimal_point(); + } + } + + gg_write( file_descriptor, + gg_string_literal(ach), + build_int_cst_type(SIZE_T, strlen(ach))); + if( advance ) + { + gg_write( file_descriptor, + gg_string_literal("\n"), + integer_one_node); + } + } + else + { + if( refer_is_clean(refer) ) + { + gg_call(VOID, + "__gg__display_clean", + gg_get_address_of(refer.field->var_decl_node), + file_descriptor, + advance ? integer_one_node : integer_zero_node, + NULL_TREE ); + } + else + { + // We might be dealing with a refmod: + if( refer.refmod.from || refer.refmod.len ) + { + gg_attribute_bit_set(refer.field, refmod_e); + } + gg_call(VOID, + "__gg__display", + gg_get_address_of(refer.field->var_decl_node), + refer_offset_source(refer), + refer_size_source( refer), + file_descriptor, + advance ? integer_one_node : integer_zero_node, + NULL_TREE ); + if( refer.refmod.from || refer.refmod.len ) + { + gg_attribute_bit_clear(refer.field, refmod_e); + } + } + } + cursor_at_sol = advance; + } + +void +parser_display_field(cbl_field_t *field) + { + parser_display_internal_field(integer_one_node, + field, + DISPLAY_NO_ADVANCE); + } + +void +parser_display( const struct cbl_special_name_t *upon, + struct cbl_refer_t refs[], + size_t n, + bool advance ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" parser_display of multiple variables:") + for(size_t i=0; i<n; i++) + { + SHOW_PARSE_INDENT + SHOW_PARSE_REF("", refs[i]); + } + if( advance ) + { + SHOW_PARSE_TEXT(" (advance)") + } + SHOW_PARSE_END + } + + TRACE1 + { + TRACE1_HEADER + for(size_t ii=0; ii<n; ii++) + { + if( ii != 0 ) + { + TRACE1_INDENT + } + if(n > 1) + { + gg_fprintf(trace_handle, 1, "%ld: ", build_int_cst_type(INT, ii)); + } + TRACE1_REFER("", refs[ii], "") + } + TRACE1_END + } + tree file_descriptor = gg_define_int(); + bool needs_closing = false; + if( upon ) + { + switch(upon->id) + { + case STDOUT_e: + case SYSOUT_e: + case SYSLIST_e: + case SYSLST_e: + case CONSOLE_e: + gg_assign(file_descriptor, integer_one_node); + break; + + case STDERR_e: + case SYSPUNCH_e: + case SYSPCH_e: + gg_assign(file_descriptor, integer_two_node); + break; + + default: + if( upon->os_filename[0] ) + { + tree topen = gg_open( gg_string_literal(upon->os_filename), + build_int_cst_type(INT, O_APPEND|O_WRONLY)); + gg_assign(file_descriptor, topen); + needs_closing = true; + } + else + { + fprintf(stderr, "We don't know what to do in parser_display\n"); + gcc_unreachable(); + } + } + } + else + { + gg_assign(file_descriptor,integer_one_node); // stdout is file descriptor 1. + } + + for(size_t i=0; i<n-1; i++) + { + CHECK_FIELD(refs[i].field); + parser_display_internal(file_descriptor, refs[i], DISPLAY_NO_ADVANCE); + } + CHECK_FIELD(refs[n-1].field); + parser_display_internal(file_descriptor, refs[n-1], advance ? DISPLAY_ADVANCE : DISPLAY_NO_ADVANCE); + + if( needs_closing ) + { + tree tclose = gg_close(file_descriptor); + // We are ignoring the close() return value + gg_append_statement(tclose); + } + + cursor_at_sol = advance; + } + +static tree +get_literalN_value(cbl_field_t *var) + { + // Get the literal N value from the integer var_decl + tree retval = NULL_TREE; + tree var_type = tree_type_from_size(var->data.capacity, + var->attr & signable_e); + retval = gg_cast(var_type, var->data_decl_node); + return retval; + } + +void +parser_assign( size_t nC, cbl_num_result_t *C, + struct cbl_refer_t sourceref, + cbl_label_t *on_error, + cbl_label_t *not_error, + cbl_label_t *compute_error) + { + Analyze(); + RETURN_IF_PARSE_ONLY; + // There might, or might not, already be error and/or not_error labels: + set_up_on_exception_label(on_error); + set_up_on_exception_label(not_error); + set_up_compute_error_label(compute_error); + + SHOW_PARSE + { + SHOW_PARSE_HEADER + } + + TRACE1 + { + TRACE1_HEADER + char ach[32]; + sprintf(ach, "%ld target%s", nC, nC==1 ? "" : "s"); + TRACE1_TEXT(ach); + if( on_error ) + { + TRACE1_TEXT("; with on_error"); + } + if( not_error ) + { + TRACE1_TEXT("; with not_error"); + } + } + + tree error_flag = gg_define_int(0); + + for(size_t i=0; i<nC; i++ ) + { + TRACE1 + { + char ach[48]; + sprintf(ach, "Processing target number %ld", i); + TRACE1_INDENT + TRACE1_TEXT(ach); + } + cbl_refer_t& destref( C[i].refer ); + cbl_round_t rounded = C[i].rounded; + SHOW_PARSE + { + if(i) + { + SHOW_PARSE_INDENT + } + if( sourceref.field && is_figconst_low(sourceref.field) ) + { + SHOW_PARSE_TEXT(" LOW-VALUE") + } + else if( sourceref.field && is_figconst_zero(sourceref.field) ) + { + SHOW_PARSE_TEXT(" ZERO-VALUE") + } + else if( sourceref.field && is_figconst_space(sourceref.field) ) + { + SHOW_PARSE_TEXT(" SPACE-VALUE") + } + else if( sourceref.field && is_figconst_quote(sourceref.field) ) + { + SHOW_PARSE_TEXT(" QUOTE-VALUE") + } + else if( sourceref.field && is_figconst_high(sourceref.field) ) + { + SHOW_PARSE_TEXT(" HIGH-VALUE") + } + else + { + SHOW_PARSE_REF(" ", sourceref) + } + SHOW_PARSE_REF(" TO ", destref) + switch(rounded) + { + case away_from_zero_e: + SHOW_PARSE_TEXT(" AWAY_FROM_ZERO") + break; + case nearest_toward_zero_e: + SHOW_PARSE_TEXT(" NEAREST_TOWARD_ZERO") + break; + case toward_greater_e: + SHOW_PARSE_TEXT(" TOWARD_GREATER") + break; + case toward_lesser_e: + SHOW_PARSE_TEXT(" TOWARD_LESSER") + break; + case nearest_away_from_zero_e: + SHOW_PARSE_TEXT(" NEAREST_AWAY_FROM_ZERO") + break; + case nearest_even_e: + SHOW_PARSE_TEXT(" NEAREST_EVEN") + break; + case prohibited_e: + SHOW_PARSE_TEXT(" PROHIBITED") + break; + case truncation_e: + SHOW_PARSE_TEXT(" TRUNCATED") + break; + default: + gcc_unreachable(); + break; + } + } + + CHECK_FIELD(destref.field); + CHECK_FIELD(sourceref.field); + + // gg_printf("parser_assign: The compute_error_code is %d\n", + // gg_cast(INT, compute_error->structs.compute_error->compute_error_code), NULL_TREE); + + static tree erf = gg_define_variable(INT, "..pa_erf", vs_file_static); + if( on_error ) + { + // There is an ON ERROR clause. When there is an ON ERROR clause, and + // there is an error, the TARGET values are to be left unchanged. + IF(compute_error->structs.compute_error->compute_error_code, + ne_op, + integer_zero_node ) + { + // There was an error, so we do NOT replace the destref with the + // sourceref value + TRACE1 + { + TRACE1_INDENT + TRACE1_TEXT("on_error clause; computional error occurred") + } + } + ELSE + { + TRACE1 + { + TRACE1_INDENT + TRACE1_TEXT("on_error clause; no computational error") + } + // There was no computational error. Call the move routine that does + // not replace the target when there is a size error: + TREEPLET tsource; + treeplet_fill_source(tsource, sourceref); + static bool check_for_error = true; + move_helper(erf, + destref, + sourceref, + tsource, + rounded, + check_for_error, + true); + + gg_assign(error_flag, gg_bitwise_or(error_flag, erf)); + IF(error_flag, ne_op, integer_zero_node) + { + TRACE1 + { + TRACE1_INDENT + TRACE1_TEXT("on_error clause; a move error occurred") + } + // There was an error during the move. Set the exception status + // information: + gg_call( VOID, + "__gg__process_compute_error", + build_int_cst_type(INT, compute_error_truncate), + NULL_TREE); + // But because there is an ON ERROR clause, suppress DECLARATIVE + // processing + gg_assign(var_decl_exception_code, integer_zero_node); + } + ELSE + { + TRACE1 + { + TRACE1_INDENT + TRACE1_TEXT("on_error clause; no move") + } + } + ENDIF + } + ENDIF + } + else + { + // There is no ON_ERROR clause, so we do the truncation type move, but + // with one exception. If the error was an exponentiation error that + // resulted in a NaN, we *don't* do the move: + + IF( gg_bitwise_and( compute_error->structs.compute_error->compute_error_code, + build_int_cst_type(INT, + compute_error_exp_minus_by_frac + | compute_error_divide_by_zero)), + ne_op, + integer_zero_node ) + { + // It was a NaN, so don't do the move + TRACE1 + { + TRACE1_INDENT + TRACE1_TEXT("Not moving the NaN") + } + } + ELSE + { + TRACE1 + { + TRACE1_INDENT + TRACE1_TEXT("Doing the move") + } + TREEPLET tsource; + treeplet_fill_source(tsource, sourceref); + static bool check_for_error = true; + move_helper(erf, + destref, + sourceref, + tsource, + rounded, + check_for_error, + false); + gg_assign(error_flag, gg_bitwise_or(error_flag, erf)); + IF(error_flag, ne_op, integer_zero_node) + { + // There was an error during the move. Set the exception status + // information: + TRACE1 + { + TRACE1_INDENT + TRACE1_TEXT("Error during the move; calling __gg__process_compute_error") + } + gg_call( VOID, + "__gg__process_compute_error", + build_int_cst_type(INT, compute_error_truncate), + NULL_TREE); + } + ELSE + { + } + ENDIF + } + ENDIF + } + TRACE1 + { + TRACE1_INDENT + TRACE1_FIELD("source ", sourceref.field, "") + TRACE1_INDENT + TRACE1_FIELD("dest ", destref.field, "") + TRACE1_END + } + } + + if( on_error ) + { + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT(" Laying down on_error GOTO into") + SHOW_PARSE_LABEL(" ", on_error) + } + IF( gg_bitwise_or(error_flag, + compute_error->structs.compute_error->compute_error_code), + ne_op, + integer_zero_node ) + { + gg_append_statement( on_error->structs.arith_error->into.go_to ); + } + ELSE + ENDIF + } + else + { + // We weren't given an explicit ON SIZE ERROR label, so we need to go + // with the NO ERROR CLAUSE behavior + if( compute_error ) + { + gg_call( VOID, + "__gg__process_compute_error", + compute_error->structs.compute_error->compute_error_code, + NULL_TREE); + } + } + + if( not_error ) + { + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT(" Laying down not_error GOTO into") + SHOW_PARSE_LABEL(" ", not_error) + } + IF( compute_error->structs.compute_error->compute_error_code, eq_op, integer_zero_node ) + { + gg_append_statement( not_error->structs.arith_error->into.go_to ); + } + ELSE + ENDIF + } + + if( on_error ) + { + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT(" Laying down on_error LABEL BOTTOM:") + SHOW_PARSE_LABEL(" ", on_error) + } + gg_append_statement( on_error->structs.arith_error->bottom.label ); + } + + if( not_error ) + { + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT(" Laying down not_error LABEL BOTTOM:") + SHOW_PARSE_LABEL(" ", not_error) + } + gg_append_statement( not_error->structs.arith_error->bottom.label ); + } + + SHOW_PARSE + { + SHOW_PARSE_END + } + } + +static cbl_figconst_t +is_figconst(cbl_field_t *field) + { + cbl_figconst_t figconst = (cbl_figconst_t)(field->attr & FIGCONST_MASK); + return figconst; + } + +static cbl_figconst_t +is_figconst(cbl_refer_t &sourceref) + { + return is_figconst(sourceref.field); + } + +void +parser_move(cbl_refer_t destref, + cbl_refer_t sourceref, + cbl_round_t rounded, + bool skip_fill_from // Defaults to false + ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + if( sourceref.field && is_figconst_low(sourceref.field) ) + { + SHOW_PARSE_TEXT(" LOW-VALUE") + } + else if( sourceref.field && is_figconst_zero(sourceref.field) ) + { + SHOW_PARSE_TEXT(" ZERO-VALUE") + } + else if( sourceref.field && is_figconst_space(sourceref.field) ) + { + SHOW_PARSE_TEXT(" SPACE-VALUE") + } + else if( sourceref.field && is_figconst_quote(sourceref.field) ) + { + SHOW_PARSE_TEXT(" QUOTE-VALUE") + } + else if( sourceref.field && is_figconst_high(sourceref.field) ) + { + SHOW_PARSE_TEXT(" HIGH-VALUE") + } + else + { + SHOW_PARSE_REF(" ", sourceref) + } + SHOW_PARSE_REF(" TO ", destref) + switch(rounded) + { + case away_from_zero_e: + SHOW_PARSE_TEXT(" AWAY_FROM_ZERO") + break; + case nearest_toward_zero_e: + SHOW_PARSE_TEXT(" NEAREST_TOWARD_ZERO") + break; + case toward_greater_e: + SHOW_PARSE_TEXT(" TOWARD_GREATER") + break; + case toward_lesser_e: + SHOW_PARSE_TEXT(" TOWARD_LESSER") + break; + case nearest_away_from_zero_e: + SHOW_PARSE_TEXT(" NEAREST_AWAY_FROM_ZERO") + break; + case nearest_even_e: + SHOW_PARSE_TEXT(" NEAREST_EVEN") + break; + case prohibited_e: + SHOW_PARSE_TEXT(" PROHIBITED") + break; + case truncation_e: + SHOW_PARSE_TEXT(" TRUNCATED") + break; + default: + gcc_unreachable(); + break; + } + SHOW_PARSE_END + } + + if( !skip_fill_from ) + { + cbl_figconst_t figconst = is_figconst(sourceref); + if( figconst ) + { + sourceref.all = true; + } + } + + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT("About to call move_helper") + } + TREEPLET tsource; + treeplet_fill_source(tsource, sourceref); + static bool dont_check_for_error = false; + move_helper(NULL, destref, sourceref, tsource, rounded, dont_check_for_error ); + + TRACE1 + { + TRACE1_INDENT + TRACE1_REFER_INFO("source ", sourceref) + TRACE1_INDENT + TRACE1_REFER_INFO("dest ", destref) + TRACE1_END + } + } + +static +void +parser_move_multi(cbl_refer_t destref, + cbl_refer_t sourceref, + TREEPLET tsource, + cbl_round_t rounded, + bool skip_fill_from ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + if( sourceref.field && is_figconst_low(sourceref.field) ) + { + SHOW_PARSE_TEXT(" LOW-VALUE") + } + else if( sourceref.field && is_figconst_zero(sourceref.field) ) + { + SHOW_PARSE_TEXT(" ZERO-VALUE") + } + else if( sourceref.field && is_figconst_space(sourceref.field) ) + { + SHOW_PARSE_TEXT(" SPACE-VALUE") + } + else if( sourceref.field && is_figconst_quote(sourceref.field) ) + { + SHOW_PARSE_TEXT(" QUOTE-VALUE") + } + else if( sourceref.field && is_figconst_high(sourceref.field) ) + { + SHOW_PARSE_TEXT(" HIGH-VALUE") + } + else + { + SHOW_PARSE_REF(" ", sourceref) + } + SHOW_PARSE_REF(" TO ", destref) + switch(rounded) + { + case away_from_zero_e: + SHOW_PARSE_TEXT(" AWAY_FROM_ZERO") + break; + case nearest_toward_zero_e: + SHOW_PARSE_TEXT(" NEAREST_TOWARD_ZERO") + break; + case toward_greater_e: + SHOW_PARSE_TEXT(" TOWARD_GREATER") + break; + case toward_lesser_e: + SHOW_PARSE_TEXT(" TOWARD_LESSER") + break; + case nearest_away_from_zero_e: + SHOW_PARSE_TEXT(" NEAREST_AWAY_FROM_ZERO") + break; + case nearest_even_e: + SHOW_PARSE_TEXT(" NEAREST_EVEN") + break; + case prohibited_e: + SHOW_PARSE_TEXT(" PROHIBITED") + break; + case truncation_e: + SHOW_PARSE_TEXT(" TRUNCATED") + break; + default: + gcc_unreachable(); + break; + } + SHOW_PARSE_END + } + + if( !skip_fill_from ) + { + cbl_figconst_t figconst = is_figconst(sourceref); + if( figconst ) + { + sourceref.all = true; + } + } + + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT("About to call move_helper") + } + + static bool dont_check_for_error = false; + move_helper(NULL, destref, sourceref, tsource, rounded, dont_check_for_error ); + + TRACE1 + { + TRACE1_INDENT + TRACE1_REFER_INFO("source ", sourceref) + TRACE1_INDENT + TRACE1_REFER_INFO("dest ", destref) + TRACE1_END + } + } + +void +parser_move(size_t ntgt, cbl_refer_t *tgts, cbl_refer_t src, cbl_round_t rounded) + { + if( mode_syntax_only() ) return; + + cbl_figconst_t figconst = is_figconst(src); + if( figconst ) + { + src.all = true; + } + TREEPLET tsource; + treeplet_fill_source(tsource, src); + static const bool skip_fill_from = true; + for( cbl_refer_t *p=tgts; p < tgts + ntgt; p++ ) + { + parser_move_multi(*p, src, tsource, rounded, skip_fill_from); + } + } + +/* + * "nelem" represents the number of elements in the table. + * "src" is the already-initialized first element of the table + * to be initialized. If nspan == 0, copy the whole record because + * the record either has no filler, or WITH FILLER was specified. + * Otherwise, the spans array comprises a set of {offset,end+1} pairs + * representing sequences of consecutive non-FILLER fields. + * + * "table" is the symbol table index for the table being initialized. + * It may appear in a subsequent call as part of the (sub)tbls array, + * if it is nested in a higher-level table. + */ +void +parser_initialize_table(size_t nelem, + cbl_refer_t src, + size_t nspan, + const cbl_bytespan_t spans[], + size_t table, // symbol table index + size_t ntbl, + const cbl_subtable_t tbls[]) + { + if( mode_syntax_only() ) return; + + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + TRACE1 + { + TRACE1_HEADER + TRACE1_REFER("src: ", src, " ") + TRACE1_END + } + typedef size_t span_t[2]; + static_assert(sizeof(spans[0]) == sizeof(span_t), "pair size wrong"); + static tree tspans = gg_define_variable(SIZE_T_P, "..pit_v1", vs_file_static); + static tree ttbls = gg_define_variable(SIZE_T_P, "..pit_v2", vs_file_static); + gg_assign(tspans, build_array_of_size_t(2*nspan, (const size_t *)spans)); + gg_assign(ttbls, build_array_of_size_t(2*ntbl, (const size_t *)tbls)); + + gg_call(VOID, + "__gg__mirror_range", + build_int_cst_type(SIZE_T, nelem), + gg_get_address_of(src.field->var_decl_node), + refer_offset_source(src), + build_int_cst_type(SIZE_T, nspan), + tspans, + build_int_cst_type(SIZE_T, table), + build_int_cst_type(SIZE_T, ntbl), + ttbls, + NULL_TREE); + + gg_free(tspans); + gg_free(ttbls); + } + +static +tree +tree_type_from_field_type(cbl_field_t *field, size_t &nbytes) + { + /* This routine is used to determine what action is taken with type of a + CALL ... USING <var> and the matching PROCEDURE DIVISION USING <var> of + a PROGRAM-ID or FUNCTION-ID + */ + tree retval = COBOL_FUNCTION_RETURN_TYPE; + nbytes = 8; + if( field ) + { + // This maps a Fldxxx to a C-style variable type: + switch(field->type) + { + case FldGroup: + case FldAlphanumeric: + case FldAlphaEdited: + case FldNumericEdited: + retval = CHAR_P; + nbytes = field->data.capacity; + break; + + case FldNumericDisplay: + case FldNumericBinary: + case FldPacked: + if( field->data.digits > 18 ) + { + retval = UINT128; + nbytes = 16; + } + else + { + retval = SIZE_T; + nbytes = 8; + } + break; + + case FldNumericBin5: + case FldIndex: + case FldPointer: + if( field->data.capacity > 8 ) + { + retval = UINT128; + nbytes = 16; + } + else + { + retval = SIZE_T; + nbytes = 8; + } + break; + + case FldFloat: + if( field->data.capacity == 8 ) + { + retval = DOUBLE; + nbytes = 8; + } + else if( field->data.capacity == 4 ) + { + retval = FLOAT; + nbytes = 4; + } + else + { + retval = FLOAT128; + nbytes = 16; + } + break; + + case FldLiteralN: + // Assume a 64-bit signed integer. This happens for GOBACK STATUS 101, + // the like + retval = LONG; + nbytes = 8; + break; + + default: + cbl_internal_error( "%s(): Invalid field type %s:", + __func__, + cbl_field_type_str(field->type)); + break; + } + } + if( retval == SIZE_T && field->attr & signable_e ) + { + retval = SSIZE_T; + } + if( retval == UINT128 && field->attr & signable_e ) + { + retval = INT128; + } + return retval; + } + +static void +restore_local_variables() + { + gg_call(VOID, + "__gg__pop_local_variables", + NULL_TREE); + gg_decrement(var_decl_unique_prog_id); + } + +static inline bool +is_valuable( cbl_field_type_t type ) { + switch ( type ) { + case FldInvalid: + case FldGroup: + case FldAlphanumeric: + case FldNumericEdited: + case FldAlphaEdited: + case FldLiteralA: + case FldClass: + case FldConditional: + case FldForward: + case FldSwitch: + case FldDisplay: + case FldBlob: + return false; + // These are variable types that have to be converted from their + // COBOL form to a little-endian binary representation so that they + // can be conveyed BY CONTENT/BY VALUE in a CALL or user-defined + // function activation. + case FldNumericDisplay: + case FldNumericBinary: + case FldFloat: + case FldPacked: + case FldNumericBin5: + case FldLiteralN: + case FldIndex: + case FldPointer: + return true; + } + cbl_internal_error( "%s:%d: invalid symbol_type_t %d", __func__, __LINE__, type ); + return false; +} + +void parser_sleep(cbl_refer_t seconds) + { + if( seconds.field ) + { + gg_get_address_of(seconds.field->var_decl_node); + //refer_offset_source(seconds); + //refer_size_source(seconds); + + gg_call(VOID, + "__gg__sleep", + gg_get_address_of(seconds.field->var_decl_node), + refer_offset_source(seconds), + refer_size_source(seconds), + NULL_TREE); + } + else + { + // This is a naked place-holding CONTINUE. Generate some do-nothing + // code that will stick some .LOC information into the assembly language, + // so that GDB-COBOL can display the CONTINUE statement. + gg_assign(var_decl_nop, build_int_cst_type(INT, 103)); + } + } + +void +parser_exit_program(void) // exits back to COBOL only, else continue + { + static cbl_label_t this_program = {}; + static cbl_refer_t magic_refer(&this_program, false); + parser_exit( magic_refer ); + } + +/* + * If RETURNING was specified, the field is provided as an argument, no lookup + * necessary. refer.field == NULL means exit(0) unless ec != ec_none_e. + * If ec == ec_all_e, that indicates RAISING LAST EXCEPTION was used. + */ + +static +void +pe_stuff(cbl_refer_t refer, ec_type_t ec) + { + // This is the moral equivalent of a C "return xyz;". + + // There cannot be both a non-zero exit status and an exception condition. + gcc_assert( !(ec != ec_none_e && refer.field != NULL) ); + + gg_call(VOID, + "__gg__pseudo_return_flush", + NULL_TREE); + + cbl_field_t *returner = refer.field ? refer.field : current_function->returning; + + if( returner ) + { + cbl_field_type_t field_type = returner->type; + size_t nbytes = 0; + tree return_type = tree_type_from_field_type(returner, + nbytes); + tree retval = gg_define_variable(return_type); + + gg_assign(retval, gg_cast(return_type, integer_zero_node)); + + gg_modify_function_type(current_function->function_decl, + return_type); + + if( is_valuable( field_type ) ) + { + // The field being returned is numeric. + if( field_type == FldNumericBin5 + || field_type == FldFloat + || field_type == FldPointer + || field_type == FldIndex ) + { + // These are easily handled because they are all little-endian. + gg_memcpy(gg_get_address_of(retval), + member(returner, "data"), + build_int_cst_type( SIZE_T, + std::min(nbytes, (size_t)returner->data.capacity))); + } + else + { + // The field_type has a PICTURE string, so we need to convert from the + // COBOL form to little-endian binary: + tree value = gg_define_int128(); + get_binary_value( value, + NULL, + returner, + size_t_zero_node); + gg_memcpy(gg_get_address_of(retval), + gg_get_address_of(value), + build_int_cst_type(SIZE_T, nbytes)); + } + restore_local_variables(); + gg_return(retval); + } + else + { + // The RETURNING type is a group or alphanumeric + + // The byte array to be returned is in returning, which is a local + // variable on the stack. We need to make a copy of it to avoid the + // error of returning a pointer to data on the stack. + + tree array_type = build_array_type_nelts(UCHAR, + returner->data.capacity); + tree retval = gg_define_variable(array_type, vs_static); + gg_memcpy(gg_get_address_of(retval), + member(returner->var_decl_node, "data"), + member(returner->var_decl_node, "capacity")); + + tree actual = gg_cast(COBOL_FUNCTION_RETURN_TYPE, gg_get_address_of(retval)); + + restore_local_variables(); + gg_return(actual); + } + } + else + { + // There is no explicit value. This means, by default (according to) + // IBM), we return the value found in RETURN-CODE: + tree value = gg_define_variable(COBOL_FUNCTION_RETURN_TYPE); + gg_assign(value, + gg_cast(COBOL_FUNCTION_RETURN_TYPE, + var_decl_return_code)); + restore_local_variables(); + gg_return(gg_cast(COBOL_FUNCTION_RETURN_TYPE, value)); + } + } + +void +parser_exit( cbl_refer_t refer, ec_type_t ec ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + if( gg_trans_unit.function_stack.size() + && current_function->returning + && !refer.field) + { + // ->returning works only if there is no refer.field + SHOW_PARSE_FIELD(" RETURNING ", current_function->returning); + } + if( gg_trans_unit.function_stack.size() && refer.field ) + { + SHOW_PARSE_FIELD(" WITH STATUS ", refer.field); + } + if( gg_trans_unit.function_stack.size() && refer.prog_func ) + { + SHOW_PARSE_TEXT(" refer.prog_func is non-zero") + } + + SHOW_PARSE_END + } + TRACE1 + { + TRACE1_HEADER + TRACE1_END + } + + if( refer.prog_func ) + { + // We are processing EXIT PROGRAM. If main() called us, we need to do + // nothing. Otherwise, this is a return + IF( current_function->called_by_main_counter, eq_op, integer_zero_node ) + { + // This function wasn't called by main, so we treat it like a GOBACK + pe_stuff(refer, ec); + } + ELSE + { + // This function was called by main. Is it the first call, or is it + // recursive? + IF( current_function->called_by_main_counter, gt_op, integer_one_node ) + { + // This was a recursive call into the function originally called by + // main. Because we are under the control of a calling program, we + // treat this like a GOBACK + pe_stuff(refer, ec); + } + ELSE + { + // We are not under the control of a calling program, meaning that we + // were called by main(). So, we do nothing, meaning we behave like + // a CONTINUE. + } + ENDIF + } + ENDIF + } + else + { + IF( current_function->called_by_main_counter, gt_op, integer_zero_node ) + { + // This wasn't an EXIT PROGRAM. But in the case where we are the program + // that was called by main(), we need to do some bookkeeping so that we + // respond properly to an EXIT PROGRAM should one appear + gg_decrement(current_function->called_by_main_counter); + } + ELSE + { + } + ENDIF + pe_stuff(refer, ec); + } + } + +static void +walk_initialization(cbl_field_t *field, bool initialized, bool deallocate) + { + if( !(field->attr & based_e) ) + { + // We are concerned only with BASED variables + return; + } + symbol_elem_t *e = symbol_at(field_index(field)); + bool first_time = true; + while( e < symbols_end() ) + { + symbol_elem_t& element = *e++; + if( element.type == SymField ) + { + cbl_field_t *this_one = cbl_field_of(&element); + if( !first_time ) + { + if( this_one->level == LEVEL01 || this_one->level == LEVEL77 ) + { + // Having encountered the next 01 or 77, we are done + break; + } + } + first_time = false; + if( this_one->level == 00 ) + { + // Ignore LEVEL00 "INDEXED BY" variables + continue; + } + if(deallocate) + { + gg_assign(member(this_one->var_decl_node, "data"), + gg_cast(UCHAR_P, null_pointer_node)); + } + else + { + gg_assign(member(this_one->var_decl_node, "data"), + gg_add(member(field->var_decl_node, "data"), + build_int_cst_type(SIZE_T, this_one->offset))); + if( this_one->level == 66 + || this_one->level == 88 + || symbol_redefines(this_one) ) + { + continue; + } + if( !initialized ) + { + // This is ALLOCATE Rule 9) in ISO 2023 + if( this_one->type == FldPointer ) + { + gg_memset(member(this_one->var_decl_node, "data"), + integer_zero_node, + build_int_cst_type(SIZE_T, this_one->data.capacity)); + } + } + } + } + } + } + +void +parser_allocate(cbl_refer_t size_or_based, + cbl_refer_t returning, + bool initialized ) + { + /* + * If the 1st parameter has based_e attribute, the field it is based on defines + * the number of bytes to allocate. In that case, "returning" is optional and + * may have a NULL field. Otherwise the 1st parameter is a numeric value and + * allocated space is assigned to "returning", which is of type FldPointer. + */ + + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_REF(" size_or_based from:", size_or_based) + SHOW_PARSE_INDENT + SHOW_PARSE_REF("returning: ", returning) + SHOW_PARSE_END + } + TRACE1 + { + TRACE1_HEADER + TRACE1_REFER("size_or_based: ", size_or_based, ""); + TRACE1_INDENT + TRACE1_REFER("returning: ", size_or_based, ""); + TRACE1_END + } + + if( returning.field ) + { + // If there is a returning, it has to be a pointer + gcc_assert(returning.field->type == FldPointer); + } + + if( !(size_or_based.field->attr & based_e) ) + { + // If the first is not based, then there must be a returning + gcc_assert(returning.field); + } + + cbl_field_t *f_working = current_options().initial_working(); + cbl_field_t *f_local = current_options().initial_local(); + + int default_byte = wsclear() ? *wsclear() : -1; + + gg_call(VOID, + "__gg__allocate", + gg_get_address_of(size_or_based.field->var_decl_node), + refer_offset_source(size_or_based) , + initialized ? integer_one_node : integer_zero_node, + build_int_cst_type(INT, default_byte), + f_working ? gg_get_address_of(f_working->var_decl_node) : null_pointer_node, + f_local ? gg_get_address_of(f_local-> var_decl_node) : null_pointer_node, + returning.field ? gg_get_address_of(returning.field->var_decl_node) + : null_pointer_node, + returning.field ? refer_offset_source(returning) + : size_t_zero_node, + NULL_TREE); + walk_initialization(size_or_based.field, initialized, false); + } + +void +parser_free( size_t n, cbl_refer_t refers[] ) + { + if( mode_syntax_only() ) return; // Normally handled by SHOW_PARSE, if present + + Analyze(); + for( auto p = refers; p < refers + n; p++ ) + { + gcc_assert( ! p->all ); + gcc_assert( ! p->is_refmod_reference() ); + if( !(p->field->type == FldPointer || p->addr_of || (p->field->attr & based_e)) ) + { + dbgmsg("Deallocating %s means it has to be FldPointer or addr_of or based_e"); + } + gcc_assert( p->field->type == FldPointer || p->addr_of || (p->field->attr & based_e) ); + + gg_call(VOID, + "__gg__deallocate", + gg_get_address_of(p->field->var_decl_node), + refer_offset_source(*p), + p->addr_of ? integer_one_node : integer_zero_node, + NULL_TREE); + walk_initialization(p->field, false, true); + } + } + +void +parser_arith_error(cbl_label_t *arithmetic_label) + { + // We can't use Analyze() on this one, because the exit ends up being laid + // down before the enter when the goto logic gets untangled by the compiler. + + // We are entering either SIZE ERROR or NOT SIZE ERROR code + RETURN_IF_PARSE_ONLY; + set_up_on_exception_label(arithmetic_label); + + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" Laying down GOTO OVER") + SHOW_PARSE_LABEL(" ", arithmetic_label) + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT(" Laying down LABEL INTO:") + SHOW_PARSE_LABEL(" ", arithmetic_label) + SHOW_PARSE_END + } + + // Jump over the [NOT] ON EXCEPTION code that is about to be laid down + gg_append_statement( arithmetic_label->structs.arith_error->over.go_to ); + // Create the label that allows the following code to be executed at + // when an ERROR, or NOT ERROR, has been determined to have taken place: + gg_append_statement( arithmetic_label->structs.arith_error->into.label ); + } + +void +parser_arith_error_end(cbl_label_t *arithmetic_label) + { + // We can't use Analyze() on this one, because the exit ends up being laid + // down before the enter when the goto logic gets untangled by the compiler. + + // We have reached the end of the ERROR, or NOT ERROR, code. + + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" Laying down GOTO BOTTOM") + SHOW_PARSE_LABEL(" ", arithmetic_label) + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT(" Laying down LABEL OVER:") + SHOW_PARSE_LABEL(" ", arithmetic_label) + SHOW_PARSE_END + } + + // Jump to the end of the arithmetic code: + gg_append_statement( arithmetic_label->structs.arith_error->bottom.go_to ); + // Lay down the label that allows the ERROR/NOT ERROR instructions + // to exist in a lacuna that doesn't get executed unless somebody jumps + // to it: + gg_append_statement( arithmetic_label->structs.arith_error->over.label ); + } + +static void +propogate_linkage_offsets(cbl_field_t *field, tree base) + { + if( field->level == LEVEL01 || field->level == LEVEL77 ) + { + field->data_decl_node = base; + symbol_elem_t *e = symbol_at(field_index(field)); + // We already updated the data pointer of the first element: + e += 1; + while( e < symbols_end() ) + { + symbol_elem_t& element = *e++; + if( element.type == SymField ) + { + cbl_field_t *this_one = cbl_field_of(&element); + if( this_one->level == LEVEL01 || this_one->level == LEVEL77 ) + { + // We have encountered another level 01/77. If this LEVEL 01 had a + // parent, then we have to assume that this is a redefines of another + // level 01/77. + if( this_one->parent ) + { + // And, gloriously and frighteningly, it can be handled by + // recursion: + propogate_linkage_offsets(this_one, base); + } + else + { + // Having encountered the next 01 or 77, we are done + break; + } + } + if( this_one->level == 00 ) + { + // Ignore LEVEL00 "INDEXED BY" variables + continue; + } + tree offset = gg_define_variable(SIZE_T); + IF( base, eq_op, gg_cast(UCHAR_P, null_pointer_node) ) + { + gg_assign(offset, size_t_zero_node); + } + ELSE + { + gg_assign(offset, member(this_one, "offset")); + } + ENDIF + this_one->data_decl_node = base; + member( this_one, + "data", + gg_add(base, offset)); + } + } + } + } + +static bool initialized_data = false; +static void +initialize_the_data() + { + if( initialized_data ) + { + return; + } + initialized_data = true; + // Here is where we initialize the run-time list of currency symbols: + const char *default_currency = "$"; + + // This is one-time initialization of the libgcobol program state stack + gg_call(VOID, + "__gg__init_program_state", + NULL_TREE); + + __gg__currency_signs = __gg__ct_currency_signs; + // We initialize currency both at compile time and run time + __gg__currency_sign_init(); + gg_call(VOID, + "__gg__currency_sign_init", + NULL_TREE); + + gg_call(VOID, + "__gg__set_program_name", + gg_string_literal( current_filename.back().c_str() ), + NULL_TREE); + + for(int symbol=0; symbol<256; symbol++) + { + const char *sign = symbol_currency(symbol); + if( sign ) + { + default_currency = NULL; + + // Both compile-time and run-time + __gg__currency_sign(symbol, sign); + gg_call(VOID, + "__gg__currency_sign", + build_int_cst_type(INT, symbol), + build_string_literal(strlen(sign)+1, sign), + NULL_TREE); + } + } + if( default_currency ) + { + __gg__currency_sign(default_currency[0], default_currency); + gg_call(VOID, + "__gg__currency_sign", + char_nodes[(int)default_currency[0]], + gg_string_literal(default_currency), + NULL_TREE); + } + + // It's time to tell the library about DECIMAL-POINT IS COMMA: + if( symbol_decimal_point() == ',' ) + { + __gg__decimal_point = ascii_comma ; + __gg__decimal_separator = ascii_period ; + gg_call(VOID, + "__gg__decimal_point_is_comma", + NULL_TREE); + } + } + +void +parser_division(cbl_division_t division, + cbl_field_t *returning, + size_t nusing, + cbl_ffi_arg_t args[] ) + { + // This is called when the parser enters a COBOL program DIVISION. See + // parser_divide for the arithmetic operation. + + if( mode_syntax_only() ) return; + + // Do this before the SHOW_PARSE; it makes a little more sense when reviewing + // the SHOW_PARSE output. + if( division == identification_div_e ) + { + initialized_data = false; + if( gg_trans_unit.function_stack.size() >= 1 ) + { + // This is a nested program. So, we need to tie off the current + // section: + leave_paragraph_internal(); + leave_section_internal(); + } + } + + if( division == environment_div_e ) + { + initialized_data = false; + } + + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" ") + switch(division) + { + case identification_div_e: + SHOW_PARSE_TEXT("IDENTIFICATION") + break; + case environment_div_e: + SHOW_PARSE_TEXT("ENVIRONMENT") + break; + case data_div_e: + SHOW_PARSE_TEXT("DATA") + break; + case procedure_div_e: + SHOW_PARSE_TEXT("PROCEDURE") + break; + } + + SHOW_PARSE_END + } + + gg_set_current_line_number(CURRENT_LINE_NUMBER); + + if( division == data_div_e ) + { + Analyze(); + initialize_the_data(); + } + if( division == environment_div_e ) + { + Analyze(); + initialize_the_data(); + } + else if( division == procedure_div_e ) + { + Analyze(); + initialize_the_data(); + + // Do some symbol table index bookkeeping. current_program_index() is valid + // at this point in time: + current_function->our_symbol_table_index = current_program_index(); + + // We have some housekeeping to do to keep track of the list of functions + // accessible by us: + + // For every procedure, we need a variable that points to the list of + // available program names. + + // We need a pointer to the array of program names + char ach[2*sizeof(cbl_name_t)]; + sprintf(ach, + "..accessible_program_list_%ld", + current_function->our_symbol_table_index); + tree prog_list = gg_define_variable(build_pointer_type(CHAR_P), + ach, vs_file_static); + + // Likewise, we need a pointer to the array of pointers to functions: + tree function_type = + build_varargs_function_type_array( SIZE_T, + 0, // No parameters yet + NULL); // And, hence, no types + tree pointer_type = build_pointer_type(function_type); + tree constructed_array_type = build_array_type_nelts(pointer_type, 1); + sprintf(ach, + "..accessible_program_pointers_%ld", + current_function->our_symbol_table_index); + tree prog_pointers = gg_define_variable( + build_pointer_type(constructed_array_type), + ach, + vs_file_static); + gg_call(VOID, + "__gg__set_program_list", + build_int_cst_type(INT, current_function->our_symbol_table_index), + gg_get_address_of(prog_list), + gg_get_address_of(prog_pointers), + NULL_TREE); + + if( gg_trans_unit.function_stack.size() == 1 ) + { + gg_create_goto_pair(&label_list_out_goto, + &label_list_out_label); + gg_create_goto_pair(&label_list_back_goto, + &label_list_back_label); + gg_append_statement(label_list_out_goto); + gg_append_statement(label_list_back_label); + } + + tree globals_are_initialized = gg_declare_variable( INT, + "__gg__globals_are_initialized", + NULL, + vs_external_reference); + IF( globals_are_initialized, eq_op, integer_zero_node ) + { + // one-time initialization happens here + + // We need to establish the initial value of the UPSI-1 switch register + // We are using IBM's conventions: + // https://www.ibm.com/docs/en/zvse/6.2?topic=SSB27H_6.2.0/fa2sf_communicate_appl_progs_via_job_control.html + // UPSI 10000110 means that bits 0, 5, and 6 are on, which means that + // SW-0, SW-5, and SW-6 are on. + gg_call(VOID, + "__gg__set_initial_switch_value", + NULL_TREE); + + // And then flag one-time initialization as having been done. + gg_assign(globals_are_initialized, integer_one_node); + } + ELSE + ENDIF + + gg_append_statement(current_function->skip_init_label); + // This is where we check to see if somebody tried to cancel us + tree cancelled = gg_define_int(); + gg_assign(cancelled, + gg_call_expr( INT, + "__gg__is_canceled", + gg_cast(SIZE_T, + current_function->function_address), + NULL_TREE)); + IF( cancelled, ne_op, integer_zero_node ) + { + // Somebody flagged us for CANCEL, which means reinitialization, so we + // need to find the _INITIALIZE_PROGRAM section label. + + // gg_printf("Somebody wants to cancel %s\n", + // gg_string_literal(current_function->our_unmangled_name), + // NULL_TREE); + cbl_label_t *prog = cbl_label_of(symbol_at(current_program_index())); + size_t initializer_index = prog->initial_section; + cbl_label_t *initializer = cbl_label_of(symbol_at(initializer_index)); + parser_perform(initializer, true); // true means suppress nexting + } + ELSE + ENDIF + + // RETURNING variables are supposed to be in the linkage section, which + // means that we didn't assign any storage to them during + // parser_symbol_add(). We do that here. + + // returning also needs to behave like local storage, even though it is + // in linkage. + + // This counter is used to help keep track of local variables + gg_increment(var_decl_unique_prog_id); + if( returning ) + { + parser_local_add(returning); + current_function->returning = returning; + } + + // Stash the returning variables for use during parser_return() + current_function->returning = returning; + + if( gg_trans_unit.function_stack.size() == 1 ) + { + // We are entering a new top-level program, so we need to set + // RETURN-CODE to zero + gg_assign(var_decl_return_code, build_int_cst_type(SHORT, 0)); + } + + // The parameters passed to this program might be 64 bits or 128 bits in + // length. We establish those lengths based on the types of the target + // for each USING. + + for(size_t i=0; i<nusing; i++) + { + // This code is relevant at compile time. It takes each + // expected formal parameter and tacks it onto the end of the + // function's arguments chain. + + char ach[2*sizeof(cbl_name_t)]; + sprintf(ach, "_p_%s", args[i].refer.field->name); + + size_t nbytes = 0; + tree par_type = tree_type_from_field_type(args[i].refer.field, nbytes); + if( par_type == FLOAT ) + { + par_type = SSIZE_T; + } + if( par_type == DOUBLE ) + { + par_type = SSIZE_T; + } + if( par_type == FLOAT128 ) + { + par_type = INT128; + } + chain_parameter_to_function(current_function->function_decl, par_type, ach); + } + + bool check_for_parameter_count = false; + + if( nusing ) + { + // During the call, we saved the parameter_count and an array of variable + // lengths. We need to look at those values if, and only if, one or more + // of our USING arguments has an OPTIONAL flag or if one of our targets is + // marked as VARYING. + for(size_t i=0; i<nusing; i++) + { + if( args[i].optional ) + { + check_for_parameter_count = true; + break; + } + if( args[i].refer.field->attr & any_length_e ) + { + check_for_parameter_count = true; + break; + } + } + + if( check_for_parameter_count ) + { + IF( var_decl_call_parameter_signature, + eq_op, + gg_cast(CHAR_P, current_function->function_address) ) + { + // We know to use var_decl_call_parameter_count, so unflag this + // pointer to avoid problems in the ridiculous possibility of + // COBOL-A calls C_B calls COBOL_A + gg_assign(var_decl_call_parameter_signature, + gg_cast(CHAR_P, null_pointer_node)); + } + ELSE + { + // We were apparently called by a C routine, not a COBOL routine, so + // make sure we don't get shortchanged by a count left behind from an + // earlier COBOL call. + gg_assign(var_decl_call_parameter_count, + build_int_cst_type(INT, A_ZILLION)); + } + ENDIF + } + else + { + // None of our parameters require a count, so make sure we don't get + // bamboozled by a count left behind from an earlier COBOL call. + gg_assign(var_decl_call_parameter_count, + build_int_cst_type(INT, A_ZILLION)); + } + + // There are 'nusing' elements in the PROCEDURE DIVISION USING list. + + tree parameter; + tree rt_i = gg_define_int(); + for(size_t i=0; i<nusing; i++) + { + // And this compiler code generates run-time execution code. The + // generated code picks up, at run time, the variable we just + // established in the chain at compile time. + + // It makes more sense if you don't think about it too hard. + + // We need to be able to restore prior arguments when doing recursive + // calls: + IF( member(args[i].refer.field->var_decl_node, "data"), + ne_op, + gg_cast(UCHAR_P, null_pointer_node) ) + { + gg_call(VOID, + "__gg__push_local_variable", + gg_get_address_of(args[i].refer.field->var_decl_node), + NULL_TREE); + } + ELSE + ENDIF + + tree base = gg_define_variable(UCHAR_P); + gg_assign(rt_i, build_int_cst_type(INT, i)); + //gg_printf("The rt_i counter is %d\n", rt_i, NULL_TREE); + IF( rt_i, lt_op , var_decl_call_parameter_count ) + { + if( i == 0 ) + { + // This is the first parameter. + parameter = DECL_ARGUMENTS(current_function->function_decl); + } + else + { + // These are subsequent parameters + parameter = TREE_CHAIN(parameter); + } + gg_assign(base, gg_cast(UCHAR_P, parameter)); + + IF( gg_call_expr( CHAR_P, + "getenv", + gg_string_literal("PARAMETERS_ON_ENTRY"), + NULL_TREE), + ne_op, + gg_cast(CHAR_P, null_pointer_node)); + { + gg_printf("parameter_on_entry: %s(): %d %p\n", + gg_string_literal(current_function->our_unmangled_name), + build_int_cst_type(INT, i+1), + base, + NULL_TREE); + } + ELSE + ENDIF + + if( args[i].refer.field->attr & any_length_e ) + { + //gg_printf("side channel 0x%lx\n", gg_array_value(var_decl_call_parameter_lengths, rt_i), NULL_TREE); + + // Get the length from the global lengths[] side channel. Don't + // forget to use the length mask on the table value. + gg_assign(member(args[i].refer.field->var_decl_node, "capacity"), + gg_array_value(var_decl_call_parameter_lengths, rt_i)); + } + } + ELSE + { + gg_assign(base, gg_cast(UCHAR_P, null_pointer_node)); + } + ENDIF + + // Arriving here means that we are processing an instruction like + // this: + // PROCEDURE DIVISION USING using[0] using[1] ... using using[nusing-1] + + // When __gg__call_parameter_count is equal to A_ZILLION, then this is + // an OTHER-TO-COBOL call and the var_decl_call_parameter_lengths array + // is not valid + + cbl_ffi_crv_t crv = args[i].crv; + cbl_field_t *new_var = args[i].refer.field; + + if( crv == by_value_e ) + { + switch(new_var->type) + { + case FldGroup: + case FldAlphanumeric: + case FldAlphaEdited: + case FldNumericEdited: + crv = by_reference_e; + break; + default: + break; + } + } + + if( crv == by_value_e ) + { + // 'parameter' is the 64-bit or 128-bit value that was placed on the stack + + size_t nbytes; + tree_type_from_field_type(new_var, nbytes); + tree parm = gg_define_variable(INT128); + + if( nbytes <= 8 ) + { + // Our input is a 64-bit number + if( new_var->attr & signable_e ) + { + IF( gg_bitwise_and( gg_cast(SIZE_T, base), + build_int_cst_type(SIZE_T, 0x8000000000000000ULL)), + ne_op, + gg_cast(SIZE_T, integer_zero_node) ) + { + // Our input is a negative number + gg_assign(parm, gg_cast(INT128, integer_minus_one_node)); + } + ELSE + { + // Our input is a positive number + gg_assign(parm, gg_cast(INT128, integer_zero_node)); + } + ENDIF + } + else + { + // This is a 64-bit positive number: + gg_assign(parm, gg_cast(INT128, integer_zero_node)); + } + } + // At this point, parm has been set to 0 or -1 + + gg_memcpy(gg_get_address_of(parm), + gg_get_address_of(base), + build_int_cst_type(SIZE_T, nbytes)); + + tree array_type = build_array_type_nelts(UCHAR, new_var->data.capacity); + tree data_decl_node = gg_define_variable( array_type, + NULL, + vs_static); + gg_assign( member(new_var->var_decl_node, "data"), + gg_get_address_of(data_decl_node) ); + + // And then move it into place + gg_call(VOID, + "__gg__assign_value_from_stack", + gg_get_address_of(new_var->var_decl_node), + parm, + NULL_TREE); + // We now have to handle an oddball situation. It's possible we are + // dealing with + // + // linkage section. + // 01 var1 + // 01 var2 redefines var1 + // + // If so, we have to give var2::data_pointer the same value as + // var1::data_pointer + // + cbl_field_t *next_var; + size_t our_index = symbol_index(symbol_elem_of(new_var)); + size_t next_index = our_index + 1; + // Look ahead in the symbol table for the next LEVEL01/77 + for(;;) + { + symbol_elem_t *e = symbol_at(next_index); + if( e->type != SymField ) + { + break; + } + next_var = cbl_field_of(e); + if( !next_var ) + { + break; + } + if( next_var->level == LEVEL01 || next_var->level == LEVEL77 ) + { + if( next_var->parent == our_index ) + { + gg_assign(member(next_var->var_decl_node, "data"), + member(new_var->var_decl_node, "data")); + } + break; + } + next_index += 1; + } + } + else + { + // 'parameter' is a reference, so it it becomes the data member of + // the cblc_field_t COBOL variable. + gg_assign(member(args[i].field()->var_decl_node, "data"), base); + + // We need to apply base + offset to the LINKAGE variable + // and all of its children + propogate_linkage_offsets( args[i].field(), base ); + } + } + } + + gg_call(VOID, + "__gg__pseudo_return_bookmark", + NULL_TREE); + + // The MODULE-NAME function requires a stack of program names. We push the + // name on here. The first character is a 'T' or an 'N', where 'N' means + // this is a nested program. + + if( gg_trans_unit.function_stack.size() > 1 ) + { + // This is a nested program + strcpy(ach, "N"); + } + else + { + // This is a top-level program: + strcpy(ach, "T"); + } + strcat(ach, current_function->our_unmangled_name); + gg_call(VOID, + "__gg__module_name_push", + gg_string_literal(ach), + NULL_TREE); + + IF( var_decl_main_called, ne_op, integer_zero_node ) + { + // We were just called by main: + gg_assign(var_decl_main_called, integer_zero_node); + gg_assign(current_function->called_by_main_counter, integer_one_node); + } + ELSE + { + // This isn't a call from main(), but it might be a recursive call to the + // function that was called by main: + IF(current_function->called_by_main_counter, ne_op, integer_zero_node) + { + // In that case, we bump the counter to keep track of things. + gg_increment(current_function->called_by_main_counter); + } + ELSE + { + } + ENDIF + } + ENDIF + } + } + +void +parser_logop( struct cbl_field_t *tgt, + struct cbl_field_t *a, // Is NULL for single-valued ops + enum logop_t logop, + struct cbl_field_t *b ) + { + Analyze(); + SHOW_PARSE + { + if( logop == true_op) + { + SHOW_PARSE_HEADER + SHOW_PARSE_FIELD(" ", tgt) + SHOW_PARSE_TEXT(" will be set to TRUE ") + } + else if( logop == false_op) + { + SHOW_PARSE_HEADER + SHOW_PARSE_FIELD(" ", tgt) + SHOW_PARSE_TEXT(" will be set to FALSE ") + } + else + { + SHOW_PARSE_HEADER + SHOW_PARSE_FIELD(" ", tgt) + SHOW_PARSE_TEXT(" = ") + if( a ) + { + SHOW_PARSE_FIELD("", a) + } + SHOW_PARSE_TEXT(" ") + SHOW_PARSE_TEXT( cbl_logop_str(logop) ) + if( b ) + { + SHOW_PARSE_FIELD(" ", b) + } + } + SHOW_PARSE_END + } + + CHECK_FIELD(tgt); + switch(logop) + { + case and_op: + case or_op: + case xor_op: + case xnor_op: + case not_op: + CHECK_FIELD(b); + break; + default: + break; + } + + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT_ABC("operation: ", cbl_logop_str(logop), "") + TRACE1_END + if( logop != true_op ) + { + if( a ) + { + TRACE1_INDENT + TRACE1_FIELD("operand A: ", a, ""); + } + TRACE1_INDENT + if( b ) + { + TRACE1_FIELD("operand B: ", b, ""); + } + TRACE1_END + } + } + + switch(logop) + { + case and_op: + case or_op: + case xor_op: + case xnor_op: + CHECK_FIELD(a); + break; + default: + break; + } + + // This routine takes two conditionals and a logical operator. From those, + // it creates and returns another conditional: + + if( tgt->type != FldConditional ) + { + cbl_internal_error("parser_logop() was called with variable %s on line %d" + ", which is not a FldConditional\n", + tgt->name, cobol_location().first_line); + } + if( a && a->type != FldConditional ) + { + cbl_internal_error("parser_logop() was called with variable %s on line %d" + ", which is not a FldConditional\n", + a->name, cobol_location().first_line); + } + if( b && b->type != FldConditional ) + { + cbl_internal_error("parser_logop() was called with variable %s on line %d" + ", which is not a FldConditional\n", + b->name, cobol_location().first_line); + } + + switch( logop ) + { + case and_op: + gg_assign(tgt->var_decl_node, gg_build_logical_expression( + a->var_decl_node, + and_op, + b->var_decl_node)); + break; + + case or_op: + gg_assign(tgt->var_decl_node, gg_build_logical_expression( + a->var_decl_node, + or_op, + b->var_decl_node)); + break; + + case not_op: + gg_assign(tgt->var_decl_node, gg_build_logical_expression( + NULL, + not_op, + b->var_decl_node)); + break; + + case xor_op: + gg_assign(tgt->var_decl_node, gg_build_logical_expression( + a->var_decl_node, + xor_op, + b->var_decl_node)); + break; + + case xnor_op: + { + gg_assign( tgt->var_decl_node, + gg_build_logical_expression(a->var_decl_node, + xor_op, + b->var_decl_node)); + + // I need to negate the result. + + gg_assign(tgt->var_decl_node, gg_build_logical_expression( + NULL, + not_op, + tgt->var_decl_node)); + } + break; + + case true_op: + gg_assign(tgt->var_decl_node, boolean_true_node); + break; + + case false_op: + gg_assign(tgt->var_decl_node, boolean_false_node); + break; + } + + TRACE1 + { + TRACE1_INDENT + TRACE1_TEXT_ABC("result: ", tgt->name, "") + TRACE1_FIELD_VALUE("", tgt, "") + TRACE1_END + } + } + +void +parser_relop( cbl_field_t *tgt, + cbl_refer_t aref, + enum relop_t relop, + cbl_refer_t bref ) + { + Analyze(); + cbl_field_t *a = aref.field, *b = bref.field; + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_FIELD(" ", tgt) + SHOW_PARSE_REF(" = ", aref) + SHOW_PARSE_TEXT(" ") + SHOW_PARSE_TEXT(relop_str(relop)) + SHOW_PARSE_REF(" ", bref) + SHOW_PARSE_END + } + + CHECK_FIELD(tgt); + CHECK_FIELD(a); + CHECK_FIELD(b); + + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT_ABC("operation: ", relop_str(relop), "") + TRACE1_INDENT + TRACE1_REFER("operand A: ", aref, ""); + TRACE1_INDENT + TRACE1_REFER("operand B: ", bref, ""); + } + + // This routine builds the relational expression and returns the TREE as + // a conditional: + + if( tgt->type != FldConditional ) + { + cbl_internal_error("parser_relop() was called with variable %s, " + "which is not a FldConditional\n", + tgt->name); + } + + static tree comp_res = gg_define_variable(INT, "..pr_comp_res", vs_file_static); + cobol_compare(comp_res, aref, bref); + + // comp_res is negative, zero, position for less-than, equal-to, greater-than + + // So, we simply compare the result of the comparison to zero using the relop + // we were given to turn it into a TRUE/FALSE + gg_assign( tgt->var_decl_node, + gg_build_relational_expression( comp_res, + relop, + integer_zero_node)); + TRACE1 + { + TRACE1_INDENT + TRACE1_FIELD("result: ", tgt, "") + TRACE1_END + } + } + +void +parser_relop_long(cbl_field_t *tgt, + long avalue, + enum relop_t relop, + cbl_refer_t bref ) + { + Analyze(); + // We are comparing a long to a field, so the field had best be numerical + + cbl_field_t *b = bref.field; + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_FIELD(" ", tgt) + SHOW_PARSE_TEXT(" = <long value> ") + SHOW_PARSE_TEXT(relop_str(relop)) + SHOW_PARSE_REF(" ", bref) + SHOW_PARSE_END + } + + CHECK_FIELD(tgt); + CHECK_FIELD(b); + + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT_ABC("operation: ", relop_str(relop), "") + TRACE1_INDENT + char ach[32]; + sprintf(ach, "operand A: %ld (long value) ", avalue); + TRACE1_TEXT(ach); + TRACE1_INDENT + TRACE1_REFER("operand B: ", bref, ""); + } + + // This routine builds the relational expression and returns the TREE as + // a conditional: + + if( tgt->type != FldConditional ) + { + cbl_internal_error("parser_relop() was called with variable %s, " + "which is not a FldConditional\n", + tgt->name); + } + + tree tree_a = build_int_cst_type(LONG, avalue); + static tree tree_b = gg_define_variable(LONG, "..prl_tree_b", vs_file_static); + get_binary_value( tree_b, + NULL, + bref.field, + refer_offset_source(bref) ); + + static tree comp_res = gg_define_variable(LONG, "..prl_comp_res", vs_file_static); + gg_assign(comp_res, gg_subtract(tree_a, tree_b)); + + // comp_res is negative, zero, position for less-than, equal-to, greater-than + + // So, we simply compare the result of the comparison to zero using the relop + // we were given to turn it into a TRUE/FALSE + gg_assign( tgt->var_decl_node, + gg_build_relational_expression( comp_res, + relop, + gg_cast(LONG, integer_zero_node))); + TRACE1 + { + TRACE1_INDENT + TRACE1_FIELD("result: ", tgt, "") + TRACE1_END + } + } + +void +parser_if( struct cbl_field_t *conditional ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_FIELD(" ", conditional) + SHOW_PARSE_END + } + + CHECK_FIELD(conditional); + + if( conditional->type != FldConditional ) + { + cbl_internal_error("parser_if() was called with variable %s, " + "which is not a FldConditional\n", + conditional->name); + } + + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT("testing: ") + TRACE1_TEXT(conditional->name) + TRACE1_FIELD_VALUE("", conditional, "") + TRACE1_END + } + + gg_create_true_false_statement_lists(conditional->var_decl_node); + } + +// The following routines border on abuse of the preprocessor, if not the +// programmer who is trying to understand this. Look at the #defines in +// gengen.h, and check out the comments for gg_if in gengen.c + +void +parser_else(void) + { + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + ELSE + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT("taking FALSE branch") + TRACE1_END + } + } + +void +parser_fi(void) + { + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + ENDIF + TRACE1 + { + TRACE1_HEADER + TRACE1_END + } + } + +void +parser_see_stop_run(struct cbl_refer_t exit_status, + const char *message) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + if( exit_status.field ) + { + SHOW_PARSE_FIELD(" ERROR STATUS ", exit_status.field); + } + SHOW_PARSE_END + } + if( message ) + { + parser_display_literal(message, DISPLAY_ADVANCE); + } + TRACE1 + { + TRACE1_HEADER + } + + // It's a stop run. Return return-code to the operating system: + static tree returned_value = gg_define_variable(INT, "..pssr_retval", vs_file_static); + + if( exit_status.field ) + { + // There is an exit_status, so it wins: + get_binary_value( returned_value, + NULL, + exit_status.field, + refer_offset_source(exit_status)); + TRACE1 + { + TRACE1_REFER(" exit_status ", exit_status, "") + } + } + else + { + gg_assign(returned_value, gg_cast(INT, var_decl_return_code)); + TRACE1 + { + gg_fprintf( trace_handle, + 2, + "RETURN-CODE %s [%d]", + gg_string_literal(cbl_field_of( + symbol_at(return_code_register()))->name), + returned_value); + } + } + TRACE1 + { + gg_printf(" gg_exit(%d)\n", returned_value, NULL_TREE); + TRACE1_END + } + gg_exit(returned_value); + } + +static +cbl_label_addresses_t * +label_fetch(struct cbl_label_t *label) + { + if( !label->structs.goto_trees ) + { + label->structs.goto_trees + = (cbl_label_addresses_t *)xmalloc(sizeof(struct cbl_label_addresses_t) ); + + gg_create_goto_pair(&label->structs.goto_trees->go_to, + &label->structs.goto_trees->label); + } + return label->structs.goto_trees; + } + +void +parser_label_label(struct cbl_label_t *label) + { + label->lain = yylineno; + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_LABEL("", label) + char ach[32]; + sprintf(ach, " label is at %p", label); + SHOW_PARSE_TEXT(ach) + sprintf(ach, " label->proc is %p", label->structs.proc); + SHOW_PARSE_TEXT(ach) + SHOW_PARSE_END + } + + CHECK_LABEL(label); + + TRACE1 + { + TRACE1_HEADER + TRACE1_LABEL("Establish label: ", label, "") + TRACE1_END + } + + if(strcmp(label->name, "_end_declaratives") == 0 ) + { + suppress_cobol_entry_point = false; + } + gg_append_statement( label_fetch(label)->label ); + } + +void +parser_label_goto(struct cbl_label_t *label) + { + label->used = yylineno; + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_LABEL(" ", label) + char ach[32]; + sprintf(ach, " label is at %p", label); + SHOW_PARSE_TEXT(ach) + sprintf(ach, " label->proc is %p", label->structs.proc); + SHOW_PARSE_TEXT(ach) + SHOW_PARSE_END + } + + CHECK_LABEL(label); + + TRACE1 + { + TRACE1_HEADER + TRACE1_LABEL("GOTO label: ", label, "") + TRACE1_END + } + + if(strcmp(label->name, "_end_declaratives") == 0 ) + { + suppress_cobol_entry_point = true; + } + + gg_append_statement( label_fetch(label)->go_to ); + } + +void +parser_setop( struct cbl_field_t *tgt, + struct cbl_field_t *candidate, + enum setop_t op, + struct cbl_field_t *domain) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_FIELD(" ", tgt) + SHOW_PARSE_FIELD(" = ", candidate) + if( op == is_op ) + { + SHOW_PARSE_TEXT(" is_op ") + } + SHOW_PARSE_FIELD(" = ", domain) + SHOW_PARSE_END + } + + CHECK_FIELD(tgt); + CHECK_FIELD(candidate); + CHECK_FIELD(domain); + + TRACE1 + { + TRACE1_HEADER + TRACE1_FIELD("parser_setop: ", candidate, "") + TRACE1_TEXT(" ") + TRACE1_TEXT(setop_str(op)) + TRACE1_FIELD(" ", domain, "") + TRACE1_END + } + + gcc_assert(tgt->type == FldConditional); + gcc_assert(domain->data.initial); + gcc_assert(strlen(domain->data.initial)); + + switch(op) + { + case is_op: + switch(candidate->type) + { + case FldGroup: + case FldAlphanumeric: + gg_assign(tgt->var_decl_node, gg_build_relational_expression( + gg_call_expr(INT, + "__gg__setop_compare", + member(candidate, "data"), + member(candidate, "capacity"), + member(domain, "initial"), + NULL_TREE), + ne_op, + integer_zero_node)); + break; + default: + dbgmsg("###### %10s in %s:%d\n", __func__, __FILE__, __LINE__ ); + cbl_internal_error( + "###### candidate %s has unimplemented CVT_type %d(%s)\n", + candidate->name, + candidate->type, + cbl_field_type_str(candidate->type)); + gcc_unreachable(); + break; + } + break; + + default: + dbgmsg("###### %10s in %s:%d\n", __func__, __FILE__, __LINE__ ); + cbl_internal_error("###### unknown setop_t code %d\n", op); + gcc_unreachable(); + break; + } + } + +void +parser_classify( cbl_field_t *tgt, + cbl_refer_t candidate, + enum classify_t type ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_FIELD(" ", tgt) + SHOW_PARSE_FIELD(" = ", candidate.field) + SHOW_PARSE_TEXT(" IS ") + SHOW_PARSE_TEXT(classify_str(type)) + SHOW_PARSE_END + } + + gcc_assert(tgt->type == FldConditional); + + TRACE1 + { + TRACE1_HEADER + TRACE1_REFER_VALUE("parser_classify: ", candidate, "") + TRACE1_TEXT(" ") + TRACE1_TEXT(classify_str(type)) + } + + gg_assign(tgt->var_decl_node, gg_build_relational_expression( + gg_call_expr(INT, + "__gg__classify", + build_int_cst_type(INT, type), + gg_get_address_of(candidate.field->var_decl_node), + refer_offset_dest(candidate), + refer_size_dest(candidate), + NULL_TREE), + ne_op, + integer_zero_node)); + + TRACE1 + { + TRACE1_TEXT(" result is ") + TRACE1_TEXT(tgt->name) + TRACE1_FIELD_VALUE(" -> ", tgt, "") + TRACE1_END + } + } + +void +parser_perform(struct cbl_perform_tgt_t *tgt, struct cbl_refer_t how_many) + { + cbl_field_t *N = how_many.field; + // No SHOW_PARSE here; we want to fall through: + if( !tgt->to() ) + { + // We only have tgt->from. + if( !N ) + { + // There is no N. This is a simple PERFORM proc-1 + parser_perform(tgt->from()); + } + else + { + // This is a PERFORM proc-1 N TIMES + parser_perform_times(tgt->from(), how_many); + } + } + else + { + // We have both from and to + if( !N ) + { + // There is no N. This is PERFORM proc-1 THROUGH proc-2 + // false means nexting in GDB will work + internal_perform_through(tgt->from(), tgt->to(), false); + } + else + { + // This is a PERFORM proc-1 THROUGH proc-2 N TIMES + internal_perform_through_times(tgt->from(), tgt->to(), how_many); + } + } + } + +static void +create_iline_address_pairs(struct cbl_perform_tgt_t *tgt) + { + gg_create_goto_pair(&tgt->addresses.top.go_to, + &tgt->addresses.top.label); + + gg_create_goto_pair(&tgt->addresses.exit.go_to, + &tgt->addresses.exit.label); + + gg_create_goto_pair(&tgt->addresses.test.go_to, + &tgt->addresses.test.label); + + gg_create_goto_pair(&tgt->addresses.testA.go_to, + &tgt->addresses.testA.label); + + gg_create_goto_pair(&tgt->addresses.setup.go_to, + &tgt->addresses.setup.label); + + // Even in -O0 compilations, the compiler does some elementary optimizations + // around JMP instructions. We have the SETUP code for in-line performats + // in an island at the end of the loop code. With this intervention, NEXTing + // through the code shows you the final statement of the loop before the + // loop actually starts. + + tgt->addresses.line_number_of_setup_code = gg_get_current_line_number(); + } + +void +parser_perform_start( struct cbl_perform_tgt_t *tgt ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + if( tgt ) + { + SHOW_PARSE_TEXT(" cbl_perform_tgt_t is at") + char ach[32]; + sprintf(ach, " %p", tgt); + SHOW_PARSE_TEXT(ach); + SHOW_PARSE_LABEL(" ", tgt->from()) + if( tgt->to() ) + { + SHOW_PARSE_LABEL(" ", tgt->to()) + } + } + SHOW_PARSE_END + } + + TRACE1 + { + TRACE1_HEADER + if( tgt->from() ) + { + TRACE1_LABEL(" from ", tgt->from(), "") + } + if( tgt->to() ) + { + TRACE1_LABEL(" to ", tgt->to(), "") + } + TRACE1_END + } + + // Create the goto/label pairs we are going to be needing: + create_iline_address_pairs(tgt); + + // From here we have to jump to the loop setup code: + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("GOTO SETUP") + SHOW_PARSE_END + } + gg_append_statement(tgt->addresses.setup.go_to); + + // The next parser+_generated instructions will be the body of the loop, so we + // need a TOP label here so we can get back to them: + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("LABEL TOP:") + SHOW_PARSE_END + } + + // Give GDB-COBOL something to chew on when NEXTing. This instruction will + // get the line number of the PERFORM N TIMES code. + gg_append_statement(tgt->addresses.top.label); + gg_assign(var_decl_nop, build_int_cst_type(INT, 104)); + } + +void +parser_perform_conditional( struct cbl_perform_tgt_t *tgt ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" cbl_perform_tgt_t is at") + char ach[32]; + sprintf(ach, " %p", tgt); + SHOW_PARSE_TEXT(ach); + SHOW_PARSE_END + } + + size_t i = tgt->addresses.number_of_conditionals; + + if( !(i < MAXIMUM_UNTILS) ) + { + cbl_internal_error("%s:%d: %zu exceeds MAXIMUM_UNTILS of %d, line %d", + __func__, __LINE__, i, MAXIMUM_UNTILS, CURRENT_LINE_NUMBER); + } + gcc_assert(i < MAXIMUM_UNTILS); + + // Create an unnamed goto/label pair for jumping over the conditional + // calculation. + gg_create_goto_pair(&tgt->addresses.condover[i].go_to, + &tgt->addresses.condover[i].label); + + // Create an unnamed goto/label pair for jumping into the + // conditional calculation: + gg_create_goto_pair(&tgt->addresses.condinto[i].go_to, + &tgt->addresses.condinto[i].label); + + // Create an unnamed goto/label pair for jumping back from the + // conditional calculation: + gg_create_goto_pair(&tgt->addresses.condback[i].go_to, + &tgt->addresses.condback[i].label); + + // The next instructions that the parser will give us are the conditional + // calculation, so the first thing that goes down is the condover: + gg_append_statement(tgt->addresses.condover[i].go_to); + + // And then, of course, we need to be able to jump back here to actually + // do the run-time conditional calculations: + gg_append_statement(tgt->addresses.condinto[i].label); + + tgt->addresses.number_of_conditionals += 1; + } + +void +parser_perform_conditional_end( struct cbl_perform_tgt_t *tgt ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" cbl_perform_tgt_t is at") + char ach[32]; + sprintf(ach, " %p", tgt); + SHOW_PARSE_TEXT(ach); + SHOW_PARSE_END + } + + size_t i = tgt->addresses.number_of_conditionals; + gcc_assert(i); + + // We need to cap off the prior conditional in this chain of conditionals + gg_append_statement(tgt->addresses.condback[i-1].go_to); + gg_append_statement(tgt->addresses.condover[i-1].label); + } + +static void +build_N_pairs(tree *go_to, tree *label, size_t N) + { + for(size_t i=0; i<N; i++) + { + tree a; + tree b; + gg_create_goto_pair(&a, &b); + go_to[i] = a; + label[i] = b; + } + } + +static void +perform_outofline_before_until(struct cbl_perform_tgt_t *tgt, + bool /*test_before*/, + size_t /*N*/, + struct cbl_perform_vary_t *varys ) + { + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + // This is a PERFORM proc-1 [through proc-2] TEST BEFORE} UNTIL + + /* + TOP: + IF CONDITION 0 + GOTO EXIT + ELSE + EXECUTE BODY + GOTO TOP + EXIT: + */ + + create_iline_address_pairs(tgt); + + // Tag the top of the perform + gg_append_statement(tgt->addresses.top.label); + + // Go do the conditional calculation: + + gg_append_statement(tgt->addresses.condinto[0].go_to); + + // And put down the label so that the conditional calculation knows + // where to return: + gg_append_statement(tgt->addresses.condback[0].label); + + char ach[256]; + size_t our_pseudo_label = pseudo_label++; + sprintf(ach, + "_proccallb.%ld:", + our_pseudo_label); + gg_insert_into_assembler( ach ); + + parser_if(varys[0].until); + { + // We're done, so leave + gg_append_statement(tgt->addresses.exit.go_to); + } + parser_else(); + { + // We're not done, so execute the body + // true means GDB next will fall through + internal_perform_through(tgt->from(), tgt->to(), true); + + // Jump back to the test: + gg_append_statement(tgt->addresses.top.go_to ); + } + parser_fi(); + + // Label the bottom of the PERFORM + gg_append_statement( tgt->addresses.exit.label ); + sprintf(ach, + "_procretb.%ld:", + our_pseudo_label); + gg_insert_into_assembler( ach ); + } + +static void +perform_outofline_after_until(struct cbl_perform_tgt_t *tgt, + bool /*test_before*/, + size_t /*N*/, + struct cbl_perform_vary_t *varys ) + { + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + // This is a PERFORM proc-1 [through proc-2] TEST AFTER UNTIL + + /* + TOP: + EXECUTE BODY + IF CONDITION 0 + GOTO EXIT + ELSE + ADD BY_0 to VARYING_0 + GOTO TOP + EXIT: + */ + + char ach[256]; + size_t our_pseudo_label = pseudo_label++; + sprintf(ach, + "_proccallb.%ld:", + our_pseudo_label); + gg_insert_into_assembler( ach ); + + create_iline_address_pairs(tgt); + + // Label the top of the loop + gg_append_statement(tgt->addresses.top.label); + + // Build the perform: + // true in the next call means that GDB next will not stop until the entire + // until loop is finished + internal_perform_through(tgt->from(), tgt->to(), true); + + // Go recalculate the conditional: + gg_append_statement( tgt->addresses.condinto[0].go_to); + + // And lay down the label for the come-back from the recalculation: + gg_append_statement( tgt->addresses.condback[0].label); + + // Assess the conditional + parser_if(varys[0].until); + // It's true, so we're done + gg_append_statement( tgt->addresses.exit.go_to ); + parser_else(); + // It's false, so execute the body again + gg_append_statement( tgt->addresses.top.go_to ); + parser_fi(); + // Label the bottom of the PERFORM + gg_append_statement( tgt->addresses.exit.label ); + sprintf(ach, + "_procretb.%ld:", + our_pseudo_label); + gg_insert_into_assembler( ach ); + } + +static void +perform_outofline_testafter_varying(struct cbl_perform_tgt_t *tgt, + bool /*test_before*/, + size_t N, + struct cbl_perform_vary_t *varys ) + { + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + // This is a PERFORM proc-1 [THROUGH proc-2] TEST AFTER VARYING + + /* + + [ENTRANCE] + MOVE FROM_0 TO VARYING_0 + INIT_1: + MOVE FROM_1 TO VARYING_1 + INIT_2: + MOVE FROM_2 TO VARYING_2 + . . . . . . . . . . . . . . . . . . + INIT_N-2: + MOVE FROM_N-2 TO VARYING_N-2 + INIT_N-1: + MOVE FROM_N-1 TO VARYING_N-1 + GOTO TOP + TOP: + PERFORM PROC-1 [THROUGH PROC-2] + IF NOT CONDITION_N-1 + ADD BY_N-1 TO VARYING_N-1 + GOTO TOP + IF NOT CONDITION_N-2 + ADD BY_N-2 TO VARYING_N-2 + GOTO INIT_N-1 + IF NOT CONDITION_N-3 + ADD BY_N-3 TO VARYING_N-3 + GOTO INIT_N-2 + . . . . . . . . . . . . . . . . . . + IF NOT CONDITION_1 + ADD BY_1 TO VARYING_1 + GOTO INIT_2 + IF NOT CONDITION_0 + ADD BY_0 TO VARYING_0 + GOTO INIT_1 + EXIT: + + */ + + // So, we're going to do that. But because the initializations + // and the testing are so nicely loopish, we're going to let + // the computer create them for us. + + // We are going to need a set of N label pairs. Actually, we + // only need N-1; we don't use the zeroth pair. But the code + // is cleaner if we just build all N of them. + + char ach[256]; + size_t our_pseudo_label = pseudo_label++; + sprintf(ach, + "_proccallb.%ld:", + our_pseudo_label); + gg_insert_into_assembler( ach ); + + create_iline_address_pairs(tgt); + + tree go_to[MAX_AFTERS]; + tree label[MAX_AFTERS]; + + build_N_pairs(go_to, label, N); + + // Build the initialization section: + for(size_t i=0; i<N; i++) + { + gg_append_statement(label[i]); + parser_move(varys[i].varying, varys[i].from); + } + // These next two statements do nothing. But it'll make sense + // when we move the logic around to create an inline VARYING + gg_append_statement(tgt->addresses.top.go_to); + gg_append_statement(tgt->addresses.top.label); + + // Build the body: + // true in the next call means that the entire loop will complete + // even in the face of a GDB next + internal_perform_through(tgt->from(), tgt->to(), true); + + // Build the test section + // (The oddball test is because N is a size_t, and can't go negative) + for(size_t i=N-1; i<N; i--) + { + // Jump to the conditional calculation: + gg_append_statement( tgt->addresses.condinto[i].go_to); + + // And put down the label for the return from that calculation: + gg_append_statement( tgt->addresses.condback[i].label); + + parser_if( varys[i].until ); + // Condition is true; so we'll fall through + parser_else(); + // Condition is false, so we increment, and keep going: + parser_add(varys[i].varying, varys[i].by, varys[i].varying); + if( i == N-1 ) + { + gg_append_statement(tgt->addresses.top.go_to); + } + else + { + gg_append_statement(go_to[i+1]); + } + parser_fi(); + } + // Arriving here means that we all of the conditions were + // true. So, we're done. + sprintf(ach, + "_procretb.%ld:", + our_pseudo_label); + gg_insert_into_assembler( ach ); + } + +static void +perform_outofline_before_varying( struct cbl_perform_tgt_t *tgt, + bool /*test_before*/, + size_t N, + struct cbl_perform_vary_t *varys ) + { + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + // This is a PERFORM proc-1 [THROUGH proc-2] TEST BEFORE VARYING + + /* + + ENTRANCE: + SET ALL VARYING-N to FROM-N + TEST_0: + IF CONDITION_0: + GOTO EXIT: + TEST_1: + IF CONDITION_1: + ADD BY_0 TO VARYING_0 + MOVE FROM_1 TO VARYING_1 + GOTO TEST_0 + TEST_2: + IF CONDITION_2: + ADD BY_1 TO VARYING_1: + MOVE FROM_2 TO VARYING_2 + GOTO TEST_1: + TEST_3: + IF CONDITION_3: + ADD BY_2 TO VARYING_2: + MOVE FROM_3 TO VARYING_3 + GOTO TEST_1: + . . . . . . . . . . . . . . . . + TEST_N-1: + IF CONDITION_N-1: + ADD BY_N-2 TO VARYING_N-2: + MOVE FROM_N-2 TO VARYING_N-2 + GOTO TEST_N-2 + TOP: + PERFORM proc-1 [THROUGH proc-2] + + ADD BY_N-1 TO VARYING_N-1: + GOTO TEST_N-1 + + */ + create_iline_address_pairs(tgt); + + tree go_to[MAX_AFTERS]; + tree label[MAX_AFTERS]; + build_N_pairs(go_to, label, N); + + char ach[256]; + size_t our_pseudo_label = pseudo_label++; + sprintf(ach, + "_proccallb.%ld:", + our_pseudo_label); + gg_insert_into_assembler( ach ); + + // Initialize all varying: + + for(size_t i=0; i<N; i++) + { + parser_move(varys[i].varying, varys[i].from); + } + + // Lay down the testing cycle: + for(size_t i=0; i<N; i++) + { + // This is the chain of conditions that gets tested before + // the statements run. Each condition gets its own label. + gg_append_statement(label[i]); + + // go back to the instructions that calculate the conditional + gg_append_statement(tgt->addresses.condinto[i].go_to); + + // And put down the label that brings us back: + gg_append_statement(tgt->addresses.condback[i].label); + + // Now we can test the calculated conditional: + parser_if(varys[i].until); + // This condition has been met, so we increment the + // variable to the left, reset ours, and go check the + // one we just incremented + if(i == 0) + { + // This is the leftmost condition condition, so when it + // is TRUE, we are done. + gg_append_statement( tgt->addresses.exit.go_to ); + } + else + { + // This is one of the conditions to the right of the + // first one. So, we augment the VARYING to the + // left, reset our VARYING, and go test the + // condition to the left: + parser_add(varys[i-1].varying, varys[i-1].by, varys[i-1].varying); + parser_move(varys[i].varying, varys[i].from); + gg_append_statement( go_to[i-1] ); + } + parser_else(); + // This condition has not been met. + if( i == N-1 ) + { + // ... and this is the rightmost condition + // This is where we perform the body of the PERFORM. + gg_append_statement( tgt->addresses.top.label ); + + // Build the body: + // true in the next call means that GDB NEXT will pass through the + // entire loop + internal_perform_through(tgt->from(), tgt->to(), true); + + // And now we augment FROM_N-1 by BY__N-1 + parser_add(varys[N-1].varying, varys[N-1].by, varys[N-1].varying); + + // And we jump back to test that freshly-augmented condition + gg_append_statement( go_to[N-1] ); + } + else + { + // At this point, a condition that is not the rightmost + // one has not been met. We could, in principle, just + // fall through at this point. But that makes me nervous. + // So, I am going to put in what may well be an + // unnecessary goto: + gg_append_statement( go_to[i+1] ); + } + parser_fi(); + } + // The astute observer will have noted that there is no way + // for the generated runtime code to reach this point except by jumpint to + // the EXIT: label. + // We have, you see, reached the egress: + gg_append_statement( tgt->addresses.exit.label ); + sprintf(ach, + "_procretb.%ld:", + our_pseudo_label); + gg_insert_into_assembler( ach ); + } + +static void +perform_outofline( struct cbl_perform_tgt_t *tgt, + bool test_before, + size_t N, + struct cbl_perform_vary_t *varys ) + { + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + // This is an out-of-line perform. + + // We need to create the address pairs, because there was no parser_perform_start + + if( N == 1 && !varys[0].varying.field ) + { + // There is no varys.varying, so this is just a PERFORM proc-1 UNTIL + if( test_before ) + { + perform_outofline_before_until(tgt, test_before, N, varys); + } + else + { + perform_outofline_after_until(tgt, test_before, N, varys); + } + } + else + { + // This is a PERFORM proc-1 [through proc-2] VARYING + if( test_before ) + { + perform_outofline_before_varying(tgt, test_before, N, varys); + } + else + { + perform_outofline_testafter_varying(tgt, test_before, N, varys); + } + } + } + +static void +perform_inline_until( struct cbl_perform_tgt_t *tgt, + bool test_before, + size_t /*N*/, + struct cbl_perform_vary_t *varys ) + { + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + // This is a PERFORM <inline> [TEST {BEFORE|AFTER}] UNTIL + + /* + + GOTO SETUP + TOP: S1 + S2 + EXIT PERFORM -> GOTO EXIT: + S3 + S4 + EXIT PERFORM CYCLE -> GOTO TEST + S6 + S7 + TEST: IF CONDITION + GOTO EXIT + ELSE + GOTO TOP + SETUP: + IF TEST BEFORE + GOTO TEST + ELSE + GOTO TOP + EXIT: + */ + gg_set_current_line_number(cobol_location().last_line); + + gg_append_statement(tgt->addresses.test.label); + + // Go to where the conditional is recalculated.... + gg_append_statement(tgt->addresses.condinto[0].go_to); + + // ...and lay down the return address. + gg_append_statement(tgt->addresses.condback[0].label); + + parser_if( varys[0].until ); + gg_append_statement( tgt->addresses.exit.go_to ); + parser_else(); + gg_append_statement( tgt->addresses.top.go_to ); + parser_fi(); + gg_append_statement( tgt->addresses.setup.label ); + + if( test_before ) + { + gg_append_statement( tgt->addresses.test.go_to ); + } + else + { + gg_append_statement( tgt->addresses.top.go_to ); + } + gg_append_statement( tgt->addresses.exit.label ); + } + +static void +perform_inline_testbefore_varying( struct cbl_perform_tgt_t *tgt, + bool /*test_before*/, + size_t N, + struct cbl_perform_vary_t *varys ) + { + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + // This is a PERFORM proc-1 [THROUGH proc-2] TEST BEFORE VARYING + + /* + + GOTO SETUP + TOP: + S1 + S2 + EXIT PERFORM -- GOTO EXIT: + S3 + S4 + EXIT PERFORM CYCLE -- GOTO TESTA + S5 + S6 + GOTO AUGMENT_N-1 + SETUP: + SET ALL VARYING-N to FROM-N + TEST_0: + IF CONDITION_0: + GOTO EXIT: + TEST_1: + IF CONDITION_1: + ADD BY_0 TO VARYING_0 + MOVE FROM_1 TO VARYING_1 + GOTO TEST_0 + TEST_2: + IF CONDITION_2: + ADD BY_1 TO VARYING_1: + MOVE FROM_2 TO VARYING_2 + GOTO TEST_1: + TEST_3: + IF CONDITION_3: + ADD BY_2 TO VARYING_2: + MOVE FROM_3 TO VARYING_3 + GOTO TEST_1: + . . . . . . . . . . . . . . . . + TEST_N-1: + IF CONDITION_N-1: + ADD BY_N-2 TO VARYING_N-2: + MOVE FROM_N-2 TO VARYING_N-2 + GOTO TEST_N-2 + + GOTO TOP + TESTA: + ADD BY_N-1 TO VARYING_N-1: + GOTO TEST_N-1 + + */ + tree go_to[MAX_AFTERS]; + tree label[MAX_AFTERS]; + build_N_pairs(go_to, label, N); + + // At this point in the executable, the body of the inline loop has been + // laid down, so we lay down a GOTO TESTA + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("GOTO TESTA") + SHOW_PARSE_END + } + gg_append_statement(tgt->addresses.testA.go_to); + + // It's now safe to setup the whole extravaganza of UNTIL conditions: + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("LABEL SETUP:") + SHOW_PARSE_END + } + gg_append_statement(tgt->addresses.setup.label); + + // Initialize all varying: + for(size_t i=0; i<N; i++) + { + parser_move(varys[i].varying, varys[i].from); + } + + gg_set_current_line_number(cobol_location().last_line); + + // Lay down the testing cycle: + for(size_t i=0; i<N; i++) + { + // This is the chain of conditions that gets tested before + // the statements run. Each condition gets its own label. + SHOW_PARSE + { + SHOW_PARSE_INDENT + char ach[32]; + sprintf(ach, "LABEL [%ld]:", i); + SHOW_PARSE_TEXT(ach) + SHOW_PARSE_END + } + gg_append_statement(label[i]); + + // Jump to where the conditional is calculated... + SHOW_PARSE + { + SHOW_PARSE_INDENT + char ach[32]; + sprintf(ach, "LABEL CONDINTO[%ld]:", i); + SHOW_PARSE_TEXT(ach) + SHOW_PARSE_END + } + gg_append_statement(tgt->addresses.condinto[i].go_to); + + // ...and lay down the label for the return from there + SHOW_PARSE + { + SHOW_PARSE_INDENT + char ach[32]; + sprintf(ach, "LABEL CONDBACK[%ld]:", i); + SHOW_PARSE_TEXT(ach) + SHOW_PARSE_END + } + gg_append_statement(tgt->addresses.condback[i].label); + + // Test that conditional + parser_if(varys[i].until); + // This condition has been met, so we increment the + // variable to the left, reset ours, and go check the + // one we just incremented + if(i == 0) + { + // This is the leftmost condition condition, so when it + // is TRUE, we are done. + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("GOTO EXIT") + SHOW_PARSE_END + } + gg_append_statement( tgt->addresses.exit.go_to ); + } + else + { + // This is one of the conditions to the right of the + // first one. So, we augment the VARYING to the + // left, reset our VARYING, and go test the + // condition to the left: + parser_add(varys[i-1].varying, varys[i-1].by, varys[i-1].varying); + parser_move(varys[i].varying, varys[i].from); + SHOW_PARSE + { + SHOW_PARSE_INDENT + char ach[32]; + sprintf(ach, "GOTO [%ld]:", i-1); + SHOW_PARSE_TEXT(ach) + SHOW_PARSE_END + } + gg_append_statement( go_to[i-1] ); + } + parser_else(); + // This condition has not been met. + if( i == N-1 ) + { + // ... and this is the rightmost condition + // This is where we perform the body of the PERFORM. + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("GOTO TOP") + SHOW_PARSE_END + } + gg_append_statement( tgt->addresses.top.go_to ); + + // And now we augment FROM_N-1 by BY__N-1 + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("LABEL TESTA:") + SHOW_PARSE_END + } + gg_append_statement(tgt->addresses.testA.label); + parser_add(varys[N-1].varying, varys[N-1].by, varys[N-1].varying); + // And we jump back to test that freshly-augmented condition + SHOW_PARSE + { + SHOW_PARSE_INDENT + char ach[32]; + sprintf(ach, "GOTO [%ld]:", N-1); + SHOW_PARSE_TEXT(ach) + SHOW_PARSE_END + } + gg_append_statement( go_to[N-1] ); + } + else + { + // At this point, a condition that is not the rightmost + // one has not been met. We could, in principle, just + // fall through at this point. But that makes me nervous. + // So, I am going to put in what may well be an + // unnecessary goto: + SHOW_PARSE + { + SHOW_PARSE_INDENT + char ach[32]; + sprintf(ach, "GOTO [%ld]:", i-1); + SHOW_PARSE_TEXT(ach) + SHOW_PARSE_END + } + gg_append_statement( go_to[i+1] ); + } + parser_fi(); + } + + // The astute observer will have noted that there is no way + // for the generated runtime code to reach this point. + // + // We have, you see, reached the egress: + gg_append_statement( tgt->addresses.exit.label ); + } + +static void +perform_inline_testafter_varying( struct cbl_perform_tgt_t *tgt, + bool /*test_before*/, + size_t N, + struct cbl_perform_vary_t *varys ) + { + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + // This is a PERFORM <inline> TEST AFTER VARYING + + /* + + GOTO SETUP + TOP: + S1 + S2 + EXIT PERFORM -- GOTO EXIT: + S3 + S4 + EXIT PERFORM CYCLE -- GOTO TESTA + S5 + S6 + GOTO TESTA: + + SETUP: + MOVE FROM_0 TO VARYING_0 + INIT_1: + MOVE FROM_1 TO VARYING_1 + INIT_2: + MOVE FROM_2 TO VARYING_2 + . . . . . . . . . . . . . . . . . . + INIT_N-2: + MOVE FROM_N-2 TO VARYING_N-2 + INIT_N-1: + MOVE FROM_N-1 TO VARYING_N-1 + GOTO TOP + TESTA: + TEST_N-1: + IF NOT CONDITION_N-1 + ADD BY_N-1 TO VARYING_N-1 + GOTO TOP + IF NOT CONDITION_N-2 + ADD BY_N-2 TO VARYING_N-2 + GOTO INIT_N-1 + IF NOT CONDITION_N-3 + ADD BY_N-3 TO VARYING_N-3 + GOTO INIT_N-2 + . . . . . . . . . . . . . . . . . . + IF NOT CONDITION_1 + ADD BY_1 TO VARYING_1 + GOTO INIT_2 + IF NOT CONDITION_0 + ADD BY_0 TO VARYING_0 + GOTO INIT_1 + // At this point, all conditions are true + EXIT: + + */ + + // So, we're going to do that. But because the initializations + // and the testing are so nicely loopish, we're going to let + // the computer create them for us. + + // We are going to need a set of N label pairs. Actually, we + // only need N-1; we don't use the zeroth pair. But the code + // is cleaner if we just build all N of them. + + tree go_to[MAX_AFTERS]; + tree label[MAX_AFTERS]; + + build_N_pairs(go_to, label, N); + + // At this point the code being laid down, the GOTO SETUP was created, + // followed by the stream of statements. We terminate it with a + // goto testa + gg_append_statement(tgt->addresses.testA.go_to); + + // See the comment in create_iline_address_pairs() + //gg_force_line_number(tgt->addresses.line_number_of_setup_code-1); + + // That's followed by the SETUP target: + gg_append_statement(tgt->addresses.setup.label); + + // We now build the initialization section, + for(size_t i=0; i<N; i++) + { + gg_append_statement(label[i]); + parser_move(varys[i].varying, varys[i].from); + } + + // Having done all the initialization, we jump back to the start of + // the list of statements: + gg_append_statement(tgt->addresses.top.go_to); + + // The list of statements ends with a goto TESTA, and that;s here: + gg_append_statement(tgt->addresses.testA.label); + + // Build the test section + // (The oddball test is because N is a size_t, and can't go negative) + for(size_t i=N-1; i<N; i--) + { + // Jump to where the conditional is calculated... + gg_append_statement(tgt->addresses.condinto[i].go_to); + + // ...and lay down the label to get back from there + gg_append_statement(tgt->addresses.condback[i].label); + + // Test the newly-recalculated conditional: + parser_if( varys[i].until ); + // Condition is true; so we'll fall through + parser_else(); + // Condition is false, so we increment, and keep going: + parser_add(varys[i].varying, varys[i].by, varys[i].varying); + if( i == N-1 ) + { + gg_append_statement(tgt->addresses.top.go_to); + } + else + { + gg_append_statement(go_to[i+1]); + } + parser_fi(); + } + + // Arriving here means that we all of the conditions were + // true. So, we're done. + gg_append_statement( tgt->addresses.exit.label ); + } + +static void +perform_inline_impl( struct cbl_perform_tgt_t *tgt, + bool test_before, + size_t N, + struct cbl_perform_vary_t *varys ) + { + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + if( N == 1 && !varys[0].varying.field ) + { + perform_inline_until(tgt, test_before, N, varys); + } + else + { + // This is a PERFORM proc-1 [through proc-2] VARYING + if( !test_before ) + { + perform_inline_testafter_varying(tgt, test_before, N, varys); + } + else + { + perform_inline_testbefore_varying(tgt, test_before, N, varys); + } + } + } + +void +parser_perform_until( struct cbl_perform_tgt_t *tgt, + bool test_before, + size_t N, + struct cbl_perform_vary_t *varys ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" cbl_perform_tgt_t is at") + char ach[32]; + sprintf(ach, " %p", tgt); + SHOW_PARSE_TEXT(ach); + SHOW_PARSE_LABEL(" ", tgt->from()) + if( tgt->to() ) + { + SHOW_PARSE_LABEL(" THROUGH", tgt->to()) + } + SHOW_PARSE_END + } + + gg_set_current_line_number(cobol_location().last_line); + gg_assign(var_decl_nop, build_int_cst_type(INT, 105)); + + if( tgt->from()->type != LblLoop ) + { + perform_outofline( tgt, test_before, N, varys); + } + else + { + perform_inline_impl( tgt, test_before, N, varys); + } + } + +void +parser_perform_inline_times(struct cbl_perform_tgt_t *tgt, + struct cbl_refer_t how_many ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_LABEL("", tgt->from()); + SHOW_PARSE_REF(" how_many is ", how_many); + SHOW_PARSE_END + } + + TRACE1 + { + TRACE1_HEADER + TRACE1_FIELD(" into ", how_many.field, " times"); + TRACE1_END + } + + gcc_assert(tgt); + cbl_field_t *count = how_many.field; + if( how_many.is_reference() ) + { + cbl_internal_error("%s:%d: ignoring subscripts", __func__, __LINE__); + } + CHECK_FIELD(count); + + // This has to be on the stack, because performs can be nested + tree counter = gg_define_variable(LONG); + + /* + GOTO SETUP + TOP: S1 + EXIT PERFORM --> GOTO EXIT + S2 + EXIT PERFORM CYCLE --> GOTO TEST + S3 + TESTA: + TEST: INCREMENT COUNTER + IF COUNTER LT LIMIT + GOTO TOP + ELSE + GOTO EXIT + SETUP: INITIALIZE COUNTER + GOTO TOP + EXIT: + */ + + // At this point, the GOTO SETUP, the label "TOP:" and the + // body of the inline perform have been laid down. + + // Tack on the label for TEST and TESTA + gg_append_statement( tgt->addresses.testA.label ); + gg_append_statement( tgt->addresses.test.label ); + + // AT this point, we want to set the line_number to the location of the + // END-PERFORM statement. + gg_set_current_line_number(cobol_location().last_line); + + gg_decrement(counter); + // Do the test: + IF( counter, gt_op, gg_cast(LONG, integer_zero_node) ) + // We continue + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("If still counting GOTO TOP") + SHOW_PARSE_END + } + gg_append_statement( tgt->addresses.top.go_to ); + ELSE + // We are done + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("If count complete GOTO EXIT") + SHOW_PARSE_END + } + gg_append_statement( tgt->addresses.exit.go_to ); + ENDIF + + // Lay down the SETUP: label + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("LABEL SETUP:") + SHOW_PARSE_END + } + + int stash = gg_get_current_line_number(); + gg_set_current_line_number(tgt->addresses.line_number_of_setup_code); + gg_append_statement( tgt->addresses.setup.label ); + + // Get the count: + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("Access the how_many parameter") + SHOW_PARSE_REF(" ", how_many) + SHOW_PARSE_END + } + + get_binary_value( counter, + NULL, + count, + size_t_zero_node); + + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("GOTO TOP") + SHOW_PARSE_END + } + + // Make sure the initial count is valid: + IF( counter, gt_op, gg_cast(LONG, integer_zero_node) ) + gg_append_statement( tgt->addresses.top.go_to ); + ELSE + gg_append_statement( tgt->addresses.exit.go_to ); + ENDIF + + gg_set_current_line_number(stash); + + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("LABEL EXIT:") + SHOW_PARSE_END + } + gg_append_statement( tgt->addresses.exit.label ); + } + +void +parser_set_conditional88( struct cbl_refer_t refer, bool which_way ) + { + Analyze(); + struct cbl_field_t *tgt = refer.field; + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_FIELD(" ", tgt) + if( which_way ) + { + SHOW_PARSE_TEXT(" TRUE"); + } + else + { + SHOW_PARSE_TEXT(" FALSE"); + } + SHOW_PARSE_END + } + + CHECK_FIELD(tgt); + + struct cbl_field_t *parent = parent_of(tgt); + + CHECK_FIELD(parent); + + cbl_domain_t *src; + + if( which_way ) + { + src = tgt->data.domain; + } + else + { + src = tgt->data.false_value; + } + + // We want to set the LEVEL88 target to TRUE (or FALSE), so we need to set + // the parent of this LEVEL88 to the first element in data.domain (or + // data.false_value); + + cbl_figconst_t figconst = cbl_figconst_of(src->first.name()); + + if( !figconst ) + { + // We are dealing with an ordinary string. + static size_t buffer_size = 0; + static char *buffer = NULL; + size_t length = src->first.size(); + raw_to_internal(&buffer, &buffer_size, src->first.name(), length); + move_tree_to_field( parent, + gg_string_literal(buffer)); + } + else + { + // This is a figurative constant + gg_call(VOID, + "__gg__parser_set_conditional", + gg_get_address_of(parent->var_decl_node), + build_int_cst_type(INT, figconst), + NULL_TREE); + } + } + +static +void set_user_status(struct cbl_file_t *file) + { + // This routine sets the user_status, if any, to the cblc_file_t::status + if(file->user_status) + { + cbl_field_t *user_status = cbl_field_of(symbol_at(file->user_status)); + gcc_assert( user_status ); + gg_call(VOID, + "__gg__set_user_status", + gg_get_address_of(user_status->var_decl_node), + gg_get_address_of(file->var_decl_node), + NULL_TREE); + } + } + +void +parser_file_add(struct cbl_file_t *file) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + if( file ) + { + fprintf(stderr, " cbl_file_t: %s", file->name); + if( file->record_length ) + { + SHOW_PARSE_TEXT(" file->record_length is %s"); + SHOW_PARSE_TEXT(file->name); + } + else + { + SHOW_PARSE_TEXT(" file->record_length is ZERO") + } + } + else + { + SHOW_PARSE_TEXT( " *file pointer is NULL") + } + SHOW_PARSE_END + } + + if( !file ) + { + cbl_internal_error("%s(): called with NULL *file", __func__); + gcc_assert(file); + } + + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT("parser_file_add cbl_file_t ") + TRACE1_TEXT(file->name); + TRACE1_END + } + + /* The FD record can be flagged external. Without definitive information, I + am going to assume that the *everything* in the cblc_file_t structure is + GLOBAL EXTERNAL. If I have read the specification incorrectly, and it's + possible for two programs to share a file connector but with, say, two + different lists of keys, then the cblc_file_t structure will have to + be changed to have one var_decl node for the common information, and a + second one for local information. + + */ + + gg_variable_scope_t scope; + if( file->attr & external_e ) + { + scope = vs_external; + } + else + { + scope = vs_static; + } + + char achName[2*sizeof(cbl_name_t)]; + + // Use the global structure template declaration to produce the specific + // structure definition expression: + strcpy(achName, "_"); + strcat(achName, file->name); + strcat(achName, "_fc"); // For "File Connector" + tree new_var_decl = gg_define_variable( cblc_file_type_node, + achName, + scope); + + // We have to convert file->nkey and file->keys to the run-time formats. + + // There can be 0 through N keys, and each of those keys has M fields. Each of + // the M fields has a "unique" flag, which we pass along as an array of INTs. + + int number_of_key_fields = 0; + for( size_t i=0; i<file->nkey; i++ ) + { + number_of_key_fields += file->keys[i].nfield; + } + + // We create an array of pointers for those fields, adding an additional + // element for a NULL pointer to indicate the end of the list: + strcpy(achName, "_"); + strcat(achName, file->name); + strcat(achName, "_keys"); + tree array_of_keys = gg_define_variable( + build_pointer_type(cblc_field_p_type_node), + achName, + scope); + gg_assign(array_of_keys, + gg_cast(build_pointer_type(cblc_field_p_type_node), + gg_malloc(build_int_cst_type(SIZE_T, + (number_of_key_fields+1) + *sizeof(void *))))); + + strcpy(achName, "_"); + strcat(achName, file->name); + strcat(achName, "_keynum"); + tree key_numbers = gg_define_variable(build_pointer_type(INT), + achName, + scope); + gg_assign(key_numbers, + gg_cast(build_pointer_type(INT), + gg_malloc(build_int_cst_type(SIZE_T, + (number_of_key_fields+1) + *sizeof(int))))); + + strcpy(achName, "_"); + strcat(achName, file->name); + strcat(achName, "_uniqs"); + tree unique_flags = gg_define_variable( build_pointer_type(INT), + achName, + scope); + gg_assign(unique_flags, + gg_cast(build_pointer_type(INT), + gg_malloc(build_int_cst_type(SIZE_T, + (number_of_key_fields+1) + *sizeof(int))))); + + size_t index = 0; + for( size_t i=0; i<file->nkey; i++ ) + { + for( size_t j=0; j<file->keys[i].nfield; j++ ) + { + gg_assign(gg_array_value(array_of_keys, index), + get_field_p(file->keys[i].fields[j]) ); + + gg_assign(gg_array_value(key_numbers, index), + build_int_cst_type(INT, i+1)); + + gg_assign(gg_array_value(unique_flags, index), + (file->keys[i].unique ? integer_one_node : integer_zero_node)); + index += 1; + } + } + // Terminate the field list with a NULL: + gg_assign( gg_array_value(array_of_keys, index), gg_cast(cblc_field_p_type_node, null_pointer_node) ); + + // Terminate the key-numbers list with a negative 1 as a guardrail: + gg_assign( gg_array_value(key_numbers, index), integer_minusone_node ); + + // Terminate the uniques list with a zero, just to avoid garbage: + gg_assign( gg_array_value(unique_flags, index), integer_zero_node ); + + cbl_file_t::varying_t varies = symbol_file_record_sizes(file); + + gcc_assert(varies.min <= varies.max); + + if(file->access == file_inaccessible_e) + { + cbl_internal_error( + "%s:%d file %s access mode is 'file_inaccessible_e' in %s", + current_filename.back().c_str(), + CURRENT_LINE_NUMBER, + file->name, + __func__); + } + + gg_call(VOID, + "__gg__file_init", + gg_get_address_of(new_var_decl), + gg_string_literal(file->name), + array_of_keys, + key_numbers, + unique_flags, + gg_get_address_of(symbol_file_record(file)->var_decl_node), + get_field_p(file->password), + get_field_p(file->user_status), + get_field_p(file->vsam_status), + get_field_p(file->record_length), + get_field_p(file_status_register()), + build_int_cst_type(SIZE_T, file->reserve), + build_int_cst_type(INT, (int)file->org), + build_int_cst_type(INT, (int)file->padding), + build_int_cst_type(INT, (int)file->access), + build_int_cst_type(INT, (int)file->optional), + build_int_cst_type(SIZE_T, varies.min), + build_int_cst_type(SIZE_T, varies.max), + NULL_TREE); + file->var_decl_node = new_var_decl; + } + +static void store_location_stuff(const cbl_name_t statement_name); + +void +parser_file_open( size_t nfiles, struct cbl_file_t *files[], int mode_char ) + { + for(size_t i=0; i<nfiles; i++) + { + auto& file = files[i]; + parser_file_open(file, mode_char); + } + } + +void +parser_file_open( struct cbl_file_t *file, int mode_char ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + if(file) + { + SHOW_PARSE_TEXT(" "); + SHOW_PARSE_TEXT(file->name); + char ach[64]; + sprintf(ach, ", organization is %s", file_org_str(file->org)); + SHOW_PARSE_TEXT(ach); + } + else + { + SHOW_PARSE_TEXT(" *file is NULL") + } + + SHOW_PARSE_TEXT(", mode_char: ") + char ach[2] = ""; + ach[0] = mode_char; + SHOW_PARSE_TEXT(ach) + + SHOW_PARSE_END + } + + if( !file ) + { + cbl_internal_error("parser_file_open called with NULL *file"); + } + + if( !file->var_decl_node ) + { + cbl_internal_error("parser_file_open for %s called with NULL var_decl_node", file->name); + } + + if( mode_char == 'a' && (file->access != file_access_seq_e) ) + { + cbl_internal_error("EXTEND can only be used where %s is ACCESS MODE SEQUENTIAL", file->name); + } + + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT("parser_file_open of ") + TRACE1_TEXT(file->name); + TRACE1_END + } + + // The cbl_file_t has a cbl_field_t *filename. This can be a FldAlphanumeric. + // The runtime has a (char *)filename, so we need to + // do a runtime conversion. + + tree psz; // This is going to be either the name of the file, or the + // possible run-time environment variable that will contain + // the name of the file. + + cbl_field_t *field_of_name = symbol_field_forward(file->filename); + bool quoted_name = false; + if( field_of_name->type == FldForward ) + { + // The target of ASSIGN TO was unquoted, but didn't resolve to a + // cbl_field_t. This means that the name of the field is an + // environment variable that will hold the file name + psz = gg_define_char_star(); + gg_assign(psz, gg_strdup(gg_string_literal(field_of_name->name))); + } + else + { + // The name is coming from a presumably FldAlphaNumeric variable + psz = get_string_from(field_of_name); + quoted_name = true; + } + + store_location_stuff("OPEN"); + gg_call(VOID, + "__gg__file_open", + gg_get_address_of(file->var_decl_node), + psz, + build_int_cst_type(INT, mode_char), + quoted_name ? integer_one_node : integer_zero_node, + NULL_TREE); + set_user_status(file); + } + +void +parser_file_close( struct cbl_file_t *file, file_close_how_t how ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + if(file) + { + SHOW_PARSE_TEXT(" "); + SHOW_PARSE_TEXT(file->name); + } + else + { + SHOW_PARSE_TEXT(" *file is NULL ") + } + SHOW_PARSE_END + } + + if( !file ) + { + cbl_internal_error("parser_file_close called with NULL *file"); + } + + if( !file->var_decl_node ) + { + cbl_internal_error("parser_file_close for %s called with NULL file->var_decl_node", file->name); + } + + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT("parser_file_close of ") + TRACE1_TEXT(file->name); + TRACE1_END + } + + // We are done with the filename. The library routine will free "filename" + // memory and set it back to null + + store_location_stuff("CLOSE"); + gg_call(VOID, + "__gg__file_close", + gg_get_address_of(file->var_decl_node), + build_int_cst_type(INT, (int)how), + NULL_TREE); + set_user_status(file); + } + +void +parser_file_read( struct cbl_file_t *file, + cbl_refer_t /*data_dest*/, + int where ) + { + Analyze(); + // where = -2 means PREVIOUS + // where = -1 means NEXT + // where = 1 or more means key N, where N is one-based + SHOW_PARSE + { + SHOW_PARSE_HEADER + if(file) + { + SHOW_PARSE_TEXT(" "); + SHOW_PARSE_TEXT(file->name); + } + else + { + SHOW_PARSE_TEXT(" *file is NULL") + } + + char ach[32]; + sprintf(ach, " where:%d", where); + SHOW_PARSE_TEXT(ach) + SHOW_PARSE_END + } + + if( where == 0 ) + { + cbl_internal_error("%s:%d file %s 'where' is zero in %s", + current_filename.back().c_str(), + CURRENT_LINE_NUMBER, + file->name, + __func__); + where = -1; + } + + if( !file ) + { + cbl_internal_error("parser_file_read called with NULL *file"); + } + + if( !file->var_decl_node ) + { + cbl_internal_error("parser_file_read for %s called with NULL file->var_decl_node", file->name); + } + + if( !file ) + { + cbl_internal_error("parser_file_read called with NULL *field"); + } + + if( !file->var_decl_node ) + { + cbl_internal_error("parser_file_read for %s called with NULL field->var_decl_node", file->name); + } + + if( file->access == file_access_seq_e && where >= 0) + { + cbl_internal_error("%s:%d file %s is RELATIVE/SEQUENTIAL, but 'where' >= 0", + current_filename.back().c_str(), + CURRENT_LINE_NUMBER, + file->name); + where = -1; + } + + if( file->access == file_access_rnd_e && where < 0) + { + cbl_internal_error("%s:%d file %s is RELATIVE/RANDOM, but 'where' < 0", + current_filename.back().c_str(), + CURRENT_LINE_NUMBER, + file->name); + where = 1; + } + + store_location_stuff("READ"); + gg_call(VOID, + "__gg__file_read", + gg_get_address_of(file->var_decl_node), + build_int_cst_type(INT, where), + NULL_TREE); + set_user_status(file); + + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT("from ") + TRACE1_TEXT(file->name); + TRACE1_INDENT + cbl_field_t *our_return_code + = cbl_field_of(symbol_at(file_status_register())); + TRACE1_FIELD("result: ", our_return_code, ""); + TRACE1_END + } + } + +void +parser_file_write( cbl_file_t *file, + cbl_field_t *record_area, + bool after, + cbl_refer_t &advance, + bool sequentially + ) + { + Analyze(); + + bool is_random = !( file->access == file_access_seq_e + || file->access == file_inaccessible_e); + + if( (is_random ? 1 : 0) != (sequentially ? 0 : 1) ) + { + cbl_internal_error("%s:%d file %s 'sequentially' is %d in %s", + current_filename.back().c_str(), + CURRENT_LINE_NUMBER, + file->name, + sequentially ? 1 : 0, + __func__); + } + + SHOW_PARSE + { + SHOW_PARSE_HEADER + if(file) + { + SHOW_PARSE_TEXT(" "); + SHOW_PARSE_TEXT(file->name); + } + else + { + SHOW_PARSE_TEXT(" *file is NULL") + } + + if( !advance.field ) + { + SHOW_PARSE_TEXT(" automatic BEFORE ADVANCING 1 LINE") + } + else + { + if( after ) + { + SHOW_PARSE_TEXT(" AFTER") + } + else + { + SHOW_PARSE_TEXT(" BEFORE") + } + SHOW_PARSE_REF(" ADVANCING ", advance); + SHOW_PARSE_TEXT(" LINE(S)") + } + + SHOW_PARSE_END + } + + if( !file ) + { + cbl_internal_error("%s(): called with NULL *file", __func__); + } + + if( !file->var_decl_node ) + { + cbl_internal_error("%s(): for %s called with NULL file->var_decl_node", + __func__, file->name); + } + + if( !file ) + { + cbl_internal_error("%s(): called with NULL *field", __func__); + } + + if( !file->var_decl_node ) + { + cbl_internal_error( "%s(): for %s called with NULL field->var_decl_node", + __func__, + file->name); + } + + static tree t_advance = gg_define_variable(INT, "..pfw_advance", vs_file_static); + if(advance.field) + { + static tree value = gg_define_variable(INT, "..pfw_value", vs_file_static); + get_binary_value( value, + NULL, + advance.field, + refer_offset_source(advance)); + gg_assign(t_advance, gg_cast(INT, value)); + } + else + { + if( file->org == file_line_sequential_e ) + { + // ISO/IEC_1989-2014 and IBM say the default is AFTER advancing + // MicroFocus and GnuCOBOL say the default is BEFORE advancing. + // See the comment where the variable is defined: + after = auto_advance_is_AFTER_advancing; + gg_assign(t_advance, integer_one_node); + } + else + { + // The default for SEQUENTIAL is no vertical motion + gg_assign(t_advance, integer_minusone_node); + } + } + + gcc_assert(record_area); + if( !record_area ) + { + record_area = cbl_field_of(symbol_at(file->default_record)); + } + + store_location_stuff("WRITE"); + gg_call(VOID, + "__gg__file_write", + gg_get_address_of(file->var_decl_node), + member(record_area, "data"), + member(record_area, "capacity"), + after ? integer_one_node : integer_zero_node, + t_advance, + is_random ? integer_one_node : integer_zero_node, + NULL_TREE); + set_user_status(file); + + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT("to ") + TRACE1_TEXT(file->name); + TRACE1_INDENT + if( advance.field ) + { + TRACE1_INDENT + if( after ) + { + TRACE1_TEXT("AFTER") + } + else + { + TRACE1_TEXT("BEFORE") + } + TRACE1_REFER(" ADVANCING ", advance, " LINE(S)"); + } + TRACE1_INDENT + cbl_field_t *our_return_code + = cbl_field_of(symbol_at(file_status_register())); + TRACE1_FIELD("result: ", our_return_code, ""); + TRACE1_END + } + } + +void +parser_file_delete( struct cbl_file_t *file, bool /*sequentially*/ ) + { + Analyze(); + bool sequentially = file->access == file_access_seq_e + || file->org == file_sequential_e + || file->org == file_line_sequential_e; + SHOW_PARSE + { + SHOW_PARSE_HEADER + if(file) + { + SHOW_PARSE_TEXT(" "); + SHOW_PARSE_TEXT(file->name); + if( sequentially ) + { + SHOW_PARSE_TEXT(" sequentially") + } + else + { + SHOW_PARSE_TEXT(" sequentially") + } + } + else + { + SHOW_PARSE_TEXT(" *file is NULL") + } + SHOW_PARSE_END + } + + store_location_stuff("DELETE"); + gg_call(VOID, + "__gg__file_delete", + gg_get_address_of(file->var_decl_node), + sequentially ? integer_zero_node : integer_one_node, + NULL_TREE); + set_user_status(file); + + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT("parser_file_delete record ") + TRACE1_TEXT(file->name); + TRACE1_END + } + } + +void +parser_file_rewrite(cbl_file_t *file, + cbl_field_t *record_area, + bool sequentially ) + { + Analyze(); + if( file->org == file_indexed_e + && file->access == file_access_seq_e + && !sequentially ) + { + cbl_internal_error( + "%s:%d file %s is INDEXED/SEQUENTIAL, but 'sequentially' is false", + current_filename.back().c_str(), + CURRENT_LINE_NUMBER, + file->name); + sequentially = true; + } + + SHOW_PARSE + { + SHOW_PARSE_HEADER + if(file) + { + SHOW_PARSE_TEXT(" "); + SHOW_PARSE_TEXT(file->name); + } + else + { + SHOW_PARSE_TEXT(" *file is NULL") + } + SHOW_PARSE_END + } + + gcc_assert(record_area); + if( !record_area ) + { + record_area = cbl_field_of(symbol_at(file->default_record)); + } + + store_location_stuff("REWRITE"); + gg_call(VOID, + "__gg__file_rewrite", + gg_get_address_of(file->var_decl_node), + member(record_area, "capacity"), + sequentially ? integer_zero_node : integer_one_node, + NULL_TREE); + set_user_status(file); + } + +/* + * flk is first-last-key. Similar to parser_file_read, it is a + * 1-based index, for consistency. Encoded values: + * -1 FIRST + * -2 LAST + * 0 neither + * >0 1-based index into cbl_file_t::keys + */ +void +parser_file_start(struct cbl_file_t *file, + relop_t op, + int flk, + cbl_refer_t length_ref ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + if(file) + { + SHOW_PARSE_TEXT(" "); + SHOW_PARSE_TEXT(file->name); + switch(op) + { + case lt_op: + SHOW_PARSE_TEXT(" lt_op") + break; + case le_op: + SHOW_PARSE_TEXT(" le_op") + break; + case eq_op: + SHOW_PARSE_TEXT(" eq_op") + break; + case ne_op: + SHOW_PARSE_TEXT(" ne_op") + break; + case ge_op: + SHOW_PARSE_TEXT(" ge_op") + break; + case gt_op: + SHOW_PARSE_TEXT(" gt_op") + break; + } + char ach[32]; + sprintf(ach, " first-last-key:%d", flk); + SHOW_PARSE_TEXT(ach) + SHOW_PARSE_REF(" length:", length_ref); + } + else + { + SHOW_PARSE_TEXT(" *file is NULL") + } + SHOW_PARSE_END + } + + if( flk == 0 + && (file->org == file_indexed_e || file->org == file_relative_e) ) + { + flk = 1; + op = eq_op; + } + + if( flk == 0 + && (file->org == file_sequential_e) ) + { + flk = -1; + } + + static tree length = gg_define_variable(SIZE_T, "..pfs_length", vs_file_static); + gg_assign(length, size_t_zero_node); + + if( flk > 0 && !length_ref.field ) + { + // We need a length, and we don't have one. We have to calculate the length + // from the lengths of the fields that make up the specified key. + + size_t combined_length = 0; + + gcc_assert(flk <= (int)file->nkey); + + int key_number = flk-1; + + // A key has a number of fields + for(size_t ifield=0; ifield<file->keys[key_number].nfield; ifield++) + { + size_t field_index = file->keys[key_number].fields[ifield]; + cbl_field_t *field = cbl_field_of(symbol_at(field_index)); + combined_length += field->data.capacity; + } + gg_assign(length, build_int_cst_type(SIZE_T, combined_length)); + } + else if( flk > 0 ) + { + get_binary_value( length, + NULL, + length_ref.field, + refer_offset_dest(length_ref)); + } + + store_location_stuff("START"); + gg_call(VOID, + "__gg__file_start", + gg_get_address_of(file->var_decl_node), + build_int_cst_type(INT, op), + build_int_cst_type(INT, flk), + length, + NULL_TREE ); + set_user_status(file); + } + +static void +inspect_tally(bool backward, + cbl_refer_t identifier_1, + unsigned long n_identifier_2, + cbx_inspect_t<cbl_refer_t>* identifier_2) + { + Analyze(); + // This is an INSPECT FORMAT 1 + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + + // Make one pass through the inputs to count up the sizes of the arrays + // we will be passing to the library routines. This loop structure simply + // anticipates the more complex one that follows. + + size_t int_index = 0; + size_t pcbl_index = 0; + + // The first integer is the all-important controlling count: + int_index++; + + // The first refer is for identifier-1 + pcbl_index++; + + for( size_t i=0; i<n_identifier_2; i++) + { + // Each identifier-2 has to go into the array: + pcbl_index++; + // For each FOR there is a count of the loops after the FOR + int_index++; + for(size_t j=0; j<identifier_2[i].nbound; j++) + { + + // After each identifier-2, there is a cbl_inspect_bound_t value: + int_index++; + if( identifier_2[i].opers[j].bound == bound_characters_e) + { + // This is a FOR CHARACTERS PHRASE1, so we will need before/after + // for each: + pcbl_index++; + pcbl_index++; + } + else + { + // This is ALL or LEADING. Each has some number of identifier-3 + int_index++; + for(size_t k=0; k<identifier_2[i].opers[j].n_identifier_3; k++) + { + // Put identifier-3 into the array: + pcbl_index++; + + // We need the PHRASE1 for that identifier-3 + pcbl_index++; + pcbl_index++; + } + } + } + } + + // We will be passing the library routine an array of size_t, which contains + // all the integers and cbl_inspect_bound_t values, in a strict sequence so + // that the library routine can peel them off. + + static tree int_size = gg_define_variable(INT, "..pit_size", vs_file_static, 0); + static tree integers = gg_define_variable(SIZE_T_P, "..pit", vs_file_static, null_pointer_node); + + size_t n_integers = int_index; + + IF( build_int_cst_type(INT, n_integers), gt_op, int_size ) + { + gg_assign(int_size, build_int_cst_type(INT, n_integers)); + gg_assign(integers, + gg_cast(SIZE_T_P, + gg_realloc(integers, n_integers * sizeof(void *)))); + } + ELSE + { + } + ENDIF + + size_t n_resolveds = pcbl_index; + cbl_refer_t *pcbl_refers = (cbl_refer_t *)xmalloc(n_resolveds * sizeof(cbl_refer_t)); + + // Now we make a second pass, populating those arrays: + int_index = 0; + pcbl_index = 0; + + // The first integer is the all-important controlling count: + gg_assign( gg_array_value(integers, int_index++), + build_int_cst_type(SIZE_T, n_identifier_2) ); + + // The first refer is for identifier-1 + pcbl_refers[pcbl_index++] = identifier_1; + + for( size_t i=0; i<n_identifier_2; i++) + { + // Each identifier-2 has to go into the array: + pcbl_refers[pcbl_index++] = identifier_2[i].tally; + // For each FOR there is a count of the loops after the FOR + gg_assign( gg_array_value(integers, int_index++), + build_int_cst_type(SIZE_T, identifier_2[i].nbound) ); + for(size_t j=0; j<identifier_2[i].nbound; j++) + { + + // After each identifier-2, there is a cbl_inspect_bound_t value: + gg_assign( gg_array_value(integers, int_index++), + build_int_cst_type(SIZE_T, identifier_2[i].opers[j].bound)); + if( identifier_2[i].opers[j].bound == bound_characters_e) + { + // This is a FOR CHARACTERS PHRASE1, so we will need before/after + // for each: + pcbl_refers[pcbl_index++] = identifier_2[i].opers[j].matches[0].before.identifier_4; + pcbl_refers[pcbl_index++] = identifier_2[i].opers[j].matches[0].after.identifier_4; + } + else + { + // This is ALL or LEADING. Each has some number of identifier-3 + gg_assign( gg_array_value(integers, int_index++), + build_int_cst_type(SIZE_T, identifier_2[i].opers[j].n_identifier_3)); + for(size_t k=0; k<identifier_2[i].opers[j].n_identifier_3; k++) + { + // Put identifier-3 into the array: + pcbl_refers[pcbl_index++] = identifier_2[i].opers[j].matches[k].matching; + + // We need the PHRASE1 for that identifier-3 + pcbl_refers[pcbl_index++] = identifier_2[i].opers[j].matches[k].before.identifier_4; + + pcbl_refers[pcbl_index++] = identifier_2[i].opers[j].matches[k].after.identifier_4; + } + } + } + } + + //fprintf(stderr, " %ld %ld\n", int_index, n_integers); + gcc_assert(int_index == n_integers); + //fprintf(stderr, " %ld %ld\n", pcbl_index, n_resolveds); + gcc_assert(pcbl_index == n_resolveds); + + // We have built up an array of integers, and an array of cbl_refer_t. + build_array_of_treeplets(1, pcbl_index, pcbl_refers); + + // Do the actual call: + gg_call(VOID, + "__gg__inspect_format_1", + backward ? integer_one_node : integer_zero_node, + integers, + NULL_TREE); + + // And free up the memory we allocated + free(pcbl_refers); + } + +static void +inspect_replacing(int backward, + cbl_refer_t identifier_1, + unsigned long n_ops, + cbx_inspect_t<cbl_refer_t>* operations) + { + Analyze(); + // This is an INSPECT FORMAT 2 + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" ") + } + + // For REPLACING, unlike TALLY, there can be but one operation + gcc_assert(n_ops == 1); + + size_t n_id_3 = 0; + size_t n_id_4 = 0; + size_t n_id_5 = 0; + size_t n_all_leading_first = 0; + + // Make one pass through the inputs to count up the sizes of the arrays + // we will be passing to the library routines: + + for( size_t j=0; j<operations[0].nbound; j++) + { + if( operations[0].opers[j].bound == bound_characters_e) + { + // This is a FOR CHARACTERS phrase + + // Each will have an identifier-5: + n_id_5 += 1; + + // Each will have a PHRASE1 comprising BEFORE and AFTER identifier-4 values + n_id_4 += 2; + } + else + { + // This is ALL, LEADING, or FIRST. Each has some number of identifier-3 values: + // The n_identifier_3 value goes into the integer list, so we'll have + // to make room for them: + n_all_leading_first += 1; + + // The n_identifier-3 values will go into the resolved values; we have to + // leave room for them + n_id_3 += operations[0].opers[j].n_identifier_3; + + // Likewise identifier-5 values: + n_id_5 += operations[0].opers[j].n_identifier_3; + + // And each identifier-3 / identifier-5 pair has BEFORE and AFTER phrases: + n_id_4 += 2 * operations[0].opers[j].n_identifier_3; + } + } + + // We will be passing the library routine an array of size_t, which contains + // all the integers and cbl_inspect_bound_t values, in a strict sequence so + // that the library routine can peel them off. + + size_t n_integers = 1 // Room for operations[0].nbound + + operations[0].nbound // Room for all the cbl_inspect_bound_t values + + n_all_leading_first; // Room for all of the n_identifier_3 counts + + static tree int_size = gg_define_variable(INT, "..pir_size", vs_file_static, 0); + static tree integers = gg_define_variable(SIZE_T_P, "..pir", vs_file_static, null_pointer_node); + + IF( build_int_cst_type(INT, n_integers), gt_op, int_size ) + { + gg_assign(int_size, build_int_cst_type(INT, n_integers)); + gg_assign(integers, + gg_cast(SIZE_T_P, + gg_realloc(integers, n_integers * sizeof(void *)))); + } + ELSE + { + } + ENDIF + + size_t n_resolveds = 1 // Room for identifier-1 + + n_id_3 // Room for the identifier-3 variables + + n_id_4 // Room for the identifier-4 variables + + n_id_5; // Room for the identifier-5 variables + + cbl_refer_t *pcbl_refers = (cbl_refer_t *)xmalloc(n_resolveds * sizeof(cbl_refer_t)); + + // Now we make a second pass, populating those arrays: + size_t int_index = 0; + size_t pcbl_index = 0; + + // The first integer is the all-important controlling count: + gg_assign( gg_array_value(integers, int_index++), + build_int_cst_type(SIZE_T, operations[0].nbound) ); + + // The first refer is for identifier-1 + pcbl_refers[pcbl_index++] = identifier_1; + + for( size_t j=0; j<operations[0].nbound; j++) + { + // For each FOR there is a count of the loops after the FOR + + // For each operation, there is a cbl_inspect_bound_t value: + gg_assign( gg_array_value(integers, int_index++), + build_int_cst_type(SIZE_T, operations[0].opers[j].bound)); + if( operations[0].opers[j].bound == bound_characters_e) + { + // This is a FOR CHARACTERS PHRASE1 + + // Put in the identifier-5 replacement value: + pcbl_refers[pcbl_index++] = operations[0].opers[j].replaces[0].replacement; + + // Each identifier-5 gets a PHRASE1: + pcbl_refers[pcbl_index++] = operations[0].opers[j].replaces[0].before.identifier_4; + pcbl_refers[pcbl_index++] = operations[0].opers[j].replaces[0].after.identifier_4; + + SHOW_PARSE + { + if( j ) + { + SHOW_PARSE_INDENT + } + SHOW_PARSE_FIELD("ID-5 ", operations[0].opers[j].replaces[0].replacement.field) + if(operations[0].opers[j].replaces[0].before.identifier_4.field) + { + SHOW_PARSE_FIELD(" before ", operations[0].opers[j].replaces[0].before.identifier_4.field) + } + if(operations[0].opers[j].replaces[0].after.identifier_4.field) + { + SHOW_PARSE_FIELD(" after ", operations[0].opers[j].replaces[0].after.identifier_4.field) + } + SHOW_PARSE_END + } + } + else + { + // This is ALL or LEADING. Each has some number of identifier-3/identifier-5 pairs + gg_assign( gg_array_value(integers, int_index++), + build_int_cst_type(SIZE_T, operations[0].opers[j].n_identifier_3)); + for(size_t k=0; k<operations[0].opers[j].n_identifier_3; k++) + { + // Put identifier-3 into the array: + pcbl_refers[pcbl_index++] = operations[0].opers[j].replaces[k].matching; + + // Put in the identifier-5 replacement value: + pcbl_refers[pcbl_index++] = operations[0].opers[j].replaces[k].replacement; + + // We need the PHRASE1 for that identifier-3/identifier-5 pair: + pcbl_refers[pcbl_index++] = operations[0].opers[j].replaces[k].before.identifier_4; + + pcbl_refers[pcbl_index++] = operations[0].opers[j].replaces[k].after.identifier_4; + + SHOW_PARSE + { + if( j || k ) + { + SHOW_PARSE_INDENT + } + SHOW_PARSE_FIELD("ID-3 ", operations[0].opers[j].replaces[k].matching.field) + SHOW_PARSE_FIELD(" ID-5 ", operations[0].opers[j].replaces[k].replacement.field) + if( operations[0].opers[j].replaces[k].before.identifier_4.field ) + { + SHOW_PARSE_FIELD("before ", operations[0].opers[j].replaces[k].before.identifier_4.field) + } + if(operations[0].opers[j].replaces[k].after.identifier_4.field) + { + SHOW_PARSE_FIELD("after ", operations[0].opers[j].replaces[k].after.identifier_4.field) + } + SHOW_PARSE_END + } + } + } + } + + //fprintf(stderr, "%s(): %ld %ld\n", __func__, int_index, n_integers); + gcc_assert(int_index == n_integers); + //fprintf(stderr, "%s(): %ld %ld\n", __func__, pcbl_index, n_resolveds); + gcc_assert(pcbl_index == n_resolveds); + + // We have built up an array of integers, and an array of cbl_refer_t. + + for(size_t i=0; i<pcbl_index; i++) + { + if( pcbl_refers[i].field && pcbl_refers[i].field->type == FldLiteralN ) + { + fprintf(stderr, "INSPECT field %s shouldn't be a FldLiteralN\n", + pcbl_refers[i].field->name); + gcc_unreachable(); + } + } + + build_array_of_treeplets(1, pcbl_index, pcbl_refers); + + // Do the actual call: + gg_call(VOID, + "__gg__inspect_format_2", + backward ? integer_one_node : integer_zero_node, + integers, + NULL_TREE); + } + +void +parser_inspect(cbl_refer_t identifier_1, + bool backward, + unsigned long n_operations, + cbx_inspect_t<cbl_refer_t>* operations) + { + Analyze(); + gcc_assert(n_operations); + + /* Operating philosophy: We are going to minimize the amount of + GENERIC tag creation here at compile time, mainly by eliminating + the generation of cbl_resolved_t structures that we know + contain no information. */ + + if( operations[0].tally.field ) + { + // This is a FORMAT 1 "TALLYING" + inspect_tally(backward, identifier_1, n_operations, operations); + } + else + { + // This is a FORMAT 2 "REPLACING" + inspect_replacing(backward, identifier_1, n_operations, operations); + } + } + +void +parser_inspect_conv(cbl_refer_t input, + bool backward, + cbl_refer_t original, + cbl_refer_t replacement, + cbl_inspect_qual_t before, + cbl_inspect_qual_t after ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + + gg_call(CHAR_P, + "__gg__inspect_format_4", + backward ? integer_one_node : integer_zero_node, + input.field ? gg_get_address_of(input.field->var_decl_node) + : null_pointer_node, + refer_offset_source(input), + refer_size_source(input), + original.field ? gg_get_address_of(original.field->var_decl_node) + : null_pointer_node, + refer_offset_dest(original), + refer_size_dest(original), + replacement.field ? gg_get_address_of( + replacement.field->var_decl_node) + : null_pointer_node, + refer_offset_source(replacement), + replacement.all ? build_int_cst_type(SIZE_T, -1LL) + : refer_size_source(replacement), + after.identifier_4.field ? gg_get_address_of( + after.identifier_4.field->var_decl_node) + : null_pointer_node, + refer_offset_source(after.identifier_4), + refer_size_source(after.identifier_4), + before.identifier_4.field ? gg_get_address_of( + before.identifier_4.field->var_decl_node) + : null_pointer_node, + refer_offset_source(before.identifier_4), + refer_size_source(before.identifier_4), + NULL_TREE + ); + } + +void +parser_module_name( cbl_field_t *tgt, module_type_t type ) + { + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + gg_call(VOID, + "__gg__module_name", + gg_get_address_of(tgt->var_decl_node), + build_int_cst_type(INT, type), + NULL_TREE); + } + +void +parser_intrinsic_numval_c( cbl_field_t *f, + cbl_refer_t& input, + bool locale, + cbl_refer_t& currency, + bool anycase, + bool test_numval_c ) // true for TEST-NUMVAL-C + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + TRACE1 + { + TRACE1_HEADER + TRACE1_END + } + if( locale || anycase ) + { + gcc_unreachable(); + } + if( test_numval_c ) + { + gg_call(INT, + "__gg__test_numval_c", + gg_get_address_of(f->var_decl_node), + gg_get_address_of(input.field->var_decl_node), + refer_offset_source(input), + refer_size_source(input), + currency.field ? gg_get_address_of(currency.field->var_decl_node) : null_pointer_node, + refer_offset_source(currency), + refer_size_source(currency), + NULL_TREE + ); + } + else + { + gg_call(INT, + "__gg__numval_c", + gg_get_address_of(f->var_decl_node), + gg_get_address_of(input.field->var_decl_node), + refer_offset_source(input), + refer_size_source(input), + currency.field ? gg_get_address_of(currency.field->var_decl_node) : null_pointer_node, + refer_offset_source(currency), + refer_size_source(currency), + NULL_TREE + ); + } + } + +void +parser_intrinsic_subst( cbl_field_t *f, + cbl_refer_t& ref1, + size_t argc, + cbl_substitute_t * argv ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + TRACE1 + { + TRACE1_HEADER + TRACE1_END + } + + store_location_stuff("SUBSTITUTE"); + unsigned char *control_bytes = (unsigned char *)xmalloc(argc * sizeof(unsigned char)); + cbl_refer_t *arg1 = (cbl_refer_t *)xmalloc(argc * sizeof(cbl_refer_t)); + cbl_refer_t *arg2 = (cbl_refer_t *)xmalloc(argc * sizeof(cbl_refer_t)); + + for(size_t i=0; i<argc; i++) + { + control_bytes[i] = (argv[i].anycase ? + substitute_anycase_e : 0) + + (argv[i].first_last == cbl_substitute_t::subst_first_e ? + substitute_first_e : 0) + + (argv[i].first_last == cbl_substitute_t::subst_last_e ? + substitute_last_e : 0); + arg1[i] = argv[i].orig; + arg2[i] = argv[i].replacement; + } + + tree control = gg_array_of_bytes(argc, control_bytes); + + build_array_of_treeplets(1, argc, arg1); + build_array_of_treeplets(2, argc, arg2); + + gg_call(VOID, + "__gg__substitute", + gg_get_address_of(f->var_decl_node), + gg_get_address_of(ref1.field->var_decl_node), + refer_offset_source(ref1), + refer_size_source(ref1), + build_int_cst_type(SIZE_T, argc), + control, + NULL_TREE); + + gg_free(control); + + free(arg2); + free(arg1); + free(control_bytes); + } + +void +parser_intrinsic_callv( cbl_field_t *tgt, + const char function_name[], + size_t nrefs, + cbl_refer_t *refs ) + { + Analyze(); + // We have been given an array of refs[nrefs]. Each ref is a pointer + // to a cbl_ref_t. We convert that to a table of pointers to run-time + // cblc_ref_t structures, and we pass that to the function_name intrinsic + // function. It is in charge of conversion to whatever form is needed. + + // We get back a return value, which we convert to tgt based on the + // intrinsic_return_type + + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" of ") + SHOW_PARSE_TEXT(function_name) + fprintf(stderr, " with %zd parameters", nrefs); + SHOW_PARSE_END + } + + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT("about to call \"") + TRACE1_TEXT(function_name) + TRACE1_TEXT("\"") + for(size_t i=0; i<nrefs; i++) + { + TRACE1_INDENT + gg_fprintf(trace_handle, 1, "parameter %ld: ", build_int_cst_type(SIZE_T, i+1)); + TRACE1_REFER("", refs[i], "") + } + } + store_location_stuff(function_name); + tree ncount = build_int_cst_type(SIZE_T, nrefs); + + build_array_of_fourplets(1, nrefs, refs); + + gg_call(VOID, + function_name, + gg_get_address_of(tgt->var_decl_node), + ncount, + NULL_TREE); + + TRACE1 + { + TRACE1_INDENT + TRACE1_FIELD("result: ", tgt, "") + TRACE1_END + } + } + +void +parser_intrinsic_call_0(cbl_field_t *tgt, + const char function_name[]) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" of ") + SHOW_PARSE_TEXT(function_name) + SHOW_PARSE_END + } + + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT("about to call \"") + TRACE1_TEXT(function_name) + TRACE1_TEXT("\"") + } + + if( strcmp(function_name, "__gg__random") == 0 ) + { + // We have no seed value, so call the "next" routine + gg_call(VOID, + "__gg__random_next", + gg_get_address_of(tgt->var_decl_node), + NULL_TREE); + } + else if( strcmp(function_name, "__gg__when_compiled") == 0 ) + { + // Pass __gg__when_compiled() the time from right now. + struct timespec tp; + clock_gettime(CLOCK_REALTIME, &tp); // time_t tv_sec; long tv_nsec + + store_location_stuff(function_name); + gg_call(VOID, + function_name, + gg_get_address_of(tgt->var_decl_node), + build_int_cst(SIZE_T, tp.tv_sec), + build_int_cst(LONG, tp.tv_nsec), + NULL_TREE); + } + else + { + store_location_stuff(function_name); + gg_call(VOID, + function_name, + gg_get_address_of(tgt->var_decl_node), + NULL_TREE); + } + + TRACE1 + { + TRACE1_INDENT + TRACE1_FIELD("result: ", tgt, "") + TRACE1_END + } + } + +void +parser_intrinsic_call_1( cbl_field_t *tgt, + const char function_name[], + cbl_refer_t& ref1 ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" of ") + SHOW_PARSE_TEXT(function_name) + SHOW_PARSE_END + } + + // There are special cases: + if( strstr(function_name, "__gg__length") ) + { + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT("about to call \"") + TRACE1_TEXT(function_name) + TRACE1_TEXT("\"") + TRACE1_INDENT + TRACE1_REFER("parameter: ", ref1, "") + } + size_t upper = ref1.field->occurs.bounds.upper + ? ref1.field->occurs.bounds.upper : 1; + if( ref1.nsubscript ) + { + upper = 1; + } + + if( is_table(ref1.field) && !ref1.nsubscript ) + { + static tree depending_on = gg_define_variable(LONG, "..pic1_dep"); + gg_get_depending_on_value(depending_on, ref1.field); + gg_call(VOID, + "__gg__int128_to_field", + gg_get_address_of(tgt->var_decl_node), + gg_cast(INT128, + gg_multiply(refer_size_source(ref1), + depending_on)), + integer_zero_node, + build_int_cst_type(INT, truncation_e), + null_pointer_node, + NULL_TREE ); + } + else + { + if( upper == 1 ) + { + gg_call(VOID, + "__gg__int128_to_field", + gg_get_address_of(tgt->var_decl_node), + gg_cast(INT128, + refer_size_source(ref1)), + integer_zero_node, + build_int_cst_type(INT, truncation_e), + null_pointer_node, + NULL_TREE ); + } + else + { + gg_call(VOID, + "__gg__int128_to_field", + gg_get_address_of(tgt->var_decl_node), + gg_cast(INT128, + gg_multiply(refer_size_source(ref1), + build_int_cst_type(SIZE_T, upper))), + integer_zero_node, + build_int_cst_type(INT, truncation_e), + null_pointer_node, + NULL_TREE ); + } + } + } + else + { + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT("about to call \"") + TRACE1_TEXT(function_name) + TRACE1_TEXT("\"") + TRACE1_INDENT + TRACE1_REFER("parameter: ", ref1, "") + } + gg_call(VOID, + function_name, + gg_get_address_of(tgt->var_decl_node), + gg_get_address_of(ref1.field->var_decl_node), + refer_offset_source(ref1), + refer_size_source(ref1), + NULL_TREE); + } + + TRACE1 + { + TRACE1_INDENT + TRACE1_FIELD("result: ", tgt, "") + TRACE1_END + } + } + +void +parser_intrinsic_call_2( cbl_field_t *tgt, + const char function_name[], + cbl_refer_t& ref1, + cbl_refer_t& ref2 ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" of ") + SHOW_PARSE_TEXT(function_name) + SHOW_PARSE_END + } + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT("about to call \"") + TRACE1_TEXT(function_name) + TRACE1_TEXT("\"") + TRACE1_INDENT + TRACE1_REFER("parameter 1: ", ref1, "") + TRACE1_INDENT + TRACE1_REFER("parameter 2: ", ref2, "") + } + store_location_stuff(function_name); + gg_call(VOID, + function_name, + gg_get_address_of(tgt->var_decl_node), + gg_get_address_of(ref1.field->var_decl_node), + refer_offset_source(ref1), + refer_size_source(ref1), + ref2.field ? gg_get_address_of(ref2.field->var_decl_node) : null_pointer_node, + refer_offset_source(ref2), + refer_size_source(ref2), + NULL_TREE); + TRACE1 + { + TRACE1_INDENT + TRACE1_FIELD("result: ", tgt, "") + TRACE1_END + } + } + +void +parser_intrinsic_call_3( cbl_field_t *tgt, + const char function_name[], + cbl_refer_t& ref1, + cbl_refer_t& ref2, + cbl_refer_t& ref3 ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" of ") + SHOW_PARSE_TEXT(function_name) + SHOW_PARSE_END + } + + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT("about to call \"") + TRACE1_TEXT(function_name) + TRACE1_TEXT("\"") + TRACE1_INDENT + TRACE1_REFER("parameter 1: ", ref1, "") + TRACE1_INDENT + TRACE1_REFER("parameter 2: ", ref2, "") + TRACE1_INDENT + TRACE1_REFER("parameter 3: ", ref3, "") + } + + store_location_stuff(function_name); + + gg_call(VOID, + function_name, + gg_get_address_of(tgt->var_decl_node), + ref1.field ? gg_get_address_of(ref1.field->var_decl_node) : null_pointer_node, + refer_offset_source(ref1), + refer_size_source(ref1), + ref2.field ? gg_get_address_of(ref2.field->var_decl_node) : null_pointer_node, + refer_offset_source(ref2), + refer_size_source(ref2), + ref3.field ? gg_get_address_of(ref3.field->var_decl_node) : null_pointer_node, + refer_offset_source(ref3), + refer_size_source(ref3), + NULL_TREE); + TRACE1 + { + TRACE1_INDENT + TRACE1_FIELD("result: ", tgt, "") + TRACE1_END + } + } + +void +parser_intrinsic_call_4( cbl_field_t *tgt, + const char function_name[], + cbl_refer_t& ref1, + cbl_refer_t& ref2, + cbl_refer_t& ref3, + cbl_refer_t& ref4 ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" of ") + SHOW_PARSE_TEXT(function_name) + SHOW_PARSE_END + } + + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT("about to call \"") + TRACE1_TEXT(function_name) + TRACE1_TEXT("\"") + TRACE1_INDENT + TRACE1_REFER("parameter 1: ", ref1, "") + TRACE1_INDENT + TRACE1_REFER("parameter 2: ", ref2, "") + TRACE1_INDENT + TRACE1_REFER("parameter 3: ", ref3, "") + TRACE1_INDENT + TRACE1_REFER("parameter 4: ", ref4, "") + } + store_location_stuff(function_name); + + gg_call(VOID, + function_name, + gg_get_address_of(tgt->var_decl_node), + ref1.field ? gg_get_address_of(ref1.field->var_decl_node) : null_pointer_node, + refer_offset_source(ref1), + refer_size_source(ref1), + ref2.field ? gg_get_address_of(ref2.field->var_decl_node) : null_pointer_node, + refer_offset_source(ref2), + refer_size_source(ref2), + ref3.field ? gg_get_address_of(ref3.field->var_decl_node) : null_pointer_node, + refer_offset_source(ref3), + refer_size_source(ref3), + ref4.field ? gg_get_address_of(ref4.field->var_decl_node) : null_pointer_node, + refer_offset_source(ref4), + refer_size_source(ref4), + NULL_TREE); + TRACE1 + { + TRACE1_INDENT + TRACE1_FIELD("result: ", tgt, "") + TRACE1_END + } + } + +static void +field_increment(cbl_field_t *fld) + { + static tree value = gg_define_variable(INT128, "..fi_value", vs_file_static); + static tree rdigits = gg_define_variable(INT, "..fi_rdigits", vs_file_static); + get_binary_value(value, rdigits, fld, size_t_zero_node); + gg_assign( value, + gg_add(value, gg_cast(SIZE_T, integer_one_node))); + gg_call(VOID, + "__gg__int128_to_field", + gg_get_address_of(fld->var_decl_node), + value, + rdigits, + build_int_cst_type(INT, truncation_e), + null_pointer_node, + NULL_TREE ); + } + +static void +create_lsearch_address_pairs(struct cbl_label_t *name) + { + // Create the lsearch structure + name->structs.lsearch = (cbl_lsearch_t *)xmalloc(sizeof(cbl_lsearch_t)); + cbl_lsearch_t *lsearch = name->structs.lsearch; + + gg_create_goto_pair(&lsearch->addresses.at_exit.go_to, + &lsearch->addresses.at_exit.label); + + gg_create_goto_pair(&lsearch->addresses.top.go_to, + &lsearch->addresses.top.label); + + gg_create_goto_pair(&lsearch->addresses.bottom.go_to, + &lsearch->addresses.bottom.label); + } + +void +parser_next_sentence() + { + // Eventually we'll need this. + } + +void +parser_lsearch_start( cbl_label_t *name, + cbl_field_t *table, + cbl_field_t *index, + cbl_field_t *varying ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + if( name ) + { + SHOW_PARSE_TEXT(" ") + SHOW_PARSE_TEXT(name->name) + } + if( table ) + { + SHOW_PARSE_TEXT(" linear search of ") + SHOW_PARSE_TEXT(table->name) + } + if( index ) + { + SHOW_PARSE_TEXT(" index is ") + SHOW_PARSE_TEXT(index->name) + } + if( varying ) + { + SHOW_PARSE_TEXT(" varying ") + SHOW_PARSE_TEXT(varying->name) + } + SHOW_PARSE_END + } + // Create the goto/label pairs we are going to be needing: + create_lsearch_address_pairs(name); + cbl_lsearch_t *lsearch = name->structs.lsearch; + lsearch->first_when = true; + + // We need to find the first table element: + cbl_field_t *current = table; + while(current) + { + if( is_table(current) ) + { + // Extract the number of elements in that rightmost dimension. + lsearch->limit = gg_define_variable(LONG); + gg_get_depending_on_value(lsearch->limit, current); + break; + } + current = parent_of(current); + } + + // Establish the initial value of our counter: + lsearch->counter = gg_define_variable(LONG); + + tree value = gg_define_int128(); + if(varying) + { + get_binary_value(value, NULL, varying, size_t_zero_node); + } + else if( index ) + { + get_binary_value(value, NULL, index, size_t_zero_node); + } + gg_assign(lsearch->counter, gg_cast(LONG, value)); + + // And we need these around, so we can increment them: + lsearch->index = index; + lsearch->varying = varying; + + // From here we have to jump to the top of the loop: + gg_append_statement(lsearch->addresses.top.go_to); + + // The next next instructions will be the body of the at-exit code, so + // we need a label here so that we can get back to them + gg_append_statement(lsearch->addresses.at_exit.label); + } + +void +parser_lsearch_conditional(cbl_label_t * name) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + if( name ) + { + SHOW_PARSE_TEXT(" ") + SHOW_PARSE_TEXT(name->name) + } + SHOW_PARSE_END + } + cbl_lsearch_t *lsearch = name->structs.lsearch; + + if( lsearch->first_when ) + { + lsearch->first_when = false; + // We are the first of the WHEN CONDITIONALs, which means we just laid down the final + // statement of the AT-EXIT imperative statements, which means it's + // time to leave the SEARCH completely. + gg_append_statement(lsearch->addresses.bottom.go_to); + + // And that puts us at the top of the loop: + gg_append_statement(lsearch->addresses.top.label); + + // It is at this point we check to see if we have reached the limit: + IF( lsearch->counter, gt_op, lsearch->limit ) + // The counter has run out. + gg_append_statement(lsearch->addresses.at_exit.go_to); + ELSE + // Just fall through into the following statements, which are + // the statements for the conditional for the first WHEN + ENDIF + } + else + { + // We are at the end of a WHEN TRUE imperative statement. + gg_append_statement(lsearch->addresses.bottom.go_to); + + // This is the second or later search_conditional. Note that the + // code generated here executes after the first parser_when call, so + // the jump_over label is ready to be placed. + + // We have to lay down the unnamed label so the prior WHEN can jump past + // its imperative statements when its condition is not met: + gg_append_statement(lsearch->jump_over.label); + } + // At this point, the parser starts laying down the statements that make + // up the next conditional. + } + +void +parser_lsearch_when( cbl_label_t *name, cbl_field_t *conditional ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + if( name ) + { + SHOW_PARSE_TEXT(" ") + SHOW_PARSE_TEXT(name->name) + } + SHOW_PARSE_END + } + cbl_lsearch_t *lsearch = name->structs.lsearch; + + // Arriving here means that all of the conditional statements have been + // laid down, and we are ready to do the WHEN test: + + parser_if(conditional); + // We have found what we were looking for. Fall through to the next + // set of instructions, which comprise the imperative statement + // associated with the WHEN condition. + ELSE + // The conditional is false. We thus want to skip over the imperative + // instructions that are about to be laid down. + + // Create an unnamed goto/label pair: + gg_create_goto_pair(&lsearch->jump_over.go_to, + &lsearch->jump_over.label); + + // And lay down the goto. + gg_append_statement(lsearch->jump_over.go_to); + ENDIF + } + +void +parser_lsearch_end( cbl_label_t *name ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + if( name ) + { + SHOW_PARSE_TEXT(" ") + SHOW_PARSE_TEXT(name->name) + } + SHOW_PARSE_END + } + cbl_lsearch_t *lsearch = name->structs.lsearch; + + // Arriving here means we have just laid down the final imperative + // statements of the final WHEN. If these statements have been executing, + // it's now time to leave the SEARCH: + gg_append_statement(lsearch->addresses.bottom.go_to); + + // It's time to lay down the last jump_over label: + gg_append_statement(lsearch->jump_over.label); + + // With that in place, we increment stuff: + gg_assign(lsearch->counter, gg_add(lsearch->counter, gg_cast(LONG, integer_one_node))); + field_increment(lsearch->index); + + if( lsearch->varying ) + { + field_increment(lsearch->varying); + } + // From here we jump to the top of the loop: + gg_append_statement(lsearch->addresses.top.go_to); + + // And that means we now lay down the label for the bottom + gg_append_statement(lsearch->addresses.bottom.label); + + // At this point, we are done with the lsearch structure + free(lsearch); + lsearch = NULL; + } + +void +parser_bsearch_start( cbl_label_t* name, + cbl_field_t *table ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + if( name ) + { + SHOW_PARSE_TEXT(" ") + SHOW_PARSE_TEXT(name->name) + } + if( table ) + { + SHOW_PARSE_TEXT(" binary search of ") + SHOW_PARSE_TEXT(table->name) + } + SHOW_PARSE_END + } + + // We need a cbl_bsearch_t structure: + name->structs.bsearch = (cbl_bsearch_t *)xmalloc(sizeof(cbl_bsearch_t)); + cbl_bsearch_t *bsearch = name->structs.bsearch; + + // Create the address/label pairs we need + gg_create_goto_pair(&bsearch->too_small.go_to, + &bsearch->too_small.label); + + gg_create_goto_pair(&bsearch->too_big.go_to, + &bsearch->too_big.label); + + gg_create_goto_pair(&bsearch->top.go_to, + &bsearch->top.label); + + gg_create_goto_pair(&bsearch->first_test.go_to, + &bsearch->first_test.label); + + gg_create_goto_pair(&bsearch->bottom.go_to, + &bsearch->bottom.label); + + // The logic when we first hit a WHEN needs to be different: + bsearch->first_when = true; + + // We need to find our table element: + cbl_field_t *current = table; + while(current) + { + if( is_table(current) ) + { + break; + } + current = parent_of(current); + } + + // There are a number of things we learn from the field "current" + + // We get the index: + gcc_assert(current->occurs.indexes.nfield); + size_t index_index = current->occurs.indexes.fields[0]; + bsearch->index = cbl_field_of( symbol_at(index_index) ); + gcc_assert(bsearch->index); + + // And we get the rightward bound of the number of elements: + // Not that these are LONGS, not SIZE_T. If we are searching for something + // that is smaller than element[0] of the table, then right ends up being + // -1, so we have to have a signed type. + bsearch->left = gg_define_variable(LONG, "_left"); + bsearch->right = gg_define_variable(LONG, "_right"); + bsearch->middle = gg_define_variable(LONG, "_middle"); + + // Assign the left and right values: + gg_assign(bsearch->left, build_int_cst_type(LONG, 1)); + gg_get_depending_on_value(bsearch->right, current); + + // Create the variable that will take the compare result. + bsearch->compare_result = gg_define_int(); + + // We now jump to the top of the binary testing loop, which comes right + // after the labels where we handle non-equal cases: + gg_append_statement(bsearch->top.go_to); + + gg_append_statement(bsearch->too_small.label); + // Arrive here when the element in the array is smaller than the one we are + // looking for. This means that we move bsearch->left to the right: + gg_assign(bsearch->left, gg_add(bsearch->middle, build_int_cst_type(LONG, 1))); + gg_append_statement(bsearch->top.go_to); + + gg_append_statement(bsearch->too_big.label); + // Arrive here when the element in the array is larger than the one we + // are looking for. This means we have to move bsearch->right to the left: + gg_assign(bsearch->right, gg_subtract(bsearch->middle, build_int_cst_type(LONG, 1))); + // Fall through to TOP: + + gg_append_statement(bsearch->top.label); + // Arrive here when it is time to check to see if we are done: + IF( bsearch->left, le_op, bsearch->right ) + // We are not done. Calculate middle from 'left' and 'right' + gg_assign( bsearch->middle, + gg_add(bsearch->left, bsearch->right) ); + gg_assign( bsearch->middle, + gg_divide(bsearch->middle, build_int_cst_type(LONG, 2) )); + //gg_printf("BSEARCH At the top %ld %ld %ld\n", bsearch->left, bsearch->middle, bsearch->right, NULL_TREE); + // We need to assign that value to bsearch->index. It might be possible + // to assume that bsearch->index is a size_t and just cram the bytes into + // place at bsearch->index->var_decl_node->data. But for now we'll + // be cautious and use the slower, but more assured, method: + + gg_call(VOID, + "__gg__int128_to_field", + gg_get_address_of(bsearch->index->var_decl_node), + gg_cast(INT128, bsearch->middle), + integer_zero_node, + build_int_cst_type(INT, truncation_e), + null_pointer_node, + NULL_TREE ); + // And with middle/index established, we go do the WHEN clause: + gg_append_statement(bsearch->first_test.go_to); + ELSE + // The search ended without finding anything. Fall through to the + // AT-EXIT imperative statements that the parser will lay down right + // after the call to parser_bsearch_start(). + ENDIF + } + +void +parser_bsearch_conditional( cbl_label_t* name ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + if( name ) + { + SHOW_PARSE_TEXT(" ") + SHOW_PARSE_TEXT(name->name) + } + SHOW_PARSE_END + } + cbl_bsearch_t *bsearch = name->structs.bsearch; + + if( bsearch->first_when ) + { + bsearch->first_when = false; + // The first time we arrive here is after the WHEN part of the SEARCH ALL + // statement. We have just finished executing any AT-END statements there + // might be, so it's time to jump to the bottom: + gg_append_statement(bsearch->bottom.go_to); + + // Otherwise, the TOP part of the loop just calculated the next middle/index, + // and we now start processing it + + gg_append_statement(bsearch->first_test.label); + } + // The second parser_bsearch_conditional() is caused by the appearance of + // any subsequent AND clauses. And, it turns out, we do nothing. + + // The parser lays down the statements that calculate the conditional, + // and we just wait for parser_bsearch_when() + } + +bool +is_ascending_key(cbl_refer_t key) + { + bool retval = true; + + cbl_field_t *family_tree = key.field; + gcc_assert(family_tree); + while( family_tree ) + { + if( family_tree->occurs.nkey ) + { + break; + } + family_tree = parent_of(family_tree); + } + gcc_assert(family_tree->occurs.nkey); + for(size_t i=0; i<family_tree->occurs.nkey; i++) + { + for(size_t j=0; j<family_tree->occurs.keys[i].field_list.nfield; j++) + { + size_t index_of_field + = family_tree->occurs.keys[i].field_list.fields[j]; + cbl_field_t *key_field = cbl_field_of(symbol_at(index_of_field)); + + if( strcmp( key_field->name, + key.field->name ) == 0 ) + { + retval = family_tree->occurs.keys[i].ascending; + goto done; + } + } + } + +done: + return retval; + } + +void +parser_bsearch_when(cbl_label_t* name, + cbl_refer_t key, + cbl_refer_t sarg, + bool ascending) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + if( name ) + { + SHOW_PARSE_TEXT(" ") + SHOW_PARSE_TEXT(name->name) + } + SHOW_PARSE_END + } + cbl_bsearch_t *bsearch = name->structs.bsearch; + + if( ascending ) + { + cobol_compare( bsearch->compare_result, + key, + sarg ); + } + else + { + cobol_compare( bsearch->compare_result, + sarg, + key ); + } + + IF( bsearch->compare_result, lt_op, integer_zero_node ) + // The key is smaller than sarg: + gg_append_statement(bsearch->too_small.go_to); + ELSE + ENDIF + IF( bsearch->compare_result, gt_op, integer_zero_node ) + // The key is larger than sarg: + gg_append_statement(bsearch->too_big.go_to); + ELSE + ENDIF + + // We are at the Goldilocks point. The clause has been satisfied with + // an equality, so we will just fall through to the next set of statements + // that the parser laid down. They are either the next conditional, or + // the final imperative statements that get executed when all the + // clauses are satisfied. + } + +void +parser_bsearch_end( cbl_label_t* name ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + if( name ) + { + SHOW_PARSE_TEXT(" ") + SHOW_PARSE_TEXT(name->name) + } + SHOW_PARSE_END + } + cbl_bsearch_t *bsearch = name->structs.bsearch; + + // Arriving here means that either the search ran out without finding + // anything, (see the test up at TOP:), or else we just fell through from + // the statements that executed after all the WHEN/AFTER clauses were + // satisifed by equality (meaning there were no jumps to TOO_SMALL: or + // TOO_LARGE). In other words: we're done. + gg_append_statement(bsearch->bottom.label); + + free(bsearch); + } + +tree +gg_array_of_field_pointers( size_t N, + cbl_field_t **fields ) + { + tree retval = gg_define_variable(build_pointer_type(cblc_field_p_type_node)); + gg_assign(retval, gg_cast(build_pointer_type(cblc_field_p_type_node), gg_malloc( build_int_cst_type(SIZE_T, N * sizeof(void *))))); + for(size_t i=0; i<N; i++) + { + gg_assign(gg_array_value(retval, i), gg_get_address_of(fields[i]->var_decl_node)); + } + return retval; + } + +static void +push_program_state() + { + gg_call(VOID, + "__gg__push_program_state", + NULL_TREE); + } + +static void +pop_program_state() + { + gg_call(VOID, + "__gg__pop_program_state", + NULL_TREE); + } + +void +parser_sort(cbl_refer_t tableref, + bool duplicates, + cbl_alphabet_t *alphabet, + size_t nkeys, + cbl_key_t *keys ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + if( tableref.field ) + { + SHOW_PARSE_REF(" Sort table: ", tableref) + } + SHOW_PARSE_END + } + + cbl_field_t *table = tableref.field; + gcc_assert(table); + gcc_assert(table->var_decl_node); + if( !is_table(table) ) + { + cbl_internal_error( "%s(): asked to sort %s, but it's not a table", + __func__, + tableref.field->name); + } + size_t total_keys = 0; + for( size_t i=0; i<nkeys; i++ ) + { + total_keys += keys[i].nfield; + } + cbl_field_t **flattened_fields = (cbl_field_t **)xmalloc(total_keys * sizeof(cbl_field_t *)); + size_t *flattened_ascending = (size_t *)xmalloc(total_keys * sizeof(size_t)); + + size_t key_index = 0; + for( size_t i=0; i<nkeys; i++ ) + { + for( size_t j=0; j<keys[i].nfield; j++ ) + { + flattened_fields[key_index] = keys[i].fields[j]; + flattened_ascending[key_index] = keys[i].ascending ? 1 : 0; + key_index += 1; + } + } + + // Create the array of cbl_field_t pointers for the keys + tree all_keys = gg_array_of_field_pointers( total_keys, flattened_fields); + + // Create the array of integers that are the flags for ASCENDING: + tree ascending = gg_array_of_size_t( total_keys, flattened_ascending ); + + tree depending_on = gg_define_variable(LONG, "_sort_size"); + gg_get_depending_on_value(depending_on, table); + + if( alphabet ) + { + push_program_state(); + parser_alphabet_use(*alphabet); + } + gg_call(VOID, + "__gg__sort_table", + gg_get_address_of(tableref.field->var_decl_node), + refer_offset_source(tableref), + gg_cast(SIZE_T, depending_on), + build_int_cst_type(SIZE_T, key_index), + all_keys, + ascending, + duplicates ? integer_one_node : integer_zero_node, + NULL_TREE); + if( alphabet ) + { + pop_program_state(); + } + + free(flattened_ascending); + free(flattened_fields); + + gg_free(ascending); + gg_free(all_keys); + } + +void +parser_file_sort( cbl_file_t *workfile, + bool duplicates, + cbl_alphabet_t *alphabet, + size_t nkeys, + cbl_key_t *keys, + size_t ninput, + cbl_file_t **inputs, + size_t noutput, + cbl_file_t **outputs, + cbl_perform_tgt_t *in_proc, + cbl_perform_tgt_t *out_proc ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + // This is the implementation of SORT FORMAT 1 + + // It proceeds in three phases. + + // The first phase is absorbing the input and writing it out to the workfile: + + parser_file_open(workfile, 'w'); + IF( member(workfile, "io_status"), ge_op, build_int_cst_type(INT, FsEofSeq) ) + { + gg_printf("Couldn't open the SORT workfile for writing\n", NULL_TREE); + gg_exit(integer_one_node); + } + ELSE + ENDIF + + if( in_proc && !ninput ) + { + // We are getting our inputs from an input procedure + parser_perform(in_proc, NULL); + } + else if( ninput && !in_proc ) + { + // ninput means there was a USING clause, specifying input files. + + // We are going to transfer the input file[s] to the workfile. The + // transfer will be done so that any newlines in a LINE SEQUENTIAL file + // are skipped, and so that any records that are too long, or too short, + // are all normalized to the format of the SD record. + for(size_t i=0; i<ninput; i++) + { + parser_file_open(inputs[i], 'r'); + IF( member(workfile, "io_status"), ge_op, build_int_cst_type(INT, FsEofSeq) ) + { + gg_printf("Couldn't open the SORT USING file for input\n", NULL_TREE); + gg_exit(integer_one_node); + } + ELSE + ENDIF + + gg_call(VOID, + "__gg__file_sort_ff_input", + gg_get_address_of(workfile-> var_decl_node), + gg_get_address_of(inputs[i]->var_decl_node), + NULL_TREE); + parser_file_close(inputs[i]); + } + } + else + { + // Having both or neither violates SORT syntax + cbl_internal_error("%s(): syntax error -- both (or neither) USING " + "and input-proc are specified", + __func__); + } + parser_file_close(workfile); + + // At this point, we have workfile of unsorted data. We have a library + // routine that sorts the workfile. It needs the keys: + + // The following is a tad more complex than it needs to be. It's a partial + // clone of the code for handling multiple keys, each of which can have + // multiple fields. + + size_t total_keys = 0; + for( size_t i=0; i<nkeys; i++ ) + { + total_keys += keys[i].nfield; + } + cbl_field_t **flattened_fields = (cbl_field_t **)xmalloc(total_keys * sizeof(cbl_field_t *)); + size_t *flattened_ascending = (size_t *) xmalloc(total_keys * sizeof(size_t)); + + size_t key_index = 0; + for( size_t i=0; i<nkeys; i++ ) + { + for( size_t j=0; j<keys[i].nfield; j++ ) + { + flattened_fields[key_index] = keys[i].fields[j]; + flattened_ascending[key_index] = keys[i].ascending ? 1 : 0; + key_index += 1; + } + } + + // Create the array of cbl_field_t pointers for the keys + tree all_keys = gg_array_of_field_pointers( total_keys, flattened_fields); + + // Create the array of integers that are the flags for ASCENDING: + tree ascending = gg_array_of_size_t( total_keys, flattened_ascending ); + + // We need to open the workfile for the sorting routine: + parser_file_open(workfile, 'r'); + IF( member(workfile, "io_status"), + ge_op, + build_int_cst(INT, FhNotOkay) ) + { + rt_error("Couldn't open workfile for sorting in parser_file_sort\n"); + } + ELSE + ENDIF + if( alphabet ) + { + push_program_state(); + parser_alphabet_use(*alphabet); + } + gg_call(VOID, + "__gg__sort_workfile", + gg_get_address_of(workfile->var_decl_node), + build_int_cst_type(SIZE_T, key_index), + all_keys, + ascending, + duplicates ? integer_one_node : integer_zero_node, + NULL_TREE); + if( alphabet ) + { + pop_program_state(); + } + parser_file_close(workfile); + + free(flattened_ascending); + free(flattened_fields); + gg_free(ascending); + gg_free(all_keys); + + // The workfile is sorted. We move to Phase 3 -- transferring the workfile + // to the output. + + if( noutput && !out_proc) + { + // We have a GIVING phrase: + for(size_t i=0; i<noutput; i++) + { + // Open WORKFILE again to position it at the beginning + parser_file_open(workfile, 'r'); + IF( member(workfile, "io_status"), + ge_op, + build_int_cst(INT, FhNotOkay) ) + { + rt_error("Couldn't open workfile for transfer to GIVING" + "in parser_file_sort"); + } + ELSE + ENDIF + parser_file_open(outputs[i], 'w'); + IF( member(outputs[i], "io_status"), + ge_op, + build_int_cst(INT, FhNotOkay) ) + { + rt_error("Couldn't open GIVING file in parser_file_sort"); + } + ELSE + ENDIF + gg_call(VOID, + "__gg__file_sort_ff_output", + gg_get_address_of(outputs[i]->var_decl_node), + gg_get_address_of(workfile->var_decl_node), + NULL_TREE); + parser_file_close(outputs[i]); + parser_file_close(workfile); + } + } + else if (!noutput && out_proc) + { + // We are going to transfer the workfile to the output procedures. + parser_file_open(workfile,'r'); + IF( member(workfile, "io_status"), + ge_op, + build_int_cst(INT, FhNotOkay) ) + { + rt_error("Couldn't open workfile for stage-three " + "output in parser_file_sort"); + } + ELSE + { + parser_perform(out_proc, NULL); + parser_file_close(workfile); + } + ENDIF + } + else + { + cbl_internal_error("%s(): syntax error -- both (or neither) GIVING " + "and output-proc are specified", __func__); + } + } + +void +parser_release( cbl_field_t *record_area ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + + // When this routine is called, it writes the contents of 'record_area' to the + // workfile specified by the cbl_file_t parent of record_area: + + cbl_file_t *workfile = symbol_record_file(record_area); + + gg_call(VOID, + "__gg__file_write", + gg_get_address_of( workfile->var_decl_node), + member(record_area, "data"), + member(record_area, "capacity"), + integer_zero_node, + integer_minusone_node, + integer_zero_node, + NULL_TREE); // non-random + set_user_status(workfile); + } + +void +parser_return_start( cbl_file_t *workfile, cbl_refer_t into ) + { + Analyze(); + // This function helps implement the COBOL RETURN statement, which is used + // in SORT and MERGE to "return" data from an intermediate sort/merge file + // to SORT/MERGE output procedure. + + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + + // We assume that workfile is open. + + workfile->addresses = (cbl_sortreturn_t *)xmalloc(sizeof(cbl_sortreturn_t)); + gg_create_goto_pair(&workfile->addresses->at_end.go_to, + &workfile->addresses->at_end.label); + gg_create_goto_pair(&workfile->addresses->not_at_end.go_to, + &workfile->addresses->not_at_end.label); + gg_create_goto_pair(&workfile->addresses->bottom.go_to, + &workfile->addresses->bottom.label); + + // Read the data from workfile into the SD record position: + cbl_field_t *data_location = symbol_file_record(workfile); + parser_file_read(workfile, data_location, -1 ); + + // And jump to either at_end or not_at_end, depending: + IF( member(workfile, "io_status"), lt_op, build_int_cst(INT, FsEofSeq) ) + { + // The read was successful. We move the result into place + if( into.field ) + { + cbl_field_t *record_area = + cbl_field_of(symbol_at(workfile->default_record)); + parser_move(into, record_area, truncation_e); + } + // And having moved -- or not -- the record, jump to the not-at-end + // imperative + gg_append_statement(workfile->addresses->not_at_end.go_to); + } + ELSE + ENDIF + + IF( member(workfile, "io_status"), lt_op, build_int_cst(INT, FsKeySeq) ) + { + // The read didn't succeed because of an end-of-file condition + gg_append_statement(workfile->addresses->at_end.go_to); + } + ELSE + ENDIF + + // Arriving here means some kind of error condition. So, we don't do the + // move, and we jump to the end of the statement + gg_append_statement(workfile->addresses->bottom.go_to); + } + +void +parser_return_atend( cbl_file_t *workfile ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + + // There might or might not be an at_end clause, and it might, or might + // not, appear after a not_at_end clause. If we are appearing after + // a not_at_end clause, we need to finish that clause with a jump to the + // bottom of the logic: + if( !workfile->addresses->not_at_end.label ) + { + // We have been preceded by a not_at_end label. So, we need to + // put in a jump to end those statements: + gg_append_statement(workfile->addresses->bottom.go_to); + } + // And now we place the at_end label: + gg_append_statement(workfile->addresses->at_end.label); + + // And having placed it, NULL it out + workfile->addresses->at_end.label = NULL; + + // The imperative statements of the NOT AT END clause will follow + } + +void +parser_return_notatend( cbl_file_t *workfile ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + // There might or might not be a not_at_end clause, and it might, or might + // not, appear after a at_end clause. If we are appearing after + // a at_end clause, we need to finish that clause with a jump to the + // bottom of the logic: + if( !workfile->addresses->at_end.label ) + { + // We have been preceded by an at_end label. So, we need to + // put in a jump to end those statements: + gg_append_statement(workfile->addresses->bottom.go_to); + } + // And now we place the not_at_end label: + gg_append_statement(workfile->addresses->not_at_end.label); + + // And having placed it, NULL it out + workfile->addresses->not_at_end.label = NULL; + + // The imperative statements of the AT END clause will follow + } + +void +parser_return_finish( cbl_file_t *workfile ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + + // If we are preceded by either an at_end or not_at_end clause, we need + // to end those statements with a jump to the bottom: + if( !workfile->addresses->at_end.label || !workfile->addresses->not_at_end.label) + { + gg_append_statement(workfile->addresses->bottom.go_to); + } + + // We need to place labels for clauses that weren't explicitly expressed + // in the COBOL source code. (Both were explicit targets of goto statements + // back in parser_return_start, so we need to place them here if they + // weren't placed elsewhere) + if( workfile->addresses->at_end.label ) + { + gg_append_statement(workfile->addresses->at_end.label); + } + if( workfile->addresses->not_at_end.label ) + { + gg_append_statement(workfile->addresses->not_at_end.label); + } + // And that brings us to the bottom: + gg_append_statement(workfile->addresses->bottom.label); + + free(workfile->addresses); + } + +static tree +gg_array_of_file_pointers( size_t N, + cbl_file_t **files ) + { + tree retval = gg_define_variable(build_pointer_type(cblc_file_p_type_node)); + gg_assign(retval, gg_cast( build_pointer_type(cblc_file_p_type_node), + gg_malloc( build_int_cst_type(SIZE_T, N * sizeof(void *))))); + for(size_t i=0; i<N; i++) + { + gg_assign(gg_array_value(retval, i), gg_get_address_of(files[i]->var_decl_node)); + } + return retval; + } + +void +parser_file_merge( cbl_file_t *workfile, + cbl_alphabet_t *alphabet, + size_t nkeys, + cbl_key_t *keys, + size_t ninputs, + cbl_file_t **inputs, + size_t noutputs, + cbl_file_t **outputs, + cbl_perform_tgt_t *out_proc ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + + // Our default file organization is LINE SEQUENTIAL, which spectacularly does + // *not* work for a SORT workfile. + if( workfile->org == file_line_sequential_e ) + { + workfile->org = file_sequential_e; + gg_assign( member(workfile->var_decl_node, "org"), + build_int_cst_type(INT, file_sequential_e)); + } + + size_t total_keys = 0; + for( size_t i=0; i<nkeys; i++ ) + { + total_keys += keys[i].nfield; + } + cbl_field_t **flattened_fields + = (cbl_field_t **)xmalloc(total_keys * sizeof(cbl_field_t *)); + size_t *flattened_ascending + = (size_t *)xmalloc(total_keys * sizeof(size_t)); + + size_t key_index = 0; + for( size_t i=0; i<nkeys; i++ ) + { + for( size_t j=0; j<keys[i].nfield; j++ ) + { + flattened_fields[key_index] = keys[i].fields[j]; + flattened_ascending[key_index] = keys[i].ascending ? 1 : 0; + key_index += 1; + } + } + + // Create the array of cbl_field_t pointers for the keys + tree all_keys = gg_array_of_field_pointers(total_keys, flattened_fields); + + // Create the array of integers that are the flags for ASCENDING: + tree ascending = gg_array_of_size_t(total_keys, flattened_ascending); + + tree all_files = gg_array_of_file_pointers(ninputs, inputs); + + // We need to open all of the input files and the workfile. It's easiest to + // do that here, rather than in the libgcobol, because of the possibility that + // the filename is in a variable or an environment variable, rather than a + // literal. This is handled by parser_file_open() in a way that would be + // inconvenient in __gg__file_open + + parser_file_open(workfile, 'w'); + IF( member(workfile, "io_status"), + ge_op, + build_int_cst_type(INT, FhNotOkay) ) + { + rt_error("Couldn't open workfile for stage-one " + "writing in parser_file_merge"); + } + ELSE + ENDIF + + for(size_t i=0; i<ninputs; i++) + { + if( process_this_exception(ec_sort_merge_file_open_e) ) + { + IF( member(inputs[i], "file_pointer"), ne_op, null_pointer_node ) + { + if( enabled_exceptions.match(ec_sort_merge_file_open_e) ) + { + set_exception_code(ec_sort_merge_file_open_e); + } + else + { + rt_error("FILE MERGE file not open"); + } + } + ELSE + ENDIF + } + + parser_file_open(inputs[i], 'r'); + IF( member(inputs[i], "io_status"), + ge_op, + build_int_cst_type(INT, FhNotOkay) ) + { + char ach[128]; + sprintf(ach, + "Couldn't open %s for stage-one reading in parser_file_merge", + inputs[i]->name); + rt_error(ach); + } + ELSE + ENDIF + } + + cbl_field_t *sd_record = symbol_file_record(workfile); + if( alphabet ) + { + push_program_state(); + parser_alphabet_use(*alphabet); + } + gg_call(VOID, + "__gg__merge_files", + gg_get_address_of(workfile->var_decl_node), + build_int_cst_type(SIZE_T, nkeys), + all_keys, + ascending, + build_int_cst_type(SIZE_T, ninputs), + all_files, + NULL_TREE); + if( alphabet ) + { + pop_program_state(); + } + + free(flattened_ascending); + free(flattened_fields); + gg_free(ascending); + gg_free(all_keys); + + parser_file_close(workfile); + for(size_t i=0; i<ninputs; i++) + { + parser_file_close(inputs[i]); + } + + // The merged workfile has been created. + if( noutputs && !out_proc) + { + // We are going to transfer the workfile to the output files. + for(size_t i=0; i<noutputs; i++) + { + if( process_this_exception(ec_sort_merge_file_open_e) ) + { + IF( member(outputs[i], "file_pointer"), ne_op, null_pointer_node ) + { + if( enabled_exceptions.match(ec_sort_merge_file_open_e) ) + { + set_exception_code(ec_sort_merge_file_open_e); + } + else + { + rt_error("FILE MERGE file not open"); + } + } + ELSE + ENDIF + } + // We keep reopening the workfile as a convenient way to make sure it is + // positioned at the beginning. + parser_file_open(workfile,'r'); + IF( member(workfile, "io_status"), + ge_op, + build_int_cst_type(INT, FhNotOkay) ) + { + rt_error("Couldn't open workfile for stage-three " + "reading in parser_file_merge\n"); + } + ELSE + ENDIF + + parser_file_open(outputs[i], 'w'); + IF( member(outputs[i], "io_status"), + ge_op, + build_int_cst_type(INT, FhNotOkay) ) + { + rt_error("Couldn't open an output file in parser_file_merge"); + } + ELSE + ENDIF + gg_call(VOID, + "__gg__file_sort_ff_output", + gg_get_address_of(outputs[i]->var_decl_node), + gg_get_address_of(workfile-> var_decl_node), + gg_get_address_of(sd_record-> var_decl_node), + NULL_TREE); + parser_file_close(outputs[i]); + parser_file_close(workfile); + } + } + else if (!noutputs && out_proc) + { + // We are going to transfer the workfile to the output procedures. + parser_file_open(workfile,'r'); + IF( member(workfile, "io_status"), + ge_op, + build_int_cst_type(INT, FhNotOkay) ) + { + rt_error("Couldn't open workfile for" + " stage-three output in parser_file_merge"); + } + ELSE + ENDIF + parser_perform(out_proc, NULL); + parser_file_close(workfile); + } + else + { + cbl_internal_error("%s(): syntax error -- both (or neither) " + "files and output-proc are specified", __func__); + } + } + +void +parser_string_overflow( cbl_label_t *name ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + /* + * parser_string_overflow is called 0-2 times before the associated + * parser_string. + */ + + name->structs.unstring + = (cbl_unstring_t *)xmalloc(sizeof(struct cbl_unstring_t) ); + + // Set up the address pairs for this clause + gg_create_goto_pair(&name->structs.unstring->over.go_to, + &name->structs.unstring->over.label); + gg_create_goto_pair(&name->structs.unstring->into.go_to, + &name->structs.unstring->into.label); + gg_create_goto_pair(&name->structs.unstring->bottom.go_to, + &name->structs.unstring->bottom.label); + + // Jump over the [NOT] ON OVERFLOW code that is about to be laid down + gg_append_statement( name->structs.unstring->over.go_to ); + + // Create the label that allows the following code to be executed at + // the appropriate time. + gg_append_statement( name->structs.unstring->into.label ); + } + +void +parser_string_overflow_end( cbl_label_t *name ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + gg_append_statement( name->structs.unstring->bottom.go_to ); + } + +void +parser_unstring(cbl_refer_t src, + size_t ndelimited, + cbl_refer_t *delimiteds, + size_t noutputs, + cbl_refer_t *outputs, + cbl_refer_t *delimiters, + cbl_refer_t *counts, + cbl_refer_t pointer, + cbl_refer_t tally, + cbl_label_t *overflow, + cbl_label_t *not_overflow ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + if( overflow ) + { + gg_append_statement(overflow->structs.unstring->over.label); + } + if( not_overflow ) + { + gg_append_statement(not_overflow->structs.unstring->over.label); + } + + cbl_refer_t *delims = (cbl_refer_t *)xmalloc(ndelimited * sizeof(cbl_refer_t)); + char *alls = (char *)xmalloc(ndelimited+1); + + for(size_t i=0; i<ndelimited; i++) + { + delims[i] = delimiteds[i]; + alls[i] = delimiteds[i].all ? '1' : '0' ; + } + alls[ndelimited] = '\0'; + + tree t_alls = build_string_literal(ndelimited+1, alls); + + build_array_of_treeplets(1, ndelimited, delims); + build_array_of_treeplets(2, noutputs, outputs); + build_array_of_treeplets(3, noutputs, delimiters); + build_array_of_treeplets(4, noutputs, counts); + + tree t_overflow = gg_define_int(); + gg_assign(t_overflow, + gg_call_expr( INT, + "__gg__unstring", + gg_get_address_of(src.field->var_decl_node), + refer_offset_source(src), + refer_size_source(src), + build_int_cst_type(SIZE_T, ndelimited), + t_alls, + build_int_cst_type(SIZE_T, noutputs), + pointer.field ? gg_get_address_of(pointer.field->var_decl_node) : null_pointer_node, + refer_offset_dest(pointer), + refer_size_dest(pointer), + tally.field ? gg_get_address_of(tally.field->var_decl_node) : null_pointer_node, + refer_offset_dest(tally), + refer_size_dest(tally), + NULL_TREE) + ); + free(alls); + free(delims); + + if( overflow ) + { + // We have an ON OVERFLOW clause: + IF( t_overflow, ne_op, integer_zero_node ) + // And we have an overflow condition + gg_append_statement( overflow->structs.unstring->into.go_to ); + ELSE + ENDIF + } + + if( not_overflow ) + { + // We have a NOT ON OVERFLOW clause: + IF( t_overflow, eq_op, integer_zero_node ) + // And there isn't an overflow condition: + gg_append_statement( not_overflow->structs.unstring->into.go_to ); + ELSE + ENDIF + } + + if( overflow ) + { + gg_append_statement( overflow->structs.unstring->bottom.label ); + free( overflow->structs.unstring ); + } + + if( not_overflow ) + { + gg_append_statement( not_overflow->structs.unstring->bottom.label ); + free( not_overflow->structs.unstring ); + } + } + +void +parser_string( cbl_refer_t tgt, + cbl_refer_t pointer, + size_t nsource, + cbl_string_src_t *sources, + cbl_label_t *overflow, + cbl_label_t *not_overflow ) + { + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + if( overflow ) + { + gg_append_statement(overflow->structs.unstring->over.label); + } + if( not_overflow ) + { + gg_append_statement(not_overflow->structs.unstring->over.label); + } + + // We need an array of nsource+1 integers: + size_t *integers = (size_t *)xmalloc((nsource+1)*sizeof(size_t)); + + // Count up how many treeplets we are going to need: + size_t cblc_count = 2; // tgt and pointer + for(size_t i=0; i<nsource; i++) + { + cblc_count += 1 + sources[i].ninput; // 1 for identifier_2 + ninput identifier_1 values; + } + + cbl_refer_t *refers = (cbl_refer_t *)xmalloc(cblc_count * sizeof(cbl_refer_t)); + + size_t index_int = 0; + size_t index_cblc = 0; + + integers[index_int++] = nsource; + + refers[index_cblc++] = tgt; + refers[index_cblc++] = pointer; + + for(size_t i=0; i<nsource; i++) + { + integers[index_int++] = sources[i].ninput; + refers[index_cblc++] = sources[i].delimited_by; + for(size_t j=0; j<sources[i].ninput; j++) + { + refers[index_cblc++] = sources[i].inputs[j]; + } + } + + gcc_assert(index_int == nsource+1); + gcc_assert(index_cblc == cblc_count); + + tree pintegers = build_array_of_size_t( index_int, integers); + + build_array_of_treeplets(1, index_cblc, refers); + + tree t_overflow = gg_define_int(); + gg_assign(t_overflow, gg_call_expr( INT, + "__gg__string", + pintegers, + NULL_TREE)); + gg_free(pintegers); + + free(integers); + free(refers); + + if( overflow ) + { + // We have an ON OVERFLOW clause: + IF( t_overflow, ne_op, integer_zero_node ) + // And we have an overflow condition + gg_append_statement( overflow->structs.unstring->into.go_to ); + ELSE + ENDIF + } + + if( not_overflow ) + { + // We have a NOT ON OVERFLOW clause: + IF( t_overflow, eq_op, integer_zero_node ) + // And there isn't an overflow condition: + gg_append_statement( not_overflow->structs.unstring->into.go_to ); + ELSE + ENDIF + } + + if( overflow ) + { + gg_append_statement( overflow->structs.unstring->bottom.label ); + free( overflow->structs.unstring ); + } + + if( not_overflow ) + { + gg_append_statement( not_overflow->structs.unstring->bottom.label ); + free( not_overflow->structs.unstring ); + } + } + +void +parser_call_exception( cbl_label_t *name ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" ") + SHOW_PARSE_TEXT(name->name) + SHOW_PARSE_END + } + + name->structs.call_exception + = (cbl_call_exception_t *)xmalloc(sizeof(struct cbl_call_exception_t) ); + + // Set up the address pairs for this clause + gg_create_goto_pair(&name->structs.call_exception->over.go_to, + &name->structs.call_exception->over.label); + gg_create_goto_pair(&name->structs.call_exception->into.go_to, + &name->structs.call_exception->into.label); + gg_create_goto_pair(&name->structs.call_exception->bottom.go_to, + &name->structs.call_exception->bottom.label); + + // Jump over the [NOT] ON EXCEPTION code that is about to be laid down + // char ach[128]; + // sprintf(ach, "# parser_call_exception %s: over.goto", name->name); + // gg_insert_into_assembler(ach); + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("except over.goto") + SHOW_PARSE_END + } + gg_append_statement( name->structs.call_exception->over.go_to ); + + // Create the label that allows the following code to be executed at + // the appropriate time. + // sprintf(ach, "# parser_call_exception %s: into.label", name->name); + // gg_insert_into_assembler(ach); + gg_append_statement( name->structs.call_exception->into.label ); + } + +void +parser_call_exception_end( cbl_label_t *name ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(name->name) + SHOW_PARSE_END + } + // char ach[128]; + // sprintf(ach, "# parser_call_exception_end %s: bottom.goto", name->name); + // gg_insert_into_assembler(ach); + gg_append_statement( name->structs.call_exception->bottom.go_to ); + } + +static +void +create_and_call(size_t narg, + cbl_ffi_arg_t args[], + tree function_handle, + tree returned_value_type, + cbl_refer_t returned, + cbl_label_t *not_except + ) + { + // We have a good function handle, so we are going to create a call + tree *arguments = NULL; + int *allocated = NULL; + + if(narg) + { + arguments = (tree *)xmalloc(2*narg * sizeof(tree)); + allocated = (int * )xmalloc(narg * sizeof(int)); + } + + // Put the arguments onto the "stack" of calling parameters: + for( size_t i=0; i<narg; i++ ) + { + cbl_ffi_crv_t crv = args[i].crv; + + if( args[i].refer.field && args[i].refer.field->type == FldLiteralN ) + { + crv = by_value_e; + } + + allocated[i] = 0; + + tree location = gg_define_variable(UCHAR_P, "..location.1", vs_stack); + tree length = gg_define_variable(SIZE_T, "..length.1", vs_stack); + + if( !args[i].refer.field ) + { + // The PARAMETER is OMITTED + arguments[i] = null_pointer_node; + gg_assign(gg_array_value(var_decl_call_parameter_lengths, i), + size_t_zero_node); + continue; + } + + if( refer_is_clean(args[i].refer) ) + { + if( args[i].refer.field->type == FldLiteralA ) + { + crv = by_content_e; + gg_assign(location, + gg_cast(UCHAR_P, build_string_literal(args[i].refer.field->data.capacity, + args[i].refer.field->data.initial))); + gg_assign(length, + build_int_cst_type( SIZE_T, + args[i].refer.field->data.capacity)); + } + else + { + gg_assign(location, + member(args[i].refer.field->var_decl_node, "data")); + gg_assign(length, + member(args[i].refer.field->var_decl_node, "capacity")); + } + } + else + { + gg_assign(location, + qualified_data_source(args[i].refer)), + gg_assign(length, + refer_size_source(args[i].refer)); + } + + switch( crv ) + { + case by_default_e: + gcc_unreachable(); + break; + + case by_reference_e: + { + arguments[i] = location; + + // Pass the pointer to the data location, so that the called program + // can both access and change the data. + break; + } + + case by_content_e: + { + if( (args[i].refer.field->attr & intermediate_e) + && is_valuable(args[i].refer.field->type) ) + { + cbl_unimplemented("CALL USING BY CONTENT <temporary> would require " + "REPOSITORY PROTOTYPES."); + } + + // BY CONTENT means that the called program gets a copy of the data. + + // We'll free this copy after the called program returns. + + switch(args[i].attr) + { + case address_of_e: + { + // Allocate the memory, and make the copy: + arguments[i] = gg_define_char_star(); + allocated[i] = 1; + gg_assign(arguments[i], gg_malloc(length) ) ; + gg_memcpy(arguments[i], + location, + length); + break; + } + + case length_of_e: + { + // The BY CONTENT LENGTH OF gets passed as an 64-bit big-endian + // value + arguments[i] = gg_define_size_t(); + allocated[i] = 1; + gg_assign(arguments[i], gg_malloc(length) ) ; + gg_call(VOID, + "__gg__copy_as_big_endian", + gg_get_address_of(arguments[i]), + length, + NULL_TREE); + break; + } + + case none_of_e: + { + // Allocate the memory, and make the copy: + arguments[i] = gg_define_char_star(); + allocated[i] = 1; + gg_assign(arguments[i], gg_cast(CHAR_P, gg_malloc(length))) ; + gg_memcpy(arguments[i], location, length); + break; + } + } + break; + } + + case by_value_e: + { + // For BY VALUE, we take whatever we've been given and do our best to + // make a 64-bit value out of it, although we move to 128 bits when + // necessary. + switch(args[i].attr) + { + case address_of_e: + { + arguments[i] = gg_define_size_t(); + gg_assign(arguments[i], gg_cast(SIZE_T, location )); + break; + } + + case length_of_e: + { + arguments[i] = gg_define_size_t(); + gg_assign(arguments[i], gg_cast(SIZE_T, length)); + break; + } + + case none_of_e: + { + assert(args[i].refer.field); + bool as_int128 = false; + if( !(args[i].refer.field->attr & intermediate_e) ) + { + // All temporaries are SIZE_T + if( args[i].refer.field->type == FldFloat + && args[i].refer.field->data.capacity == 16 ) + { + as_int128 = true; + } + else if( args[i].refer.field->type == FldNumericBin5 + && args[i].refer.field->data.digits == 0 + && args[i].refer.field->data.capacity == 16 ) + { + as_int128 = true; + } + else if( args[i].refer.field->data.digits > 18 ) + { + as_int128 = true; + } + } + + if( as_int128 ) + { + arguments[i] = gg_define_variable(INT128); + gg_assign(arguments[i], + gg_cast(INT128, + gg_call_expr( + INT128, + "__gg__fetch_call_by_value_value", + gg_get_address_of(args[i].refer.field->var_decl_node), + refer_offset_source(args[i].refer), + refer_size_source(args[i].refer), + NULL_TREE))); + } + else + { + arguments[i] = gg_define_size_t(); + gg_assign(arguments[i], + gg_cast(SIZE_T, + gg_call_expr( + INT128, + "__gg__fetch_call_by_value_value", + gg_get_address_of(args[i].refer.field->var_decl_node), + refer_offset_source(args[i].refer), + refer_size_source(args[i].refer), + NULL_TREE))); + } + break; + } + } + } + } + // The elements in this array tell the called routine the length of each + // variable. This value is used both to handle ANY LENGTH formal + // parameters, and to provide information to the called program when being + // passed expressions BY VALUE and BY CONTENT + gg_assign(gg_array_value(var_decl_call_parameter_lengths, i),length); + } + + // Let the called program know how many parameters we are passing + gg_assign(var_decl_call_parameter_count, + build_int_cst_type(INT, narg)); + + gg_assign(var_decl_call_parameter_signature, + gg_cast(CHAR_P, function_handle)); + + tree call_expr = gg_call_expr_list( returned_value_type, + function_handle, + narg, + arguments ); + tree returned_value; + if( returned.field ) + { + returned_value = gg_define_variable(returned_value_type); + + // We are expecting a return value of type CHAR_P, SSIZE_T, SIZE_T, + // UINT128 or INT128 + push_program_state(); + gg_assign(returned_value, gg_cast(returned_value_type, call_expr)); + pop_program_state(); + + // Because the CALL had a RETURNING clause, RETURN-CODE doesn't return a + // value. So, we make sure it is zero + gg_assign(var_decl_return_code, build_int_cst_type(SHORT, 0)); + + if( returned_value_type == CHAR_P ) + { + tree returned_location = gg_define_uchar_star(); + tree returned_length = gg_define_size_t(); + // we were given a returned::field, so find its location and length: + gg_assign(returned_location, + gg_add( member(returned.field->var_decl_node, "data"), + refer_offset_dest(returned))); + gg_assign(returned_length, + refer_size_dest(returned)); + + // The returned value is a string of nbytes, which by specification + // has to be at least as long as the returned_length of the target: + IF( returned_value, + eq_op, + gg_cast(returned_value_type, null_pointer_node ) ) + { + // Somebody was discourteous enough to return a NULL pointer + // We'll jam in spaces: + gg_memset( returned_location, + char_nodes[(unsigned char)internal_space], + returned_length ); + } + ELSE + { + // There is a valid pointer. Do the assignment. + move_tree(returned.field, + refer_offset_dest(returned), + returned_value, + integer_one_node); + } + ENDIF + TRACE1 + { + TRACE1_HEADER + TRACE1_REFER("returned value: ", returned, "") + TRACE1_END + } + } + else if( returned_value_type == SSIZE_T + || returned_value_type == SIZE_T + || returned_value_type == INT128 + || returned_value_type == UINT128) + { + // We got back a 64-bit or 128-bit integer. The called and calling + // programs have to agree on size, but other than that, integer numeric + // types are converted one to the other. + gg_call(VOID, + "__gg__int128_to_qualified_field", + gg_get_address_of(returned.field->var_decl_node), + refer_offset_dest(returned), + refer_size_dest(returned), + gg_cast(INT128, returned_value), + member(returned.field->var_decl_node, "rdigits"), + build_int_cst_type(INT, truncation_e), + null_pointer_node, + NULL_TREE ); + TRACE1 + { + TRACE1_HEADER + TRACE1_REFER("returned value: ", returned, "") + TRACE1_END + } + } + else if( returned_value_type == FLOAT + || returned_value_type == DOUBLE + || returned_value_type == FLOAT128) + { + tree returned_location = gg_define_uchar_star(); + tree returned_length = gg_define_size_t(); + // we were given a returned::field, so find its location and length: + gg_assign(returned_location, + qualified_data_source(returned)); + gg_assign(returned_length, + refer_size_source(returned)); + + // We are doing float-to-float, and we require that those be identical + // one the caller and callee sides. + gg_memcpy( returned_location, + gg_get_address_of(returned_value), + returned_length); + + TRACE1 + { + TRACE1_HEADER + TRACE1_REFER("returned value: ", returned, "") + TRACE1_END + } + } + else + { + cbl_internal_error( + "%s(): What in the name of Nero's fiddle are we doing here?", + __func__); + } + } + else + { + // Because no explicit returning value is expected, we switch to + // the IBM default behavior, where the returned INT value is assigned + // to our RETURN-CODE: + returned_value = gg_define_variable(SHORT); + + // Before doing the call, we save the COBOL program_state: + push_program_state(); + gg_assign(returned_value, gg_cast(SHORT, call_expr)); + // And after the call, we restore it: + pop_program_state(); + + // We know that the returned value is a 2-byte little-endian INT: + gg_assign( var_decl_return_code, + returned_value); + TRACE1 + { + TRACE1_HEADER + gg_printf("returned value: %d", + gg_cast(INT, var_decl_return_code), + NULL_TREE); + TRACE1_END + } + } + + for( size_t i=0; i<narg; i++ ) + { + if( allocated[i] ) + { + gg_free(arguments[i]); + } + } + free(arguments); + free(allocated); + + if( not_except ) + { + // We have an ON EXCEPT clause: + gg_append_statement( not_except->structs.call_exception->into.go_to ); + } + } + +void +parser_call( cbl_refer_t name, + cbl_refer_t returned, // This is set by RETURNING clause + size_t narg, + cbl_ffi_arg_t args[], + cbl_label_t *except, + cbl_label_t *not_except, + bool /*is_function*/) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_FIELD( " calling ", name.field) + if( except ) + { + SHOW_PARSE_TEXT(" - except is ") + SHOW_PARSE_TEXT(except->name) + } + if( not_except ) + { + SHOW_PARSE_TEXT(" - not_except is ") + SHOW_PARSE_TEXT(not_except->name) + } + SHOW_PARSE_TEXT(" (") + for(size_t i=0; i<narg; i++) + { + cbl_field_t *p = args[i].refer.field; + SHOW_PARSE_FIELD( " ", p) + } + SHOW_PARSE_TEXT(" )") + SHOW_PARSE_END + } + + TRACE1 + { + TRACE1_HEADER + TRACE1_REFER("calling ", name, ""); + for(size_t i=0; i<narg; i++) + { + TRACE1_INDENT + gg_fprintf(trace_handle, 1, "parameter %d: ", build_int_cst_type(INT, i+1)); + switch( args[i].crv ) + { + case by_default_e: gcc_unreachable(); + case by_reference_e: + TRACE1_TEXT(" BY REFERENCE ") + break; + case by_content_e: + TRACE1_TEXT(" BY CONTENT ") + break; + case by_value_e: + TRACE1_TEXT(" BY VALUE ") + break; + } + TRACE1_REFER("", args[i].refer, "") + } + TRACE1_END + } + + // If we have an ON EXCEPTION clause, a GOTO was established in + // parser_call_exception(). + // Here is where we place the label for that GOTO + + if( except ) + { + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("except over.label:") + } + gg_append_statement(except->structs.call_exception->over.label); + } + + // Likewise, for a NOT ON EXCEPTION + if( not_except ) + { + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("not_except over.label:") + } + gg_append_statement(not_except->structs.call_exception->over.label); + } + + // We are getting close to establishing the function_type. To do that, + // we want to establish the function's return type. + +// gg_push_context(); + size_t nbytes; + tree returned_value_type = tree_type_from_field_type(returned.field, nbytes); + + tree function_handle = function_handle_from_name( name, + returned_value_type); + if( (use_static_call() && is_literal(name.field)) + || (name.field && name.field->type == FldPointer) ) + { + // If these conditions are true, then we know we have a good + // function_handle, and we don't need to check + create_and_call(narg, + args, + function_handle, + returned_value_type, + returned, + not_except + ); + } + else + { + // We might not have a good handle, so we have to check: + IF( function_handle, + ne_op, + gg_cast(TREE_TYPE(function_handle), null_pointer_node) ) + { + create_and_call(narg, + args, + function_handle, + returned_value_type, + returned, + not_except + ); + } + ELSE + { + // We have a bad function pointer, which is the except condition: + parser_exception_raise(ec_program_not_found_e); + if( except ) + { + // We have an ON EXCEPT clause: + gg_append_statement( except->structs.call_exception->into.go_to ); + // Because there is an ON EXCEPTION clause, suppress DECLARATIVE + // processing + gg_assign(var_decl_exception_code, integer_zero_node); + } + else + { + tree mangled_name = gg_define_variable(CHAR_P); + + gg_call(VOID, + "__gg__just_mangle_name", + (name.field->var_decl_node + ? gg_get_address_of(name.field->var_decl_node) + : null_pointer_node), + gg_get_address_of( mangled_name), + NULL_TREE); + + gg_printf("WARNING: %s:%d \"CALL %s\" not found" + " with no \"CALL ON EXCEPTION\" phrase\n", + gg_string_literal(current_filename.back().c_str()), + build_int_cst_type(INT, CURRENT_LINE_NUMBER), + mangled_name, + NULL_TREE); + } + } + ENDIF + } + + // Clean up the label bookkeeping + if( except ) + { + gg_append_statement( except->structs.call_exception->bottom.label ); + free( except->structs.call_exception ); + } + if( not_except ) + { + gg_append_statement( not_except->structs.call_exception->bottom.label ); + free( not_except->structs.call_exception ); + } +// gg_pop_context(); + + } + +// Set global variable to use alternative ENTRY point. +void +parser_entry_activate( size_t iprog, const cbl_label_t *declarative ) + { + assert(iprog == symbol_elem_of(declarative)->program); + } + +// Define ENTRY point with alternative LINKAGE +void +parser_entry( cbl_field_t */*name*/, size_t /*narg*/, cbl_ffi_arg_t */*args*/ ) + { + } + +void +parser_bitop( struct cbl_field_t *tgt, // tgt has to be a FldConditional + struct cbl_field_t *a, // is modified by SET,CLEAR + enum bitop_t op, + size_t bitmask ) + { + Analyze(); + // This routine is designed to set, clear, and test BITMASK bits in the + // A operand. For ON and OFF, it sets tgt, a FldConditional, to TRUE or FALSE + + // This is clumsy: The ops[] array has to match bitop_t + static const char *ops[] = { "SET", "CLEAR", "ON", "OFF", + "AND", "OR", "XOR" }; + gcc_assert( op < COUNT_OF(ops) ); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_FIELD( " switch: ", a) + fprintf(stderr, " mask: %lx", bitmask); + fprintf(stderr, " op: %s", ops[op]); + SHOW_PARSE_FIELD( " target ", tgt) + SHOW_PARSE_END + } + + if(tgt && tgt->type != FldConditional) + { + fprintf(stderr, + "%s(): The target %s has to be a FldConditional, not %s\n", + __func__, + tgt->name, + cbl_field_type_str(tgt->type)); + gcc_unreachable(); + } + + switch(op) + { + case bit_set_op: + case bit_clear_op: + // For set_on and set_off operations, the tgt is superfluous, so I + // did this code just in case the parser doesn't give us anything + // to set + gg_call(BOOL, + "__gg__bitop", + gg_get_address_of(a->var_decl_node), + build_int_cst_type(INT, op), + build_int_cst_type(SIZE_T, bitmask), + NULL_TREE ); + break; + + case bit_on_op: + case bit_off_op: + gg_assign( tgt->var_decl_node, + gg_call_expr( BOOL, + "__gg__bitop", + gg_get_address_of(a->var_decl_node), + build_int_cst_type(INT, op), + build_int_cst_type(SIZE_T, bitmask), + NULL_TREE)); + break; + + case bit_and_op: + case bit_or_op: + case bit_xor_op: + fprintf(stderr, + "%s(): The %s operation is not valid\n", + __func__, + ops[op]); + gcc_unreachable(); + break; + } + + TRACE1 + { + TRACE1_HEADER + //TRACE1_FIELD_INFO( " target ", tgt) + TRACE1_FIELD_INFO( " a ", a) + TRACE1_END + } + } + +void +parser_bitwise_op(struct cbl_field_t *tgt, + struct cbl_field_t *a, + enum bitop_t op, + size_t bitmask ) + { + Analyze(); + // This routine is a specialized TGT = A op (size_t) bitmask, where OP is + // AND, OR, or XOR. A should be an integer type. tgt should be a valid target + // for a move where an integer is the sender. + + // SET and CLEAR are straightforward. ON returns true if any bitmask bit is + // one in 'A'. OFF returns true if any bitmask bit in 'A' is zero. + + // This is clumsy: The ops[] array has to match bitop_t + static const char *ops[] = { "SET", "CLEAR", "ON", "OFF", + "AND", "OR", "XOR" }; + gcc_assert( op < COUNT_OF(ops) ); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_FIELD( " switch: ", a) + fprintf(stderr, " mask: %lx", bitmask); + fprintf(stderr, " op: %s", ops[op]); + SHOW_PARSE_FIELD( " target ", tgt) + SHOW_PARSE_END + } + + if( tgt && !is_valuable(tgt->type) && tgt->type != FldLiteralN) + { + fprintf(stderr, + "%s(): The target %s has to be is_valuable, not %s\n", + __func__, + tgt->name, + cbl_field_type_str(tgt->type)); + gcc_unreachable(); + } + + switch(op) + { + case bit_set_op: + case bit_clear_op: + case bit_on_op: + case bit_off_op: + fprintf(stderr, + "%s(): The %s operation is not valid\n", + __func__, + ops[op]); + gcc_unreachable(); + break; + + case bit_and_op: + case bit_or_op: + case bit_xor_op: + gg_call(VOID, + "__gg__bitwise_op", + gg_get_address_of(tgt->var_decl_node), + gg_get_address_of(a->var_decl_node), + build_int_cst_type(INT, op), + build_int_cst_type(SIZE_T, bitmask), + NULL_TREE ); + break; + } + + TRACE1 + { + TRACE1_HEADER + //TRACE1_FIELD_INFO( " target ", tgt) + TRACE1_FIELD_INFO( " a ", a) + TRACE1_END + } + } + +void +parser_set_pointers( size_t ntgt, cbl_refer_t *tgts, cbl_refer_t source ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_FIELD(" source ", source.field); + char ach[128]; + sprintf(ach, + " source.addr_of %s", + source.addr_of ? "TRUE" : "FALSE" ); + SHOW_PARSE_TEXT(ach); + for( size_t i=0; i<ntgt; i++ ) + { + SHOW_PARSE_INDENT + SHOW_PARSE_FIELD("target ", tgts[i].field) + } + SHOW_PARSE_END + } + for( size_t i=0; i<ntgt; i++ ) + { + if( !source.addr_of + && (source.field->type == FldAlphanumeric + || source.field->type == FldLiteralA)) + { + // This is something like SET varp TO ENTRY "ref". + tree function_handle = function_handle_from_name(source, + COBOL_FUNCTION_RETURN_TYPE); + gg_memcpy(qualified_data_dest(tgts[i]), + gg_get_address_of(function_handle), + build_int_cst_type(SIZE_T, sizeof(void *))); + } + else + { + if( !tgts[i].addr_of ) + { + // When not ADDRESS OF TARGET, the variable must be a POINTER + gcc_assert( tgts[i].field->type == FldPointer ); + } + else + { + // When ADDRESS OF TARGET, the target must be linkage or based + gcc_assert( tgts[i].field->attr & (linkage_e | based_e) ); + } + + gg_call( VOID, + "__gg__set_pointer", + gg_get_address_of(tgts[i].field->var_decl_node), + refer_offset_dest(tgts[i]), + build_int_cst_type(INT, tgts[i].addr_of ? REFER_T_ADDRESS_OF : 0), + source.field ? gg_get_address_of(source.field->var_decl_node) : null_pointer_node, + refer_offset_source(source), + build_int_cst_type(INT, source.addr_of ? REFER_T_ADDRESS_OF : 0), + NULL_TREE + ); + + if( tgts[i].addr_of ) + { + // When SET ADDRESS OF TARGET TO ..., the library call sets + // tgts[i].field->data. We need to propogate the data+offset + // through the level01 variable's children: + propogate_linkage_offsets(tgts[i].field, + member(tgts[i].field->var_decl_node, "data")); + } + } + } + } +typedef struct hier_node + { + size_t our_index; // In the symbol table + bool common; + struct hier_node *parent_node; + char *name; + std::vector<struct hier_node *>child_nodes; + + hier_node() : + our_index(0), + common(false), + parent_node(NULL) + {} + } hier_node; + +static hier_node * +find_hier_node( const std::unordered_map<size_t, hier_node *> &node_map, + size_t program_index) + { + std::unordered_map<size_t, hier_node *>::const_iterator it = + node_map.find(program_index); + if( it == node_map.end() ) + { + return NULL; + } + return it->second; + } + +static bool +sort_by_hier_name(const hier_node *a, const hier_node *b) + { + return strcmp(a->name, b->name) < 0; + } + +static void +find_uncles(const hier_node *node, std::vector<const hier_node *> &uncles) + { + const hier_node *parent = node->parent_node; + if( parent ) + { + for(size_t i=0; i<parent->child_nodes.size(); i++) + { + if( parent->child_nodes[i] != node ) + { + if( parent->child_nodes[i]->common ) + { + uncles.push_back(parent->child_nodes[i]); + } + } + } + find_uncles(parent, uncles); + } + } + +void +parser_program_hierarchy( const struct cbl_prog_hier_t& hier ) + { + Analyze(); + /* The complication in this routine is that it gets called near the end + of every program-id. And it keeps growing. The reason is because the + parser doesn't know when it is working on the last program of a list of + nested programs. So, we just do what we need to do, and we keep track + of what we've already built so that we don't build it more than once. + */ + SHOW_PARSE + { + SHOW_PARSE_HEADER + if( gg_trans_unit.function_stack.size() != 1 ) + { + SHOW_PARSE_TEXT("Ending a nested function") + } + else + { + for( size_t i=0; i<hier.nlabel; i++ ) + { + if( i ) + { + SHOW_PARSE_INDENT + } + else + { + SHOW_PARSE_TEXT(" "); + } + char ach[128]; + sprintf(ach, + "%ld %s%s parent:%ld", + hier.labels[i].ordinal, + hier.labels[i].label.name, + hier.labels[i].label.common ? " COMMON" : "", + hier.labels[i].label.parent); + SHOW_PARSE_TEXT(ach); + } + } + SHOW_PARSE_END + } + + // This needs to be an island that doesn't execute in-line. This is necessary + // when there isn't a GOBACK or GOTO or STOP RUN at the point where a + // [possibly implicit] PROGRAM END is encountered + tree skipper_goto; + tree skipper_label; + gg_create_goto_pair(&skipper_goto, + &skipper_label); + gg_append_statement(skipper_goto); + + // The stack.size() test shouldn't be necessary, because the parser should + // be calling us only at the PROGRAM END point of an outermost function. + + gcc_assert(gg_trans_unit.function_stack.size() == 1); + + gg_append_statement(label_list_out_label); + + std::unordered_map<size_t, std::vector<const hier_node *>> map_of_lists; + std::unordered_map<size_t, hier_node *> node_map; + std::vector<hier_node *> nodes; + + // We need to avoid duplicating names, because a direct child's name takes + // precedence over a COMMON name above us in the hierarchy: + + std::unordered_map<size_t, std::unordered_set<std::string>>map_of_sets; + + // We need to build a tree out of the hierarchical structure: + // Create, essentially, a root node: + hier_node *zero_node = new hier_node; + nodes.push_back(zero_node); + node_map[0] = nodes.back(); + + // Pass 1: Create a node for every program: + for( size_t i=0; i<hier.nlabel; i++ ) + { + hier_node *existing_node = find_hier_node(node_map, hier.labels[i].ordinal); + gcc_assert( existing_node == NULL ); + + hier_node *new_node = new hier_node; + new_node->our_index = hier.labels[i].ordinal; + new_node->common = hier.labels[i].label.common; + new_node->name = cobol_name_mangler(hier.labels[i].label.name); + nodes.push_back(new_node); + node_map[hier.labels[i].ordinal] = nodes.back(); + } + + // Pass 2: populate each node with their parent and children: + for( size_t i=0; i<hier.nlabel; i++ ) + { + hier_node *child_node = find_hier_node(node_map, hier.labels[i].ordinal); + gcc_assert(child_node); + + hier_node *parent_node = find_hier_node(node_map, + hier.labels[i].label.parent); + gcc_assert(parent_node); + + child_node->parent_node = parent_node; + parent_node->child_nodes.push_back(child_node); + } + + // We now build the lists of routines that can be called from every routine + + // We are going to create one vector of hier_nodes for each routine: + + for(size_t i=0; i<nodes.size(); i++) + { + // First, direct children always take precedence + size_t caller = nodes[i]->our_index; + const hier_node *caller_node = nodes[i]; + for(size_t j=0; j<caller_node->child_nodes.size(); j++) + { + map_of_lists[caller].push_back(caller_node->child_nodes[j]); + map_of_sets[caller].insert(caller_node->child_nodes[j]->name); + } + + // Sibling routines marked COMMON, and siblings of ancestors marked COMMON + // are also accessible by us. Go find them. + std::vector<const hier_node *>uncles; + find_uncles(nodes[i], uncles); + for( size_t i=0; i<uncles.size(); i++ ) + { + const hier_node *uncle = uncles[i]; + if( map_of_sets[caller].find(uncle->name) == map_of_sets[caller].end() ) + { + // We have a COMMON uncle or sibling we haven't seen before. + map_of_lists[caller].push_back(uncle); + } + } + } + + // Having created lists of callables for each caller, we want to sort each + // of those lists to make it easier to bsearch things in them later: + for( std::unordered_map<size_t, std::vector<const hier_node *>>::iterator mol = map_of_lists.begin(); + mol != map_of_lists.end(); + mol++ ) + { + std::sort(mol->second.begin(), mol->second.end(), sort_by_hier_name); + } + + // Having built the lists of lists, start pulling them apart + + tree function_type = + build_varargs_function_type_array( SIZE_T, + 0, // No parameters yet + NULL); // And, hence, no types + tree pointer_type = build_pointer_type(function_type); + + static std::unordered_set<size_t>callers; + + for( std::unordered_map<size_t, std::vector<const hier_node *>>::const_iterator mol = map_of_lists.begin(); + mol != map_of_lists.end(); + mol++ ) + { + size_t caller = mol->first; + if( caller != 0 ) + { + if( callers.find(caller) == callers.end() ) + { + // We haven't seen this caller before + callers.insert(caller); + + char ach[2*sizeof(cbl_name_t)]; + tree names_table_type = build_array_type_nelts(CHAR_P, mol->second.size()+1); + sprintf(ach, "..our_accessible_functions_%ld", caller); + tree the_names_table = gg_define_variable(names_table_type, ach, vs_file_static); + + // Here is where we build a table out of constructors: + tree constructed_array_type = build_array_type_nelts(pointer_type, mol->second.size()); + sprintf(ach, "..our_constructed_table_%ld", caller); + tree the_constructed_table = gg_define_variable(constructed_array_type, ach, vs_file_static); + + tree constr_names = make_node(CONSTRUCTOR); + TREE_TYPE(constr_names) = names_table_type; + TREE_STATIC(constr_names) = 1; + TREE_CONSTANT(constr_names) = 1; + + tree constr = make_node(CONSTRUCTOR); + TREE_TYPE(constr) = constructed_array_type; + TREE_STATIC(constr) = 1; + TREE_CONSTANT(constr) = 1; + + int i=0; + for( std::vector<const hier_node *>::const_iterator callee = mol->second.begin(); + callee != mol->second.end(); + callee++ ) + { + sprintf(ach, "%s.%ld", (*callee)->name, (*callee)->parent_node->our_index); + + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr_names), + build_int_cst_type(SIZE_T, i), + build_string_literal(ach)); + + // Build the constructor element for that function: + tree function_decl = build_fn_decl (ach, function_type); + tree addr_expr = build1(ADDR_EXPR, pointer_type, function_decl); + + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr), + build_int_cst_type(SIZE_T, i), + addr_expr); + + i++; + } + // Terminate the names table with NULL + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr_names), + build_int_cst_type(SIZE_T, i), + null_pointer_node); + + DECL_INITIAL(the_names_table) = constr_names; + DECL_INITIAL(the_constructed_table) = constr; + + // And put a pointer to that table into the file-static variable set aside + // for it: + sprintf(ach, "..accessible_program_list_%ld", caller); + tree accessible_list_var_decl = gg_trans_unit_var_decl(ach); + gg_assign( accessible_list_var_decl, gg_get_address_of(the_names_table) ); + + sprintf(ach, "..accessible_program_pointers_%ld", caller); + tree accessible_programs_decl = gg_trans_unit_var_decl(ach); + gg_assign( accessible_programs_decl, gg_get_address_of(the_constructed_table) ); + } + } + } + gg_append_statement(label_list_back_goto); + gg_append_statement(skipper_label); + } + +void +parser_set_handled(ec_type_t ec_handled) + { + if( mode_syntax_only() ) return; + SHOW_PARSE + { + SHOW_PARSE_HEADER + char ach[64]; + sprintf(ach, "ec_type_t: 0x%lx", size_t(ec_handled)); + SHOW_PARSE_TEXT(ach); + SHOW_PARSE_END + } + + TRACE1 + { + TRACE1_HEADER + TRACE1_END + } + + if( gg_trans_unit.function_stack.size() ) + { + if( ec_handled ) + { + // We assume that exception_handled is zero, always. We only make it + // non-zero when something needs to be done. __gg__match_exception is + // in charge of setting it back to zero. + gg_assign(var_decl_exception_handled, + build_int_cst_type(INT, (int)ec_handled)); + } + } + else + { + yywarn("parser_set_handled() called between programs"); + } + } + +void +parser_set_file_number(int file_number) + { + if( mode_syntax_only() ) return; + SHOW_PARSE + { + SHOW_PARSE_HEADER + char ach[32]; + sprintf(ach, "file number: %d", file_number); + SHOW_PARSE_TEXT(ach); + SHOW_PARSE_END + } + + TRACE1 + { + TRACE1_HEADER + TRACE1_END + } + + if( gg_trans_unit.function_stack.size() ) + { + gg_assign(var_decl_exception_file_number, + build_int_cst_type(INT, file_number)); + } + else + { + yywarn("parser_set_file_number() called between programs"); + } + } + +void +parser_set_numeric(struct cbl_field_t *tgt, ssize_t value) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" set ") + SHOW_PARSE_TEXT(tgt->name) + SHOW_PARSE_TEXT(" to ") + char ach[32]; + sprintf(ach, "%ld", value); + SHOW_PARSE_TEXT(ach); + SHOW_PARSE_END + } + + gg_call(VOID, + "__gg__int128_to_field", + gg_get_address_of(tgt->var_decl_node), + build_int_cst_type(INT128, value), + integer_zero_node, + build_int_cst_type(INT, truncation_e), + null_pointer_node, + NULL_TREE ); + } + +static void +stash_exceptions( const cbl_enabled_exceptions_array_t *enabled ) + { + // We need to create a static array of bytes + size_t narg = enabled->nbytes(); + unsigned char *p = (unsigned char *)(enabled->ecs); + + static size_t prior_narg = 0; + static size_t max_narg = 128; + static unsigned char *prior_p = (unsigned char *)xmalloc(max_narg); + + bool we_got_new_data = false; + if( prior_narg != narg ) + { + we_got_new_data = true; + } + else + { + // The narg counts are the same. + for(size_t i=0; i<narg; i++) + { + if( p[i] != prior_p[i] ) + { + we_got_new_data = true; + break; + } + } + } + + if( !we_got_new_data ) + { + return; + } + + if( narg > max_narg ) + { + max_narg = narg; + prior_p = (unsigned char *)xrealloc(prior_p, max_narg); + } + + memcpy(prior_p, p, narg); + + static int count = 1; + + tree array_of_chars_type; + tree array_of_chars; + + if( narg ) + { + char ach[32]; + sprintf(ach, "_ec_array_%d", count++); + array_of_chars_type = build_array_type_nelts(UCHAR, narg); + + // We have the array. Now we need to build the constructor for it + tree constr = make_node(CONSTRUCTOR); + TREE_TYPE(constr) = array_of_chars_type; + TREE_STATIC(constr) = 1; + TREE_CONSTANT(constr) = 1; + + for(size_t i=0; i<narg; i++) + { + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr), + build_int_cst_type(SIZE_T, i), + build_int_cst_type(UCHAR, p[i])); + } + array_of_chars = gg_define_variable(array_of_chars_type, ach, vs_static); + DECL_INITIAL(array_of_chars) = constr; + + gg_call(VOID, + "__gg__stash_exceptions", + build_int_cst_type(SIZE_T, enabled->nec), + narg ? gg_get_address_of(array_of_chars) : null_pointer_node, + NULL_TREE); + } + } + +static void +store_location_stuff(const cbl_name_t statement_name) + { + if( exception_location_active && !current_declarative_section_name() ) + { + // We need to establish some stuff for EXCEPTION- function processing + gg_assign(var_decl_exception_source_file, + gg_string_literal(current_filename.back().c_str())); + + gg_assign(var_decl_exception_program_id, + gg_string_literal(current_function->our_unmangled_name)); + + if( strstr(current_function->current_section->label->name, "_implicit") + != current_function->current_section->label->name ) + { + gg_assign(var_decl_exception_section, + gg_string_literal(current_function->current_section->label->name)); + } + else + { + gg_assign(var_decl_exception_section, + gg_cast(build_pointer_type(CHAR_P),null_pointer_node)); + } + + if( strstr(current_function->current_paragraph->label->name, "_implicit") + != current_function->current_paragraph->label->name ) + { + gg_assign(var_decl_exception_paragraph, + gg_string_literal(current_function->current_paragraph->label->name)); + } + else + { + gg_assign(var_decl_exception_paragraph, + gg_cast(build_pointer_type(CHAR_P), null_pointer_node)); + } + + gg_assign(var_decl_exception_source_file, + gg_string_literal(current_filename.back().c_str())); + gg_assign(var_decl_exception_line_number, build_int_cst_type(INT, + CURRENT_LINE_NUMBER)); + gg_assign(var_decl_exception_statement, gg_string_literal(statement_name)); + } + } + +void +parser_exception_prepare( const cbl_name_t statement_name, + const cbl_enabled_exceptions_array_t *enabled ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(enabled->nec? " stashing " : " skipping ") + SHOW_PARSE_TEXT(statement_name) + SHOW_PARSE_END + } + + TRACE1 + { + TRACE1_HEADER + TRACE1_END + } + + if( enabled->nec ) + { + if( gg_trans_unit.function_stack.size() ) + { + stash_exceptions(enabled); + store_location_stuff(statement_name); + } + else + { + yywarn("parser_exception_prepare() called between programs"); + } + } + } + +void +parser_exception_clear() + { + if( mode_syntax_only() ) return; + + Analyze(); + gg_assign(var_decl_exception_code, integer_zero_node); + } + +void +parser_exception_raise(ec_type_t ec) + { + Analyze(); + if( ec == ec_none_e ) + { + gg_call(VOID, + "__gg__set_exception_code", + integer_zero_node, + integer_one_node, + NULL_TREE); + } + else + { + set_exception_code_func(ec, __LINE__, 1); + } + } + +void +parser_match_exception(cbl_field_t *index, + cbl_field_t *blob ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_FIELD(" index ", index) + SHOW_PARSE_INDENT + if( blob ) + { + SHOW_PARSE_FIELD("blob ", blob) + } + else + { + SHOW_PARSE_TEXT("blob is NULL") + } + SHOW_PARSE_END + } + + TRACE1 + { + TRACE1_HEADER + TRACE1_FIELD("index ", index, "") + TRACE1_INDENT + TRACE1_TEXT("blob ") + if( blob ) + { + TRACE1_TEXT(blob->name) + } + else + { + TRACE1_TEXT("is NULL") + } + TRACE1_END + } + + gg_call(VOID, + "__gg__match_exception", + gg_get_address_of(index->var_decl_node), + blob ? blob->var_decl_node : null_pointer_node, + NULL_TREE); + + TRACE1 + { + static tree index_val = gg_define_variable(INT, "..pme_index", vs_file_static); + get_binary_value(index_val, NULL, index, size_t_zero_node); + TRACE1_INDENT + gg_printf("returned value is 0x%x (%d)", index_val, index_val, NULL_TREE); + TRACE1_END + } + } + +void +parser_check_fatal_exception() + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" Check for fatal EC...") + SHOW_PARSE_END + } + gg_call(VOID, + "__gg__check_fatal_exception", + NULL_TREE); + } + +void +parser_clear_exception() + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" Clear raised EC...") + SHOW_PARSE_END + } + gg_call(VOID, "__gg__clear_exception", NULL_TREE); + } + +void +parser_exception_file( cbl_field_t *tgt, cbl_file_t *file) + { + Analyze(); + gg_call(VOID, + "__gg__func_exception_file", + gg_get_address_of(tgt->var_decl_node), + file ? gg_get_address_of(file->var_decl_node) : null_pointer_node, + NULL_TREE); + } + +void +parser_file_stash( struct cbl_file_t *file ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + if(file) + { + SHOW_PARSE_TEXT(" "); + SHOW_PARSE_TEXT(file->name); + } + else + { + SHOW_PARSE_TEXT(" *file is NULL ") + } + SHOW_PARSE_END + } + + if( file ) + { + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT("parser_file_stash of ") + TRACE1_TEXT(file->name); + TRACE1_END + } + + gg_call(VOID, + "__gg__file_stash", + gg_get_address_of(file->var_decl_node), + NULL_TREE); + } + else + { + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT("parser_file_stash of NULL ") + TRACE1_END + } + + gg_call(VOID, + "__gg__file_stash", + null_pointer_node, + NULL_TREE); + } + } + +static void +hijack_for_development(const char *funcname) + { + /* + + To make sure that things like global symbols and whatnot get initialized, you + should probably create a source file that looks like this: + + identification division. + program-id. prog. + procedure division. + call "dubner". + end program prog. + identification division. + program-id. dubner. + procedure division. + goback. + end program dubner. + + The first program will cause all of the parser_enter_program() and + parser_division(procedure_div_e) stuff to be initialized. The second program, + named "dubner", will be hijacked and bring you here. */ + + // Assume that funcname is lowercase with no hyphens + enter_program_common(funcname, funcname); + parser_display_literal("You have been hijacked by a program named \"dubner\""); + gg_insert_into_assembler("# HIJACKED DUBNER CODE START"); + + for(int i=0; i<10; i++) + { + char ach[64]; + sprintf(ach, "Hello, world - %d", i+1); + + gg_call(VOID, + "puts", + build_string_literal(strlen(ach)+1, ach), + NULL_TREE); + } + + gg_insert_into_assembler("# HIJACKED DUBNER CODE END"); + gg_return(0); + } + +static void +conditional_abs(tree source, cbl_field_t *field) + { + Analyze(); + if( !(field->attr & signable_e) ) + { + gg_assign(source, gg_abs(source)); + } + } + +static bool +mh_identical(cbl_refer_t &destref, + cbl_refer_t &sourceref, + TREEPLET &tsource) + { + // Check to see if the two variables are identical types, thus allowing + // for a simple byte-for-byte copy of the data areas: + bool moved = false; + if( destref.field->type == sourceref.field->type + && destref.field->data.capacity == sourceref.field->data.capacity + && destref.field->data.digits == sourceref.field->data.digits + && destref.field->data.rdigits == sourceref.field->data.rdigits + && (destref.field->attr & (signable_e|separate_e|leading_e)) + == (sourceref.field->attr & (signable_e|separate_e|leading_e)) + && !destref.field->occurs.depending_on + && !sourceref.field->occurs.depending_on + && !destref.refmod.from + && !sourceref.refmod.len + && !(destref.field->attr & intermediate_e) // variables with variable + && !(sourceref.field->attr & intermediate_e) // capacities have to be + && !(destref.field->attr & any_length_e) // handled elsewhere + && !(sourceref.field->attr & any_length_e) + ) + { + // The source and destination are identical in type + if( (sourceref.field->attr & intermediate_e) || !symbol_find_odo(sourceref.field) ) + { + Analyze(); + // Source doesn't have a depending_on clause + SHOW_PARSE1 + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("mh_identical()"); + } + gg_memcpy(gg_add(member(destref.field->var_decl_node, "data"), + refer_offset_dest(destref)), + gg_add(member(sourceref.field->var_decl_node, "data"), + tsource.offset), + build_int_cst_type(SIZE_T, sourceref.field->data.capacity)); + moved = true; + } + } + return moved; + } + +static bool +mh_source_is_literalN(cbl_refer_t &destref, + cbl_refer_t &sourceref, + bool check_for_error, + cbl_round_t rounded, + tree size_error) + { + bool moved = false; + if( sourceref.field->type == FldLiteralN ) + { + Analyze(); + switch( destref.field->type ) + { + case FldGroup: + case FldAlphanumeric: + { + SHOW_PARSE1 + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("mh_source_is_literalN: __gg__psz_to_alpha_move") + } + + static char *buffer = NULL; + static size_t buffer_size = 0; + raw_to_internal(&buffer, + &buffer_size, + sourceref.field->data.initial, + strlen(sourceref.field->data.initial)); + gg_call(VOID, + "__gg__psz_to_alpha_move", + gg_get_address_of(destref.field->var_decl_node), + refer_offset_dest(destref), + refer_size_dest(destref), + gg_string_literal(buffer), + build_int_cst_type(SIZE_T, strlen(sourceref.field->data.initial)), + NULL_TREE); + moved = true; + break; + } + + case FldPointer: + case FldIndex: + { + // We know this is a move to an eight-byte value: + SHOW_PARSE1 + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("mh_source_is_literalN: pointer/index") + } + + if( sourceref.field->data.capacity < 8 ) + { + // There are too few bytes in sourceref + if( sourceref.field->attr & signable_e ) + { + static tree highbyte = gg_define_variable(UCHAR, "..mh_litN_highbyte", vs_file_static); + // Pick up the source byte that has the sign bit. + gg_assign(highbyte, + gg_get_indirect_reference(gg_add(member(sourceref.field->var_decl_node, + "data"), + build_int_cst_type(SIZE_T, + sourceref.field->data.capacity-1)), + integer_zero_node)); + IF( gg_bitwise_and(highbyte, build_int_cst_type(UCHAR, 0x80)), + eq_op, + build_int_cst_type(UCHAR, 0x80) ) + { + // We are dealing with a negative number + gg_memset(gg_add(member(destref.field->var_decl_node, "data"), + refer_offset_dest(destref)), + build_int_cst_type(UCHAR, 0xFF), + build_int_cst_type(SIZE_T, 8)); + } + ELSE + gg_memset(gg_add(member(destref.field->var_decl_node, "data"), + refer_offset_dest(destref)), + build_int_cst_type(UCHAR, 0x00), + build_int_cst_type(SIZE_T, 8)); + ENDIF + } + else + { + // The too-short source is positive. + gg_memset(gg_add(member(destref.field->var_decl_node, "data"), + refer_offset_dest(destref)), + build_int_cst_type(UCHAR, 0x00), + build_int_cst_type(SIZE_T, 8)); + } + } + + tree literalN_value = get_literalN_value(sourceref.field); + scale_by_power_of_ten_N(literalN_value, -sourceref.field->data.rdigits); + gg_memcpy(gg_add(member(destref.field->var_decl_node, "data"), + refer_offset_dest(destref)), + gg_get_address_of(literalN_value), + build_int_cst_type(SIZE_T, sourceref.field->data.capacity)); + moved = true; + + break; + } + + case FldNumericBin5: + { + // We are moving from a FldLiteralN (which we know has no subscripts or + // refmods), to a NumericBin5, which might. + + SHOW_PARSE1 + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("mh_source_is_literalN: FldNumericBin5") + } + + // For now, we are ignoring intermediates: + assert( !(destref.field->attr & intermediate_e) ); + + int bytes_needed = std::max(destref.field->data.capacity, + sourceref.field->data.capacity); + tree calc_type = tree_type_from_size(bytes_needed, + sourceref.field->attr & signable_e); + tree dest_type = tree_type_from_size( destref.field->data.capacity, + destref.field->attr & signable_e); + + // Pick up the source data. + tree source = gg_define_variable(calc_type); + gg_assign(source, gg_cast(calc_type, sourceref.field->data_decl_node)); + + // Take the absolute value, if the destination is not signable + conditional_abs(source, destref.field); + + // See if it needs to be scaled: + scale_by_power_of_ten_N( + source, + destref.field->data.rdigits-sourceref.field->data.rdigits); + + if( check_for_error && size_error ) + { + Analyzer.Message("Check to see if result fits"); + if( destref.field->data.digits ) + { + __int128 power_of_ten = get_power_of_ten(destref.field->data.digits); + IF( gg_abs(source), ge_op, build_int_cst_type(calc_type, + power_of_ten) ) + { + gg_assign(size_error, gg_bitwise_or(size_error, integer_one_node)); + } + ELSE + ENDIF + } + } + + Analyzer.Message("Move to destination location"); + tree dest_location = gg_indirect( + gg_cast(build_pointer_type(dest_type), + gg_add(member(destref.field->var_decl_node, "data"), + refer_offset_dest(destref)))); + gg_assign(dest_location, gg_cast(dest_type, source)); + moved = true; + break; + } + + case FldNumericDisplay: + case FldNumericBinary: + case FldNumericEdited: + case FldPacked: + { + static tree berror = gg_define_variable(INT, "..mh_litN_berror", vs_file_static); + gg_assign(berror, integer_zero_node); + SHOW_PARSE1 + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("calling get_literalN_value ") + } + tree literalN_value = get_literalN_value(sourceref.field); + + SHOW_PARSE1 + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("calling __gg__int128_to_qualified_field ") + } + + gg_call(INT, + "__gg__int128_to_qualified_field", + gg_get_address_of(destref.field->var_decl_node), + refer_offset_dest(destref), + refer_size_dest(destref), + gg_cast(INT128, literalN_value), + build_int_cst_type(INT, sourceref.field->data.rdigits), + build_int_cst_type(INT, rounded), + gg_get_address_of(berror), + NULL_TREE); + + if( size_error ) + { + IF( berror, ne_op, integer_zero_node ) + { + gg_assign(size_error, gg_bitwise_or(size_error, integer_one_node)); + } + ELSE + ENDIF + } + moved = true; + break; + } + + case FldAlphaEdited: + { + SHOW_PARSE1 + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT(" FldAlphaEdited") + } + gg_call(VOID, + "__gg__string_to_alpha_edited_ascii", + gg_add( member(destref.field->var_decl_node, "data"), + refer_offset_dest(destref) ), + gg_string_literal(sourceref.field->data.initial), + build_int_cst_type(INT, strlen(sourceref.field->data.initial)), + gg_string_literal(destref.field->data.picture), + NULL_TREE); + moved = true; + break; + } + + case FldFloat: + { + tree tdest = gg_add(member(destref.field->var_decl_node, "data"), + refer_offset_dest(destref) ); + switch( destref.field->data.capacity ) + { + // For some reason, using FLOAT128 in the build_pointer_type causes + // a SEGFAULT. So, we'll use other types with equivalent sizes. I + // am speculating that the use of floating-point types causes the -O0 + // compilation to move things using the mmx registers. So, I am using + // intxx types in the hope that they are simpler. + case 4: + { + // The following generated code is the exact equivalent + // of the C code: + // *(float *)dest = (float)data.value + _Float32 src = (_Float32)sourceref.field->data.value; + tree tsrc = build_string_literal(sizeof(src), (char *)&src); + gg_assign(gg_indirect(gg_cast(build_pointer_type(INT), tdest)), + gg_indirect(gg_cast(build_pointer_type(INT), tsrc ))); + break; + } + case 8: + { + _Float64 src = (_Float64)sourceref.field->data.value; + tree tsrc = build_string_literal(sizeof(src), (char *)&src); + gg_assign(gg_indirect(gg_cast(build_pointer_type(LONG), tdest)), + gg_indirect(gg_cast(build_pointer_type(LONG), tsrc ))); + break; + } + case 16: + { + _Float128 src = (_Float128)sourceref.field->data.value; + tree tsrc = build_string_literal(sizeof(src), (char *)&src); + gg_assign(gg_indirect(gg_cast(build_pointer_type(INT128), tdest)), + gg_indirect(gg_cast(build_pointer_type(INT128), tsrc ))); + break; + } + } + moved=true; + break; + } + + default: + cbl_internal_error( + "In parser_move(%s to %s), the move of FldLiteralN to %s " + "hasn't been implemented", + sourceref.field->name, + destref.field->name, + cbl_field_type_str(destref.field->type)); + break; + } + } + return moved; + } + +static +tree float_type_of(int n) + { + switch(n) + { + case 4: + return FLOAT; + case 8: + return DOUBLE; + case 16: + return FLOAT128; + default: + gcc_unreachable(); + } + return NULL_TREE; + } + +static tree +float_type_of(cbl_field_t *field) + { + gcc_assert(field->type == FldFloat); + return float_type_of(field->data.capacity); + } + +static tree +float_type_of(cbl_refer_t *refer) + { + return float_type_of(refer->field); + } + +static bool +mh_dest_is_float( cbl_refer_t &destref, + cbl_refer_t &sourceref, + TREEPLET &tsource, + cbl_round_t rounded, + tree size_error) // int + { + bool moved = false; + if( destref.field->type == FldFloat ) + { + Analyze(); + switch( sourceref.field->type ) + { + case FldPointer: + case FldIndex: + case FldNumericBin5: + case FldNumericDisplay: + case FldNumericBinary: + case FldNumericEdited: + case FldPacked: + { + switch( destref.field->data.capacity ) + { + case 4: + gg_call(VOID, + "__gg__float32_from_int128", + gg_get_address_of(destref.field->var_decl_node), + refer_offset_dest(destref), + tsource.pfield, + tsource.offset, + build_int_cst_type(INT, rounded), + size_error ? gg_get_address_of(size_error) : null_pointer_node, + NULL_TREE); + break; + case 8: + gg_call(VOID, + "__gg__float64_from_int128", + gg_get_address_of(destref.field->var_decl_node), + refer_offset_dest(destref), + tsource.pfield, + tsource.offset, + build_int_cst_type(INT, rounded), + size_error ? gg_get_address_of(size_error) : null_pointer_node, + NULL_TREE); + break; + case 16: + gg_call(VOID, + "__gg__float128_from_int128", + gg_get_address_of(destref.field->var_decl_node), + refer_offset_dest(destref), + tsource.pfield, + tsource.offset, + build_int_cst_type(INT, rounded), + size_error ? gg_get_address_of(size_error) : null_pointer_node, + NULL_TREE); + break; + } + moved = true; + break; + } + + case FldFloat: + { + // We are testing for size. First, we need to check to see if the + // source is INFINITY. If so, that's an automatic size error + + IF( gg_call_expr( INT, + "__gg__is_float_infinite", + tsource.pfield, + tsource.offset, + NULL_TREE), + ne_op, + integer_zero_node ) + { + if( size_error ) + { + gg_assign(size_error, integer_one_node ); + } + } + ELSE + { + // The source isn't infinite. + // If the destination is bigger than the source, then we can + // do an untested move: + + if( destref.field->data.capacity >= sourceref.field->data.capacity ) + { + tree dtype = float_type_of(&destref); + tree stype = float_type_of(&sourceref); + + tree tdest = gg_add(member(destref.field->var_decl_node, "data"), + refer_offset_dest(destref)); + tree source = gg_add(member(sourceref.field->var_decl_node, "data"), + refer_offset_source(sourceref)); + gg_assign(gg_indirect(gg_cast(build_pointer_type(dtype), tdest)), + gg_cast(dtype, + gg_indirect(gg_cast(build_pointer_type(stype), + source)))); + } + else + { + // There are only three possible moves left: + if(destref.field->data.capacity == 8 ) + { + if( size_error ) + { + gg_assign(size_error, + gg_call_expr( INT, + "__gg__float64_from_128", + gg_get_address_of(destref.field->var_decl_node), + refer_offset_dest(destref), + tsource.pfield, + tsource.offset, + NULL_TREE)); + } + else + { + gg_call( INT, + "__gg__float64_from_128", + gg_get_address_of(destref.field->var_decl_node), + refer_offset_dest(destref), + tsource.pfield, + tsource.offset, + NULL_TREE); + } + } + else + { + // The destination has to be float32 + if( sourceref.field->data.capacity == 8 ) + { + if( size_error ) + { + gg_assign(size_error, + gg_call_expr( INT, + "__gg__float32_from_64", + gg_get_address_of(destref.field->var_decl_node), + refer_offset_dest(destref), + tsource.pfield, + tsource.offset, + NULL_TREE)); + } + else + { + gg_call( INT, + "__gg__float32_from_64", + gg_get_address_of(destref.field->var_decl_node), + refer_offset_dest(destref), + tsource.pfield, + tsource.offset, + NULL_TREE); + } + + } + else + { + if( size_error ) + { + gg_assign(size_error, + gg_call_expr( INT, + "__gg__float32_from_128", + gg_get_address_of(destref.field->var_decl_node), + refer_offset_dest(destref), + tsource.pfield, + tsource.offset, + NULL_TREE)); + } + else + { + gg_call( INT, + "__gg__float32_from_128", + gg_get_address_of(destref.field->var_decl_node), + refer_offset_dest(destref), + tsource.pfield, + tsource.offset, + NULL_TREE); + } + } + } + } + } + ENDIF + + moved = true; + break; + } + + case FldLiteralA: + case FldAlphanumeric: + { + // Alphanumeric to float is inherently slow. Send it off to the library + break; + } + + default: + cbl_internal_error("In mh_dest_is_float(%s to %s), the " + "move of %s to %s hasn't been implemented", + sourceref.field->name, + destref.field->name, + cbl_field_type_str(sourceref.field->type), + cbl_field_type_str(destref.field->type)); + break; + } + } + return moved; + } + +static void +picky_memset(tree &dest_p, unsigned char value, size_t length) + { + if( length ) + { + tree dest_ep = gg_define_variable(TREE_TYPE(dest_p)); + gg_assign(dest_ep, + gg_add( dest_p, + build_int_cst_type(SIZE_T, length))); + WHILE( dest_p, lt_op, dest_ep ) + { + gg_assign(gg_indirect(dest_p), + build_int_cst_type(UCHAR, value)); + gg_increment(dest_p); + } + WEND + } + } + +static void +picky_memcpy(tree &dest_p, tree &source_p, size_t length) + { + if( length ) + { + tree dest_ep = gg_define_variable(TREE_TYPE(dest_p)); + gg_assign(dest_ep, + gg_add( dest_p, + build_int_cst_type(SIZE_T, length))); + WHILE( dest_p, lt_op, dest_ep ) + { + gg_assign(gg_indirect(dest_p), gg_indirect(source_p)); + gg_increment(dest_p); + gg_increment(source_p); + } + WEND + } + } + +static bool +mh_numeric_display( cbl_refer_t &destref, + cbl_refer_t &sourceref, + TREEPLET &tsource, + tree size_error) + { + bool moved = false; + + if( destref.field->type == FldNumericDisplay + && sourceref.field->type == FldNumericDisplay + && !(destref.field->attr & scaled_e) + && !(sourceref.field->attr & scaled_e) ) + { + Analyze(); + // I believe that there are 225 pathways through the following code. That's + // because there are five different valid combination of signable_e, + // separate_e, and leading_e. There are three possibilities for + // sender/receiver rdigits (too many, too few, and just right), and the same + // for ldigits. 5 * 5 * 3 * 3 = 225. + + // Fasten your seat belts. + + // In order to simplify processing of a signable internal sender, we are + // going to pick up the sign byte and temporarily turn off the sign bit in + // the source data. At the end, we will restore that value. This + // reflexively makes me a bit nervous (it isn't, for example, thread-safe), + // but it makes life easier. + + static tree source_sign_loc = gg_define_variable(UCHAR_P, "..mhnd_sign_loc", vs_file_static); + static tree source_sign_byte = gg_define_variable(UCHAR, "..mhnd_sign_byte", vs_file_static); + static tree dest_p = gg_define_variable(UCHAR_P, "..mhnd_dest", vs_file_static); // The destination data pointer + static tree source_p = gg_define_variable(UCHAR_P, "..mhnd_source", vs_file_static); // The source data pointer + static tree source_ep = gg_define_variable(UCHAR_P, "..mhnd_source_e", vs_file_static); // When we need an end pointer + + gg_assign(dest_p, qualified_data_dest(destref)); + gg_assign(source_p, gg_add(member(sourceref.field, "data"), + tsource.offset)); + + if( sourceref.field->attr & signable_e ) + { + // The source is signable + + if( !(sourceref.field->attr & leading_e) ) + { + // The sign location is trailing. Whether separate or not, the location + // is the final byte of the data: + gg_assign(source_sign_loc, gg_add(member( sourceref.field->var_decl_node, "data"), + tsource.offset)), + gg_assign(source_sign_loc, + gg_add(source_sign_loc, + build_int_cst_type(SIZE_T, + sourceref.field->data.capacity-1))); + if( (sourceref.field->attr & separate_e) ) + { + // We have trailing separate + } + else + { + // We have trailing internal + } + } + else + { + // The source sign location is in the leading position. + gg_assign(source_sign_loc, + gg_add(member(sourceref.field->var_decl_node, "data"), + tsource.offset)); + if( (sourceref.field->attr & separate_e) ) + { + // We have leading separate, so the first actual digit is at + // source_p+1. + gg_increment(source_p); + } + else + { + // We have leading internal + } + } + // Pick up the byte that contains the sign data, whether internal or + // external: + gg_assign(source_sign_byte, gg_indirect(source_sign_loc)); + + if( !(sourceref.field->attr & separate_e) ) + { + // This is signable and internal, so we want to turn off the sign bit + // in the original source data + if( internal_codeset_is_ebcdic() ) + { + gg_assign(gg_indirect(source_sign_loc), + gg_bitwise_or(source_sign_byte, + build_int_cst_type( UCHAR, + NUMERIC_DISPLAY_SIGN_BIT))); + } + else + { + gg_assign(gg_indirect(source_sign_loc), + gg_bitwise_and( source_sign_byte, + build_int_cst_type( UCHAR, + ~NUMERIC_DISPLAY_SIGN_BIT))); + } + } + } + else + { + // The number is unsigned, so do nothing. + } + + // Let the shenanigans begin. + + // We are now ready to output the very first byte. + + // The first thing to do is see if we need to output a leading sign + // character + if( (destref.field->attr & signable_e) + && (destref.field->attr & leading_e) + && (destref.field->attr & separate_e) ) + { + // The output is signed, separate, and leading, so the first character + // needs to be either '+' or '-' + if( (sourceref.field->attr & separate_e) ) + { + // The source is signable/separate + // Oooh. Shiny. We already have that character. + gg_assign(gg_indirect(dest_p), source_sign_byte); + } + else + { + // The source is internal. Not that up above we set source_sign_byte + // even for source values that aren't signable + if( internal_codeset_is_ebcdic() ) + { + // We are working in EBCDIC + if( sourceref.field->attr & signable_e ) + { + IF( gg_bitwise_and( source_sign_byte, + build_int_cst_type( UCHAR, + NUMERIC_DISPLAY_SIGN_BIT)), + eq_op, + build_int_cst_type( UCHAR, 0) ) + { + // The source was negative + gg_assign(gg_indirect(dest_p), + build_int_cst_type( UCHAR, EBCDIC_MINUS)); + + } + ELSE + { + // The source was positive + gg_assign(gg_indirect(dest_p), + build_int_cst_type( UCHAR, EBCDIC_PLUS)); + } + ENDIF + } + else + { + // The source is not signable, so the result is positive + gg_assign(gg_indirect(dest_p), + build_int_cst_type( UCHAR, EBCDIC_PLUS)); + } + } + else + { + // We are working in ASCII + if( sourceref.field->attr & signable_e ) + { + IF( gg_bitwise_and( source_sign_byte, + build_int_cst_type( UCHAR, + NUMERIC_DISPLAY_SIGN_BIT)), + ne_op, + build_int_cst_type( UCHAR, 0) ) + { + // The source was negative + gg_assign(gg_indirect(dest_p), + build_int_cst_type( UCHAR, '-')); + + } + ELSE + { + // The source was positive + gg_assign(gg_indirect(dest_p), + build_int_cst_type( UCHAR, '+')); + } + ENDIF + } + else + { + // The source is not signable, so the result is positive + gg_assign(gg_indirect(dest_p), + build_int_cst_type( UCHAR, '+')); + } + } + } + gg_increment(dest_p); + } + + // We have the leading '+' or '-', assuming one is needed. We can + // now start outputting the digits to the left of the decimal place + + int dest_ldigits = (int)destref.field->data.digits + - destref.field->data.rdigits; + int source_ldigits = (int)sourceref.field->data.digits + - sourceref.field->data.rdigits; + + int digit_count = 0; + + if( dest_ldigits > source_ldigits ) + { + // The destination has more ldigits than the source, and needs some + // leading zeroes: + picky_memset( dest_p, + internal_codeset_is_ebcdic() ? + EBCDIC_ZERO : '0' , + dest_ldigits - source_ldigits); + // With the leading zeros set, copy over the ldigits: + digit_count = source_ldigits; + } + else if( dest_ldigits == source_ldigits ) + { + // This is the Goldilocks zone. Everything is *just* right. + digit_count = dest_ldigits; + } + else + { + // The destination is smaller than the source. We have to throw away the + // the high-order digits of the source. If any of them are non-zero, then + // we need to indicate a size error. + gg_assign(source_ep, + gg_add( source_p, + build_int_cst_type( SIZE_T, + source_ldigits-dest_ldigits))); + WHILE(source_p, lt_op, source_ep) + { + if( size_error ) + { + IF( gg_indirect(source_p), + ne_op, + build_int_cst_type( UCHAR, + internal_codeset_is_ebcdic() ? + EBCDIC_ZERO : '0') ) + { + set_exception_code(ec_size_truncation_e); + gg_assign(size_error, integer_one_node); + } + ELSE + ENDIF + } + gg_increment(source_p); + } + WEND + + // Having skipped over the leading digits, we are in position to move the + // remaining digits + digit_count = dest_ldigits; + } + + // The ldigits are in place. We now go the very similar exercise for the + // rdigits: + + int dest_rdigits = destref.field->data.rdigits; + int source_rdigits = sourceref.field->data.rdigits; + + int trailing_zeros = 0; + + if( dest_rdigits > source_rdigits ) + { + // The destination has more rdigits than the source + + // Copy over the available digits: + digit_count += source_rdigits; + + // And then tack on the needed trailing zeroes: + trailing_zeros = dest_rdigits - source_rdigits; + } + else if( dest_rdigits == source_rdigits ) + { + // This is the Goldilocks zone. Everything is *just* right. + digit_count += dest_rdigits; + } + else + { + // The destination has fewer rdigits than the source. We send + // over only the necessary rdigits, discarding the ones to the right. + digit_count += dest_rdigits; + } + + picky_memcpy(dest_p, source_p, digit_count); + picky_memset( dest_p, + internal_codeset_is_ebcdic() ? + EBCDIC_ZERO : '0' , + trailing_zeros); + + // With the digits in place, we need to sort out what to do if the target + // is signable: + if( destref.field->attr & signable_e ) + { + if( (destref.field->attr & separate_e) + && !(destref.field->attr & leading_e) ) + { + // The target is separate/trailing, so we need to tack a '+' + // or '-' character + if( sourceref.field->attr & separate_e ) + { + // The source was separate, so we already have what we need in t + // source_sign_byte: + gg_assign(gg_indirect(dest_p), source_sign_byte); + gg_increment(dest_p); + } + else + { + // The source is either internal, or unsigned + if( sourceref.field->attr & signable_e ) + { + // The source is signable/internal, so we need to extract the + // sign bit from source_sign_byte + if( internal_codeset_is_ebcdic() ) + { + IF( gg_bitwise_and( source_sign_byte, + build_int_cst_type( UCHAR, + NUMERIC_DISPLAY_SIGN_BIT)), + eq_op, + build_int_cst_type( UCHAR, 0) ) + { + // The source was negative + gg_assign(gg_indirect(dest_p), + build_int_cst_type( UCHAR, EBCDIC_MINUS)); + + } + ELSE + { + // The source was positive + gg_assign(gg_indirect(dest_p), + build_int_cst_type( UCHAR, EBCDIC_PLUS)); + } + ENDIF + } + else + { + IF( gg_bitwise_and( source_sign_byte, + build_int_cst_type( UCHAR, + NUMERIC_DISPLAY_SIGN_BIT)), + ne_op, + build_int_cst_type( UCHAR, 0) ) + { + // The source was negative + gg_assign(gg_indirect(dest_p), + build_int_cst_type( UCHAR, '-')); + + } + ELSE + { + // The source was positive + gg_assign(gg_indirect(dest_p), + build_int_cst_type( UCHAR, '+')); + } + ENDIF + } + } + else + { + // The source is unsigned, so dest is positive + gg_assign(gg_indirect(dest_p), + build_int_cst_type( UCHAR, + internal_codeset_is_ebcdic() ? + EBCDIC_PLUS : '+' )); + } + } + gg_increment(dest_p); + } + else if( !(destref.field->attr & separate_e) ) + { + // The destination is signed/internal + if( destref.field->attr & leading_e ) + { + // The sign bit goes into the first byte: + gg_assign(dest_p, qualified_data_dest(destref)); + } + else + { + // The sign bit goes into the last byte: + gg_decrement(dest_p); + } + if( sourceref.field->attr & signable_e ) + { + if( sourceref.field->attr & separate_e ) + { + // The source is separate, so source_sign_byte is '+' or '-' + IF( source_sign_byte, + eq_op, + build_int_cst_type(UCHAR, + internal_codeset_is_ebcdic() ? + EBCDIC_MINUS : '-') ) + { + // The source is negative, so turn the ASCII bit on + if( !internal_codeset_is_ebcdic() ) + { + gg_assign(gg_indirect(dest_p), + gg_bitwise_or(gg_indirect(dest_p), + build_int_cst_type( + UCHAR, + NUMERIC_DISPLAY_SIGN_BIT))); + + } + else + { + // It's ebcdic, so turn the sign bit OFF + gg_assign(gg_indirect(dest_p), + gg_bitwise_and(gg_indirect(dest_p), + build_int_cst_type( + UCHAR, + ~NUMERIC_DISPLAY_SIGN_BIT))); + } + } + ELSE + { + // The source is positive, so turn the EBCDIC bit ON: + if( internal_codeset_is_ebcdic() ) + { + gg_assign(gg_indirect(dest_p), + gg_bitwise_or(gg_indirect(dest_p), + build_int_cst_type( + UCHAR, + NUMERIC_DISPLAY_SIGN_BIT))); + } + } + ENDIF + } + else + { + // The source is signable/internal, so the sign bit is in + // source_sign_byte. Whatever it is, it has to go into dest_p: + if( internal_codeset_is_ebcdic() ) + { + // This is EBCDIC, so if the source_sign_byte bit is LOW, we + // clear that bit in dest_p high. + IF( gg_bitwise_and( source_sign_byte, + build_int_cst_type( + UCHAR, + NUMERIC_DISPLAY_SIGN_BIT)), + eq_op, + build_int_cst_type(UCHAR, 0) ) + { + // The source was negative, so make the dest negative + gg_assign(gg_indirect(dest_p), + gg_bitwise_and(gg_indirect(dest_p), + build_int_cst_type( + UCHAR, + ~NUMERIC_DISPLAY_SIGN_BIT))); + } + ELSE + ENDIF + } + else + { + // This is ASCII, so if the source_sign_byte bit is high, we + // set that bit in dest_p high. + IF( gg_bitwise_and( source_sign_byte, + build_int_cst_type( + UCHAR, + NUMERIC_DISPLAY_SIGN_BIT)), + ne_op, + build_int_cst_type(UCHAR, 0) ) + { + // The source was negative, so make the dest negative + gg_assign(gg_indirect(dest_p), + gg_bitwise_or(gg_indirect(dest_p), + build_int_cst_type( + UCHAR, + NUMERIC_DISPLAY_SIGN_BIT))); + } + ELSE + ENDIF + } + } + } + } + } + + if( (sourceref.field->attr & signable_e) + && !(sourceref.field->attr & separate_e)) + { + // The source is signable internal, so we need to restore the original + // sign byte in the original source data: + gg_assign(gg_indirect(source_sign_loc), source_sign_byte); + } + moved = true; + } + return moved; + } + +static bool +mh_little_endian( cbl_refer_t &destref, + cbl_refer_t &sourceref, + TREEPLET &tsource, + bool check_for_error, + tree size_error) + { + bool moved = false; + + cbl_figconst_t figconst = cbl_figconst_of( sourceref.field->data.initial); + + if( !figconst + && !(destref.field->attr & scaled_e) + && !(destref.field->attr & (intermediate_e )) + && !(sourceref.field->attr & (intermediate_e )) + && sourceref.field->type != FldLiteralA + && sourceref.field->type != FldAlphanumeric + && sourceref.field->type != FldNumericEdited + && sourceref.field->type != FldPacked + && ( destref.field->type == FldNumericBin5 + || destref.field->type == FldPointer + || destref.field->type == FldIndex ) ) + { + Analyze(); + SHOW_PARSE1 + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("mh_little_endian") + SHOW_PARSE_END + } + + int bytes_needed = get_bytes_needed(sourceref.field); + tree source_type = tree_type_from_size(bytes_needed, + sourceref.field->attr + & signable_e) ; + tree source = gg_define_variable(source_type); + + if( sourceref.field->type == FldFloat ) + { + get_binary_value_from_float(source, + destref, + sourceref.field, + tsource.offset); + + // Get binary value from float actually scales the source value to the + // dest:: rdigits + copy_little_endian_into_place(destref.field, + refer_offset_dest(destref), + source, + destref.field->data.rdigits, + check_for_error, + size_error); + moved = true; + } + else + { + get_binary_value( source, + NULL, + sourceref.field, + tsource.offset); + copy_little_endian_into_place(destref.field, + refer_offset_dest(destref), + source, + sourceref.field->data.rdigits, + check_for_error, + size_error); + moved = true; + } + } + return moved; + } + +static bool +mh_source_is_group( cbl_refer_t &destref, + cbl_refer_t &sourceref, + TREEPLET &tsrc) + { + bool retval = false; + if( sourceref.field->type == FldGroup && !(destref.field->attr & rjust_e) ) + { + Analyze(); + // We are moving a group to a something. The rule here is just move as + // many bytes as you can, and, if necessary, fill with spaces + tree tdest = gg_add( member(destref.field->var_decl_node, "data"), + refer_offset_dest(destref)); + tree tsource = gg_add( member(sourceref.field->var_decl_node, "data"), + tsrc.offset); + tree dbytes = refer_size_dest(destref); + tree sbytes = tsrc.length; + + IF( sbytes, ge_op, dbytes ) + { + // There are too many source bytes + gg_memcpy(tdest, tsource, dbytes); + } + ELSE + { + // There are too-few source bytes: + gg_memset(tdest, build_int_cst_type(INT, internal_space), dbytes); + gg_memcpy(tdest, tsource, sbytes); + } + ENDIF + retval = true; + } + return retval; + } + +static void +move_helper(tree size_error, // This is an INT + cbl_refer_t destref, + cbl_refer_t sourceref, // Call move_helper with this resolved. + TREEPLET &tsource, + cbl_round_t rounded, + bool check_for_error, // True means our called wants to know about truncation errors + bool restore_on_error + ) + { + Analyze(); + SHOW_PARSE1 + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("move_helper()"); + } + + bool moved = false; + + if( size_error ) + { + gg_assign(size_error, integer_zero_node); + } + + static tree stash = gg_define_variable(UCHAR_P, "..mh_stash", vs_file_static); + + tree st_data = NULL_TREE; + tree st_size = NULL_TREE; + + if( restore_on_error ) + { + // We are creating a copy of the original destination in case we clobber it + // and have to restore it because of a computational error. + bool first_time = true; + static size_t stash_size = 1024; + if( first_time ) + { + first_time = false; + gg_assign(stash, gg_cast(UCHAR_P, gg_malloc(stash_size))); + } + if( stash_size < destref.field->data.capacity ) + { + stash_size = destref.field->data.capacity; + gg_assign(stash, gg_cast(UCHAR_P, gg_realloc(stash, stash_size))); + } + st_data = qualified_data_dest(destref); + st_size = refer_size_dest(destref); + gg_memcpy(stash, + st_data, + st_size); + } + + if( (sourceref.field->attr & (linkage_e | based_e)) + || ( destref.field->attr & (linkage_e | based_e)) ) + { + //goto dont_be_clever; this will go through to the default. + } + + if( !moved ) + { + moved = mh_source_is_group(destref, sourceref, tsource); + } + + if( !moved ) + { + moved = mh_identical(destref, sourceref, tsource); + } + + if( !moved ) + { + moved = mh_source_is_literalN(destref, + sourceref, + check_for_error, + rounded, + size_error); + } + + if( !moved ) + { + moved = mh_dest_is_float( destref, + sourceref, + tsource, + rounded, + size_error); + } + + if( !moved && rounded == truncation_e ) + { + moved = mh_numeric_display( destref, + sourceref, + tsource, + size_error); + } + + if( !moved ) + { + moved = mh_little_endian( destref, + sourceref, + tsource, + restore_on_error, + size_error); + } + + if( !moved && sourceref.field->type == FldLiteralA) + { + SHOW_PARSE1 + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("__gg__move_literala") + } + + cbl_figconst_t figconst = cbl_figconst_of( sourceref.field->data.initial); + + if( destref.refmod.from + || destref.refmod.len ) + { + // Let the move routine know to treat the destination as alphanumeric + gg_attribute_bit_set(destref.field, refmod_e); + } + + static char *buffer = NULL; + static size_t buffer_size = 0; + size_t source_length = sourceref.field->data.capacity; + + if( buffer_size < source_length ) + { + buffer_size = source_length; + buffer = (char *)xrealloc(buffer, buffer_size); + } + + if( figconst ) + { + char const_char = 0xFF; // Head off a compiler warning about + // // uninitialized variables + switch(figconst) + { + case normal_value_e : + // This is not possible, it says here in the fine print. + abort(); + break; + case low_value_e : + const_char = ascii_to_internal(__gg__low_value_character); + break; + case zero_value_e : + const_char = internal_zero; + break; + case space_value_e : + const_char = internal_space; + break; + case quote_value_e : + const_char = ascii_to_internal(__gg__quote_character); + break; + case high_value_e : + const_char = ascii_to_internal(__gg__high_value_character); + break; + case null_value_e: + const_char = 0x00; + break; + } + memset(buffer, const_char, source_length); + } + else + { + memset( buffer, ascii_space, source_length); + memcpy( buffer, + sourceref.field->data.initial, + std::min(source_length, (size_t)sourceref.field->data.capacity) ); + for( size_t i=0; i<source_length; i++) + { + buffer[i] = ascii_to_internal(buffer[i]); + } + } + + int rounded_parameter = rounded + | ((sourceref.all || figconst ) ? REFER_ALL_BIT : 0); + + if( size_error ) + { + gg_assign(size_error, + gg_call_expr( INT, + "__gg__move_literala", + gg_get_address_of(destref.field->var_decl_node), + refer_offset_dest(destref), + refer_size_dest(destref), + build_int_cst_type(INT, rounded_parameter), + build_string_literal(source_length, + buffer), + build_int_cst_type( SIZE_T, source_length), + NULL_TREE)); + } + else + { + gg_call ( INT, + "__gg__move_literala", + gg_get_address_of(destref.field->var_decl_node), + refer_offset_dest(destref), + refer_size_dest(destref), + build_int_cst_type(INT, rounded_parameter), + build_string_literal(source_length, + buffer), + build_int_cst_type( SIZE_T, source_length), + NULL_TREE); + } + if( destref.refmod.from + || destref.refmod.len ) + { + // Return that value to its original form + gg_attribute_bit_clear(destref.field, refmod_e); + } + moved = true; + } + + if( !moved ) + { + SHOW_PARSE1 + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("default __gg__move") + } + + if( destref.refmod.from + || destref.refmod.len + || sourceref.refmod.from + || sourceref.refmod.len ) + { + // Let the move routine know to treat the destination as alphanumeric + gg_attribute_bit_set(destref.field, refmod_e); + } + + int nflags = (sourceref.all ? REFER_T_MOVE_ALL : 0) + + (sourceref.addr_of ? REFER_T_ADDRESS_OF : 0); + + if( size_error ) + { + gg_assign(size_error, + gg_call_expr( INT, + "__gg__move", + gg_get_address_of(destref.field->var_decl_node), + refer_offset_dest(destref), + refer_size_dest(destref), + tsource.pfield, + tsource.offset, + tsource.length, + build_int_cst_type(INT, nflags), + build_int_cst_type(INT, rounded), + NULL_TREE)); + } + else + { + gg_call ( INT, + "__gg__move", + gg_get_address_of(destref.field->var_decl_node), + refer_offset_dest(destref), + refer_size_dest(destref), + tsource.pfield, + tsource.offset, + tsource.length, + build_int_cst_type(INT, nflags), + build_int_cst_type(INT, rounded), + NULL_TREE); + + } + if( destref.refmod.from + || destref.refmod.len + || sourceref.refmod.from + || sourceref.refmod.len ) + { + // Return that value to its original form + gg_attribute_bit_clear(destref.field, refmod_e); + } + + moved = true; + } + + if( restore_on_error ) + { + IF(size_error, ne_op, integer_zero_node) + { + gg_memcpy(st_data, + stash, + st_size); + } + ELSE + ENDIF + } + else + { + if( check_for_error ) + { + IF(size_error, ne_op, integer_zero_node) + { + // We had a size error, but there was no restore_on_error. Pointer + // Let our lord and master know there was a truncation: + set_exception_code(ec_size_truncation_e); + } + ELSE + ENDIF + } + } + + SHOW_PARSE1 + { + SHOW_PARSE_END + } + } + +tree parser_cast_long(tree N) + { + return gg_cast(LONG, N); + } + +void +parser_print_long(tree N) + { + gg_printf("%ld", N, NULL_TREE); + } + +void +parser_print_long(const char *fmt, tree N) + { + // fmt should have a %ld/%lx in it + gg_printf(fmt, N, NULL_TREE); + } + +void +parser_print_long(long N) + { + gg_printf("%ld", build_int_cst_type(LONG, N), NULL_TREE); + } + +void +parser_print_long(const char *fmt, long N) + { + // fmt should have a %ld/%lx in it + gg_printf(fmt, build_int_cst_type(LONG, N), NULL_TREE); + } + +void +parser_print_string(const char *ach) + { + gg_printf("%s", gg_string_literal(ach), NULL_TREE); + } + +void +parser_print_string(const char *fmt, const char *ach) + { + // fmt should have a %s in it + gg_printf(fmt, gg_string_literal(ach), NULL_TREE); + } + +char * +binary_initial_from_float128(cbl_field_t *field, int rdigits, _Float128 value) + { + // This routine returns an xmalloced buffer designed to replace the + // data.initial member of the incoming field + char *retval = NULL; + char ach[128] = ""; + + // We need to adjust value so that it has no decimal places + if( rdigits ) + { + value *= get_power_of_ten(rdigits); + } + // We need to make sure that the resulting string will fit into + // a number with 'digits' digits + + // Keep in mind that pure binary types, like BINARY-CHAR, have no digits + if( field->data.digits ) + { + value = fmodf128(value, (_Float128)get_power_of_ten(field->data.digits)); + } + + // We convert it to a integer string of digits: + strfromf128(ach, sizeof(ach), "%.0f", value); + if( strcmp(ach, "-0") == 0 ) + { + // Yes, negative zero can be a thing. Let's make it go away. + strcpy(ach, "0"); + } + + retval = (char *)xmalloc(field->data.capacity); + switch(field->data.capacity) + { + case 1: + *(signed char *)retval = atoi(ach); + break; + case 2: + *(signed short *)retval = atoi(ach); + break; + case 4: + *(signed int *)retval = atoi(ach); + break; + case 8: + *(signed long *)retval = atol(ach); + break; + case 16: + { + __int128 val = 0; + bool negative = false; + for(size_t i=0; i<strlen(ach); i++) + { + if( ach[i] == '-' ) + { + negative = true; + continue; + } + val *= 10; + val += ach[i] & 0x0F; + } + if( negative ) + { + val = -val; + } + *(__int128 *)retval = val; + } + break; + default: + fprintf(stderr, + "Trouble in initial_from_float128 at %s() %s:%d\n", + __func__, + __FILE__, + __LINE__); + abort(); + break; + } + + return retval; + } + +static void +digits_from_float128(char *retval, cbl_field_t *field, size_t width, int rdigits, _Float128 value) + { + char ach[128]; + + // We need to adjust value so that it has no decimal places + if( rdigits ) + { + value *= get_power_of_ten(rdigits); + } + // We need to make sure that the resulting string will fit into + // a number with 'digits' digits + + value = fmodf128(value, (_Float128)get_power_of_ten(field->data.digits)); + + // We convert it to a integer string of digits: + strfromf128(ach, sizeof(ach), "%.0f", value); + if( strcmp(ach, "-0") == 0 ) + { + // Yes, negative zero can be a thing. Let's make it go away. + strcpy(ach, "0"); + } + + //fprintf(stderr, "digits_from_float128() %s %f %s ", field->name, (double)value, ach); + + gcc_assert( strlen(ach) <= field->data.digits ); + if( strlen(ach) < width ) + { + memset(retval, '0', width-strlen(ach) ); + } + strcpy(retval + (width-strlen(ach)), ach); + } + +char * +initial_from_float128(cbl_field_t *field, _Float128 value) + { + Analyze(); + // This routine returns an xmalloced buffer that is intended to replace the + // data.initial member of the incoming field. + + //fprintf(stderr, "initial_from_float128 %s\n", field->name); + + char *retval = NULL; + int rdigits; + + // Let's handle the possibility of a figurative constant + cbl_figconst_t figconst = cbl_figconst_of( field->data.initial); + //cbl_figconst_t figconst = (cbl_figconst_t)(field->attr & FIGCONST_MASK); + if( figconst ) + { + int const_char = 0xFF; // Head off a compiler warning about uninitialized + // // variables + switch(figconst) + { + case normal_value_e : + // This really should never happen because normal_value_e is zero + abort(); + break; + case low_value_e : + const_char = ascii_to_internal(__gg__low_value_character); + break; + case zero_value_e : + const_char = internal_zero; + break; + case space_value_e : + const_char = internal_space; + break; + case quote_value_e : + const_char = ascii_to_internal(__gg__quote_character); + break; + case high_value_e : + if( __gg__high_value_character == DEGENERATE_HIGH_VALUE ) + { + const_char = __gg__high_value_character; + } + else + { + const_char = ascii_to_internal(__gg__high_value_character); + } + break; + case null_value_e: + break; + } + bool set_return = figconst != zero_value_e; + if( !set_return ) + { + // The figconst is zero + switch(field->type) + { + case FldGroup: + case FldAlphanumeric: + set_return = true; + break; + + default: + break; + } + } + if( set_return ) + { + retval = (char *)xmalloc(field->data.capacity); + memset(retval, const_char, field->data.capacity); + goto done; + } + } + + // There is always the infuriating possibility of a P-scaled number + if( field->attr & scaled_e ) + { + rdigits = 0; + if( field->data.rdigits >= 0 ) + { + // Suppose our PIC is PPPPPP999, meaning that field->digits + // is 3, and field->rdigits is 6. + + // Our result has no decimal places, and we have to multiply the value + // by 10**9 to get the significant bdigits where they belong. + + value *= get_power_of_ten(field->data.digits + field->data.rdigits); + } + else + { + // Suppose our target is 999PPPPPP, so there is a ->digits + // of 3 and field->rdigits of -6. + + // If our caller gave us 123000000, we need to divide + // it by 1000000 to line up the 123 with where we want it to go: + + value /= get_power_of_ten(-field->data.rdigits); + } + // Either way, we now have everything aligned for the remainder of the + // processing to work: + } + else + { + // Not P-scaled + rdigits = field->data.rdigits; + } + + switch(field->type) + { + case FldNumericBin5: + case FldIndex: + retval = binary_initial_from_float128(field, rdigits, value); + break; + + case FldNumericBinary: + { + retval = binary_initial_from_float128(field, rdigits, value); + size_t left = 0; + size_t right = field->data.capacity - 1; + while(left < right) + { + std::swap(retval[left++], retval[right--]); + } + break; + } + + case FldNumericDisplay: + { + retval = (char *)xmalloc(field->data.capacity); + char *pretval = retval; + char ach[128]; + + bool negative; + if( value < 0 ) + { + negative = true; + value = -value; + } + else + { + negative = false; + } + + digits_from_float128(ach, field, field->data.digits, rdigits, value); + + char *digits = ach; + if( (field->attr & signable_e) + && (field->attr & separate_e) + && (field->attr & leading_e ) ) + { + if( negative ) + { + *pretval++ = internal_minus; + } + else + { + *pretval++ = internal_plus; + } + } + for(size_t i=0; i<field->data.digits; i++) + { + *pretval++ = internal_zero + ((*digits++) & 0x0F); + } + if( (field->attr & signable_e) + && (field->attr & separate_e) + && !(field->attr & leading_e ) ) + { + if( negative ) + { + *pretval++ = internal_minus; + } + else + { + *pretval++ = internal_plus; + } + } + if( (field->attr & signable_e) + && !(field->attr & separate_e) + && negative) + { + if( field->attr & leading_e ) + { + if( internal_is_ebcdic ) + { + retval[0] &= ~NUMERIC_DISPLAY_SIGN_BIT; + } + else + { + retval[0] |= NUMERIC_DISPLAY_SIGN_BIT; + } + } + else + { + if( internal_is_ebcdic ) + { + pretval[-1] &= ~NUMERIC_DISPLAY_SIGN_BIT; + } + else + { + pretval[-1] |= NUMERIC_DISPLAY_SIGN_BIT; + } + } + } + break; + } + + case FldPacked: + { + retval = (char *)xmalloc(field->data.capacity); + char *pretval = retval; + char ach[128]; + + bool negative; + if( value < 0 ) + { + negative = true; + value = -value; + } + else + { + negative = false; + } + + // For COMP-6 (flagged by separate_e), the number of required digits is + // twice the capacity. + + // For COMP-3, the number of digits is 2*capacity minus 1, because the + // the final "digit" is a sign nybble. + + size_t ndigits = (field->attr & separate_e) + ? field->data.capacity * 2 + : field->data.capacity * 2 - 1; + digits_from_float128(ach, field, ndigits, rdigits, value); + + char *digits = ach; + for(size_t i=0; i<ndigits; i++) + { + if( !(i & 0x01) ) + { + *pretval = ((*digits++) & 0x0F)<<4;; + } + else + { + *pretval++ += (*digits++) & 0x0F; + } + } + if( !(field->attr & separate_e) ) + { + // This is COMP-3, so put in a sign nybble + if( (field->attr & signable_e) ) + { + if( negative ) + { + *pretval++ += 0x0D; // Means signable and negative + } + else + { + *pretval++ += 0x0C; // Means signable and non-negative + } + } + else + { + *pretval++ += 0x0F; // Means not signable + } + } + break; + } + + case FldGroup: + case FldAlphanumeric: + case FldLiteralA: + case FldAlphaEdited: + { + if( field->data.initial ) + { + retval = (char *)xmalloc(field->data.capacity+1); + if( field->attr & hex_encoded_e) + { + memcpy(retval, field->data.initial, field->data.capacity); + } + else + { + size_t buffer_size = 0; + size_t length = (size_t)field->data.capacity; + memset(retval, internal_space, length); + raw_to_internal(&retval, &buffer_size, field->data.initial, length); + if( strlen(field->data.initial) < length ) + { + // If this is true, then the initial string must've been Z'xyz' + retval[strlen(field->data.initial)] = '\0'; + } + } + retval[field->data.capacity] = '\0'; + } + break; + } + + case FldNumericEdited: + { + retval = (char *)xmalloc(field->data.capacity+1); + if( field->data.initial && field->attr & quoted_e ) + { + if( field->attr & quoted_e ) + { + // What the programmer says the value is, the value becomes, no + // matter how wrong it might be. + size_t length = std::min( (size_t)field->data.capacity, + strlen(field->data.initial)); + for(size_t i=0; i<length; i++) + { + retval[i] = ascii_to_internal(field->data.initial[i]); + } + if( length < (size_t)field->data.capacity ) + { + memset( retval+length, + internal_space, + (size_t)field->data.capacity - length); + } + } + } + else + { + // It's not a quoted string, so we use data.value: + bool negative; + if( value < 0 ) + { + negative = true; + value = -value; + } + else + { + negative = false; + } + + char ach[128]; + memset(ach, 0, sizeof(ach)); + memset(retval, 0, field->data.capacity); + size_t ndigits = field->data.capacity; + + if( (field->attr & blank_zero_e) && value == 0 ) + { + memset(retval, internal_space, field->data.capacity); + } + else + { + digits_from_float128(ach, field, ndigits, rdigits, value); + __gg__string_to_numeric_edited( retval, + ach, + field->data.rdigits, + negative, + field->data.picture); + } + } + break; + } + + case FldFloat: + { + retval = (char *)xmalloc(field->data.capacity); + switch( field->data.capacity ) + { + case 4: + *(_Float32 *)retval = (_Float32) value; + break; + case 8: + *(_Float64 *)retval = (_Float64) value; + break; + case 16: + *(_Float128 *)retval = (_Float128) value; + break; + } + break; + } + + case FldLiteralN: + { + break; + } + + default: + break; + } + done: + return retval; + } + +static void +actually_create_the_static_field( cbl_field_t *new_var, + tree data_area, + size_t length_of_initial_string, + const char *new_initial, + tree immediate_parent, + tree new_var_decl) + { + tree constr = make_node(CONSTRUCTOR); + TREE_TYPE(constr) = cblc_field_type_node; + TREE_STATIC(constr) = 1; + TREE_CONSTANT(constr) = 1; + + tree next_field = TYPE_FIELDS(cblc_field_type_node); + // We are going to create the constructors by walking the linked + // list of FIELD_DECLs. We must do it in the same order as the + // structure creation code in create_cblc_field_t() + + // UCHAR_P, "data", + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr), + next_field, + data_area ); + next_field = TREE_CHAIN(next_field); + + // SIZE_T, "capacity", + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr), + next_field, + build_int_cst_type( SIZE_T, + new_var->data.capacity) ); + next_field = TREE_CHAIN(next_field); + + // SIZE_T, "allocated", + if( data_area != null_pointer_node ) + { + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr), + next_field, + build_int_cst_type( SIZE_T, + new_var->data.capacity) ); + } + else + { + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr), + next_field, + build_int_cst_type( SIZE_T, + 0) ); + } + + next_field = TREE_CHAIN(next_field); + + // SIZE_T, "offset", + + if( new_var->type == FldAlphanumeric && + new_var->attr & intermediate_e ) + { + // This is in support of FUNCTION TRIM. That function can make the capacity + // of the intermediate target smaller so that TRIM("abc ") returns + // "abc". By putting the capacity here for such variables, we have a + // mechanism for restoring it the capacity to the original. + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr), + next_field, + build_int_cst_type(SIZE_T, new_var->data.capacity)); + } + else + { + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr), + next_field, + build_int_cst_type(SIZE_T, new_var->offset) ); + } + + next_field = TREE_CHAIN(next_field); + + // CHAR_P, "name", + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr), + next_field, + gg_string_literal(new_var->name) ); + next_field = TREE_CHAIN(next_field); + + // CHAR_P, "picture", + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr), + next_field, + gg_string_literal(new_var->data.picture) ); + next_field = TREE_CHAIN(next_field); + + // CHAR_P, "initial", + if( length_of_initial_string == 0 ) + { + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr), + next_field, + null_pointer_node ); + } + else + { + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr), + next_field, + build_string_literal(length_of_initial_string, new_initial) ); + } + next_field = TREE_CHAIN(next_field); + + // CHAR_P, "parent", + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr), + next_field, + immediate_parent ? gg_get_address_of(immediate_parent) : null_pointer_node ); + next_field = TREE_CHAIN(next_field); + + // SIZE_T, "occurs_lower", + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr), + next_field, + build_int_cst_type(SIZE_T, new_var->occurs.bounds.lower) ); + next_field = TREE_CHAIN(next_field); + + // SIZE_T, "occurs_upper"); + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr), + next_field, + build_int_cst_type(SIZE_T, new_var->occurs.bounds.upper) ); + next_field = TREE_CHAIN(next_field); + + // SIZE_T, "attr", + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr), + next_field, + build_int_cst_type(SIZE_T, new_var->attr) ); + next_field = TREE_CHAIN(next_field); + + // SCHAR, "type", + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr), + next_field, + build_int_cst_type(SCHAR, new_var->type) ); + next_field = TREE_CHAIN(next_field); + + // SCHAR, "level", + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr), + next_field, + build_int_cst_type(SCHAR, new_var->level) ); + next_field = TREE_CHAIN(next_field); + + // SCHAR, "digits", + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr), + next_field, + build_int_cst_type(SCHAR, new_var->data.digits) ); + next_field = TREE_CHAIN(next_field); + + // SCHAR, "rdigits", + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr), + next_field, + build_int_cst_type(SCHAR, new_var->data.rdigits) ); + next_field = TREE_CHAIN(next_field); + + DECL_INITIAL(new_var_decl) = constr; + } + +static void +psa_global(cbl_field_t *new_var) + { + char *mname = cobol_name_mangler(new_var->name); + char ach[2*sizeof(cbl_name_t)]; + sprintf(ach, "__gg__%s", mname); + free(mname); + + if( getenv("SHOW_GLOBAL_VARIABLES") ) + { + char ach_type[32]; + strcpy(ach_type, cbl_field_type_str(new_var->type)); + + fprintf(stderr, "struct cblc_field_t %s = {\n", ach); + fprintf(stderr, " .data = NULL ,\n" ); + fprintf(stderr, " .capacity = %d ,\n", new_var->data.capacity ); + fprintf(stderr, " .offset = %ld ,\n" , new_var->offset ); + fprintf(stderr, " .name = \"%s\" ,\n" , new_var->name ); + fprintf(stderr, " .picture = \"%s\" ,\n" , new_var->data.picture ? new_var->data.picture : "" ); + if( new_var->data.initial || new_var->type == FldPointer ) + { + fprintf(stderr, " .initial = \"%s\" ,\n" , new_var->data.picture ? new_var->data.picture : "" ); + } + else + { + fprintf(stderr, " .initial = NULL ,\n" ); + } + fprintf(stderr, " .parent = NULL,\n" ); + fprintf(stderr, " .depending_on = NULL ,\n" ); + fprintf(stderr, " .depends_on = NULL ,\n" ); + fprintf(stderr, " .occurs_lower = 0 ,\n" ); + fprintf(stderr, " .occurs_upper = 0 ,\n" ); + fprintf(stderr, " .attr = 0x%lx ,\n" , new_var->attr ); + fprintf(stderr, " .type = %s ,\n" , ach_type); + fprintf(stderr, " .level = %d ,\n" , new_var->level ); + fprintf(stderr, " .digits = %d ,\n" , new_var->data.digits ); + fprintf(stderr, " .rdigits = %d ,\n" , new_var->data.rdigits ); + fprintf(stderr, " };\n"); + } + + if( strcmp(new_var->name, "_VERY_TRUE") == 0 ) + { + new_var->var_decl_node = boolean_true_node; + return; + } + if( strcmp(new_var->name, "_VERY_FALSE") == 0 ) + { + new_var->var_decl_node = boolean_false_node; + return; + } + + // global variables already have a cblc_field_t defined in constants.cc + + strcpy(ach, "__gg__"); + strcat(ach, new_var->name); + for(size_t i=0; i<strlen(ach); i++) + { + ach[i] = _tolower(ach[i]); + if(ach[i] == '-') + { + ach[i] = '_'; + } + } + + if( strcmp(new_var->name, "RETURN-CODE") == 0 ) + { + strcpy(ach, "__gg___11_return_code6"); + } + + if( strcmp(new_var->name, "UPSI-0") == 0 ) + { + strcpy(ach, "__gg___6_upsi_04"); + } + + new_var->var_decl_node = gg_declare_variable(cblc_field_type_node, ach, NULL, vs_external_reference); + + // global variables already have a .data area defined. We can find that + // variable from the new_var->name. It's lower-case, with hyphens + // converted to underscores + strcpy(ach, "__gg__data_"); + strcat(ach, new_var->name); + for(size_t i=0; i<strlen(ach); i++) + { + ach[i] = _tolower(ach[i]); + if(ach[i] == '-') + { + ach[i] = '_'; + } + } + new_var->data_decl_node = gg_declare_variable(UCHAR, ach, NULL, vs_external_reference); + } + +static tree +psa_new_var_decl(cbl_field_t *new_var, const char *external_record_base) + { + // This routine creates the VAR_DECL for the cblc_field_t that we are about + // to statically create. + tree new_var_decl; + + if( *external_record_base ) + { + char ach[257]; + strcpy(ach, "_"); + strcat(ach, external_record_base); + strcat(ach, "_ra"); // For "Record Area" + new_var_decl = gg_define_variable( cblc_field_type_node, + ach, + vs_external); + SET_DECL_MODE(new_var_decl, BLKmode); + } + else + { + size_t our_index = new_var->our_index; + + // During the early stages of implementing cbl_field_t::our_index, there + // were execution paths in parse.y and parser.cc that resulted in our_index + // not being set. I hereby try to use field_index() to find the index + // of this field to resolve those. I note that field_index does a linear + // search of the symbols[] table to find that index. That's why I don't + // use it routinely; it results in O(N^squared) computational complexity + // to do a linear search of the symbol table for each symbol + + if( !our_index + && new_var->type != FldLiteralN + && !(new_var->attr & intermediate_e)) + { + our_index = field_index(new_var); + if( our_index == (size_t)-1 ) + { + // Hmm. Couldn't find it. Seems odd. + our_index = 0; + } + } + + char base_name[257]; + char id_string[32] = ""; + + if( new_var->attr & external_e ) + { + // For external variables, just stick with the original name + sprintf(base_name, "%s_cblc_field", new_var->name); + } + else + { + if( our_index + && new_var->parent + && symbol_at(new_var->parent)->type == SymField ) + { + // We have a parent that is a field + sprintf(id_string, ".%ld_%ld", our_index, new_var->parent); + } + else + { + // The parent is zero, so it'll be implied: + sprintf(id_string, ".%ld", our_index); + } + + if(strcasecmp(new_var->name, "filler") == 0) + { + // Multiple "fillers" can have the same parent, so we use filler_count + // to distinguish them. We also prepend an underscore, so that + // the user can't trip us up by creating their *own* cobol variable + // named "FILLER-1" + static int filler_count = 1; + sprintf(base_name, "_filler_%d", filler_count++); + } + else if( strlen(new_var->name) == 0 ) + { + // This can happen. + static int empty_count = 1; + sprintf(base_name, + "_%s_%d", + cbl_field_type_str(new_var->type), + empty_count++); + } + else if( new_var->attr & intermediate_e ) + { + static int inter_count = 1; + sprintf(base_name, + "_%s_%s_%d", + "intermediate", + new_var->name, + inter_count++); + } + else + { + strcpy(base_name, new_var->name); + } + strcat(base_name, id_string); + } + + if( new_var->attr & external_e ) + { + //fprintf(stderr, "external_e base name is %s\n", base_name); + new_var_decl = gg_define_variable( cblc_field_type_node, + base_name, + vs_external); + SET_DECL_MODE(new_var_decl, BLKmode); + } + else if( new_var->attr & (intermediate_e) + && new_var->type != FldLiteralA + && new_var->type != FldLiteralN ) + { +// new_var_decl = gg_define_variable( cblc_field_type_node, +// base_name, +// vs_static); + new_var_decl = gg_define_variable( cblc_field_type_node, + base_name, + vs_stack); + SET_DECL_MODE(new_var_decl, BLKmode); + } + else + { + new_var_decl = gg_define_variable( cblc_field_type_node, + base_name, + vs_static); + SET_DECL_MODE(new_var_decl, BLKmode); + } + } + return new_var_decl; + } + +#if 1 +static void +psa_FldLiteralA(struct cbl_field_t *field ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_FIELD(" ", field) + SHOW_PARSE_END + } + + TRACE1 + { + TRACE1_HEADER + TRACE1_END + } + + // We are constructing a completely static constant structure. We know the + // capacity. We'll create it from the data.initial. The cblc_field_t:data + // will be an ASCII/EBCDIC copy of the .initial data. The .initial will be + // left as ASCII. The var_decl_node will be an ordinary cblc_field_t, which + // means that at this point in time, a FldLiteralA can be used anywhere a + // FldGroup or FldAlphanumeric can be used. We are counting on the parser + // not allowing a FldLiteralA to be a left-hand-side variable. + + // First make room + static size_t buffer_size = 1024; + static char *buffer = (char *)xmalloc(buffer_size); + if( buffer_size < field->data.capacity+1 ) + { + buffer_size = field->data.capacity+1; + buffer = (char *)xrealloc(buffer, buffer_size); + } + + cbl_figconst_t figconst = cbl_figconst_of( field->data.initial ); + gcc_assert(figconst == normal_value_e); + + if( internal_codeset_is_ebcdic() ) + { + for( size_t i=0; i<field->data.capacity; i++ ) + { + buffer[i] = ascii_to_internal(field->data.initial[i]); + } + } + else + { + memcpy(buffer, field->data.initial, field->data.capacity); + } + buffer[field->data.capacity] = '\0'; + + // We have the original nul-terminated text at data.initial. We have a + // copy of it in buffer[] in the internal codeset. + + // We will reuse a single static structure for each string + static std::unordered_map<std::string, int> seen_before; + std::string field_string(buffer); + std::unordered_map<std::string, int>::const_iterator it = + seen_before.find(field_string); + + static const char name_base[] = "_literal_a_"; + + if( it != seen_before.end() ) + { + // We've seen that string before. + int nvar = it->second; + char ach[32]; + sprintf(ach, "%s%d", name_base, nvar); + field->var_decl_node = gg_declare_variable(cblc_field_type_node, + ach, + NULL, + vs_file_static); + } + else + { + // We have not seen that string before + static int nvar = 1; + seen_before[field_string] = nvar; + + char ach[32]; + sprintf(ach, "%s%d", name_base, nvar); + field->var_decl_node = gg_define_variable( cblc_field_type_node, + ach, + vs_file_static); + actually_create_the_static_field( + field, + build_string_literal(field->data.capacity+1, + buffer), + field->data.capacity+1, + field->data.initial, + NULL_TREE, + field->var_decl_node); + nvar += 1; + } + TRACE1 + { + TRACE1_INDENT + TRACE1_TEXT("Finished") + TRACE1_END + } + } +#endif + +void +parser_local_add(struct cbl_field_t *new_var ) + { + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_FIELD(" ", new_var); + SHOW_PARSE_END + } + + IF( member(new_var->var_decl_node, "data"), + ne_op, + gg_cast(UCHAR_P, null_pointer_node) ) + { + gg_call(VOID, + "__gg__push_local_variable", + gg_get_address_of(new_var->var_decl_node), + NULL_TREE); + } + ELSE + ENDIF + + if( new_var->level == LEVEL01 || new_var->level == LEVEL77) + { + // We need to allocate memory on the stack for this variable + tree array_type = build_array_type_nelts(UCHAR, new_var->data.capacity); + tree data_decl_node = gg_define_variable( array_type, + NULL, + vs_stack); + gg_assign( member(new_var->var_decl_node, "data"), + gg_get_address_of(data_decl_node) ); + } + cbl_refer_t wrapper; + wrapper.field = new_var; + initialize_variable_internal(wrapper); + } + +void +parser_field_attr_set( cbl_field_t *tgt, cbl_field_attr_t attr, bool on_off ) + { + if( on_off ) + { + gg_assign(member(tgt, "attr"), + gg_bitwise_or(member(tgt, "attr"), + build_int_cst_type(SIZE_T, attr))); + } + else + { + gg_assign(member(tgt, "attr"), + gg_bitwise_and(member(tgt, "attr"), + build_int_cst_type(SIZE_T, ~attr))); + } + } + +void +parser_symbol_add(struct cbl_field_t *new_var ) + { + Analyze(); + SHOW_PARSE + { + do + { + fprintf(stderr, "( %d ) %s():", CURRENT_LINE_NUMBER, __func__); + } + while(0); + + fprintf(stderr, " %2.2d %s<%s> off:%zd " + "msiz:%d cap:%d dig:%d rdig:%d attr:0x%lx loc:%p", + new_var->level, + new_var->name, + cbl_field_type_str(new_var->type), + new_var->offset, + new_var->data.memsize, + new_var->data.capacity, + new_var->data.digits, + new_var->data.rdigits, + new_var->attr, + new_var); + + if( is_table(new_var) ) + { + fprintf(stderr," OCCURS:%zd", new_var->occurs.ntimes()); + } + cbl_field_t *parent = parent_of(new_var); + if( parent ) + { + fprintf(stderr, + " parent:(%zd)%s", + new_var->parent, + parent->name); + } + else + { + // Parent isn't a field + size_t parent_index = new_var->parent; + if( parent_index ) + { + symbol_elem_t *e = symbol_at(parent_index); + if( e->type == SymFile ) + { + fprintf(stderr, + " parent_file:(%zd)%s", + new_var->parent, + e->elem.file.name); + if( e->elem.file.attr & external_e ) + { + fprintf(stderr, " (flagged external)"); + } + } + } + } + + if( symbol_redefines(new_var) ) + { + fprintf(stderr, + " redefines:(%p)%s", + symbol_redefines(new_var), + symbol_redefines(new_var)->name); + } + + SHOW_PARSE_END + } + + if( new_var->level == 1 && new_var->occurs.bounds.upper ) + { + if( new_var->data.memsize < new_var->data.capacity * new_var->occurs.bounds.upper ) + { + cbl_internal_error("LEVEL 01 (%s) OCCURS " + "has insufficient data.memsize", new_var->name); + } + } + + if( new_var->var_decl_node ) + { + if( new_var->type != FldConditional ) + { + // There is a possibility when re-using variables that a temporary that + // was created at compile time might not have a data pointer at run time. + if( new_var->attr & (intermediate_e) ) + { + IF( member(new_var->var_decl_node, "allocated"), + lt_op, + member(new_var->var_decl_node, "capacity") ) + { + gg_free(member(new_var, "data")); + gg_assign(member(new_var, "data"), + gg_cast(UCHAR_P, gg_malloc(new_var->data.capacity))); + gg_assign(member(new_var, "allocated"), + build_int_cst_type(SIZE_T, new_var->data.capacity)); + } + ELSE + { + } + ENDIF + } + } + else + { + gg_assign(new_var->var_decl_node, boolean_false_node); + } + + goto done; + } + + if( !(new_var->attr & initialized_e) ) + { + cbl_field_type_t incoming_type = new_var->type; + + if( is_register_field(new_var) ) + { + psa_global(new_var); + goto done; + } + + if( new_var->type == FldBlob ) + { + psa_FldBlob(new_var); + goto done; + } + + if( new_var->type == FldLiteralA ) + { + new_var->data.picture = ""; + psa_FldLiteralA(new_var); + goto done; + } + + size_t length_of_initial_string = 0; + const char *new_initial = NULL; + + // gg_printf("parser_symbol_add %s\n", build_string_literal( strlen(new_var->name)+1, new_var->name), NULL_TREE); + + // If we are dealing with an alphanumeric, and it is not hex_encoded, we + // want to convert to single-byte-encoding (if it happens to be UTF-8) and + // to EBCDIC, if EBCDIC is in force: + + // Make sure we have a new variable to work with. + if( !new_var ) + { + cbl_internal_error("parser_symbol_add() was called with a NULL new_var\n"); + } + + TRACE1 + { + TRACE1_HEADER + if( new_var->level ) + { + gg_fprintf( trace_handle, + 1, + "%2.2d ", + build_int_cst_type(INT, new_var->level)); + } + TRACE1_TEXT(new_var->name) + TRACE1_TEXT_ABC(" (", cbl_field_type_str(new_var->type) ,")") + if( new_var->type == FldLiteralN) + { + gg_fprintf( trace_handle, + 1, " [%ld]", + build_int_cst_type(LONG, + *(const long *)new_var->data.initial)); + } + TRACE1_END + } + + if( is_table(new_var) && new_var->data.capacity == 0) + { + cbl_internal_error( + "%s(): %2.2d %s is a table, but it improperly has a capacity of zero", + __func__, + new_var->level, + new_var->name); + } + + cbl_field_t *ancestor = NULL; + tree immediate_parent = NULL_TREE; + + if( new_var->parent > 0 ) + { + symbol_elem_t *parent = symbol_at(new_var->parent); + gcc_assert(parent); + if( parent->type == SymField ) + { + ancestor = cbl_field_of(parent); + immediate_parent = ancestor->var_decl_node; + } + } + + if( ancestor == NULL ) + { + // This is a last ditch effort for handling SAME AREA. Although + // symbol_redefines should work for REDEFINES, LEVEL66, and SAME AREA, I + // decided to leave the existing code alone and added this in when SAME AREA + // was added in. + ancestor = symbol_redefines(new_var); + if( ancestor ) + { + immediate_parent = ancestor->var_decl_node; + + // This obscure test was put in to find problems caused by SAME AREA, + // which at one point would cause a parent to be erroneously seen after + // the child. + assert(ancestor->our_index < new_var->our_index); + } + } + + if( ancestor == new_var ) + { + cbl_internal_error("parser_symbol_add(): %s is its own ancestor", + new_var->name); + } + + if( !ancestor && (new_var->level > LEVEL01 && new_var->level <= LEVEL49 ) ) + { + cbl_internal_error("parser_symbol_add(): %2.2d %s has null ancestor", + new_var->level, + new_var->name); + } + + // new_var's var_decl_node should be NULL at this point + if( new_var->var_decl_node ) + { + cbl_internal_error( "parser_symbol_add( %s ) improperly has a non-null " + "var_decl_node\n", + new_var->name); + } + + switch( new_var->type ) + { + static int counter=1; + char ach[2*sizeof(cbl_name_t)]; + case FldConditional: + // FldConditional corresponds to a C "bool". But we don't carry + // a runtime copy of a structure for the variable; instead, + // var_decl_node becomes a boolean_type_node that is used directly. + sprintf(ach, "_%sconditional_%d", new_var->name, counter++); + new_var->var_decl_node = gg_define_variable(BOOL, ach, vs_static); + goto done; + break; + + default: + break; + } + + if( new_var->type == FldNumericBinary + || new_var->type == FldNumericBin5 ) + { + switch( new_var->data.capacity ) + { + case 1: + case 2: + case 4: + case 8: + case 16: + break; + default: + fprintf(stderr, + "%s is type %s and has capacity %u\n", + new_var->name, + cbl_field_type_str(new_var->type), + new_var->data.capacity); + gcc_unreachable(); + break; + } + } + + size_t level_88_string_size = 0; + char *level_88_string = NULL; + if( ancestor ) + { + level_88_string = get_level_88_domain(ancestor->data.capacity, new_var, level_88_string_size); + } + + if( !new_var->data.picture ) + { + // When picture is NULL, we have to keep testing for NULLness at runtime + // Force it to be a zero-length string here, so that we don't need to + // worry about it. + new_var->data.picture = ""; + } + + if( new_var->type == FldNumericEdited && (new_var->attr & scaled_e) ) + { + char *pic = xstrdup(new_var->data.picture); // duplicate the const char * + remove_p_from_picture(pic); + new_var->data.picture = pic; + } + + if( new_var->type == FldClass && new_var->level != 88 ) + { + new_var->data.initial = get_class_condition_string(new_var); + } + + if( new_var->type == FldLiteralA ) + { + length_of_initial_string = new_var->data.capacity; + } + else if( new_var->data.initial && new_var->data.initial[0] != '\0' ) + { + if( new_var->type == FldClass ) + { + length_of_initial_string = strlen(new_var->data.initial)+1; + } + else if( new_var->type == FldNumericDisplay ) + { + length_of_initial_string = strlen(new_var->data.initial)+1; + } + else + { + // This is an ordinary string + // fprintf(stderr, ">>>>>>> parser_symbol_add %s %s \n", cbl_field_type_str(new_var->type), new_var->name); + // fprintf(stderr, " %d %d\n", (int)strlen(new_var->data.initial), (int)new_var->data.capacity); + //length_of_initial_string = strlen(new_var->data.initial) + 1; + length_of_initial_string = new_var->data.capacity + 1; + } + } + else + { + // We have something that doesn't have a data.initial pointer + length_of_initial_string = 0; + } + + // GDB needs to know the data hierarchy. We do that by including our_index + // and parent index in the variable name: + + size_t our_index = new_var->our_index; + + // During the early stages of implementing cbl_field_t::our_index, there + // were execution paths in parse.y and parser.cc that resulted in our_index + // not being set. I hereby try to use field_index() to find the index + // of this field to resolve those. I note that field_index does a linear + // search of the symbols[] table to find that index. That's why I don't + // use it routinely; it results in O(N^squared) computational complexity + // to do a linear search of the symbol table for each symbol + + if( !our_index + && new_var->type != FldLiteralN + && !(new_var->attr & intermediate_e)) + { + our_index = field_index(new_var); + if( our_index == (size_t)-1 ) + { + // Hmm. Couldn't find it. Seems odd. + our_index = 0; + } + } + + // When we create the cblc_field_t structure, we need a data pointer + // for "data". In the case of a variable that has no parent, we + // have to allocate storage. In the case of a variable that has a parent, + // we calculate data as the pointer to our parent's data plus our + // offset. + + // declare and define the structure. This code *must* match + // the C structure declared in libgcobol.c. Towards that end, the + // variables are declared in descending order of size in order to + // make the packing match up. + + // This uses a single structure type_decl template for creating each structure + + char external_record_base[2*sizeof(cbl_name_t)] = ""; + + if( new_var->parent > 0 ) + { + symbol_elem_t *parent = symbol_at(new_var->parent); + gcc_assert(parent); + if( parent->type == SymField ) + { + ancestor = cbl_field_of(parent); + immediate_parent = ancestor->var_decl_node; + } + else if( parent->type == SymFile ) + { + if( parent->elem.file.attr & external_e ) + { + // The parent of new_var is a SymFile with the external_e attribute + // Therefore, we have to establish new_var as an external with a + // predictable name + strcpy(external_record_base, parent->elem.file.name); + } + } + } + + tree new_var_decl = psa_new_var_decl(new_var, external_record_base); + + if( new_var->type == FldNumericEdited ) + { + // Decide if a NumericEdited can hold negative numbers: + size_t len = strlen( new_var->data.picture); + + new_var->attr &= ~signable_e; + if( strchr(new_var->data.picture, '+') ) + { + new_var->attr |= signable_e; + } + else if( strchr(new_var->data.picture, '-') ) + { + new_var->attr |= signable_e; + } + else if( len > 2 ) + { + char ch1 = _toupper(new_var->data.picture[len-2]); + char ch2 = _toupper(new_var->data.picture[len-1]); + if( (ch1 == 'D' && ch2 == 'B') + || (ch1 == 'C' && ch2 == 'R') ) + { + new_var->attr |= signable_e; + } + } + } + + /* + * Burn after reading. (Delete comment after implementing.) + * + * As of Tue Apr 4 10:29:35 2023, we support 01 CONSTANT numeric values as follows: + * 1. FldNumericBin5 + * 2. always constant_e, also potentially global_e + * 3. compile-time value in cbl_field_data_t::value + * 4. cbl_field_data_t::capacity is 0 because it requires no working storage + */ + + if( new_var->data.capacity == 0 + && new_var->level != 88 + && new_var->type != FldClass + && new_var->type != FldLiteralN + && new_var->type != FldLiteralA ) + { + cbl_internal_error( "%s(): %2.2d %s<%s> improperly has a data.capacity of zero", + __func__, + new_var->level, + new_var->name, + cbl_field_type_str(new_var->type)); + } + + new_var->var_decl_node = new_var_decl; + + if( level_88_string ) + { + new_var->data.initial = level_88_string; + length_of_initial_string = level_88_string_size; + } + + tree data_area = null_pointer_node; + + if( *external_record_base ) + { + char achDataName[256]; + if( *external_record_base ) + { + sprintf(achDataName, "__%s_vardata", external_record_base); + } + tree array_type = build_array_type_nelts(UCHAR, new_var->data.capacity); + new_var->data_decl_node = gg_define_variable( + array_type, + achDataName, + vs_external); + data_area = gg_get_address_of(new_var->data_decl_node); + goto actual_allocate; + } + + if( ancestor && new_var->level != 0 ) + { + // This variable has an ancestor, so we share its already-allocated data + // area + new_var->data_decl_node = ancestor->data_decl_node; + } + else + { + // We have no ancestor, so data_decl_node must be allocated. Note that + // LEVEL00 variables might have ancestors (INDEXED BY variables, for + // example), but they need data allocated. + + if( new_var->type == FldLiteralN ) + { + // A numeric literal gets special handling: + psa_FldLiteralN(new_var); + data_area = gg_get_address_of(new_var->data_decl_node); + } + else + { + // Create a static array of UCHAR, and make that the data_decl_node + // size_t bytes_to_allocate = new_var->data.memsize ? + // new_var->data.memsize : new_var->data.capacity; + size_t bytes_to_allocate = std::max(new_var->data.memsize, + new_var->data.capacity); + + // A FldClass actually doesn't need any bytes, because the only important + // thing about it is the .initial field. We will allocate a single byte, + // just to keep run-time pointers from being NULL + if( (new_var->type == FldClass && bytes_to_allocate == 0) + || (new_var->type == FldLiteralA && bytes_to_allocate == 0) ) + { + bytes_to_allocate = 1; + } + + if( !bytes_to_allocate ) + { + fprintf(stderr, + "bytes_to_allocate is zero for %s (symbol number %ld)\n", + new_var->name, + new_var->our_index); + gcc_assert(bytes_to_allocate); + } + + if( new_var->type == FldIndex && new_var->level == 0 ) + { + // Do nothing, because the OCCURS INDEXED BY variable needs data + // allocated. This leaves bytes_to_allcate at its value. + } + else + { + if( new_var->attr & based_e + || new_var->attr & linkage_e + || new_var->attr & local_e ) + { + // BASED variables get their data through ALLOCATE or SET + // LINKAGE variables get their data from the caller + // LOCAL variables get their data dynamically. + bytes_to_allocate = 0; + } + } + + if( bytes_to_allocate ) + { + if( new_var->attr & (intermediate_e) + && new_var->type != FldLiteralN + && new_var->type != FldLiteralA ) + { + // We'll malloc() data in initialize_variable + data_area = null_pointer_node; + } + else + { + // We need a unique name for the allocated data for this COBOL variable: + char achDataName[256]; + if( new_var->attr & external_e ) + { + sprintf(achDataName, "%s", new_var->name); + } + else if( new_var->name[0] == '_' ) + { + // Avoid doubling up on leading underscore + sprintf(achDataName, + "%s_data_%lu", + new_var->name, + sv_data_name_counter++); + } + else + { + sprintf(achDataName, + "_%s_data_%lu", + new_var->name, + sv_data_name_counter++); + } + + if( new_var->attr & external_e ) + { + tree array_type = build_array_type_nelts(UCHAR, bytes_to_allocate); + new_var->data_decl_node = gg_define_variable( + array_type, + achDataName, + vs_external); + data_area = gg_get_address_of(new_var->data_decl_node); + } + else + { + tree array_type = build_array_type_nelts(UCHAR, bytes_to_allocate); + new_var->data_decl_node = gg_define_variable( + array_type, + achDataName, + vs_static); + data_area = gg_get_address_of(new_var->data_decl_node); + } + } + } + } + } + + if( new_var->data.initial ) + { + new_initial = initial_from_float128(new_var, new_var->data.value); + } + if( new_initial ) + { + switch(new_var->type) + { + case FldGroup: + case FldAlphanumeric: + case FldLiteralA: + length_of_initial_string = new_var->data.capacity+1; + break; + + default: + length_of_initial_string = new_var->data.capacity; + break; + } + } + else + { + new_initial = new_var->data.initial; + if( !new_initial ) + { + if( length_of_initial_string ) + { + gcc_unreachable(); + } + } + else + { + if( new_var->type == FldLiteralN ) + { + // We need to convert this string to the internal character set + // char *buffer = NULL; + // size_t buffer_size = 0; + // raw_to_internal(&buffer, + // &buffer_size, + // new_var->data.initial, + // strlen(new_var->data.initial)); + // new_initial = bufer; + // length_of_initial_string = strlen(new_var->data.initial)+1; + } + } + } + + actual_allocate: + // if( level_88_string ) + // { + // actually_create_the_static_field( new_var, + // data_area, + // level_88_string_size, + // level_88_string, + // immediate_parent, + // new_var_decl); + // } + // else + { + actually_create_the_static_field( new_var, + data_area, + length_of_initial_string, + new_initial, + immediate_parent, + new_var_decl); + } + + if( level_88_string ) + { + free(level_88_string); + } + + if( !(new_var->attr & ( linkage_e | based_e)) ) + { + static const bool explicitly = false; + static const bool just_once = true; + initialize_variable_internal( new_var, + explicitly, + just_once); + } + + if( new_var->type != incoming_type ) + { + fprintf(stderr, "Type mismatch in parser_symbol_add()\n"); + gcc_unreachable(); + } + new_var->attr |= initialized_e; + } + else + { + fprintf(stderr, "parser_symbol_add() skipping %s", new_var->name); + } + done: + return; + } diff --git a/gcc/cobol/genapi.h b/gcc/cobol/genapi.h new file mode 100644 index 0000000..2c135e8 --- /dev/null +++ b/gcc/cobol/genapi.h @@ -0,0 +1,587 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef _GENAPI_H_ +#define _GENAPI_H_ + +#define DISPLAY_ADVANCE true +#define DISPLAY_NO_ADVANCE false + +typedef enum + { + refer_dest, + refer_source, + } refer_type_t; + +void parser_display_internal( tree file_descriptor, + cbl_refer_t refer, + bool advance=DISPLAY_NO_ADVANCE); + +void parser_first_statement( int lineno ); + +void parser_enter_file(const char *filename); +void parser_leave_file(); +void parser_division( cbl_division_t division, + cbl_field_t *ret, size_t narg, cbl_ffi_arg_t args[] ); +void parser_enter_program(const char *funcname, bool is_function, int *retval); +void parser_leave_program(); + +void parser_accept( cbl_refer_t refer, special_name_t special_e); +void parser_accept_exception( cbl_label_t *name ); +void parser_accept_exception_end( cbl_label_t *name ); + +void parser_accept_envar( cbl_refer_t refer, cbl_refer_t envar, + cbl_label_t *error, cbl_label_t *not_error ); +void parser_set_envar( cbl_refer_t envar, cbl_refer_t refer ); + +void parser_accept_command_line( cbl_refer_t tgt, cbl_refer_t src, + cbl_label_t *error, cbl_label_t *not_error ); +void parser_accept_command_line_count( cbl_refer_t tgt ); + +void parser_accept_date_yymmdd( cbl_field_t *tgt ); +void parser_accept_date_yyyymmdd( cbl_field_t *tgt ); +void parser_accept_date_yyddd( cbl_field_t *tgt ); +void parser_accept_date_yyyyddd( cbl_field_t *tgt ); +void parser_accept_date_dow( cbl_field_t *tgt ); +void parser_accept_date_hhmmssff( cbl_field_t *tgt ); + +void +parser_alphabet( cbl_alphabet_t& alphabet ); +void +parser_alphabet_use( cbl_alphabet_t& alphabet ); + +void +parser_allocate( cbl_refer_t size_or_based, cbl_refer_t returning, bool initialized ); +void +parser_free( size_t n, cbl_refer_t refers[] ); + +void +parser_add( size_t nC, cbl_num_result_t *C, + size_t nA, cbl_refer_t *A, + cbl_arith_format_t format, + cbl_label_t *error, + cbl_label_t *not_error, + void *compute_error = NULL); // This has to be cast to a tree pointer to int + +void parser_arith_error( cbl_label_t *name ); +void parser_arith_error_end( cbl_label_t *name ); + +void +parser_subtract(size_t nC, cbl_num_result_t *C, + size_t nA, cbl_refer_t *A, + size_t nB, cbl_refer_t *B, + cbl_arith_format_t format, + cbl_label_t *error, + cbl_label_t *not_error, + void *compute_error = NULL); // This has to be cast to a tree pointer to int + +void +parser_multiply(size_t nC, cbl_num_result_t *C, + size_t nA, cbl_refer_t *A, + size_t nB, cbl_refer_t *B, + cbl_label_t *error, + cbl_label_t *not_error, + void *compute_error = NULL); // This has to be cast to a tree pointer to int + +void +parser_divide(size_t nC, cbl_num_result_t *C, + size_t nA, cbl_refer_t *A, + size_t nB, cbl_refer_t *B, + cbl_refer_t remainder, + cbl_label_t *error, + cbl_label_t *not_error, + void *compute_error = NULL); // This has to be cast to a tree pointer to int + +void +parser_add( struct cbl_refer_t tgt, + struct cbl_refer_t a, struct cbl_refer_t b, + enum cbl_round_t = truncation_e ); + +void +parser_subtract( struct cbl_refer_t tgt, + struct cbl_refer_t a, struct cbl_refer_t b, + enum cbl_round_t = truncation_e ); + +void +parser_multiply( struct cbl_refer_t tgt, + struct cbl_refer_t a, struct cbl_refer_t b, + enum cbl_round_t = truncation_e ); + +void +parser_divide( struct cbl_refer_t quotient, + struct cbl_refer_t divisor, + struct cbl_refer_t dividend, + enum cbl_round_t = truncation_e, + struct cbl_refer_t remainder = cbl_refer_t()); + +// void +// parser_exponentiation( cbl_refer_t cref, +// cbl_refer_t aref, +// cbl_refer_t bref, +// cbl_round_t rounded = truncation_e ); + +void +parser_relop( struct cbl_field_t *tgt, + struct cbl_refer_t a, enum relop_t, struct cbl_refer_t b ); + +void +parser_relop_long(struct cbl_field_t *tgt, + long a, enum relop_t, struct cbl_refer_t b ); + +void +parser_logop( struct cbl_field_t *tgt, + struct cbl_field_t *a, enum logop_t, struct cbl_field_t *b ); + +void +parser_setop( struct cbl_field_t *tgt, + struct cbl_field_t *a, enum setop_t, struct cbl_field_t *b ); + +void +parser_bitop( struct cbl_field_t *tgt, + struct cbl_field_t *a, enum bitop_t, size_t B ); + +void +parser_bitwise_op(struct cbl_field_t *tgt, + struct cbl_field_t *a, + enum bitop_t op, + size_t bitmask ); + +void +parser_classify( struct cbl_field_t *tgt, + struct cbl_refer_t srca, enum classify_t type ); + +void +parser_op( struct cbl_refer_t cref, + struct cbl_refer_t aref, int op, struct cbl_refer_t bref, + struct cbl_label_t *op_error); + +cbl_field_t +determine_intermediate_type( const cbl_refer_t& aref, + int op, + const cbl_refer_t& bref ); + +void +parser_if( struct cbl_field_t *yn ); // value is 1 or 0 +void +parser_else(void); +void +parser_fi(void); + + +void +parser_enter_paragraph( struct cbl_label_t *label ); +void +parser_leave_paragraph( cbl_label_t *label ); + +void +parser_enter_section( struct cbl_label_t *label ); +void +parser_leave_section( struct cbl_label_t *label ); + +void +parser_perform( struct cbl_label_t *label, bool suppress_nexting=false ); + +void +parser_perform_times( struct cbl_label_t *label, cbl_refer_t count ); + +void +parser_perform_start( struct cbl_perform_tgt_t *tgt ); + +void +parser_perform_conditional( struct cbl_perform_tgt_t *tgt ); + +void +parser_perform_conditional_end( struct cbl_perform_tgt_t *tgt ); + +/* + * To perform once (not a loop) N is NULL because the user didn't provide a count. + * tgt->to is NULL if the PERFORM statement has no THRU phrase. + * For an in-line loop body, tgt->from.type == LblLoop, and tgt->to is NULL. + */ +void +parser_perform( struct cbl_perform_tgt_t *tgt, struct cbl_refer_t N ); + +/* + * A simple UNTIL loop uses 1 varys element. For VARY loops, the + * VARY/AFTER phrases appear in varys in the same order as in the + * COBOL text. + */ + +// Either parser_perform_until() or parser_perform_inline_times() must appear +// after a parser_perform_start() +void +parser_perform_until( struct cbl_perform_tgt_t *tgt, + bool test_before, + size_t nvary, + struct cbl_perform_vary_t *varys ); + +void +parser_perform_inline_times(struct cbl_perform_tgt_t *tgt, + struct cbl_refer_t count ); + +void +parser_see_stop_run( struct cbl_refer_t exit_status, const char name[] ); + +void +parser_program_hierarchy( const struct cbl_prog_hier_t& hier ); +void +parser_end_program(const char *name=NULL); + +void parser_sleep(cbl_refer_t seconds); + +void parser_exit( cbl_refer_t refer, ec_type_t = ec_none_e ); +void parser_exit_section(void); +void parser_exit_paragraph(void); +void parser_exit_perform( struct cbl_perform_tgt_t *tgt, bool cycle ); +void parser_exit_program(void); // exits back to COBOL only, else continue + +void +parser_display( const struct cbl_special_name_t *upon, + struct cbl_refer_t args[], size_t n, + bool advance = DISPLAY_ADVANCE ); + +void parser_display_field(cbl_field_t *fld); + +void parser_display_literal(const char *literal, + bool advance = DISPLAY_ADVANCE); + +void +parser_assign( size_t nC, cbl_num_result_t *C, + struct cbl_refer_t from, + cbl_label_t *on_error, + cbl_label_t *not_error, + cbl_label_t *compute_error ); + +void parser_move(struct cbl_refer_t to, + struct cbl_refer_t from, + cbl_round_t rounded=truncation_e, + bool skip_fill_from = false); + +void parser_move( size_t ntgt, cbl_refer_t *tgts, + cbl_refer_t src, cbl_round_t rounded=truncation_e ); + +void parser_initialize_table( size_t ntgt, cbl_refer_t src, + size_t nspan, const cbl_bytespan_t spans[], + size_t table, // symbol table index + size_t ntbl, const cbl_subtable_t tbls[] ); + +void parser_set_pointers( size_t ntgt, cbl_refer_t *tgts, cbl_refer_t src ); + +void +parser_symbol_add(struct cbl_field_t *field); + +void +parser_initialize(struct cbl_refer_t refer, bool like_parser_symbol_add=false); + +void +parser_initialize_programs(size_t nprog, struct cbl_refer_t *progs); + +void +parser_label_label( struct cbl_label_t *label ); + +void +parser_label_goto( struct cbl_label_t *label ); + +void +parser_goto( cbl_refer_t value, size_t narg, cbl_label_t * const labels[] ); + +void +parser_alter( cbl_perform_tgt_t *tgt ); + +void +parser_set_conditional88( struct cbl_refer_t tgt, bool which_way ); +void +parser_set_numeric(struct cbl_field_t *tgt, ssize_t value); + +void +parser_field_attr_set( cbl_field_t *tgt, cbl_field_attr_t attr, bool on_off = true ); + +void +parser_file_add(struct cbl_file_t *file); + +void +parser_file_open( struct cbl_file_t *file, int mode_char ); +void +parser_file_open( size_t n, struct cbl_file_t *files[], int mode_char ); + +void +parser_file_close( struct cbl_file_t *file, file_close_how_t how = file_close_no_how_e); + +void +parser_file_read( struct cbl_file_t *file, + struct cbl_refer_t buffer, + int where ); + +void +parser_file_start( struct cbl_file_t *file, relop_t op, int flk, + cbl_refer_t = cbl_refer_t() ); + +/* + * Write *field* to *file*. *after* is a bool where false + * means BEFORE. *nlines* is the number of lines, frequently + * FldLiteralN. To indicate PAGE, nlines is the literal "PAGE", with + * quoted_e off. + * + * According to the 2014 standard, the lack of an ADVANCING clause implies + * AFTER ADVANCING 1 LINE. *nlines* is be zero to write a line without + * prepending or appending newlines. See section 14.9.47.1 paragraph 22) + * + * At present, we don't have enough information to implement PAGE + * correctly, because we don't know the page size (in lines) of the + * output device. Rather than doing nothing, we issue a 0x0C form feed + * character. + */ +void +parser_file_write( cbl_file_t *file, + cbl_field_t *source, + bool after, + cbl_refer_t& nlines, + bool sequentially); + +void +parser_file_rewrite( cbl_file_t *file, cbl_field_t *field, + bool sequentially ); + +void +parser_file_delete( cbl_file_t *file, bool sequentially ); + +#if condition_lists +struct cbl_conditional_t { + cbl_field_t *tgt; + cbl_refer_t& lhs; + unsigned int op; + cbl_refer_t& rhs; +}; +#endif + +void +parser_lsearch_start( cbl_label_t *name, + cbl_field_t *table, + cbl_field_t *index, + cbl_field_t *varying ); + +void parser_lsearch_conditional(cbl_label_t * name); +void parser_bsearch_conditional(cbl_label_t * name); + +void parser_lsearch_when( cbl_label_t *name, cbl_field_t *conditional ); +void +parser_bsearch_when(cbl_label_t *name, + cbl_refer_t key, + cbl_refer_t sarg, + bool ascending); + +void parser_lsearch_end( cbl_label_t *name ); +void parser_bsearch_end( cbl_label_t *name ); + +void +parser_bsearch_start( cbl_label_t *name, cbl_field_t *tgt ); + +void +parser_sort(cbl_refer_t table, + bool duplicates, + cbl_alphabet_t *alphabet, + size_t nkey, + cbl_key_t *keys ); +void +parser_file_sort( cbl_file_t *file, + bool duplicates, + cbl_alphabet_t *alphabet, + size_t nkey, + cbl_key_t *keys, + size_t ninput, + cbl_file_t **inputs, + size_t noutput, + cbl_file_t **outputs, + cbl_perform_tgt_t *in_proc, + cbl_perform_tgt_t *out_proc ); +void +parser_file_merge( cbl_file_t *file, + cbl_alphabet_t *alphabet, + size_t nkey, + cbl_key_t *keys, + size_t ninput, + cbl_file_t **inputs, + size_t noutput, + cbl_file_t **outputs, + cbl_perform_tgt_t *out_proc ); + +void +parser_release( cbl_field_t *record_area ); + +void +parser_exception_file( cbl_field_t *tgt, cbl_file_t* file = NULL ); + +void +parser_module_name( cbl_field_t *tgt, module_type_t type ); + +void +parser_intrinsic_numval_c( cbl_field_t *f, + cbl_refer_t& input, + bool locale, + cbl_refer_t& currency, + bool anycases, + bool test_numval_c = false); + +void +parser_intrinsic_subst( cbl_field_t *f, + cbl_refer_t& ref1, + size_t argc, + cbl_substitute_t * argv ); + +void +parser_intrinsic_callv( cbl_field_t *f, + const char name[], + size_t argc, + cbl_refer_t * argv ); + +void +parser_intrinsic_call_0( cbl_field_t *tgt, + const char name[] ); +void +parser_intrinsic_call_1( cbl_field_t *tgt, + const char name[], + cbl_refer_t& ref1 ); +void +parser_intrinsic_call_2( cbl_field_t *tgt, + const char name[], + cbl_refer_t& ref1, + cbl_refer_t& ref2 ); +void +parser_intrinsic_call_3( cbl_field_t *tgt, + const char name[], + cbl_refer_t& ref1, + cbl_refer_t& ref2, + cbl_refer_t& ref3 ); +void +parser_intrinsic_call_4( cbl_field_t *tgt, + const char name[], + cbl_refer_t& ref1, + cbl_refer_t& ref2, + cbl_refer_t& ref3, + cbl_refer_t& ref4 ); + +void +parser_string_overflow( cbl_label_t *name ); +void +parser_string_overflow_end( cbl_label_t *name ); + +void +parser_string( cbl_refer_t tgt, + cbl_refer_t pointer, + size_t nsource, + cbl_string_src_t *sources, + cbl_label_t *overflow, + cbl_label_t *not_overflow ); + +void +parser_unstring( cbl_refer_t src, + size_t ndelimited, + cbl_refer_t *delimiteds, + // into + size_t noutput, + cbl_refer_t *outputs, + cbl_refer_t *delimiters, + cbl_refer_t *counts, + cbl_refer_t pointer, + cbl_refer_t tally, + cbl_label_t *overflow, + cbl_label_t *not_overflow ); + +void parser_return_start( cbl_file_t *file, cbl_refer_t into ); +void parser_return_atend( cbl_file_t *file ); +void parser_return_notatend( cbl_file_t *file ); +void parser_return_finish( cbl_file_t *file ); + +void parser_exception_prepare( const cbl_name_t statement_name, + const cbl_enabled_exceptions_array_t *enabled ); + +//void parser_exception_condition( cbl_field_t *ec ); + +struct cbl_exception_file; +struct cbl_exception_files_t; + +void parser_exception_raise(ec_type_t ec); + +void parser_call_exception( cbl_label_t *name ); +void parser_call_exception_end( cbl_label_t *name ); + +//void parser_stash_exceptions(const cbl_enabled_exceptions_array_t *enabled); + +void parser_match_exception(cbl_field_t *index, + cbl_field_t *blob); +void parser_check_fatal_exception(); +void parser_clear_exception(); + +void parser_call_targets_dump(); +size_t parser_call_target_update( size_t caller, + const char extant[], + const char mangled_tgt[] ); + +void parser_file_stash( struct cbl_file_t *file ); + +void parser_call( cbl_refer_t name, + cbl_refer_t returning, + size_t narg, cbl_ffi_arg_t args[], + cbl_label_t *except, + cbl_label_t *not_except, + bool is_function); + +void parser_entry_activate( size_t iprog, const cbl_label_t *declarative ); + +void parser_entry( cbl_field_t *name, + size_t narg = 0, cbl_ffi_arg_t args[] = NULL); + +bool is_ascending_key(cbl_refer_t key); + +void register_main_switch(const char *main_string); + +tree parser_cast_long(tree N); +void parser_print_long(tree N); +void parser_print_long(const char *fmt, tree N); +void parser_print_long(long N); +void parser_print_long(const char *fmt, long N); // fmt needs to have a %ls in it +void parser_print_string(const char *ach); +void parser_print_string(const char *fmt, const char *ach); // fmt needs to have a %s in it +void parser_set_statement(const char *statement); + +char *initial_from_float128(cbl_field_t *field, _Float128 value); + +void parser_set_handled(ec_type_t ec_handled); +void parser_set_file_number(int file_number); +void parser_exception_clear(); + +void parser_init_list_size(int count_of_variables); +void parser_init_list_element(cbl_field_t *field); +void parser_init_list(); + +tree file_static_variable(tree type, const char *name); + +void parser_statement_begin(); + +#endif diff --git a/gcc/cobol/gengen.cc b/gcc/cobol/gengen.cc new file mode 100644 index 0000000..c39af0b --- /dev/null +++ b/gcc/cobol/gengen.cc @@ -0,0 +1,3462 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* The compilation process consists of + + 1) lexing + 2) parsing + 3) generation of the GENERIC abstract syntax tree + 4) reduction + 5) generation of machine code + + For your sins, you have wandered into the code that accepts information from + the parser about what the COBOL source code wants done. + + Specifically, the routines in this module, which run at compile time, generate + the GENERIC tags that describe the equivalent of the COBOL. They are rathernnn + low level routines, ultimately used for pretty much everything. Specifically, + they run at compile-time, and they generate the GENERIC tags that control what + ultimately happens at run-time. + + It *is* confusing. + + I'll try to collect things in a logical way, and name them in a logical way, + and I'll try to comment them well enough so that you have some hope of + understanding what the heck is going on. + + There is some information in the GCC internals document, but it was written by + people who live and breathe this stuff, and they don't remember what it was like + to know nothing. + + I suspect that those who have tried and failed to create GCC front ends have foundered because + they just couldn't figure out what it was they needed to do. I certainly floundered + for several days before I hit on the means to figure it out. I created the + rjd_print_tree() routine, which spits out a text listing of all the nodes + connected to the specified starting node. (Keep in mind that the GENERIC graph + is cyclic, and consequently there is no real ordering, except that the starting + node you specify is NodeNumber0. rjd_print_tree follows all links, but it prints + out each unique node exactly once.) + + I then built into GCC a call to rjd_print_tree right at the point where the GENERIC tree + is complete and about to be reduced. + + And that gave me the ability to create simple C programs and see the resulting GENERIC + tree. It took a while to sort out what I was seeing, but ultimately things started + to make sense. The inherent difficulty may start to become clear when you realize that + the program + + void foo() + { + } + + is implemented by a GENERIC tree with fifty-six nodes. + + I can't try to write a whole manual here. But hopefully there will be enough examples + throughout the code for you to learn how to do things on a highish level, and you can + look at the low -level routines to see how it is accomplished. + + That said, I will try to comment things well enough to be meaningful at least to me + when I run across them at some time in the future. Because I fear that whatever + I do here, the world will little note, and *I* will not long remember, what it was! + */ + +#include "cobol-system.h" +#include "coretypes.h" +#include "tree.h" +#include "tree-iterator.h" +#include "stringpool.h" +#include "cgraph.h" +#include "toplev.h" +#include "function.h" +#include "fold-const.h" +#define HOWEVER_GCC_DEFINES_TREE 1 +#include "ec.h" +#include "common-defs.h" +#include "util.h" +#include "cbldiag.h" +#include "symbols.h" +#include "gengen.h" + +// We are limiting the programmer to functions with 512 or fewer arguments. +// Don't like it? Cry me a river. +static const int ARG_LIMIT = 512; + +static int sv_current_line_number; + +// These are globally useful constants +tree char_nodes[256]; + +tree pvoid_type_node; +tree integer_minusone_node; +tree integer_two_node; +tree integer_eight_node; +tree size_t_zero_node; +tree int128_zero_node; +tree int128_five_node; +tree int128_ten_node; +tree char_ptr_type_node; +tree uchar_ptr_type_node; +tree wchar_ptr_type_node; +tree long_double_ten_node; +tree sizeof_size_t; +tree sizeof_pointer; + +tree bool_true_node; +tree bool_false_node; + +// This is the global translation unit structure; it contains everything needed +// to compile one file that you might otherwise be tempted to instantiate as +// global variables: + +struct cbl_translation_unit_t gg_trans_unit; + +void +gg_build_translation_unit(const char *filename) + { + // The translation_unit_decl gets declared once for each processing source + // input file. It serves as an anchor for each function. And the + // block referred to by its "initial" member is the anchor for any + // variables whose scope is file. + + gg_trans_unit.trans_unit_decl + = build_translation_unit_decl(get_identifier(filename)); + + gg_trans_unit.filename = filename; + + tree tree_block = make_node(BLOCK); + BLOCK_SUPERCONTEXT(tree_block) + = gg_trans_unit.trans_unit_decl; + TREE_USED(tree_block) = 1; + DECL_INITIAL(gg_trans_unit.trans_unit_decl) = tree_block; + } + +// Explanation of context. There is a plate of spaghetti that represents +// a chain of contexts. + +// The deconstructed dinner: The function_decl "initial" points to a block +// The block points to the first of a chained set of var_decl, one for each +// variable in the block. The function "saved_tree" entry points to a +// bind_expr. The bind_expr vars member points to the same chain of var_decl. +// The bind_expr block member points to the block. And the bind_expr body +// member points to the statement_list for the context. + +// Those four tags constitute the context. To push the context, a new block +// is chained to the first blocks SUBCHAIN member. A new bind_expr is created +// and put on the statement_list of the enclosing block. And a new list of +// var_decls is set up for the new block and the new bind_expr. + +// And that's how subcontexts are made. + +static void +gg_chain_onto_block_vars(tree block, tree var) + { + // In order to use a variable in a context, the var_decl has to go + // onto the chain that starts with the "vars" entry of a block + + // Upon discovering that chainon has O(N-squared) complexity because it walks + // the entire chain looking for the final member, Dubner put in this map. + static std::unordered_map<tree, tree>blocks; + if( !BLOCK_VARS(block) ) + { + // This is the first variable: + BLOCK_VARS(block) = var; + blocks[block] = var; + } + else + { + //chainon(BLOCK_VARS(block), var); + // What follows is the quicker equivalent of calling chainon() + TREE_CHAIN(blocks[block]) = var; + blocks[block] = var; + } + } + +void +gg_append_var_decl(tree var_decl) + { + // The var_decl has to be chained onto the appropriate block. + + if( SCOPE_FILE_SCOPE_P(DECL_CONTEXT(var_decl)) ) + { + tree context = gg_trans_unit.trans_unit_decl; + tree block = DECL_INITIAL(context); + + gg_chain_onto_block_vars(block, var_decl); + + rest_of_decl_compilation (var_decl, true, false); + + // With global variables, it is probably necessary to do something with + // wrapup_global_declarations. At this writing, I have not yet + // investigated that. The advice from gcc@gcc.gnu.org came from + // David Malcolm: + /* + You might find libgccjit's gcc/jit/jit-playback.cc helpful for this, as + it tends to contain minimal code to build trees (generally + simplified/reverse-engineered from the C frontend). + + playback::context::global_new_decl makes the VAR_DECL node, and such + trees are added to the jit playback::context's m_globals. + In playback::context::replay, we have: + + / * Finalize globals. See how FORTRAN 95 does it in gfc_be_parse_file() + for a simple reference. * / + FOR_EACH_VEC_ELT (m_globals, i, global) + rest_of_decl_compilation (global, true, true); + + wrapup_global_declarations (m_globals.address(), m_globals.length()); + */ + + // Stash this var_decl in a map so it can be found elsewhere: + //fprintf(stderr, "Stashing %s\n", IDENTIFIER_POINTER(DECL_NAME(var_decl))); + gg_trans_unit.trans_unit_var_decls + [IDENTIFIER_POINTER(DECL_NAME(var_decl))] = var_decl; + } + else + { + // For function-level variables, we use a stack of blocks to keep track + // of which block is active for the current context: + + // fprintf(stderr, "%s(): %30s Function Scope\n", __func__, id_name); + tree bind_expr = current_function->bind_expr_stack.back(); + tree block = BIND_EXPR_BLOCK(bind_expr); + + gg_chain_onto_block_vars(block, var_decl); + + // If saved_tree.bind_expr.vars is null, then var_decl is the very + // first variable in the block, and it must be set in bind_expr as well + if( !BIND_EXPR_VARS(bind_expr) ) + { + BIND_EXPR_VARS(bind_expr) = var_decl; + } + } + } + +location_t +location_from_lineno() + { + location_t loc; + loc = linemap_line_start(line_table, sv_current_line_number, 0); + return loc; + } + +void +gg_append_statement(tree stmt) + { + // Likewise, we have a stack of statement_lists, with the current one + // at the back. (The statement_list stack can get deeper than the block + // stack, because you can create a separate statement list for the insides + // of, say, a WHILE statement without creating a whole context for it) + + // This statement list thing looks innocent enough, but it is the general + // way of actually having a GENERIC tree generate executing code. What goes + // onto a statement list is an expression. A = B is implemented with a + // modify_expr + + // Actually instantiating a variable requires a var_expr + + // A subroutine call is effected by putting a call_expr onto the statement + // list. + + // It's not the only way; you can have a modify_expr that takes a var_decl + // as a destination, and uses a call_expr as a source. This requires that + // the type of the var_decl be the same as the type of the function being + // called. + + // And so on. Just keep in mind that you have types, and declarations, and + // expressions, among other things. + + // When trying to figure out location_t, take a look at + // ./libcpp/include/line-map.h + // ./libcpp/location-example.txt + + gcc_assert( gg_trans_unit.function_stack.size() ); + + TREE_SIDE_EFFECTS(stmt) = 1; // If an expression has no side effects, + // // it won't generate code. + TREE_SIDE_EFFECTS(current_function->statement_list_stack.back()) = 1; + append_to_statement_list( stmt, &(current_function->statement_list_stack.back()) ); + } + +tree +gg_float(tree floating_type, tree integer_var) + { + // I don't know why, but this fails if 'var' is an INT128 + return build1(FLOAT_EXPR, floating_type, integer_var); + } + +tree +gg_trunc(tree integer_type, tree floating_var) + { + /* Conversion of real to fixed point by truncation. */ + return build1(FIX_TRUNC_EXPR, integer_type, floating_var); + } + +tree +gg_cast(tree type, tree var) + { + return fold_convert(type, var); + } + +static bool saw_pointer; + +static +tree +adjust_for_type(tree type) + { + tree retval; + + switch( TREE_CODE(type) ) + { + case POINTER_TYPE: + saw_pointer = true; + retval = adjust_for_type(TREE_TYPE(type)); + break; + + case COMPONENT_REF: + case ADDR_EXPR: + case ARRAY_TYPE: + case VAR_DECL: + case FUNCTION_TYPE: + retval = adjust_for_type(TREE_TYPE(type)); + break; + case RECORD_TYPE: + default: + retval = type; + break; + } + + return retval; + } + +static +char * +show_type(tree type) + { + if( !type ) + { + cbl_internal_error("The given type is not NULL, and that's just not fair"); + } + + if( DECL_P(type) ) + { + type = TREE_TYPE(type); + } + if( !TYPE_P(type) ) + { + cbl_internal_error("The given type is not a DECL or a TYPE"); + } + + static char ach[1024]; + switch( TREE_CODE(type) ) + { + case VOID_TYPE: + sprintf(ach, "VOID"); + break; + + case BOOLEAN_TYPE: + sprintf(ach, "BOOL"); + break; + + case RECORD_TYPE: + sprintf(ach, "RECORD"); + break; + + case REAL_TYPE: + sprintf(ach, + "%3ld-bit REAL", + TREE_INT_CST_LOW(TYPE_SIZE(type))); + break; + + case INTEGER_TYPE: + sprintf(ach, + "%3ld-bit %s INT", + TREE_INT_CST_LOW(TYPE_SIZE(type)), + (TYPE_UNSIGNED(type) ? "unsigned" : " signed")); + break; + + case FUNCTION_TYPE: + sprintf(ach, "FUNCTION"); +// sprintf(ach, +// "%3ld-bit %s INT", +// TREE_INT_CST_LOW(TYPE_SIZE(type)), +// (TYPE_UNSIGNED(type) ? "unsigned" : " signed")); + break; + + default: + cbl_internal_error("Unknown type %d", TREE_CODE(type)); + } + + return ach; + } + +void +gg_assign(tree dest, const tree source) + { + // This does the equivalent of a C/C++ "dest = source". When X1 is set, it + // does some checking for conditions that can result in inefficient code, so + // that is useful during development when even an astute programmer might + // need an assist with keeping variable types straight. + + // This routine also provides for the possibility that the assignment is + // for a source that is a function invocation, as in + // "dest = function_call()" + + saw_pointer = false; + tree dest_type = adjust_for_type(TREE_TYPE(dest)); + saw_pointer = false; + tree source_type = adjust_for_type(TREE_TYPE(source)); + bool p2 = saw_pointer; + + bool okay = dest_type == source_type; + + if( !okay ) + { + if( TREE_CODE(dest_type) == INTEGER_TYPE + && TREE_CODE(source_type) == INTEGER_TYPE + && TREE_INT_CST_LOW(TYPE_SIZE(dest_type)) == TREE_INT_CST_LOW(TYPE_SIZE(source_type)) + && TYPE_UNSIGNED(dest_type) == TYPE_UNSIGNED(source_type) ) + { + okay = true; + } + } + + if( okay ) + { + tree stmt = build2_loc( location_from_lineno(), + MODIFY_EXPR, + TREE_TYPE(dest), + dest, + source); + gg_append_statement(stmt); + } + else + { + // We are doing an assignment where the left- and right-hand types are not + // the same. This is a compilation-time error, since we want the caller to + // have sorted the types out explicitly. If we don't throw an error here, + // the gimple reduction will do so. Better to do it here, when we know + // where we are. + dbgmsg("Inefficient assignment"); + if(DECL_P(dest) && DECL_NAME(dest)) + { + dbgmsg(" Destination is %s", IDENTIFIER_POINTER(DECL_NAME(dest))); + } + dbgmsg(" dest type is %s%s", show_type(dest_type), p2 ? "_P" : ""); + if(DECL_P(source) && DECL_NAME(source)) + { + dbgmsg(" Source is %s", IDENTIFIER_POINTER(DECL_NAME(source))); + } + dbgmsg(" source type is %s%s", show_type(source_type), p2 ? "_P" : ""); + gcc_unreachable(); + } + } + +tree +gg_find_field_in_struct(const tree base, const char *field_name) + { + // Finds and returns the field_decl for the named member. 'base' can be + // a structure or a pointer to a structure. + tree type = TREE_TYPE(base); + tree rectype; + if( POINTER_TYPE_P (type) ) + { + tree pointer_type = TREE_TYPE(base); + rectype = TREE_TYPE(pointer_type); + } + else + { + // Assuming a struct (or union), pick up the record_type + rectype = TREE_TYPE(base); + } + + tree id_of_field = get_identifier(field_name); + + tree field_decl = NULL_TREE; + + tree next_value = TYPE_FIELDS(rectype); + + // Look through the chain of fields for a match to ours. This is, in the + // limit, an O(N^2) computational burden. But structures usually small, so we + // probably don't have to figure out how to make it faster. + while( next_value ) + { + if( DECL_NAME(next_value) == id_of_field ) + { + field_decl = next_value; + break; + } + next_value = TREE_CHAIN(next_value); + } + + if( !field_decl ) + { + yywarn("###### %10s in %s:%d", __func__, __FILE__,__LINE__ ); + yywarn("###### Somebody asked for the field %s.%s, which doesn't exist", + IDENTIFIER_POINTER(DECL_NAME(base)), + field_name); + gcc_unreachable(); + } + + return field_decl; + } + +static tree +gg_start_building_a_union(const char *type_name, tree type_context) + { + // type_context is current_function->function_decl for union local + // to a function. + + // It is translation_unit_decl for unions common to all functions + + // We want to return the type_decl for an empty union + + // First, create the record_type whose values will eventually + // be the chain of of the struct's fields: + + tree uniontype = make_node(UNION_TYPE); + TYPE_CONTEXT(uniontype) = type_context; + TYPE_SIZE_UNIT(uniontype) = integer_zero_node; + TYPE_SIZE(uniontype) = integer_zero_node; + TYPE_NAME(uniontype) = get_identifier(type_name); + + TYPE_MODE_RAW(uniontype) = TYPE_MODE (intTI_type_node); + + // We need a type_decl for the record_type: + tree typedecl = make_node(TYPE_DECL); + + // The type of the type_decl is the record_type: + TREE_TYPE(typedecl) = uniontype; + + SET_TYPE_ALIGN(uniontype, 16); + + // The chain element of the record_type points back to the type_decl: + TREE_CHAIN(uniontype) = typedecl; + + return typedecl; + } + +static tree +gg_start_building_a_struct(const char *type_name, tree type_context) + { + // type_context is current_function->function_decl for structures local + // to a function. + + // It is translation_unit_decl for structures common to all functions + + // We want to return the type_decl for an empty struct + + // First, create the record_type whose values will eventually + // be the chain of of the struct's fields: + + tree recordtype = make_node(RECORD_TYPE); + TYPE_CONTEXT(recordtype) = type_context; + TYPE_SIZE_UNIT(recordtype) = integer_zero_node; + TYPE_SIZE(recordtype) = integer_zero_node; + TYPE_NAME(recordtype) = get_identifier(type_name); + + TYPE_MODE_RAW(recordtype) = BLKmode; + + // We need a type_decl for the record_type: + tree typedecl = make_node(TYPE_DECL); + + // The type of the type_decl is the record_type: + TREE_TYPE(typedecl) = recordtype; + + SET_TYPE_ALIGN(recordtype, 8); + + // The chain element of the record_type points back to the type_decl: + TREE_CHAIN(recordtype) = typedecl; + + return typedecl; + } + +static void +gg_add_field_to_structure(const tree type_of_field, const char *name_of_field, tree struct_type_decl) + { + // We're given the struct_type_decl. + // Append the new field to that type_decl's record_type's chain: + tree struct_record_type = TREE_TYPE(struct_type_decl); + + bool is_union = TREE_CODE((struct_record_type)) == UNION_TYPE; + + tree id_of_field = get_identifier (name_of_field); + + // Create the new field: + tree new_field_decl = build_decl( location_from_lineno(), + FIELD_DECL, + id_of_field, + type_of_field); + + // Establish the machine mode for the field_decl: + SET_DECL_MODE(new_field_decl, TYPE_MODE(type_of_field)); + + // Establish the context of the new field as being the record_type + DECL_CONTEXT (new_field_decl) = struct_record_type; + + // Establish the size of the new field as being the same as its prototype: + DECL_SIZE(new_field_decl) = TYPE_SIZE(type_of_field); // This is in bits + DECL_SIZE_UNIT(new_field_decl) = TYPE_SIZE_UNIT(type_of_field); // This is in bytes + + // We need to establish the offset and bit offset of the new node. + // Empirically, this seems to be done on 16-bit boundaries, with DECL_FIELD_OFFSET + // in units of N*16 bytes, and FIELD_BIT_OFFSET being offsets in bits from the DECL_FIELD_OFFSET + + // We calculate our desired offset in bits: + + // Pick up the current size, in bytes, of the record_type: + long offset_in_bytes = TREE_INT_CST_LOW(TYPE_SIZE_UNIT(struct_record_type)); + + static const int MAGIC_NUMBER_SIXTEEN = 16 ; + static const int BITS_IN_A_BYTE = 8 ; + + // We know the offset_in_bytes, which is the size, of the structure with + // its current members. + + //long type_size = TREE_INT_CST_LOW(TYPE_SIZE_UNIT(type_of_field)); + long type_align_in_bits = TYPE_ALIGN(type_of_field); + long type_align_in_bytes = type_align_in_bits/BITS_IN_A_BYTE; + + // As per the Amd64 ABI, we need to set the structure's type alignment to be + // that of most strictly aligned component: + // This is the current restriction: + long struct_align_in_bits = TYPE_ALIGN(TREE_TYPE(struct_type_decl)); + if( type_align_in_bits > struct_align_in_bits ) + { + // The new one is the new champion + SET_TYPE_ALIGN(TREE_TYPE(struct_type_decl), type_align_in_bits ); + } + + // We know struct_type_decl is a record_type, so we can sneak through this comparison + if( type_of_field == TREE_TYPE(struct_type_decl) ) + { + printf(" It is a record_type\n"); + } + + // Bump up the offset until we are aligned: + while( offset_in_bytes % type_align_in_bytes) + { + offset_in_bytes += 1; + } + + if( is_union ) + { + // Turn that into the bytes/bits offsets of the new field: + DECL_FIELD_OFFSET(new_field_decl) = build_int_cst_type (SIZE_T, 0); + DECL_FIELD_BIT_OFFSET(new_field_decl) = build_int_cst_type (bitsizetype, 0); + + // The size of a union is the size of its largest member: + offset_in_bytes = std::max(offset_in_bytes, (long)TREE_INT_CST_LOW(DECL_SIZE_UNIT(new_field_decl))); + } + else + { + // Turn that into the bytes/bits offsets of the new field: + long field_offset = (offset_in_bytes/MAGIC_NUMBER_SIXTEEN)*MAGIC_NUMBER_SIXTEEN; + long field_bit_offset = (offset_in_bytes - field_offset) * BITS_IN_A_BYTE; + DECL_FIELD_OFFSET(new_field_decl) = build_int_cst_type (SIZE_T, field_offset);; + DECL_FIELD_BIT_OFFSET(new_field_decl) = build_int_cst_type (bitsizetype, field_bit_offset); + + // This was done empirically to make our generated code match that of a C program + SET_DECL_OFFSET_ALIGN(new_field_decl, 128); + + // And now we need to update the size of the record type: + offset_in_bytes += TREE_INT_CST_LOW(DECL_SIZE_UNIT(new_field_decl)); + } + + TYPE_SIZE_UNIT(struct_record_type) = build_int_cst_type (SIZE_T, offset_in_bytes); // In bytes + TYPE_SIZE(struct_record_type) = build_int_cst_type (bitsizetype, offset_in_bytes*BITS_IN_A_BYTE); // In bits + + if( !TYPE_FIELDS(struct_record_type) ) + { + // This is the first variable of the chain: + TYPE_FIELDS(struct_record_type) = new_field_decl; + } + else + { + // We need to tack the new one onto an already existing chain: + chainon(TYPE_FIELDS(struct_record_type), new_field_decl); + } + } + +void +gg_get_struct_type_decl(tree struct_type_decl, int count, va_list params) + { + while( count-- ) + { + tree field_type = va_arg(params, tree); + const char *name = va_arg(params, const char *); + gg_add_field_to_structure(field_type, name, struct_type_decl); + } + // Note: On 2022-02-18 I removed the call to gg_append_var_decl, which + // chains the type_decl on the function block. I don't remember why I + // thought it was necessary. It makes no difference for COBOL compilations. + // + // But I must have copied it from a C compilation example. + // + // I removed it so that I could create type_decls outside of a function. + // I know not what the long-term implications might be. + // + // You have been served notice. + // + // struct_type_decl is the type_decl for our structure. We need to + // append it to the list of variables in order to use it: + // The following function call is misnamed. It can take struct type_decls + //gg_append_var_decl(struct_type_decl); + } + +void +gg_get_union_type_decl(tree union_type_decl, int count, va_list params) + { + while( count-- ) + { + tree field_type = va_arg(params, tree); + const char *name = va_arg(params, const char *); + gg_add_field_to_structure(field_type, name, union_type_decl); + } + } + +tree +gg_get_local_struct_type_decl(const char *type_name, int count, ...) + { + tree struct_type_decl = gg_start_building_a_struct(type_name, current_function->function_decl); + + va_list params; + va_start(params, count); + + gg_get_struct_type_decl(struct_type_decl, count, params); + + va_end(params); + + // To use the struct_type_decl, you'll need to execute + // the following to turn it into a var_decl: + // tree var_decl = gg_define_variable( TREE_TYPE(struct_type_decl), + // var_name, + // vs_static); + return struct_type_decl; + } + +tree +gg_get_filelevel_struct_type_decl(const char *type_name, int count, ...) + { + tree struct_type_decl = gg_start_building_a_struct(type_name, gg_trans_unit.trans_unit_decl); + + va_list params; + va_start(params, count); + + gg_get_struct_type_decl(struct_type_decl, count, params); + + va_end(params); + + // To use the struct_type_decl, you'll need to execute + // the following to turn it into a var_decl: + // tree var_decl = gg_define_variable( TREE_TYPE(struct_type_decl), + // var_name, + // vs_static); + return struct_type_decl; + } + +tree +gg_get_filelevel_union_type_decl(const char *type_name, int count, ...) + { + tree struct_type_decl = gg_start_building_a_union(type_name, gg_trans_unit.trans_unit_decl); + + va_list params; + va_start(params, count); + + gg_get_union_type_decl(struct_type_decl, count, params); + + va_end(params); + + // To use the struct_type_decl, you'll need to execute + // the following to turn it into a var_decl: + // tree var_decl = gg_define_variable( TREE_TYPE(struct_type_decl), + // var_name, + // vs_static); + return struct_type_decl; + } + +tree +gg_define_local_struct(const char *type_name, const char * var_name, int count, ...) + { + // Builds a structure, declares it as a static variable in the current function, + // and returns the var_decl for it. + tree struct_type_decl = gg_start_building_a_struct(type_name, current_function->function_decl); + + va_list params; + va_start(params, count); + + gg_get_struct_type_decl(struct_type_decl, count, params); + + va_end(params); + // We now have a complete struct_type_decl, whose TREE_TYPE is the + // the type we need when declaring it. + + // And with that done, we can actually define the storage: + tree var_decl = gg_define_variable( TREE_TYPE(struct_type_decl), + var_name, + vs_static); + return var_decl; + } + +tree +gg_struct_field_ref(const tree base, const char *field) + { + tree retval; + + tree type = TREE_TYPE(base); + if( POINTER_TYPE_P (type) ) + { + tree pointer_type = TREE_TYPE(base); + tree base_pointer_type = TREE_TYPE(pointer_type); + // We need a COMPONENT_REF which is an INDIRECT_REF to a FIELD_DECL + tree field_decl = gg_find_field_in_struct(base, field); + tree indirect_ref = build1(INDIRECT_REF, base_pointer_type, base); + retval = build3(COMPONENT_REF, + TREE_TYPE(field_decl), + indirect_ref, + field_decl, + NULL_TREE); + } + else + { + // It's not a pointer, so presumably it's a structure + tree field_decl = gg_find_field_in_struct(base, field); + retval = build3(COMPONENT_REF, + TREE_TYPE(field_decl), + base, + field_decl, + NULL_TREE); + } + return retval; + } + +tree +gg_assign_to_structure(tree var_decl_struct, const char *field, const tree source) + { + // The C equivalent: "struct.field = source" + tree component_ref = gg_struct_field_ref(var_decl_struct,field); + gg_assign(component_ref,source); + return component_ref; + } + +tree +gg_assign_to_structure(tree var_decl_struct, const char *field, int N) + { + // The C equivalent: "struct.field = N" + tree component_ref = gg_struct_field_ref(var_decl_struct,field); + gg_assign(component_ref,build_int_cst(integer_type_node, N)); + return component_ref; + } + +static tree +gg_create_assembler_name(const char *cobol_name) + { + char *psz = cobol_name_mangler(cobol_name); + tree retval = get_identifier(psz); + free(psz); + return retval; + } + +static char * +gg_unique_in_function(const char *var_name, gg_variable_scope_t vs_scope) + { + char *retval = (char *)xmalloc(strlen(var_name)+32); + if( (vs_scope == vs_stack || vs_scope == vs_static) ) + { + sprintf(retval, "%s.%ld", var_name, current_function->program_id_number); + } + else + { + strcpy(retval, var_name); + } + return retval; + } + +tree +gg_declare_variable(tree type_decl, + const char *name, + tree initial_value, + gg_variable_scope_t vs_scope, + bool *already_defined) + { + // The C/C++ language provides the concept of a *declaration*, which is a + // prototype for a variable or function. "extern int global_var" is a + // declaration. Declarations let the compiler know what kind of variable it + // is looking for so that it can know what to do with it when it is found. + // + // A *definition* causes the assembler to actually create data storage for + // the specified var_decl. + // + // Be it hereby known that the various attributes associated with a var_decl, + // things like TREE_PUBLIC and TREE_STATIC and TREE_CONST seem to line up with + // their meanings in the C language. But I haven't investigated it enough to + // be completely sure about that. A hard look at gcc/tree.h is on my list of + // homework assignments. In the meantime, I continue to learn by compiling + // C programs with the fdump-generic-nodes option, and copying them as + // necessary to accomplish specific tasks. + // + // Specifically, this routine creates and returns a VAR_DECL, which is the + // prototype. + // + // The gg_define_variable() routines take a VAR_DECL and create a DECL_EXPR + // node from it. When that DECL_EXPR is appended to the statement list, it + // causes the storage to be allocated. + + // It is routine to let the compiler assign names to stack variables. The + // assembly code doesn't use names for variables on the stack; they are + // referenced by offsets to the base pointer. But static variables have to + // have names, and there are places in my code generation -- Lord only knows + // why -- where I didn't give the variables explicit names. We remedy that + // here: + + static std::map<std::string, tree>seen; + + tree var_name = NULL_TREE; + tree var_decl; + // Assume that for an external reference we know what we want: + char *unique_name = NULL; + if( name ) + { + // We were provided a name + unique_name = gg_unique_in_function(name, vs_scope); + var_name = get_identifier(unique_name); + std::map<std::string, tree>::const_iterator it = seen.find(unique_name); + if( it != seen.end() ) + { + // We've seen this one before + var_decl = it->second; + if( already_defined ) + { + *already_defined = true; + } + } + else + { + var_decl = build_decl(UNKNOWN_LOCATION, + VAR_DECL, + var_name, + type_decl); + } + } + else + { + // We were not provided a name, so we have to create one. + if( vs_scope == vs_static ) + { + // static variables have to have names: + static int counter = 1; + char ach[32]; + sprintf(ach, "__unnamed_static_variable_%d", counter++); + var_name = get_identifier(ach); + } + var_decl = build_decl(UNKNOWN_LOCATION, + VAR_DECL, + var_name, + type_decl); + } + switch(vs_scope) + { + case vs_stack: + // This is a stack variable + DECL_CONTEXT(var_decl) = current_function->function_decl; + break; + case vs_static: + // This is a function-level static variable + DECL_CONTEXT(var_decl) = current_function->function_decl; + TREE_STATIC(var_decl) = 1; + break; + case vs_file_static: + // File static variables have translation_unit_scope. I have chosen to + // provide access to them through a map; see gg_trans_unit_var_decl(); + // TREE_STATIC seems to imply const. + DECL_CONTEXT (var_decl) = gg_trans_unit.trans_unit_decl; + TREE_STATIC(var_decl) = 1; + break; + case vs_file: + // File variables have translation_unit_scope. + // When TREE_STATIC is on, they seem to get put into the .text section + DECL_CONTEXT (var_decl) = gg_trans_unit.trans_unit_decl; + break; + case vs_external: + // This is for defining variables with global scope + DECL_CONTEXT (var_decl) = gg_trans_unit.trans_unit_decl; + TREE_USED(var_decl) = 1; + TREE_STATIC(var_decl) = 1; + TREE_PUBLIC(var_decl) = 1; + seen[unique_name] = var_decl; + break; + case vs_external_reference: + // This is for referencing variables defined elsewhere + // TODO: Figure out why this is working. For accessing "stderr", it + // doesn't matter if TREE_PUBLIC is on, but TREE_STATIC has to be on. This + // does *not* match what is seen when compiling a C program that accesses + // "stderr". + DECL_CONTEXT (var_decl) = gg_trans_unit.trans_unit_decl; + TREE_USED(var_decl) = 1; + TREE_STATIC(var_decl) = 1; + TREE_PUBLIC(var_decl) = 1; + break; + } + DECL_INITIAL(var_decl) = initial_value; + if( unique_name ) + { + free(unique_name); + } + return var_decl; + } + +tree +gg_define_from_declaration(tree var_decl) + { + // Append the var_decl to either the chain for the current function or for + // the translation_unit, depending on the var_decl's context: + gg_append_var_decl(var_decl); + + if( !SCOPE_FILE_SCOPE_P(DECL_CONTEXT(var_decl)) ) + { + // Having made sure the chain of variable declarations is nicely started, + // it's time to actually define the storage with a decl_expression: + tree stmt = build1_loc (location_from_lineno(), + DECL_EXPR, + TREE_TYPE(var_decl), + var_decl); + gg_append_statement(stmt); + } + + // And we are done. That variable is now available for computation. + return var_decl; + } + +tree +gg_define_variable(tree type_decl) + { + tree var_decl = gg_declare_variable(type_decl); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_variable(tree type_decl, tree initial_value) + { + tree var_decl = gg_declare_variable(type_decl, + NULL, + gg_cast(type_decl, initial_value), + vs_stack); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_variable(tree type_decl, gg_variable_scope_t vs_scope) + { + tree var_decl = gg_declare_variable(type_decl, NULL, NULL_TREE, vs_scope); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_variable( tree type_decl, + const char *var_name, + gg_variable_scope_t vs_scope, + tree initial_value) + { + tree var_decl = gg_declare_variable(type_decl, var_name, initial_value, vs_scope); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_variable(tree type_decl, const char *name, gg_variable_scope_t vs_scope) + { + bool already_defined = false; + tree var_decl = gg_declare_variable(type_decl, name, NULL_TREE, vs_scope, &already_defined); + if( !already_defined ) + { + gg_define_from_declaration(var_decl); + } + return var_decl; + } + +tree +gg_define_bool() + { + tree var_decl = gg_declare_variable(BOOL); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_char() + { + // The nearest C equivalent: "char name;", but this one is given a + // compiler-assigned name. + // Beware: This is the "implementation specific" version of char, which + // in GENERIC seems to be signed on Windows/Linux Intel machines. But we + // need to be careful if we use an 8-bit type for numerical calculation. + tree var_decl = gg_declare_variable(CHAR); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_char(const char *variable_name) + { + // The C equivalent: "char name;" + // Beware: This is the "implementation specific" version of char, which + // in GENERIC seems to be signed on Windows/Linux Intel machines. But we + // need to be careful if we use an 8-bit type for numerical calculation. + tree var_decl = gg_declare_variable(CHAR, variable_name); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_char(const char *variable_name, tree ch) + { + tree var_decl = gg_declare_variable(CHAR, variable_name, ch); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_char(const char *variable_name, int ch) + { + return gg_define_char(variable_name, char_nodes[ch&0xFF]); + } + +tree +gg_define_uchar() + { + // The C equivalent: "char name;" + // Beware: This is the "implementation specific" version of char, which + // in GENERIC seems to be signed on Windows/Linux Intel machines. But we + // need to be careful if we use an 8-bit type for numerical calculation. + return gg_define_variable(UCHAR); + } + +tree +gg_define_uchar(const char *variable_name) + { + // The C equivalent: "char name;" + // Beware: This is the "implementation specific" version of char, which + // in GENERIC seems to be signed on Windows/Linux Intel machines. But we + // need to be careful if we use an 8-bit type for numerical calculation. + return gg_define_variable(UCHAR, variable_name); + } + +tree +gg_define_uchar(const char *variable_name, tree ch) + { + tree var_decl = gg_declare_variable(UCHAR, variable_name, ch); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_uchar(const char *variable_name, int ch) + { + return gg_define_char(variable_name, char_nodes[ch&0xFF]); + } + +tree +gg_define_int() + { + tree var_decl = gg_declare_variable(INT); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_int(int N) + { + tree var_decl = gg_declare_variable(INT, NULL, build_int_cst_type(INT, N)); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_int(const char *variable_name) + { + tree var_decl = gg_declare_variable(INT, variable_name); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_int(const char *variable_name, tree N) + { + tree var_decl = gg_declare_variable(INT, variable_name, N); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_int(const char *variable_name, int N) + { + tree var_decl = gg_declare_variable(INT, variable_name, build_int_cst_type(INT, N)); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_size_t() + { + tree var_decl = gg_declare_variable(SIZE_T); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_size_t(const char *variable_name) + { + tree var_decl = gg_declare_variable(SIZE_T, variable_name); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_size_t(tree N) + { + tree retval = gg_define_variable(SIZE_T); + gg_assign(retval, N); + return retval; + } + +tree +gg_define_size_t(size_t N) + { + tree var_decl = gg_declare_variable(SIZE_T, NULL, build_int_cst_type(SIZE_T, N)); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_size_t(const char *variable_name, tree N) + { + tree var_decl = gg_declare_variable(SIZE_T, variable_name, N); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_size_t(const char *variable_name, size_t N) + { + tree var_decl = gg_declare_variable(SIZE_T, variable_name, build_int_cst_type(SIZE_T, N)); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_int128() + { + // The C equivalent: "INT128 <compiler_name>;" + return gg_define_variable(INT128); + } + +tree +gg_define_int128(const char *variable_name) + { + // The C equivalent: "INT128 name;" + return gg_define_variable(INT128, variable_name); + } + +tree +gg_define_int128(const char *variable_name, tree N) + { + // The C equivalent: "INT128 name = N" + tree var_decl = gg_declare_variable(INT128, variable_name, N); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_int128(const char *variable_name, int N) + { + // The C equivalent: "INT128 name = N" + tree var_decl = gg_define_int128(variable_name, build_int_cst_type(INT128, N)); + return var_decl; + } + +tree +gg_define_char_star() + { + // The C equivalent: "char *name;" + return gg_define_variable(CHAR_P); + } + +tree +gg_define_char_star(const char *variable_name) + { + return gg_define_variable(CHAR_P, variable_name); + } + +tree +gg_define_char_star(const char *variable_name, gg_variable_scope_t scope) + { + tree var_decl = gg_declare_variable(CHAR_P, variable_name, NULL_TREE, scope); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_char_star(tree var) + { + tree var_decl = gg_declare_variable(CHAR_P, NULL, var); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_char_star(const char *variable_name, tree var) + { + tree var_decl = gg_declare_variable(CHAR_P, variable_name, var); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_uchar_star() + { + tree var_decl = gg_declare_variable(UCHAR_P); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_uchar_star(const char *variable_name) + { + tree var_decl = gg_declare_variable(UCHAR_P, variable_name); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_uchar_star(const char *variable_name, gg_variable_scope_t scope) + { + tree var_decl = gg_declare_variable(UCHAR_P, variable_name, NULL_TREE, scope); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_uchar_star(tree var) + { + tree var_decl = gg_declare_variable(UCHAR_P, NULL, var); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_uchar_star(const char *variable_name, tree var) + { + tree var_decl = gg_declare_variable(UCHAR_P, variable_name, var); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_void_star() + { + tree var_decl = gg_declare_variable(VOID_P); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_void_star(const char *variable_name) + { + tree var_decl = gg_declare_variable(VOID_P, variable_name); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_void_star(const char *variable_name, tree var) + { + tree var_decl = gg_declare_variable(VOID_P, variable_name, var); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_void_star(const char *variable_name, gg_variable_scope_t scope) + { + tree var_decl = gg_declare_variable(VOID_P, variable_name, NULL_TREE, scope); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_longdouble() + { + tree var_decl = gg_declare_variable(LONGDOUBLE); + gg_define_from_declaration(var_decl); + return var_decl; + } + +extern tree +gg_define_array(tree type_decl, size_t size) + { + tree array_type = build_array_type_nelts(type_decl, size); + return gg_define_variable(array_type); + } + +extern tree +gg_define_array(tree type_decl, const char *name, size_t size) + { + tree array_type = build_array_type_nelts(type_decl, size); + return gg_define_variable(array_type, name); + } + +extern tree +gg_define_array(tree type_decl, size_t size, gg_variable_scope_t scope) + { + tree array_type = build_array_type_nelts(type_decl, size); + return gg_define_variable(array_type, scope); + } + +extern tree +gg_define_array(tree type_decl, const char *name, size_t size, gg_variable_scope_t scope) + { + tree array_type = build_array_type_nelts(type_decl, size); + return gg_define_variable(array_type, name, scope); + } + +tree +gg_get_address_of(const tree var_decl) + { + // Returns an ADDR_EXPR which points to var_decl. + // The C equivalent is &variable + // We need to be able to use this guy's address directly: + + // In order to do that, this fellow's "addressable" bit has to be on, otherwise + // the GIMPLE reducer creates a temporary variable, sets its value to var_decl's, + // and returns the pointer to the temp. I suppose this has something to do with + // pass by reference and pass by value, but it makes my head hurt, and, frankly, + // I'll take the dangerous road. + + TREE_ADDRESSABLE(var_decl) = 1; + TREE_USED(var_decl) = 1; + return build1( ADDR_EXPR, + build_pointer_type (TREE_TYPE(var_decl)), + var_decl); + } + +tree +gg_get_indirect_reference(tree pointer, tree offset) + { + // The C equivalent: auto pointer[offset]; + + // the returned indirect reference has the same type as + // what pointer points to. If pointer is a char *, then the returned + // value has type char. If pointer is an int *, then the returned + // value has type int. + + // We also want the offset to operate the same way it does in C, so we + // are going to find the size of the objects the pointer points to, and + // multiply the offset by that size: + + tree pointer_type = TREE_TYPE(pointer); + tree element_type = TREE_TYPE(pointer_type); + + tree indirect_reference; + if( offset ) + { + // We can now start building our little shrub: + tree distance = build2( MULT_EXPR, + SIZE_T, + gg_cast(sizetype, offset), + TYPE_SIZE_UNIT(element_type)); + + // Next, we build the pointer_plus_expr: + tree pointer_plus_expr = build2(POINTER_PLUS_EXPR, + pointer_type, + pointer, + distance); + + // With that in hand, we can build the indirect_reference: + indirect_reference = build1(INDIRECT_REF, element_type, pointer_plus_expr); + } + else + { + indirect_reference = build1(INDIRECT_REF, element_type, pointer); + } + + return indirect_reference; + } + +tree +gg_indirect(tree pointer, tree byte_offset) + { + // Unlike gg_get_indirect_reference, which multiplies the offset by the + // size of the type pointed to by pointer, this routine simply adds the offset + // to the pointer. + tree pointer_type = TREE_TYPE(pointer); + tree element_type = TREE_TYPE(pointer_type); + + tree retval; + if( byte_offset == NULL_TREE ) + { + retval = build1(INDIRECT_REF, element_type, pointer); + } + else + { + tree pointer_plus_expr = build2(POINTER_PLUS_EXPR, + pointer_type, + pointer, + gg_cast(SIZE_T, byte_offset)); + retval = build1(INDIRECT_REF, element_type, pointer_plus_expr); + } + + return retval; + } + +tree +gg_array_value(tree pointer, tree offset) + { + // We arrange the function so that it can work on either an ARRAY_TYPE + // or a pointer type + tree pointer_type = TREE_TYPE(pointer); + tree element_type = TREE_TYPE(pointer_type); + if(POINTER_TYPE_P(pointer_type)) + { + // It is a pointer + tree retval = gg_get_indirect_reference(pointer, offset); + return retval; + } + else + { + return build4(ARRAY_REF, + element_type, + pointer, + offset, + NULL_TREE, + NULL_TREE); + } + } + +tree +gg_array_value(tree pointer, int N) + { + return gg_array_value(pointer, build_int_cst(INT, N)); + } + +void +gg_increment(tree var) + { + tree var_type = TREE_TYPE(var); + gg_assign(var, gg_add(var, build_int_cst_type(var_type, 1))); + } + +void +gg_decrement(tree var) + { + tree var_type = TREE_TYPE(var); + gg_assign(var, + gg_cast(var_type, + gg_subtract(var, + build_int_cst_type(var_type, 1)))); + } + +tree +gg_negate(tree var) + { + return build1(NEGATE_EXPR, TREE_TYPE(var), var); + } + +tree +gg_bitwise_not(tree var) + { + return build1(BIT_NOT_EXPR, TREE_TYPE(var), var); + } + +tree +gg_abs(tree var) + { + return build1(ABS_EXPR, TREE_TYPE(var), var); + } + +static tree +gg_get_larger_type(tree A, tree B) + { + tree larger = TREE_TYPE(B); + if( TREE_INT_CST_LOW(TYPE_SIZE(TREE_TYPE(A))) + > TREE_INT_CST_LOW(TYPE_SIZE(TREE_TYPE(B))) ) + { + larger = TREE_TYPE(A); + } + return larger; + } + +tree +gg_add(tree addend1, tree addend2) + { + tree retval; + if( POINTER_TYPE_P(TREE_TYPE(addend1)) ) + { + // operand1 is a pointer. + // Make this work like C pointer arithmetic. We'll find the + // size of the things that pointer points to, and multiply accordingly + tree pointer_type = TREE_TYPE(addend1); + tree pointer_type_type = TREE_TYPE(pointer_type); + tree bytes_per_element = TYPE_SIZE_UNIT(pointer_type_type); + + tree op2 = gg_cast(SIZE_T, gg_multiply(addend2, bytes_per_element)); + retval = build2(POINTER_PLUS_EXPR, + TREE_TYPE(addend1), + addend1, + op2); + } + else + { + // Ordinary addition. Scale both operands to match the larger + // type of the two operands. + tree larger_type = gg_get_larger_type(addend1, addend2); + retval = build2( PLUS_EXPR, + larger_type, + gg_cast(larger_type, addend1), + gg_cast(larger_type, addend2)); + } + return retval; + } + +tree +gg_subtract(tree A, tree B) + { + // We are doing A - B, instead. + + if( POINTER_TYPE_P(TREE_TYPE(A)) && INTEGRAL_TYPE_P(TREE_TYPE(B)) ) + { + // We are subtracting an integer from a pointer. That's handled + // in gg_add, by converting the integer, possibly signed, to + // an unsigned huge number. + return gg_add(A, gg_negate(B)); + } + + if( POINTER_TYPE_P(TREE_TYPE(A)) && POINTER_TYPE_P(TREE_TYPE(A)) ) + { + // We are subtracting two pointers, yielding a signed size_t + return build2(POINTER_DIFF_EXPR, SSIZE_T, A, B); + } + + // This is an ordinary subtraction. Scale everything to the larger_type + // of the two operands. + tree larger_type = gg_get_larger_type(A, B); + tree stmt = build2( MINUS_EXPR, + larger_type, + gg_cast(larger_type, A), + gg_cast(larger_type, B) ); + return stmt; + } + +tree +gg_multiply(tree A, tree B) + { + // We will return the product of A and B, adjusting to + // whichever is larger: + tree larger_type = gg_get_larger_type(A, B); + return build2( MULT_EXPR, larger_type, gg_cast(larger_type, A), gg_cast(larger_type, B) ); + } + +tree +gg_real_divide(tree A, tree B) + { + // This floating point division: + tree larger_type = gg_get_larger_type(A, B); + return build2( RDIV_EXPR, larger_type, gg_cast(larger_type,A), gg_cast(larger_type,B)); + } + +tree +gg_divide(tree A, tree B) + { + // This is the equivalent of C integer divide + tree larger_type = gg_get_larger_type(A, B); + return build2( TRUNC_DIV_EXPR, larger_type, gg_cast(larger_type,A), gg_cast(larger_type,B)); + } + +tree +gg_mod(tree A, tree B) + { + // This is the equivalent of C A % B + tree larger_type = gg_get_larger_type(A, B); + return build2( TRUNC_MOD_EXPR, larger_type, gg_cast(larger_type,A), gg_cast(larger_type,B)); + } + +tree +gg_lshift(tree A, tree B) + { + // Equivalent of A << B; + return build2( LSHIFT_EXPR, TREE_TYPE(A), A, B ); + } + +tree +gg_rshift(tree A, tree B) + { + // Equivalent of A >> B; + return build2( RSHIFT_EXPR, TREE_TYPE(A), A, B ); + } + +tree +gg_bitwise_or(tree A, tree B) + { + // This is C equivalent to A | B + tree larger_type = gg_get_larger_type(A, B); + return build2( BIT_IOR_EXPR, larger_type, gg_cast(larger_type,A), gg_cast(larger_type,B)); + } + +tree +gg_bitwise_xor(tree A, tree B) + { + // This is C equivalent to A ^ B + tree larger_type = gg_get_larger_type(A, B); + return build2( BIT_XOR_EXPR, larger_type, gg_cast(larger_type,A), gg_cast(larger_type,B)); + } + +tree +gg_bitwise_and(tree A, tree B) + { + // This is C equivalent to A & B + tree larger_type = gg_get_larger_type(A, B); + return build2( BIT_AND_EXPR, larger_type, gg_cast(larger_type,A), gg_cast(larger_type,B)); + } + +tree +gg_build_relational_expression(tree operand_a, + enum relop_t op, + tree operand_b) + { + tree_code compare = EQ_EXPR; // Assuage the compiler + switch(op) + { + case eq_op: + compare = EQ_EXPR; + break; + case ne_op: + compare = NE_EXPR; + break; + case lt_op: + compare = LT_EXPR; + break; + case gt_op: + compare = GT_EXPR; + break; + case ge_op: + compare = GE_EXPR; + break; + case le_op: + compare = LE_EXPR; + break; + } + tree relational_expression = build2_loc(location_from_lineno(), + compare, + boolean_type_node, + operand_a, + operand_b); + return relational_expression; + } + +tree +gg_build_logical_expression(tree operand_a, + enum logop_t op, + tree operand_b) + { + tree logical_expression = NULL_TREE; + tree_code logical_op; + switch(op) + { + case and_op: + logical_op = TRUTH_ANDIF_EXPR; + logical_expression = build2(logical_op, + boolean_type_node, + operand_a, + operand_b); + break; + + case or_op: + logical_op = TRUTH_ORIF_EXPR; + logical_expression = build2(logical_op, + boolean_type_node, + operand_a, + operand_b); + break; + + case not_op: + logical_op = TRUTH_NOT_EXPR; + logical_expression = build1(logical_op, + boolean_type_node, + operand_b); + break; + + case xor_op: + logical_op = TRUTH_XOR_EXPR; + logical_expression = build2(logical_op, + boolean_type_node, + operand_a, + operand_b); + break; + + case xnor_op: + case true_op: + case false_op: + // This is handled elsewhere + break; + } + return logical_expression; + } + +void +gg_create_goto_pair(tree *goto_expr, tree *label_expr, tree *label_addr, const char *name) + { + // We are going to create a pair of expressions for our + // caller. They are a matched set of goto/label expressions, + // to be included in a statement list + tree label_decl = build_decl( UNKNOWN_LOCATION, + LABEL_DECL, + gg_create_assembler_name(name), + void_type_node); + DECL_CONTEXT(label_decl) = current_function->function_decl; + TREE_USED(label_decl) = 1; + + *goto_expr = build1(GOTO_EXPR, void_type_node, label_decl); + *label_expr = build1(LABEL_EXPR, void_type_node, label_decl); + *label_addr = gg_get_address_of(label_decl); + } + +void +gg_create_goto_pair(tree *goto_expr, tree *label_expr, tree *label_addr) + { + // We are going to create a pair of expressions for our + // caller. They are a matched set of goto/label expressions, + // to be included in a statement list + tree label_decl = build_decl( UNKNOWN_LOCATION, + LABEL_DECL, + NULL_TREE, + void_type_node); + DECL_CONTEXT(label_decl) = current_function->function_decl; + TREE_USED(label_decl) = 1; + + *goto_expr = build1(GOTO_EXPR, void_type_node, label_decl); + *label_expr = build1(LABEL_EXPR, void_type_node, label_decl); + *label_addr = gg_get_address_of(label_decl); + } + +void +gg_create_goto_pair(tree *goto_expr, + tree *label_expr, + tree *label_addr, + tree *label_decl) + { + // We are going to create a pair of expressions for our + // caller. They are a matched set of goto/label expressions, + // to be included in a statement list + *label_decl = build_decl( UNKNOWN_LOCATION, + LABEL_DECL, + NULL_TREE, + void_type_node); + DECL_CONTEXT(*label_decl) = current_function->function_decl; + TREE_USED(*label_decl) = 1; + + *goto_expr = build1(GOTO_EXPR, void_type_node, *label_decl); + *label_expr = build1(LABEL_EXPR, void_type_node, *label_decl); + *label_addr = gg_get_address_of(*label_decl); + } + +void +gg_goto_label_decl(tree label_decl) + { + tree goto_expr = build1_loc( location_from_lineno(), + GOTO_EXPR, + void_type_node, + label_decl); + gg_append_statement(goto_expr); + } + +void +gg_create_goto_pair(tree *goto_expr, tree *label_expr) + { + // We are going to create a pair of expressions for our + // caller. They are a matched set of goto/label expressions, + // to be included in a statement list + tree label_decl = build_decl( UNKNOWN_LOCATION, + LABEL_DECL, + NULL_TREE, + void_type_node); + DECL_CONTEXT(label_decl) = current_function->function_decl; + TREE_USED(label_decl) = 1; + + *goto_expr = build1(GOTO_EXPR, void_type_node, label_decl); + *label_expr = build1(LABEL_EXPR, void_type_node, label_decl); + } + +void +gg_create_goto_pair(tree *goto_expr, tree *label_expr, const char *name) + { + // We are going to create a pair of named expressions for our + // caller. They are a matched set of goto/label expressions, + // to be included in a statement list + tree label_decl = build_decl( UNKNOWN_LOCATION, + LABEL_DECL, + gg_create_assembler_name(name), + void_type_node); + DECL_CONTEXT(label_decl) = current_function->function_decl; + TREE_USED(label_decl) = 1; + + *goto_expr = build1(GOTO_EXPR, void_type_node, label_decl); + *label_expr = build1(LABEL_EXPR, void_type_node, label_decl); + } + +// Used for implementing SECTIONS and PARAGRAPHS. When you have a +// void *pointer = &&label, gg_goto is the same as +// goto *pointer +void +gg_goto(tree var_decl_pointer) + { + tree go_to = build1_loc(location_from_lineno(), + GOTO_EXPR, + void_type_node, + var_decl_pointer); + gg_append_statement(go_to); + } + +void +gg_while( tree operand_a, + enum relop_t op, + tree operand_b) + { + /* + See demonstration_while_if for the canonical demonstration + + You use it like this: + + WHILE + .... + WEND + + We do the C construct: + + while( a OP b ) + { + <block> + } + + like this: + + goto test + top: + <block> + test: + if( a OP b) + goto top + else + goto leave: + leave: + + */ + + tree goto_top; + tree label_top; + + tree goto_test; + tree label_test; + + tree goto_leave; + tree label_leave; + + gg_create_goto_pair(&goto_top, &label_top); + gg_create_goto_pair(&goto_test, &label_test); + gg_create_goto_pair(&goto_leave, &label_leave); + + tree statement_block = make_node(STATEMENT_LIST); + TREE_TYPE(statement_block) = void_type_node; + + // During development, I tried appending a statement_list to a statement_list, + // intending it to be collected together that way. But it was too smart for me; + // it just unwound the second list and tacked it onto the end of the first. + + // So I used a BIND_EXPR to collect them together. This isn't a new context, so I don't + // point operand[0] at a string of vars, nor operand[2] at a block. + tree bind_expr = build3( BIND_EXPR, + void_type_node, + NULL_TREE, + statement_block, + NULL_TREE); + + // With the pairs created and the bind_expr sorted out, we can now put + // together our while construction: + + gg_append_statement(goto_test); + gg_append_statement(label_top); + gg_append_statement(bind_expr); + gg_append_statement(label_test); + IF( operand_a, op, operand_b ) + gg_append_statement(goto_top); + ELSE + gg_append_statement(goto_leave); + ENDIF + gg_append_statement(label_leave); + + // And here's the statement_list for the programmer to fill + // and end with a WEND + current_function->statement_list_stack.push_back(statement_block); + } + +void +gg_create_true_false_statement_lists(tree relational_expression) + { + // Create the two statement_lists for ifness, one for true and + // the other for false. Put them on the stack, ready for the first + // pop on ELSE and the second pop on ENDIF: + + tree if_true_statement_list = make_node(STATEMENT_LIST); + TREE_TYPE(if_true_statement_list) = void_type_node; + tree if_false_statement_list = make_node(STATEMENT_LIST); + TREE_TYPE(if_false_statement_list) = void_type_node; + + tree conditional = build3( COND_EXPR, + boolean_type_node, + relational_expression, + if_true_statement_list, + if_false_statement_list); + + // We need to put our conditional onto the current_stack: + gg_append_statement(conditional); + + // And with that done, we can push the FALSE and TRUE blocks + // onto the stack in the correct order: + current_function->statement_list_stack.push_back(if_false_statement_list); + current_function->statement_list_stack.push_back(if_true_statement_list); + } + +void +gg_if( tree operand_a, + enum relop_t op, + tree operand_b) + { + /* Listen up, troops. Here's how you use this constructor. + + You use it like this: + + IF( this, LT, that) + .... + ELSE + .... + ENDIF + + You *must* have all three: IF ELSE ENDIF, if you don't, the + current_function->statement_list_stack gets all higgledepiggledy + + It is the C equivalent of + + if( a OP b ) + { + <if_true_statement_list> + } + else + { + <if_false_statement_list> + } + + This routine pushes the false_statement_list onto current_function->statement_list_stack, + followed by the true_statement_list. + + You then generate statements for the TRUE block + You then pop the current_function->statement_list_stack. + Then you do the same for the FALSE block + You then pop the current_function->statement_list_stack again. + + For the sake of readability, we define ELSE and ENDIF to do + that popping. + + I don't plan on explaining this everywhere it's used. + + See demonstration_while_if for the canonical demonstration + */ + + if( TREE_TYPE(operand_a) != TREE_TYPE(operand_b) ) + { + fprintf(stderr, "%s(): a and b have different TREE_TYPES\n", __func__); + gcc_unreachable(); + } + + // Build the relational expression: + tree relational_expression = + gg_build_relational_expression(operand_a, + op, + operand_b); + + // And with that in hand, create the two statement lists, one for + // true and one for false, and set up the stacks: + gg_create_true_false_statement_lists(relational_expression); + } + +tree +gg_get_function_address(tree return_type, const char *funcname) + { + // This routine finds a function by name. It calls build_fn_decl + // with an empty array of varargs. I haven't investigated all the + // possibilities, but this returns an address expression for a function + // that can be built with any argument[s]. + + // There is no compile-time checking; if you specify disaster, then + // disaster will be what you get. + tree fndecl_type = build_varargs_function_type_array (return_type, + 0, + NULL); + tree function_decl = build_fn_decl (funcname, fndecl_type); + DECL_EXTERNAL (function_decl) = 1; + + tree retval = build1(ADDR_EXPR, build_pointer_type (fndecl_type), function_decl); + + return retval; + } + +void +gg_printf(const char *format_string, ...) + { + // This allows you to use fprintf(stderr, ...) with a format string + // and a list of arguments ending with a NULL + + // Use this for conveniently adding print statements into the generated + // code, for run-time print-statement debugging. gg_write is used for + // actual program code. + + // Note that the return value from the printf() call is *not* available + // to the caller. + + int nargs = 0; + tree args[ARG_LIMIT]; + + // Because this routine is intended for debugging, we are sending the + // text to STDERR + + // Because we don't actually use stderr ourselves, we just pick it up as a + // VOID_P and pass it along to fprintf() + tree t_stderr = gg_declare_variable(VOID_P, "stderr", + NULL_TREE, + vs_external_reference); + + gg_push_context(); + + args[nargs++] = t_stderr; + args[nargs++] = build_string_literal(strlen(format_string)+1, format_string); + + va_list ap; + va_start(ap, format_string); + tree arg = va_arg(ap, tree); + while(arg) + { + if(nargs >= ARG_LIMIT) + { + yywarn("###### %10s in %s:%d", __func__, __FILE__,__LINE__ ); + yywarn("###### You *must* be joking!"); + gcc_unreachable(); + } + + if( TREE_CODE(arg) >= NUM_TREE_CODES) + { + // Warning: This test is not completely reliable, because a garbage + // byte could have a valid TREE_CODE. But it does help. + yywarn("You nitwit!"); + yywarn("You forgot to put a NULL_TREE at the end of a " + "gg_printf() again!"); + yywarn("###### %10s in %s:%d", __func__, __FILE__,__LINE__ ); + gcc_unreachable(); + } + + args[nargs++] = arg; + arg = va_arg(ap, tree); + } + va_end (ap); + + static tree function = NULL_TREE; + if( !function ) + { + function = gg_get_function_address(INT, "fprintf"); + } + + tree stmt = build_call_array_loc (location_from_lineno(), + INT, + function, + nargs, + args); + gg_append_statement(stmt); + + gg_pop_context(); + } + +tree +gg_fprintf(tree fd, int nargs, const char *format_string, ...) + { + tree retval = gg_define_int(); + gg_push_context(); + tree buffer = gg_define_char_star(); + gg_assign(buffer, gg_cast(CHAR_P, gg_malloc(1024))); + + tree args[ARG_LIMIT]; + + // Set up a call to sprintf: + int argc = 0; + args[argc++] = buffer; + args[argc++] = build_string_literal(strlen(format_string)+1, format_string); + + va_list ap; + va_start(ap, format_string); + tree arg = va_arg(ap, tree); + int narg = 0; + while(narg++ < nargs) + { + if(argc >= ARG_LIMIT) + { + yywarn("###### %10s in %s:%d", __func__, __FILE__,__LINE__ ); + yywarn("###### You *must* be joking!"); + gcc_unreachable(); + } + + args[argc++] = arg; + arg = va_arg(ap, tree); + } + va_end (ap); + + static tree function = NULL_TREE; + + if( !function ) + { + function = gg_get_function_address(INT, "sprintf"); + } + + tree stmt = build_call_array_loc (location_from_lineno(), + INT, + function, + argc, + args); + gg_assign(retval, stmt); + gg_write(fd, buffer, gg_strlen(buffer)); + + gg_free(buffer); + gg_pop_context(); + return retval; + } + +tree +gg_read(tree fd, tree buf, tree count) + { + // The C equivalent: "read(fd, buf, count)" + + // Because the caller might need the ssize_t return value, this routine + // returns the statement_decl for the call. It is used this way: + + // tree num_chars = gg_define_int("_num_chars"); + // gg_assign(num_chars, gg_read(fd, buf, count)); + + return gg_call_expr(SSIZE_T, + "read", + fd, + buf, + count, + NULL_TREE); + } + +void +gg_write(tree fd, tree buf, tree count) + { + gg_call(SSIZE_T, + "write", + fd, + buf, + count, + NULL_TREE); + } + +void +gg_memset(tree dest, const tree value, tree size) + { + tree the_call = + build_call_expr_loc(location_from_lineno(), + builtin_decl_explicit (BUILT_IN_MEMSET), + 3, + dest, + value, + size); + gg_append_statement(the_call); + } + +tree +gg_memchr(tree buf, tree ch, tree length) + { + tree the_call = fold_convert( + pvoid_type_node, + build_call_expr_loc(location_from_lineno(), + builtin_decl_explicit (BUILT_IN_MEMCHR), + 3, + buf, + ch, + length)); + return the_call; + } + +/* Built-in call to memcpy() */ + +void +gg_memcpy(tree dest, const tree src, tree size) + { + tree the_call = build_call_expr_loc( + location_from_lineno(), + builtin_decl_explicit (BUILT_IN_MEMCPY), + 3, + dest, + src, + size); + gg_append_statement(the_call); + } + +/* Built-in call to memmove() */ + +void +gg_memmove(tree dest, const tree src, tree size) + { + tree the_call = build_call_expr_loc( + location_from_lineno(), + builtin_decl_explicit (BUILT_IN_MEMMOVE), + 3, + dest, + src, + size); + gg_append_statement(the_call); + } + +tree +gg_memdup(tree data, tree length) + { + // Duplicates data; gg_free should eventually be called + tree retval = gg_define_char_star(); + gg_assign(retval, gg_malloc(length)); + gg_memcpy(retval, data, length); + return retval; + } + +tree +gg_memdup(tree data, size_t length) + { + // Duplicates data; gg_free should eventually be called + tree retval = gg_define_char_star(); + gg_assign(retval, gg_malloc(length)); + gg_memcpy(retval, data, build_int_cst_type(SIZE_T, length)); + return retval; + } + +void +gg_strcpy(tree dest, tree src) + { + tree the_call = + build_call_expr_loc(location_from_lineno(), + builtin_decl_explicit (BUILT_IN_STRCPY), + 2, + dest, + src); + gg_append_statement(the_call); + } + +tree +gg_strcmp(tree A, tree B) + { + tree the_call = fold_convert( + integer_type_node, + build_call_expr_loc(location_from_lineno(), + builtin_decl_explicit (BUILT_IN_STRCMP), + 2, + A, + B)); + return the_call; + } + +tree +gg_open(tree char_star_A, tree int_B) + { + return gg_call_expr(INT, + "open", + char_star_A, + int_B, + NULL_TREE); + } + +tree +gg_close(tree int_A) + { + return gg_call_expr(INT, + "close", + int_A, + NULL_TREE); + } + +tree +gg_strncmp(tree char_star_A, tree char_star_B, tree size_t_N) + { + tree the_call = fold_convert( + integer_type_node, + build_call_expr_loc(location_from_lineno(), + builtin_decl_explicit (BUILT_IN_STRNCMP), + 3, + char_star_A, + char_star_B, + size_t_N)); + return the_call; + } + +void +gg_return(tree operand) + { + tree stmt; + + if( !gg_trans_unit.function_stack.size() ) + { + // I put this in to cope with the problem of two END PROGRAM statements, which + // should be a syntax error but, as of 2021-02-24, is ignored by GnuCOBOL and + // by our parser. + return ; + } + + // We have to pop ourselves off of the module_name_stack: + gg_call(VOID, + "__gg__module_name_pop", + NULL_TREE); + + if( !operand || !DECL_RESULT(current_function->function_decl) ) + { + // When there is no operand, or if the function result is void, then + // we just generate a return_expr. + stmt = build1_loc(location_from_lineno(), RETURN_EXPR, void_type_node, NULL_TREE); + } + else + { + // Life is a wee bit more complicated, because we want to return the operand + tree function_type = TREE_TYPE(DECL_RESULT(current_function->function_decl)); + tree modify = build2( MODIFY_EXPR, + function_type, + DECL_RESULT(current_function->function_decl), + gg_cast(function_type, operand)); + stmt = build1_loc(location_from_lineno(), RETURN_EXPR, void_type_node, modify); + } + gg_append_statement(stmt); + } + +void +chain_parameter_to_function(tree function_decl, const tree param_type, const char *name) + { + tree parm = build_decl (location_from_lineno(), + PARM_DECL, + get_identifier (name), + param_type); + DECL_CONTEXT(parm) = function_decl; + TREE_USED(parm) = 1; + DECL_INITIAL(parm) = param_type; + + if( DECL_ARGUMENTS(function_decl) ) + { + chainon(DECL_ARGUMENTS(function_decl),parm); + } + else + { + DECL_ARGUMENTS(function_decl) = parm; + } + } + +void +gg_modify_function_type(tree function_decl, tree return_type) + { + tree fndecl_type = build_varargs_function_type_array( return_type, + 0, // No parameters yet + NULL); // And, hence, no types + TREE_TYPE(function_decl) = fndecl_type; + tree resdecl = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, return_type); + DECL_CONTEXT (resdecl) = function_decl; + DECL_RESULT (function_decl) = resdecl; + } + +tree +gg_define_function_with_no_parameters(tree return_type, + const char *funcname, + const char *unmangled_name) + { + // This routine builds a function_decl, puts it on the stack, and + // gives it a context. + + // At this time we don't know how many parameters this function expects, so + // we set things up and we'll tack on the parameters later. + + // Create the FUNCTION_TYPE for that array: + // int nparams = 1; + // tree types[1] = {VOID_P}; + // const char *names[1] = {"_p1"}; + + // tree fndecl_type = build_varargs_function_type_array( return_type, + // nparams, + // types); + + tree fndecl_type = build_varargs_function_type_array( return_type, + 0, // No parameters yet + NULL); // And, hence, no types + + // Create the FUNCTION_DECL for that FUNCTION_TYPE + tree function_decl = build_fn_decl (funcname, fndecl_type); + + // Some of this stuff is magical, and is based on compiling C programs + // and just mimicking the results. + TREE_ADDRESSABLE(function_decl) = 1; + TREE_STATIC(function_decl) = 1; + DECL_EXTERNAL (function_decl) = 0; + DECL_PRESERVE_P (function_decl) = 0; + DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT(function_decl) = 1; + DECL_ARTIFICIAL(function_decl) = 0; + TREE_NOTHROW(function_decl) = 0; + TREE_USED(function_decl) = 1; + + // This code makes COBOL nested programs actual visible on the + // source code "trans_unit_decl" level, but with non-public "static" + // visibility. + if( gg_trans_unit.function_stack.size() == 0 ) + { + // gg_trans_unit.function_stack is empty, so our context is + // the compilation module, and we need to be public: + DECL_CONTEXT (function_decl) = gg_trans_unit.trans_unit_decl; + TREE_PUBLIC(function_decl) = 1; + } + else + { + // The stack has something in it, so we are building a nested function. + // Make the current function our context + DECL_CONTEXT (function_decl) = gg_trans_unit.trans_unit_decl; + TREE_PUBLIC(function_decl) = 0; + + // Append this function to the list of functions and variables + // associated with the computation module. + gg_append_var_decl(function_decl); + } + + // Establish the RESULT_DECL for the function: + tree resdecl = build_decl (location_from_lineno(), RESULT_DECL, NULL_TREE, return_type); + DECL_CONTEXT (resdecl) = function_decl; + DECL_RESULT (function_decl) = resdecl; + + // The function_decl has a .function member, a pointer to struct_function. + // This is quietly, almost invisibly, extremely important. You need to + // call this routine after DECL_RESULT has been established: + + allocate_struct_function(function_decl, false); + + struct gg_function_t new_function = {}; + new_function.context_count = 0; + new_function.function_decl = function_decl; + new_function.our_name = IDENTIFIER_POINTER(DECL_NAME(function_decl)); + new_function.our_unmangled_name = xstrdup(unmangled_name); + new_function.function_address = gg_get_function_address(VOID, new_function.our_name); + + // Each program on the stack gets a unique identifier. This is used, for + // example, to make sure that static variables have unique names. + static size_t program_id = 0; + new_function.program_id_number = program_id++; + + // With everything established, put this function_decl on the stack + gg_trans_unit.function_stack.push_back(new_function); + + // All we need is a context, and we are ready to go: + gg_push_context(); + return function_decl; + } + +void +gg_tack_on_function_parameters(tree function_decl, ...) + { + int nparams = 0; + + tree types[ARG_LIMIT]; + const char *names[ARG_LIMIT]; + + va_list params; + va_start(params, function_decl); + for(;;) + { + tree var_type = va_arg(params, tree); + if( !var_type ) + { + break; + } + + if( TREE_CODE(var_type) >= NUM_TREE_CODES) + { + // Warning: This test is not completely reliable, because a garbage + // byte could have a valid TREE_CODE. But it does help. + yywarn("You nitwit!"); + yywarn("You forgot to put a NULL_TREE at the end of a " + "gg_define_function() again!"); + yywarn("###### %10s in %s:%d", __func__, __FILE__,__LINE__ ); + gcc_unreachable(); + } + + const char *name = va_arg(params, const char *); + + types[nparams] = var_type; + names[nparams] = name; + nparams += 1; + if(nparams > ARG_LIMIT) + { + yywarn("###### %10s in %s:%d", __func__, __FILE__,__LINE__ ); + yywarn("###### %d parameters? Really? Are you insane?",ARG_LIMIT+1); + gcc_unreachable(); + } + } + va_end(params); + + // Chain the names onto the variables list: + for(int i=0; i<nparams; i++) + { + chain_parameter_to_function(function_decl, types[i], names[i]); + } + } + +void +gg_define_function(tree return_type, const char *funcname, ...) + { + // This routine builds a function_decl, puts it on the stack, and + // gives it a context. + + // After the funcname, we expect the formal parameters: pairs of types/names + // terminated by a NULL_TREE + + int nparams = 0; + + tree types[ARG_LIMIT]; + const char *names[ARG_LIMIT]; + + va_list params; + va_start(params,funcname); + for(;;) + { + tree var_type = va_arg(params, tree); + if( !var_type ) + { + break; + } + + if( TREE_CODE(var_type) >= NUM_TREE_CODES) + { + // Warning: This test is not completely reliable, because a garbage + // byte could have a valid TREE_CODE. But it does help. + yywarn("You nitwit!"); + yywarn("You forgot to put a NULL_TREE at the end of a " + "gg_define_function() again!"); + yywarn("###### %10s in %s:%d", __func__, __FILE__,__LINE__ ); + gcc_unreachable(); + } + + const char *name = va_arg(params, const char *); + + types[nparams] = var_type; + names[nparams] = name; + nparams += 1; + if(nparams > ARG_LIMIT) + { + yywarn("###### %10s in %s:%d", __func__, __FILE__,__LINE__ ); + yywarn("###### %d parameters? Really? Are you insane?", + ARG_LIMIT+1); + gcc_unreachable(); + } + } + va_end(params); + + // Create the FUNCTION_TYPE for that array: + tree fndecl_type = build_varargs_function_type_array( return_type, + nparams, + types); + + // Create the FUNCTION_DECL for that FUNCTION_TYPE + tree function_decl = build_fn_decl (funcname, fndecl_type); + + // Some of this stuff is magical, and is based on compiling C programs + // and just mimicking the results. + TREE_ADDRESSABLE(function_decl) = 1; + TREE_STATIC(function_decl) = 1; + DECL_EXTERNAL (function_decl) = 0; + DECL_PRESERVE_P (function_decl) = 0; + DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT(function_decl) = 1; + DECL_ARTIFICIAL(function_decl) = 0; + TREE_NOTHROW(function_decl) = 0; + TREE_USED(function_decl) = 1; + + // This code makes COBOL nested programs actual visible on the + // source code "trans_unit_decl" level, but with non-public "static" + // visibility. + if( gg_trans_unit.function_stack.size() == 0 ) + { + // gg_trans_unit.function_stack is empty, so our context is + // the compilation module, and we need to be public: + DECL_CONTEXT (function_decl) = gg_trans_unit.trans_unit_decl; + TREE_PUBLIC(function_decl) = 1; + } + else + { + // The stack has something in it, so we are building a nested function. + // Make the current function our context + DECL_CONTEXT (function_decl) = gg_trans_unit.trans_unit_decl; + + // We need to make it public, because otherwise COBOL CALL "func" + // won't be able to find it, because dlopen/dlsym won't find it. + TREE_PUBLIC(function_decl) = 0; + + // Append this function to the list of functions and variables + // associated with the computation module. + gg_append_var_decl(function_decl); + } + + // Chain the names onto the variables list: + for(int i=0; i<nparams; i++) + { + chain_parameter_to_function(function_decl, types[i], names[i]); + } + + // Establish the RESULT_DECL for the function: + tree resdecl = build_decl (location_from_lineno(), RESULT_DECL, NULL_TREE, return_type); + DECL_CONTEXT (resdecl) = function_decl; + DECL_RESULT (function_decl) = resdecl; + + // The function_decl has a .function member, a pointer to struct_function. + // This is quietly, almost invisibly, extremely important. You need to + // call this routine after DECL_RESULT has been established: + + allocate_struct_function(function_decl, false); + + struct gg_function_t new_function = {}; + new_function.context_count = 0; + new_function.function_decl = function_decl; + + // Each program on the stack gets a unique identifier. This is used, for + // example, to make sure that static variables have unique names. + static size_t program_id = 0; + new_function.program_id_number = program_id++; + + // With everything established, put this function_decl on the stack + gg_trans_unit.function_stack.push_back(new_function); + + // All we need is a context, and we are ready to go: + gg_push_context(); + } + +tree +gg_get_function_decl(tree return_type, const char *funcname, ...) + { + // This very similar routine creates and returns the function_decl + + // It was designed for implementing nested functions, in particular + // in cases of forward references. Thus, you need to have the function_decl + // in order to create the call_expr, even though you don't yet have a body, + // and you aren't ready to create it at this time. + + int nparams = 0; + + tree types[ARG_LIMIT]; + const char *names[ARG_LIMIT]; + + va_list params; + va_start(params,funcname); + for(;;) + { + tree var_type = va_arg(params, tree); + if( !var_type ) + { + break; + } + + if( TREE_CODE(var_type) >= NUM_TREE_CODES) + { + // Warning: This test is not completely reliable, because a garbage + // byte could have a valid TREE_CODE. But it does help. + yywarn("You nitwit!"); + yywarn("You forgot to put a NULL_TREE at the end of a " + "gg_define_function() again!"); + yywarn("###### %10s in %s:%d", __func__, __FILE__,__LINE__ ); + gcc_unreachable(); + } + + const char *name = va_arg(params, const char *); + + types[nparams] = var_type; + names[nparams] = name; + nparams += 1; + if(nparams > ARG_LIMIT) + { + yywarn("###### %10s in %s:%d", __func__, __FILE__,__LINE__ ); + yywarn("###### %d parameters? Really? Are you insane?", + ARG_LIMIT+1); + gcc_unreachable(); + } + } + va_end(params); + + // Create the FUNCTION_TYPE for that array: + tree fndecl_type = build_varargs_function_type_array( return_type, + nparams, + types); + + // Create the FUNCTION_DECL for that FUNCTION_TYPE + tree function_decl = build_fn_decl (funcname, fndecl_type); + + // Some of this stuff is magical, and is based on compiling C programs + // and just mimicking the results. + TREE_ADDRESSABLE(function_decl) = 1; + TREE_STATIC(function_decl) = 1; + DECL_EXTERNAL (function_decl) = 0; + DECL_PRESERVE_P (function_decl) = 0; + DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT(function_decl) = 1; + DECL_ARTIFICIAL(function_decl) = 0; + TREE_NOTHROW(function_decl) = 0; + TREE_USED(function_decl) = 1; + + if( gg_trans_unit.function_stack.size() == 0 ) + { + // gg_trans_unit.function_stack is empty, so our context is + // the compilation module, and we need to be public: + DECL_CONTEXT (function_decl) = gg_trans_unit.trans_unit_decl; + TREE_PUBLIC(function_decl) = 1; + } + else + { + // The stack has something in it, so we are building a nested function. + // Make the current function our context + DECL_CONTEXT (function_decl) = current_function->function_decl; + TREE_PUBLIC(function_decl) = 0; + DECL_STATIC_CHAIN(function_decl) = 1; + } + + // Chain the names onto the variables list: + for(int i=0; i<nparams; i++) + { + chain_parameter_to_function(function_decl, types[i], names[i]); + } + + // Establish the RESULT_DECL for the function: + tree resdecl = build_decl (location_from_lineno(), RESULT_DECL, NULL_TREE, return_type); + DECL_CONTEXT (resdecl) = function_decl; + DECL_RESULT (function_decl) = resdecl; + + // The function_decl has a .function member, a pointer to struct_function. + // This is quietly, almost invisibly, extremely important. You need to + // call this routine after DECL_RESULT has been established: + allocate_struct_function(function_decl, false); + + // It will be the caller's responsibility to push this function_decl onto + // the stack at the appropriate time, and create the appropriate context. + return function_decl; + } + +void +gg_finalize_function() + { + // Unless it has already been handled: + if( !gg_trans_unit.function_stack.size() ) + { + return ; + } + + // Finish off the context + gg_pop_context(); + + if( gg_trans_unit.function_stack.back().is_truly_nested ) + { + // This code is for true nested functions. + + ///////// DANGER, WILL ROBINSON! + ///////// This is all well and good. It does not, however, work. + ///////// I tried to implement it because I had a Brilliant Idea for + ///////// building COBOL paragraphs in a way that would easily allow + ///////// the GDB "NEXT" command to step over a PERFORM <paragraph>. + ///////// But, alas, I realized that it was just not going to work. + ///////// + ///////// Pity. + ///////// + ///////// But at that point, I was here, and I am leaving this uncooked + ///////// code in case I someday want to return to it. If it becomes + ///////// your job, rather than mine, I encourage you to write a C + ///////// program that uses the GNU extensions that allow true nested + ///////// functions, and reverse engineer the "finish_function" + ///////// function, and get it working. + ///////// + ///////// Good luck. Bob Dubner, 2022-08-13 + + // Because this is a nested function, let's make sure that it actually + // has a function that it is nested within + gcc_assert(gg_trans_unit.function_stack.size() > 1 ); + + /* Genericize before inlining. Delay genericizing nested functions + until their parent function is genericized. Since finalizing + requires GENERIC, delay that as well. */ + + // This is the comment in gcc/c/c-decl.c: + + /* Register this function with cgraph just far enough to get it + added to our parent's nested function list. Handy, since the + C front end doesn't have such a list. */ + + static cgraph_node *node = cgraph_node::get_create (current_function->function_decl); + gcc_assert(node); + + } + else + { + // This makes the function visible on the source code module level. + cgraph_node::finalize_function (current_function->function_decl, true); + } + + if( gg_trans_unit.function_stack.back().context_count ) + { + cbl_internal_error("Residual context count!"); + } + + gg_trans_unit.function_stack.pop_back(); + } + +void +gg_push_context() + { + // Sit back, relax, prepare to be amazed. + + // functions need a context in which they build variables and whatnot. + // they also need to be able to create subcontexts. + + // Functions have an DECL_INITIAL member that points to the first block. The + // first block has a BLOCK_VARS member that points to the first of a chain + // of var_decl entries. The first block has a BLOCK_SUBBLOCKS member that + // points to the block of the first subcontext. + + // Functions have a DECL_SAVED_TREE member that points to the first bind_expr + // That first bind_expr has a BIND_EXPR_BLOCK that points back to the first block + // has a BIND_EXPR_VARS that points back to the first block's first var_decl + // has a BIND_EXPR_BODY that points to the first statement_list + + // Each subsequent context gets a new block that is chained to the prior block through BLOCK_SUBBLOCKS + // Each subsequent context gets a new bind_expr which gets added to the parent context's statement list + + // Yes, it's confusing. Have a nice lie-down. + + // Here's what we need for this recipe: + + // We need a block: + tree block = make_node(BLOCK); + TREE_USED(block) = 1; + BLOCK_SUPERCONTEXT(block) = current_function->function_decl; + + // We need a statement list: + tree statement_list = alloc_stmt_list(); + + // We need a bind_expr: + tree bind_expr = build3(BIND_EXPR, + void_type_node, + NULL_TREE, // There are no vars yet. + statement_list, + block); + TREE_SIDE_EFFECTS(bind_expr) = 1; + + // At this point, we might be creating the initial context for a function, + // or we might be creating a sub-context. + + if( !DECL_INITIAL(current_function->function_decl) ) + { + // We are creating the initial context of the function: + DECL_INITIAL(current_function->function_decl) = block; + DECL_SAVED_TREE(current_function->function_decl) = bind_expr; + + // To avoid an N-squared time complexity when chaining blocks, we save the + // current end of the chain of blocks: + current_function->current_block = block; + } + else + { + // We are in the subtext business: + + // We need to tack on our new block to the end of the + // chain of existing blocks: + tree cblock = current_function->current_block; + BLOCK_SUBBLOCKS(cblock) = block; + current_function->current_block = block; + + // And we need to put our new bind_expr onto the end of the + // current active statement list: + gg_append_statement(bind_expr); + } + + // And now we make our statement_list and bind_expr the active ones: + current_function->statement_list_stack.push_back(statement_list); + current_function->bind_expr_stack.push_back(bind_expr); + + // And the new context is ready to rock and roll + gg_trans_unit.function_stack.back().context_count += 1; + } + +void +gg_pop_context() + { + // Backing out is much easier: + current_function->bind_expr_stack.pop_back(); + current_function->statement_list_stack.pop_back(); + + gg_trans_unit.function_stack.back().context_count -= 1; + } + +static +std::unordered_map<std::string, tree> fndecl_from_name; + +static +tree +function_decl_from_name(tree return_type, + const char *function_name, + int nargs, + tree arg_types[]) + { + tree fndecl; + std::unordered_map<std::string, tree>::const_iterator it = + fndecl_from_name.find(function_name); + if( it != fndecl_from_name.end() ) + { + fndecl = it->second; + } + else + { + tree fntype = build_function_type_array(return_type, nargs, arg_types); + fndecl = build_fn_decl (function_name, fntype); + fndecl_from_name[function_name] = fndecl; + } + return fndecl; + } + +tree +gg_call_expr(tree return_type, const char *function_name, ...) + { + // Generalized caller. Params are terminated with NULL_TREE + + // Use this routine to call function_name when you need the return value. + // Typically you will do something like + + // tree call_expr = gg_call_expr(...); + // gg_assign( dest, call_expr ); + + // Note that everyt time call_expr is laid down, the function will be called, + // so you probably don't want to do things like + // gg_assign( dest1, call_expr ); + // gg_assign( dest2, call_expr ); + + int nargs = 0; + static tree arg_types[ARG_LIMIT+1]; + static tree args[ARG_LIMIT+1]; + + va_list ap; + va_start(ap, function_name); + for(;;) + { + if(nargs >= ARG_LIMIT) + { + yywarn("###### %10s in %s:%d", __func__, __FILE__,__LINE__ ); + yywarn("###### You *must* be joking!"); + gcc_unreachable(); + } + + tree arg = va_arg(ap, tree); + + if( !arg ) + { + break; + } + + arg_types[nargs] = TREE_TYPE(arg); + args[nargs] = arg; + nargs += 1; + } + arg_types[nargs] = NULL_TREE; + args[nargs] = NULL_TREE; + va_end (ap); + + tree function_decl = function_decl_from_name( return_type, + function_name, + nargs, + arg_types); + DECL_EXTERNAL (function_decl) = 1; + tree the_func_addr = build1(ADDR_EXPR, + build_pointer_type (TREE_TYPE(function_decl)), + function_decl); + tree the_call = build_call_array_loc(location_from_lineno(), + return_type, + the_func_addr, + nargs, + args); + // This routine returns the call_expr; the caller will have to deal with it + // as described up above + return the_call; + } + +void +gg_call(tree return_type, const char *function_name, ...) + { + // Generalized caller. function_name is followed by a NULL_TREE-terminated + // list of formal parameters. + + // Use this routine when you don't care about the return value, and + // you want the subroutine to be invoked. + + int nargs = 0; + static tree arg_types[ARG_LIMIT+1]; + static tree args[ARG_LIMIT+1]; + + va_list ap; + va_start(ap, function_name); + for(;;) + { + if(nargs >= ARG_LIMIT) + { + yywarn("###### %10s in %s:%d", __func__, __FILE__,__LINE__ ); + yywarn("###### You *must* be joking!"); + gcc_unreachable(); + } + + tree arg = va_arg(ap, tree); + + if( !arg ) + { + break; + } + + arg_types[nargs] = TREE_TYPE(arg); + args[nargs] = arg; + nargs += 1; + } + arg_types[nargs] = NULL_TREE; + args[nargs] = NULL_TREE; + va_end (ap); + + tree function_decl = function_decl_from_name( return_type, + function_name, + nargs, + arg_types); + DECL_EXTERNAL (function_decl) = 1; + tree the_func_addr = build1(ADDR_EXPR, + build_pointer_type (TREE_TYPE(function_decl)), + function_decl); + tree the_call = build_call_array_loc(location_from_lineno(), + return_type, + the_func_addr, + nargs, + args); + // This simply executes the_call; any return value is ignored + gg_append_statement(the_call); + } + +tree +gg_call_expr_list(tree return_type, tree function_name, int param_count, tree args[]) + { + // Generalized caller. param_count is the count of params in the arg[]] + + // Use this routine when you need the return value. Typically you + // will do something like + + // tree call_expr_Plist = gg_call_expr_list(...); + // gg_append_statement(call_expr); + + // Note that every time call_expr is invoked, the routine will run again. + + // Avoid that with something like + // gg_assign( dest, gg_call_expr_list(...) ); + + tree the_call = build_call_array_loc(location_from_lineno(), + return_type, + function_name, + param_count, + args); + // This routine returns the call_expr; the caller will have to deal with it + // as described up above + return the_call; + } + +tree +gg_create_bind_expr() + { + // In support of things like PERFORM paragraph, we need to create + // blocks of statements that can be executed. + + // This will be a naked bind_expr, like we use for WHILE construction. + // It's not defining a context, so it has no variable list, nor does + // it point to a block. + + tree statement_block = make_node(STATEMENT_LIST); + TREE_TYPE(statement_block) = void_type_node; + tree bind_expr = build3( BIND_EXPR, + void_type_node, + NULL_TREE, + statement_block, + NULL_TREE); + + return bind_expr; + } + +void +gg_exit(tree exit_code) + { + tree the_call = + build_call_expr_loc(location_from_lineno(), + builtin_decl_explicit (BUILT_IN_EXIT), + 1, + exit_code); + gg_append_statement(the_call); + } + +void +gg_abort() + { + tree the_call = + build_call_expr_loc(location_from_lineno(), + builtin_decl_explicit (BUILT_IN_ABORT), + 0); + gg_append_statement(the_call); + } + +tree +gg_strlen(tree psz) + { + tree the_call = fold_convert( + size_type_node, + build_call_expr_loc(location_from_lineno(), + builtin_decl_explicit (BUILT_IN_STRLEN), + 1, + psz)); + return the_call; + } + +tree +gg_strdup(tree psz) + { + tree the_call = fold_convert( + build_pointer_type(char_type_node), + build_call_expr_loc(location_from_lineno(), + builtin_decl_explicit (BUILT_IN_STRDUP), + 1, + psz)); + return the_call; + } + +/* built_in call to malloc() */ + +tree +gg_malloc(tree size) + { + tree the_call = fold_convert( + pvoid_type_node, + build_call_expr_loc(location_from_lineno(), + builtin_decl_explicit (BUILT_IN_MALLOC), + 1, + size)); + return the_call; + } + +tree +gg_realloc(tree base, tree size) + { + tree the_call = fold_convert( + pvoid_type_node, + build_call_expr_loc(location_from_lineno(), + builtin_decl_explicit (BUILT_IN_REALLOC), + 2, + base, + size)); + return the_call; + } + +tree +gg_realloc(tree base, size_t size) + { + return gg_realloc(base, build_int_cst_type(SIZE_T, size)); + } + +tree +gg_malloc(size_t size) + { + return gg_malloc(build_int_cst_type(SIZE_T, size)); + } + +void +gg_free(tree pointer) + { + tree the_call = + build_call_expr_loc(location_from_lineno(), + builtin_decl_explicit (BUILT_IN_FREE), + 1, + pointer); + gg_append_statement(the_call); + } + +void +gg_record_statement_list_start() + { + // We need a statement list: + tree statement_list = alloc_stmt_list(); + current_function->statement_list_stack.push_back(statement_list); + } + +tree +gg_record_statement_list_finish() + { + tree retval = current_function->statement_list_stack.back(); + current_function->statement_list_stack.pop_back(); + return retval; + } + +size_t +gg_sizeof(tree node) + { + size_t size_in_bytes; + if( DECL_P(node) ) + { + size_in_bytes = TREE_INT_CST_LOW(TYPE_SIZE_UNIT(TREE_TYPE(node))); + } + else + { + gcc_assert(TYPE_P(node)); + size_in_bytes = TREE_INT_CST_LOW(TYPE_SIZE_UNIT(node)); + } + return size_in_bytes; + } + +tree +gg_array_of_size_t( size_t N, size_t *values) + { + tree retval = gg_define_variable(build_pointer_type(SIZE_T)); + gg_assign(retval, gg_cast(build_pointer_type(SIZE_T), gg_malloc( build_int_cst_type(SIZE_T, N * sizeof(size_t))))); + for(size_t i=0; i<N; i++) + { + gg_assign(gg_array_value(retval, i), build_int_cst_type(SIZE_T, values[i])); + } + return retval; + } + +tree +gg_array_of_bytes( size_t N, unsigned char *values) + { + tree retval = gg_define_variable(build_pointer_type(UCHAR)); + gg_assign(retval, gg_cast(build_pointer_type(UCHAR), gg_malloc( build_int_cst_type(UCHAR, N * sizeof(unsigned char))))); + for(size_t i=0; i<N; i++) + { + gg_assign(gg_array_value(retval, i), build_int_cst_type(UCHAR, values[i])); + } + return retval; + } + +tree +gg_string_literal(const char *string) + { + /* This is a message in a bottle. + + A genapi.cc program calling + + gg_call(VOID, + "puts", + build_string_literal(strlen(ach)+1, ach), + NULL_TREE); + + ten thousand times compiles about ten percent slower than a C program + calling + + puts(ach); + + ten thousand times. + + Trapping through the C front end reveals that they do not call + build_string_literal(). They instead use build_string() in a way that + I gave up trying to figure out that produces, apparently, more efficient + GENERIC. + + Their GENERIC: call_expr -> nop_expr -> addr_expr -> string_cst + + My GENERIC: call_expr -> addr_expr -> array_ref -> string_cst + + I tried for an hour to duplicate the C stuff, but made no headway. + + This comment is a reminder to myself to investigate this, someday, because + I eventually want that ten percent. + */ + + return build_string_literal(strlen(string)+1, string); + } + +void +gg_set_current_line_number(int line_number) + { + sv_current_line_number = line_number; + } + +int +gg_get_current_line_number() + { + return sv_current_line_number; + } + +tree +gg_trans_unit_var_decl(const char *var_name) + { + std::unordered_map<std::string, tree>::const_iterator it = + gg_trans_unit.trans_unit_var_decls.find(var_name); + if( it != gg_trans_unit.trans_unit_var_decls.end() ) + { + return it->second; + } + return NULL_TREE; + } + +void +gg_insert_into_assembler(const char *format, ...) + { + // This routine inserts text directly into the assembly language stream. + + // Note that if for some reason your text has to have a '%' character, it + // needs to be doubled in the GENERIC tag. And that means if it is in the + // 'format' variable, it needs to be quadrupled. + + // Create the string to be inserted: + char ach[256]; + va_list ap; + va_start(ap, format); + vsnprintf(ach, sizeof(ach), format, ap); + va_end(ap); + + // Create the required generic tag + tree asm_expr = build5_loc( location_from_lineno(), + ASM_EXPR, + VOID, + build_string(strlen(ach), ach), + NULL_TREE, + NULL_TREE, + NULL_TREE, + NULL_TREE); + //SET_EXPR_LOCATION (asm_expr, UNKNOWN_LOCATION); + + // And insert it as a statement + gg_append_statement(asm_expr); + } diff --git a/gcc/cobol/gengen.h b/gcc/cobol/gengen.h new file mode 100644 index 0000000..8c1bc8d --- /dev/null +++ b/gcc/cobol/gengen.h @@ -0,0 +1,544 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef __GENGEN_H +#define __GENGEN_H + +// Note how the definitions of IF and WHILE lets you use them as +// IF(a,b,c) and WHILE(a,b,c) with no semicolon. +// And, yes, I see that ELSE, ENDIF, and WEND are all the same. Sometimes +// looks *are* important, and the multiple definitions make things easier +// to understand. + +#define IF(a,b,c) gg_if((a),(b),(c)); +#define ELSE current_function->statement_list_stack.pop_back(); +#define ENDIF current_function->statement_list_stack.pop_back(); +#define WHILE(a,b,c) gg_while((a),(b),(c)); +#define WEND current_function->statement_list_stack.pop_back(); + +// mnemonics for variable types: + +#define VOID void_type_node +#define BOOL boolean_type_node +#define CHAR char_type_node +#define SCHAR signed_char_type_node +#define UCHAR unsigned_char_type_node +#define SHORT short_integer_type_node +#define USHORT short_unsigned_type_node +#define WCHAR short_unsigned_type_node +#define INT integer_type_node +#define INT_P build_pointer_type(integer_type_node) +#define UINT unsigned_type_node +#define LONG long_integer_type_node +#define ULONG long_unsigned_type_node +#define LONGLONG long_long_integer_type_node +#define ULONGLONG long_long_unsigned_type_node +#define SIZE_T size_type_node +#define SIZE_T_P (build_pointer_type(SIZE_T)) +#define SSIZE_T ptrdiff_type_node +#define INT128 intTI_type_node +#define UINT128 unsigned_intTI_type_node +#define FLOAT float32_type_node +#define DOUBLE float64_type_node +#define LONGDOUBLE long_double_type_node +#define FLOAT128 float128_type_node +#define VOID_P ptr_type_node +#define VOID_P_P (build_pointer_type(VOID_P)) +#define CHAR_P char_ptr_type_node +#define UCHAR_P uchar_ptr_type_node +#define WCHAR_P wchar_ptr_type_node +#define FILE_P fileptr_type_node + +#define SIZE128 (16) // In bytes + +/* Explanatory note for vs_file_static variables + + In a C program, you can have this variable declaration outside of a + function: + + static const int intvar = 12321; + + It will be visible to any function that follows. After several days of + experimentation and research, I found I was unable to duplicate this + behavior in the GCOBOL code generator. I simply wasn't able to reverse + engineer whatever magical incantations are necessary to declare and define] + variables on the translation unit level rather than on the function level. + + Having reached the point where the structural integrity of my desk was being + threatened by the repeated percussive strikes from my forehead, I turned my + attention to an equivalent workaround. + + On the assembly language level, there is no fundamental way of making a + variable visible to only a specific function. So, to distinguish between + two non-global variables named "fred" in two different functions, the C + compiler appends a dot and a number, with the "number" being different for + the two functions. + + The GCOBOL compiler has been doing just that. So, to implement a + vs_file_static variable, I treat it just like a vs_static variable, but + without appending a differentiator. + + */ + +enum gg_variable_scope_t { + vs_stack, + vs_static, + vs_file_static, // static variable of file scope + vs_external, // Creates a PUBLIC STATIC variable of file scope + vs_external_reference, // References the previous + vs_file, // variable of file scope, without static +}; + +struct gg_function_t + { + // Nomenclature Alert: The "function" in gg_function_t was chosen + // originally because a PROGRAM-ID is implemented as a C-style "function", + // and there are numerous tree variables that refer to "functions". + // Eventually the COBOL compiler grew to handle not just COBOL PROGRAM-ID + // "programs", but also user-defined COBOL FUNCTION-ID "functions". This + // inevitably is confusing. Sorry about that. + + // This structure contains state variables for a single function. + + const char *our_unmangled_name; // This is the original name + const char *our_name; // This is our mangled name + tree function_address; + size_t our_symbol_table_index; + + // The function_decl is fundamental to many, many things + tree function_decl; + + // We keep track of the end of the chain of blocks: + tree current_block; + + // Every function has a context, wherein temporary variables get created + // and whose names won't collide with the names in other function. + + // But it is often necessary to create subcontexts, which inherit names from + // its parent function, but can reuse names, and create new ones, without + // collisions. Each context gets its own bind_expr, each bind_expr points + // to its own block. So, to create subcontexts, we need to know which + // bind_expr we add variable declarations to. + std::vector<tree> bind_expr_stack; + + // Every function has a statement list. But there can be statements + // that consist of statement lists. This happens when building IF + // statements (TRUE gets its own list, as does FALSE) and WHILE statements + // (where the execution block is a statement list. This stack enables that + // to happen cleanly, so the programmer doesn't have to be concerned about + // which list is being built. + + // Note that the gg_statement_list_stack can grow larger than the + // current_function->bind_expr_stack stack, because + // there are times -- like inside of WHILE() and IF constructs -- where we + // push onto the statement_list_stack and even create new bind_expr nodes, + // but don't need a full new context. But every new context gets a new + // statement list, and when + // current_function->bind_expr_stack is popped, + // statement_list_stack is popped, too. + std::vector<tree> statement_list_stack; + + // COBOL sections and paragraphs are handled identically; it's the context + // that makes them different: PROGRAMS contain SECTIONS, and SECTIONS + // contain paragraphs. I call both SECTIONS and PARAGRAPHS "procs" + + // At any given moment, there is one "current section" and one "current + // paragraph". + struct cbl_proc_t *current_section; + struct cbl_proc_t *current_paragraph; + + tree void_star_temp; // At the end of every paragraph and section, we + // // we need a variable "void *temp" to hold a + // // label for one instruction. Rather than clutter + // // up the code with temporaries, we use this one + // // instance instead. + + tree first_time_through; + + tree skip_init_goto; + tree skip_init_label; + + // We use context_count to detect a mismatch between gg_push_context() and + // gg_pop_context calls, which should be equal at the point gimplify is + // invoked: + int context_count; + + // When a function is called, it comes with zero to N parameters on the + // stack. We treat it as variadic; see parser_division(PROCEDURE) to see + // how we pick up the N values on the stack: + tree formal_parameters; + + // When parser_division(PROCEDURE) is called, it provides a cbl_field_t + // *returning parameter. We stash it here; it's used during parser_exit() + // to provide the data for the program's return value. + cbl_field_t *returning; // This one is on the stack, like a LOCAL-STORAGE + + size_t program_id_number; // Used to give static variables + // // a unique .<n> suffix + + // There are two types of nesting. COBOL nesting is implemented in a + // logical way: All programs are siblings, with the context being the source + // code module. The nested aspect is not reflected in the GENERIC tree. + + // Truly nested functions are implemented within the generic tree; the + // nested function is completely inside the outer function. This was + // implemented to support paragraphs as callable entities. + bool is_truly_nested; + + // This variable, which appears on the stack, contains the exit_address + // for the terminating proc of a PERFORM A or PERFORM A THROUGH B + tree perform_exit_address; + + // This variable is a pointer to the first declarative section of this + // program-id/function. It's used in when creating the linked list of + // declaratives, because the last declarative of a nested function links + // back to the first declarative of its immediate parent. + tree first_declarative_section; + + // is_function is true when this structure is describing a COBOL FUNCTION-ID + // and is false for a PROGRAM-ID + bool is_function; + + // This integer is initially set to one when this function is called by + // our generated main(). It gets incremented by 1 when the routine is + // re-entered: main() -> us -> B -> us + // When processing EXIT PROGRAM, if the counter is greater then 1, it is + // decremented and a return is created. When the counter is 1, the + // EXIT program is treated as a CONTINUE. + tree called_by_main_counter; + }; + +struct cbl_translation_unit_t + { + // GCC calls a source file a "translation unit". This structure contains + // all of the information needed by and for a translation unit. There + // probably should be one, and only one, of these instantiated by the COBOL + // front end. + + // Every function in this code module gets this translation_unit_decl + // as its context. This node is built in parse_enter_file() + tree trans_unit_decl; + + // This is the filename of this trans_unit_decl + const char *filename; + + // This is the stack of function_decls in this translation unit; each + // call to parser_enter_program() pushes onto this stack; each call to + // parser_end_program() pops it. + std::vector<struct gg_function_t> function_stack; + + // This is where we keep var_decls because of my inability to figure out how + // to tell the compiler to create data definitions for translation_unit_decl + // variables: + std::unordered_map<std::string, tree> trans_unit_var_decls; + }; + +extern struct cbl_translation_unit_t gg_trans_unit; + +#define current_function (&gg_trans_unit.function_stack.back()) + +extern GTY(()) tree char_nodes[256] ; +extern GTY(()) tree pvoid_type_node ; +extern GTY(()) tree integer_minusone_node; +extern GTY(()) tree integer_two_node ; +extern GTY(()) tree integer_eight_node ; +extern GTY(()) tree size_t_zero_node ; +extern GTY(()) tree int128_zero_node ; +extern GTY(()) tree int128_five_node ; +extern GTY(()) tree int128_ten_node ; +extern GTY(()) tree bool_true_node ; +extern GTY(()) tree bool_false_node ; +extern GTY(()) tree char_ptr_type_node ; +extern GTY(()) tree uchar_ptr_type_node ; +extern GTY(()) tree wchar_ptr_type_node ; +extern GTY(()) tree long_double_ten_node ; +extern GTY(()) tree sizeof_size_t ; +extern GTY(()) tree sizeof_pointer ; + +// These routines happen when beginning to process a new file, which is also +// known, in GCC, as a "translation unit" +extern void gg_build_translation_unit(const char *filename); + +// For an expression type to actually be implemented in the target +// runtime binary, it has to find its way onto a statement list. (Or be used +// as the second operand of a modify_expr, and so on.) +extern void gg_append_statement(tree stmt); +//// extern void gg_insert_statement(struct tree_stmt_iterator *tsi, tree stmt); + +// For variables: +extern void gg_append_var_decl(tree var); + +// type cast +extern tree gg_float(tree float_type, tree integer_var); +extern tree gg_trunc(tree integer_type, tree float_var); +extern tree gg_cast(tree type, tree var); + +// Assignment, that is to say, A = B +extern void gg_assign(tree dest, const tree source); + +// struct creation and field access +// Create struct, and access a field in a struct +extern tree gg_get_local_struct_type_decl(const char *type_name, int count, ...); +extern tree gg_get_filelevel_struct_type_decl(const char *type_name, int count, ...); +extern tree gg_get_filelevel_union_type_decl(const char *type_name, int count, ...); +extern tree gg_define_local_struct(const char *type_name, const char * var_name, int count ,...); +extern tree gg_find_field_in_struct(const tree var_decl, const char *field_name); +extern tree gg_struct_field_ref(const tree struct_decl, const char *field); +extern tree gg_assign_to_structure(tree var_decl_struct, const char *field, const tree source); +extern tree gg_assign_to_structure(tree var_decl_struct, const char *field, int N); + +// Generalized variable declareres. This don't create storage +extern tree gg_declare_variable(tree type_decl, + const char *name=NULL, + tree initial_value=NULL_TREE, + gg_variable_scope_t vs_scope=vs_stack, + bool *already_defined = NULL); +extern tree gg_define_from_declaration(tree var_decl); + +// Generalized variable definers. These create storage +extern tree gg_define_variable(tree type_decl); +extern tree gg_define_variable(tree type_decl, tree initial_value); +extern tree gg_define_variable(tree type_decl, gg_variable_scope_t vs_scope); +extern tree gg_define_variable(tree type_decl, + const char *name, + gg_variable_scope_t vs_scope=vs_stack); +extern tree gg_define_variable(tree type_decl, + const char *name, + gg_variable_scope_t vs_scope, + tree initial_value); +// Utility definers: +extern tree gg_define_bool(); +extern tree gg_define_char(); +extern tree gg_define_char(const char *variable_name); +extern tree gg_define_char(const char *variable_name, tree ch); +extern tree gg_define_char(const char *variable_name, int ch); + +extern tree gg_define_uchar(); +extern tree gg_define_uchar(const char *variable_name); +extern tree gg_define_uchar(const char *variable_name, tree ch); +extern tree gg_define_uchar(const char *variable_name, int ch); + +extern tree gg_define_int(); +extern tree gg_define_int(int N); +extern tree gg_define_int(const char *variable_name); +extern tree gg_define_int(const char *variable_name, tree N); +extern tree gg_define_int(const char *variable_name, int N); + +extern tree gg_define_size_t(); +extern tree gg_define_size_t(const char *variable_name); +extern tree gg_define_size_t(const char *variable_name, tree N); +extern tree gg_define_size_t(const char *variable_name, size_t N); +extern tree gg_define_size_t(tree N); +extern tree gg_define_size_t(size_t N); + +extern tree gg_define_int128(); +extern tree gg_define_int128(const char *variable_name); +extern tree gg_define_int128(const char *variable_name, tree N); +extern tree gg_define_int128(const char *variable_name, int N); + +extern tree gg_define_longdouble(); + +extern tree gg_define_void_star(); +extern tree gg_define_void_star(tree var); +extern tree gg_define_void_star(const char *variable_name); +extern tree gg_define_void_star(const char *variable_name, tree var); +extern tree gg_define_void_star(const char *variable_name, gg_variable_scope_t scope); + +extern tree gg_define_char_star(); +extern tree gg_define_char_star(tree var); +extern tree gg_define_char_star(const char *variable_name); +extern tree gg_define_char_star(const char *variable_name, tree var); +extern tree gg_define_char_star(const char *variable_name, gg_variable_scope_t scope); + +extern tree gg_define_uchar_star(); +extern tree gg_define_uchar_star(const char *variable_name); +extern tree gg_define_uchar_star(const char *variable_name, gg_variable_scope_t scope); +extern tree gg_define_uchar_star(tree var); +extern tree gg_define_uchar_star(const char *variable_name, tree var); + +// address_of operator; equivalent of C "&buffer" +extern tree gg_get_address_of(const tree var_decl); + +// Array creation and access: +extern tree gg_define_array(tree type_decl, size_t size); +extern tree gg_define_array(tree type_decl, const char *name, size_t size); +extern tree gg_define_array(tree type_decl, size_t size, gg_variable_scope_t scope); +extern tree gg_define_array(tree type_decl, const char *name, size_t size, gg_variable_scope_t scope); + +extern tree gg_array_value(tree pointer, tree offset=NULL_TREE); +extern tree gg_array_value(tree pointer, int N); + +// Here are some unary operations +extern void gg_increment(tree var); +extern void gg_decrement(tree var); +extern tree gg_negate(tree var); // Two's complement negation +extern tree gg_bitwise_not(tree var); // Bitwise inversion +extern tree gg_abs(tree var); // Absolute value + +// And some binary operations: + +extern tree gg_add(tree addend1, tree addend2); +extern tree gg_subtract(tree A, tree B); +extern tree gg_multiply(tree A, tree B); +extern tree gg_real_divide(tree A, tree B); // Floating point division +extern tree gg_divide(tree A, tree B); // Integer division +extern tree gg_mod(tree A, tree B); +extern tree gg_lshift(tree A, tree B); +extern tree gg_rshift(tree A, tree B); +extern tree gg_bitwise_or(tree A, tree B); +extern tree gg_bitwise_xor(tree A, tree B); +extern tree gg_bitwise_and(tree A, tree B); + +// Conditionals: Use the IF() and WHILE() macros, which generated +// code that calls these functions. Calling them yourself is +// probably a bad idea because there are stacks that have to be +// kept in the right states. + +extern tree gg_build_relational_expression( tree operand_a, + enum relop_t op, + tree operand_b); +extern tree gg_build_logical_expression(tree operand_a, + enum logop_t op, + tree operand_b); + +extern void gg_create_true_false_statement_lists(tree relational_expression); +extern void gg_while(tree operand_a, enum relop_t op, tree operand_b); +extern void gg_if( tree operand_a, enum relop_t op, tree operand_b); + +// Are are some system functions that can be useful. gg_printf is +// particularly useful for generating run-time messages. Actual run-time +// code is built using write(), because it allows for file descriptors and +// doesn't require null-terminated strings. + +extern tree gg_get_function_address(tree return_type, const char *funcname); +extern void gg_printf(const char *format_string, ...); +extern tree gg_fprintf(tree fd, int nargs, const char *format_string, ...); +extern tree gg_read(tree fd, tree buf, tree count); +extern void gg_write(tree fd, tree buf, tree count); +extern void gg_memset(tree dest, const tree value, tree size); +extern tree gg_memchr(tree s, tree c, tree n); +extern void gg_memcpy(tree dest, const tree src, tree size); +extern void gg_memmove(tree dest, const tree src, tree size); +extern tree gg_memdup(tree data, tree length); +extern tree gg_memdup(tree data, size_t length); +extern void gg_strcpy(tree char_star_A, tree char_star_B); +extern tree gg_strdup(tree char_star_A); +extern tree gg_strcmp(tree char_star_A, tree char_star_B); +extern tree gg_strncmp(tree char_star_A, tree char_star_B, tree size_t_N); + +// Flow control inside a function +extern void gg_return(tree operand = NULL_TREE); + +// These routines are the preample and postamble that bracket everything else +extern void gg_define_function(tree return_type, const char *funcname, ...); +extern tree gg_define_function_with_no_parameters(tree return_type, + const char *funcname, + const char *unmangled_name); +extern void chain_parameter_to_function( tree function_decl, + const tree param_type, + const char *name); + +extern void gg_finalize_function(); +extern void gg_push_context(); +extern void gg_pop_context(); + +// These are a generalized call constructor. The first for when you just want +// the function called, because you don't care about the return value. The others +// are for when you do need the return value. +extern tree gg_call_expr_list(tree return_type, tree function_name, int param_count, tree[]); + +// The following is a garden-variety call, with known return type and known +// but in the case where the return value is unimportant. +extern void gg_call (tree return_type, const char *function_name, ...); +extern tree gg_call_expr(tree return_type, const char *function_name, ...); + +// Returns a simple entangled goto/comefrom pair. Used for things like +// IF/ELSE/ENDIF and WHILE/WEND +void gg_create_goto_pair(tree *goto_expr, tree *label_expr); +void gg_create_goto_pair(tree *goto_expr, tree *label_expr, const char *name); + +// This more complex version is used for implementing SECTIONS and PARAGRAPHS. +void gg_create_goto_pair( tree *goto_expr, + tree *label_expr, + tree *label_addr, + const char *name); +void gg_create_goto_pair( tree *goto_expr, + tree *label_expr, + tree *label_addr); +void gg_create_goto_pair( tree *goto_expr, + tree *label_expr, + tree *label_addr, + tree *label_decl); +void gg_goto_label_decl(tree label_decl); + +// Used for implementing SECTIONS and PARAGRAPHS. When you have a +// void *pointer = &&label, gg_goto is the same as +// goto *pointer +void gg_goto(tree pointer); + +void gg_record_statement_list_start(); +tree gg_record_statement_list_finish(); + +// These routines are in support of PERFORM PARAGRAPH +extern tree gg_get_function_decl(tree return_type, const char *funcname, ...); + +// Used to call system exit() +extern void gg_exit(tree exit_code); +extern void gg_abort(); + +extern tree gg_malloc(tree length); +extern tree gg_malloc(size_t length); +extern tree gg_realloc(tree base, tree length); +extern tree gg_realloc(tree base, size_t length); +extern void gg_free(tree pointer); +extern tree gg_strlen(tree psz); +extern size_t gg_sizeof(tree decl_node); + +extern tree gg_array_of_field_pointers( size_t N, + cbl_field_t **fields ); +extern tree gg_array_of_size_t( size_t N, size_t *values); +extern tree gg_array_of_bytes( size_t N, unsigned char *values); +extern tree gg_indirect(tree pointer, tree byte_offset = NULL_TREE); +extern tree gg_string_literal(const char *string); + +#define CURRENT_LINE_NUMBER (cobol_location().first_line) +location_t location_from_lineno(); + +// When set to true, use UNKNOWN_LOCATION instead of CURRENT_LINE_NUMBER +extern void gg_set_current_line_number(int line_number); +extern int gg_get_current_line_number(); + +extern tree gg_trans_unit_var_decl(const char *var_name); + +tree gg_open(tree char_star_A, tree int_B); +tree gg_close(tree int_A); +tree gg_get_indirect_reference(tree pointer, tree offset); +void gg_insert_into_assembler(const char *format, ...); +void gg_modify_function_type(tree function_decl, tree return_type); +#endif diff --git a/gcc/cobol/genmath.cc b/gcc/cobol/genmath.cc new file mode 100644 index 0000000..138551b --- /dev/null +++ b/gcc/cobol/genmath.cc @@ -0,0 +1,1730 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include "cobol-system.h" +#include "coretypes.h" +#include "tree.h" +#define HOWEVER_GCC_DEFINES_TREE 1 +#include "ec.h" +#include "common-defs.h" +#include "util.h" +#include "cbldiag.h" +#include "symbols.h" +#include "inspect.h" +#include "io.h" +#include "genapi.h" +#include "genutil.h" +#include "gengen.h" +#include "structs.h" +#include "gcobolio.h" +#include "libgcobol.h" +#include "show_parse.h" + +void +set_up_on_exception_label(cbl_label_t *arithmetic_label) + { + if( arithmetic_label ) + { + if( !arithmetic_label->structs.arith_error ) + { + arithmetic_label->structs.arith_error + = (cbl_arith_error_t *)xmalloc(sizeof(struct cbl_arith_error_t) ); + // Set up the address pairs for this clause + gg_create_goto_pair(&arithmetic_label->structs.arith_error->over.go_to, + &arithmetic_label->structs.arith_error->over.label); + gg_create_goto_pair(&arithmetic_label->structs.arith_error->into.go_to, + &arithmetic_label->structs.arith_error->into.label); + gg_create_goto_pair(&arithmetic_label->structs.arith_error->bottom.go_to, + &arithmetic_label->structs.arith_error->bottom.label); + } + } + } + +void +set_up_compute_error_label(cbl_label_t *compute_label) + { + if( compute_label ) + { + if( !compute_label->structs.compute_error ) + { + compute_label->structs.compute_error + = (cbl_compute_error_t *) + xmalloc(sizeof(struct cbl_compute_error_t) ); + compute_label->structs.compute_error->compute_error_code + = gg_define_int(0); + } + } + } + +static void +set_up_arithmetic_error_handler(cbl_label_t *error, + cbl_label_t *not_error) + { + Analyze(); + // There might, or might not, be error and/or not_error labels: + set_up_on_exception_label(error); + set_up_on_exception_label(not_error); + } + +static void +arithmetic_operation(size_t nC, cbl_num_result_t *C, + size_t nA, cbl_refer_t *A, + size_t nB, cbl_refer_t *B, + cbl_arith_format_t format, + cbl_label_t *error, + cbl_label_t *not_error, + tree compute_error, // Pointer to int + const char *operation, + cbl_refer_t *remainder = NULL) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT_AB("performing ", operation, "") + } + + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT_ABC("calling ", operation, "") + TRACE1_END + for(size_t ii=0; ii<nA; ii++) + { + TRACE1_INDENT + gg_fprintf( trace_handle, + 1, "parameter A[%ld]: ", + build_int_cst_type(SIZE_T, ii)); + TRACE1_REFER("", A[ii], ""); + } + for(size_t ii=0; ii<nB; ii++) + { + TRACE1_INDENT + gg_fprintf( trace_handle, + 1, "parameter B[%ld]: ", + build_int_cst_type(SIZE_T, ii)); + TRACE1_REFER("", B[ii], ""); + } + TRACE1_END + } + + // We need to split up cbl_num_result_t into two arrays, one for the refer_t + // and a second for the cbl_round_t enums. + + // Allocate nC+1 in case this is a divide with a REMAINDER + + cbl_refer_t *results = (cbl_refer_t *)xmalloc((nC+1) * sizeof( cbl_refer_t )); + int ncount = 0; + + if( nC+1 <= MIN_FIELD_BLOCK_SIZE ) + { + // We know there is room in our existing buffer + } + else + { + // We might have to allocate more space: + gg_call(VOID, + "__gg__resize_int_p", + gg_get_address_of(var_decl_arithmetic_rounds_size), + gg_get_address_of(var_decl_arithmetic_rounds), + build_int_cst_type(SIZE_T, nC+1), + NULL_TREE); + } + + // We have to take into account the possibility the quotient of the division + // can affect the disposition of the remainder. In particular, some of the + // NIST tests have the construction + + // DIVIDE A BY B GIVING C REMAINDER TABLE(C) + + // Which seems, somehow, unnatural. + + cbl_refer_t temp_remainder; + cbl_field_t temp_field = {}; + + if( remainder ) + { + // We need a duplicate of the remainder, because we have to take into count + // the possibility of a size error in moving the remainder into place + temp_field.type = remainder->field->type; + temp_field.attr = (remainder->field->attr | intermediate_e) & ~initialized_e; + temp_field.level = 1; + temp_field.data.memsize = remainder->field->data.memsize ; + temp_field.data.capacity = remainder->field->data.capacity; + temp_field.data.digits = remainder->field->data.digits ; + temp_field.data.rdigits = remainder->field->data.rdigits ; + temp_field.data.initial = remainder->field->data.initial ; + temp_field.data.picture = remainder->field->data.picture ; + parser_symbol_add(&temp_field); + temp_remainder.field = &temp_field; + + // For division, the optional remainder goes onto the beginning of the + // list + results[ncount++] = temp_remainder; + } + for(size_t i=0; i<nC; i++) + { + results[ncount] = C[i].refer; + gg_assign( gg_array_value(var_decl_arithmetic_rounds, ncount), + build_int_cst_type(INT, C[i].rounded)); + ncount += 1; + } + + // REMAINDER_PRESENT means what it says. + // ON_SIZE_ERROR means that the ON SIZE ERROR phrase is present + + int call_flags = (( error || not_error ) ? ON_SIZE_ERROR : 0) + + (remainder ? REMAINDER_PRESENT : 0); + + gcc_assert(compute_error); + + // Having done all that work, we now need to break out the various different + // arithmetic routines that implement the various possibilities, + + build_array_of_treeplets(1, nA, A); + build_array_of_treeplets(2, nB, B); + build_array_of_treeplets(3, ncount, results); + + gg_call(VOID, + operation, + build_int_cst_type(INT, format), + build_int_cst_type(SIZE_T, nA), + build_int_cst_type(SIZE_T, nB), + build_int_cst_type(SIZE_T, ncount), + var_decl_arithmetic_rounds, + build_int_cst_type(INT, call_flags), + compute_error, + NULL_TREE); + TRACE1 + { + for(size_t ii=0; ii<nC; ii++) + { + break; // Breaks on ADD 1 SUB2 GIVING SUB4 both PIC S9(3) COMP + TRACE1_INDENT + gg_fprintf( trace_handle, + 1, "result: C[%ld]: ", + build_int_cst_type(SIZE_T, ii)); + TRACE1_REFER("", C[ii].refer, ""); + } + TRACE1_END + } + + // We just did an operation. + IF( gg_indirect(compute_error), ne_op, integer_zero_node ) + { + gg_call( VOID, + "__gg__process_compute_error", + gg_indirect(compute_error), + NULL_TREE); + } + ELSE + ENDIF + + if( remainder ) + { + parser_move(*remainder, temp_remainder); + } + + SHOW_PARSE + { + SHOW_PARSE_END + } + + // We need to release all of the refers we allocated: + free(results); + } + +static void +arithmetic_error_handler( cbl_label_t *error, + cbl_label_t *not_error, + tree compute_error) // Pointer to int with bits + { + Analyze(); + if( error ) + { + // We had an ON SIZE ERROR phrase + IF( gg_indirect(compute_error), ne_op, integer_zero_node) + { + // The ON SIZE ERROR imperative takes precedence over exception processing + // So, we set the global exception code to zero. This leaves intact the + // stashed data needed for FUNCTION EXCEPTION-STATUS, but will preclude + // any declarative processing + gg_assign(var_decl_exception_code, integer_zero_node); + + // There was some kind of error, so we execute the ON SIZE ERROR + // imperative + gg_append_statement( error->structs.arith_error->into.go_to ); + } + ELSE + ENDIF + } + + if( not_error ) + { + IF( gg_indirect(compute_error), eq_op, integer_zero_node) + { + // There wasn't a computation error + gg_append_statement( not_error->structs.arith_error->into.go_to ); + } + ELSE + ENDIF + } + + // With the operation and the two possible GO TOs laid down, it's time + // to create the target labels for exiting the ON [NOT] SIZE ERROR blocks: + if( error ) + { + gg_append_statement( error->structs.arith_error->bottom.label ); + } + if( not_error ) + { + gg_append_statement( not_error->structs.arith_error->bottom.label ); + } + } + +static bool +is_somebody_float(size_t nA, cbl_refer_t *A) + { + bool retval = false; + for(size_t i=0; i<nA; i++) + { + if(A[i].field->type == FldFloat) + { + retval = true; + break; + } + } + return retval; + } + +static bool +is_somebody_float(size_t nC, cbl_num_result_t *C) + { + bool retval = false; + for(size_t i=0; i<nC; i++) + { + if(C[i].refer.field->type == FldFloat) + { + retval = true; + break; + } + } + return retval; + } + +static bool +all_results_binary(size_t nC, cbl_num_result_t *C) + { + bool retval = true; + + for(size_t i=0; i<nC; i++) + { + if(C[i].refer.field->data.digits != 0 || C[i].refer.field->type == FldFloat ) + { + retval = false; + break; + } + } + return retval; + } + +static tree +largest_binary_term(size_t nA, cbl_refer_t *A) + { + tree retval = NULL_TREE; + uint32_t max_capacity = 0; + int is_negative = 0; + + for(size_t i=0; i<nA; i++) + { + if( A[i].field->data.rdigits || A[i].field->type == FldFloat ) + { + // We are prepared to work only with integers + retval = NULL_TREE; + break; + } + if( A[i].field->type == FldLiteralN +// || A[i].field->type == FldNumericDisplay + || A[i].field->type == FldNumericBinary + || A[i].field->type == FldNumericBin5 + || A[i].field->type == FldIndex + || A[i].field->type == FldPointer ) + { + // This is an integer type that can be worked with quickly + is_negative |= ( A[i].field->attr & signable_e ); + max_capacity = std::max(max_capacity, A[i].field->data.capacity); + retval = tree_type_from_size(max_capacity, is_negative); + } + else + { + // This is a type we don't care to deal with for fast arithmetic + retval = NULL_TREE; + break; + } + } + return retval; + } + +static bool +fast_add( size_t nC, cbl_num_result_t *C, + size_t nA, cbl_refer_t *A, + cbl_arith_format_t format ) + { + bool retval = false; + if( all_results_binary(nC, C) ) + { + Analyze(); + // All targets are non-PICTURE binaries: + //gg_insert_into_assembler("# DUBNER addition START"); + tree term_type = largest_binary_term(nA, A); + if( term_type ) + { + // All the terms are things we can work with. + + // We need to calculate the sum of all the A[] terms using term_type as + // the intermediate type: + + tree sum = gg_define_variable(term_type); + tree addend = gg_define_variable(term_type); + get_binary_value( sum, + NULL, + A[0].field, + refer_offset_source(A[0])); + + // Add in the rest of them: + for(size_t i=1; i<nA; i++) + { + get_binary_value( addend, + NULL, + A[i].field, + refer_offset_source(A[i])); + gg_assign(sum, gg_add(sum, addend)); + } + //gg_printf("The intermediate sum is %ld\n", gg_cast(LONG, sum), NULL_TREE); + + // We now either accumulate into C[n] or assign to C[n]: + for(size_t i=0; i<nC; i++ ) + { + tree dest_type = tree_type_from_size(C[i].refer.field->data.capacity, 0); + tree dest_addr = gg_add(member(C[i].refer.field->var_decl_node, "data"), + refer_offset_dest(C[i].refer)); + tree ptr = gg_cast(build_pointer_type(dest_type), dest_addr); + if( format == giving_e ) + { + // We are assigning + gg_assign( gg_indirect(ptr), + gg_cast(dest_type, sum)); + } + else + { + // We are accumulating + gg_assign( gg_indirect(ptr), + gg_add( gg_indirect(ptr), + gg_cast(dest_type, sum))); + } + } + retval = true; + } + + //gg_insert_into_assembler("# DUBNER addition END "); + } + return retval; + } + +static bool +fast_subtract(size_t nC, cbl_num_result_t *C, + size_t nA, cbl_refer_t *A, + size_t nB, cbl_refer_t *B, + cbl_arith_format_t format) + { + bool retval = false; + if( all_results_binary(nC, C) ) + { + Analyze(); + // All targets are non-PICTURE binaries: + //gg_insert_into_assembler("# DUBNER addition START"); + tree term_type = largest_binary_term(nA, A); + + if( term_type && format == giving_e ) + { + tree term_type_B = largest_binary_term(nB, B); + if( term_type_B ) + { + if(TREE_INT_CST_LOW(TYPE_SIZE(term_type_B)) + > TREE_INT_CST_LOW(TYPE_SIZE(term_type)) ) + { + term_type = term_type_B; + } + } + else + { + term_type = NULL_TREE; + } + } + + if( term_type ) + { + // All the terms are things we can work with. + + // We need to calculate the sum of all the A[] terms using term_type as + // the intermediate type: + + tree sum = gg_define_variable(term_type); + tree addend = gg_define_variable(term_type); + get_binary_value(sum, NULL, A[0].field, refer_offset_dest(A[0])); + + // Add in the rest of them: + for(size_t i=1; i<nA; i++) + { + get_binary_value(sum, NULL, A[i].field, refer_offset_dest(A[i])); + gg_assign(sum, gg_add(sum, addend)); + } + //gg_printf("The intermediate sum is %ld\n", gg_cast(LONG, sum), NULL_TREE); + + if( format == giving_e ) + { + // We now subtract the sum from B[0] + get_binary_value(addend, NULL, B[0].field, refer_offset_dest(B[0])); + gg_assign(sum, gg_subtract(addend, sum)); + } + + // We now either accumulate into C[n] or assign to C[n]: + for(size_t i=0; i<nC; i++ ) + { + tree dest_type = tree_type_from_size(C[i].refer.field->data.capacity, 0); + tree dest_addr = gg_add(member(C[i].refer.field->var_decl_node, "data"), + refer_offset_dest(C[i].refer)); + tree ptr = gg_cast(build_pointer_type(dest_type), dest_addr); + if( format == giving_e ) + { + // We are assigning + gg_assign( gg_indirect(ptr), + gg_cast(dest_type, sum)); + } + else + { + // We are subtracting the sum from C[i] + gg_assign( gg_indirect(ptr), + gg_subtract(gg_indirect(ptr), + gg_cast(dest_type, sum))); + } + } + retval = true; + } + } + return retval; + } + +static bool +fast_multiply(size_t nC, cbl_num_result_t *C, + size_t nA, cbl_refer_t *A, + size_t nB, cbl_refer_t *B) + { + bool retval = false; + if( all_results_binary(nC, C) ) + { + Analyze(); + // All targets are non-PICTURE binaries: + //gg_insert_into_assembler("# DUBNER addition START"); + tree term_type = largest_binary_term(nA, A); + + if( term_type && nB ) + { + tree term_type_B = largest_binary_term(nB, B); + if( term_type_B ) + { + if(TREE_INT_CST_LOW(TYPE_SIZE(term_type_B)) + > TREE_INT_CST_LOW(TYPE_SIZE(term_type)) ) + { + term_type = term_type_B; + } + } + else + { + term_type = NULL_TREE; + } + } + + if( term_type ) + { + // All the terms are things we can work with. + + tree valA = gg_define_variable(term_type); + tree valB = gg_define_variable(term_type); + get_binary_value(valA, NULL, A[0].field, refer_offset_dest(A[0])); + + if( nB ) + { + // This is a MULTIPLY Format 2 + get_binary_value(valB, NULL, B[0].field, refer_offset_dest(B[0])); + } + + if(nB) + { + gg_assign(valA, gg_multiply(valA, valB)); + } + + // We now either multiply into C[n] or assign A * B to C[n]: + for(size_t i=0; i<nC; i++ ) + { + tree dest_type = tree_type_from_size(C[i].refer.field->data.capacity, 0); + tree dest_addr = gg_add(member(C[i].refer.field->var_decl_node, "data"), + refer_offset_dest(C[i].refer)); + tree ptr = gg_cast(build_pointer_type(dest_type), dest_addr); + if( nB ) + { + // We put A * B into C + gg_assign(gg_indirect(ptr), gg_cast(dest_type, valA)); + } + else + { + // We multiply C = valA * C + gg_assign(gg_indirect(ptr), + gg_multiply(gg_indirect(ptr), valA)); + } + } + retval = true; + } + + //gg_insert_into_assembler("# DUBNER addition END "); + } + return retval; + } + +static bool +fast_divide(size_t nC, cbl_num_result_t *C, + size_t nA, cbl_refer_t *A, + size_t nB, cbl_refer_t *B, + cbl_refer_t remainder) + { + bool retval = false; + if( all_results_binary(nC, C) ) + { + Analyze(); + // All targets are non-PICTURE binaries: + //gg_insert_into_assembler("# DUBNER addition START"); + tree term_type = largest_binary_term(nA, A); + + if( term_type && nB ) + { + tree term_type_B = largest_binary_term(nB, B); + if( term_type_B ) + { + if(TREE_INT_CST_LOW(TYPE_SIZE(term_type_B)) + > TREE_INT_CST_LOW(TYPE_SIZE(term_type)) ) + { + term_type = term_type_B; + } + } + else + { + term_type = NULL_TREE; + } + } + + if( term_type ) + { + // All the terms are things we can work with. + + tree divisor = gg_define_variable(term_type); + tree dividend = gg_define_variable(term_type); + tree quotient = NULL_TREE; + get_binary_value(divisor, NULL, A[0].field, refer_offset_dest(A[0])); + + if( nB ) + { + // This is a MULTIPLY Format 2, where we are dividing A into B and + // assigning that to C + get_binary_value(dividend, NULL, B[0].field, refer_offset_dest(B[0])); + + quotient = gg_define_variable(term_type); + // Yes, in this case the divisor and dividend are switched. Things are + // tough all over. + gg_assign(quotient, gg_divide(divisor, dividend)); + } + + // We now either divide into C[n] or assign dividend/divisor to C[n]: + for(size_t i=0; i<nC; i++ ) + { + tree dest_type = tree_type_from_size(C[i].refer.field->data.capacity, 0); + tree dest_addr = gg_add(member(C[i].refer.field->var_decl_node, "data"), + refer_offset_dest(C[i].refer)); + tree ptr = gg_cast(build_pointer_type(dest_type), dest_addr); + if( nB ) + { + // We put A * B into C + gg_assign(gg_indirect(ptr), gg_cast(dest_type, quotient)); + } + else + { + // We divide the divisor into C + gg_assign(gg_indirect(ptr), + gg_divide(gg_indirect(ptr), divisor)); + } + + // This is where we handle any remainder, keeping in mind that for + // nB != 0, the actual dividend is in the value we have named "divisor". + // + // And, yes, I hate comments like that, too. + + // We calculate the remainder by calculating + // dividend minus quotient * divisor + if( remainder.field ) + { + tree dest_addr = gg_add(member(remainder.field->var_decl_node, "data"), + refer_offset_dest(remainder)); + dest_type = tree_type_from_size(remainder.field->data.capacity, 0); + ptr = gg_cast(build_pointer_type(dest_type), dest_addr); + + gg_assign(gg_indirect(ptr), + gg_cast(dest_type, gg_subtract(divisor, + gg_multiply(quotient, dividend)))); + } + } + retval = true; + } + + //gg_insert_into_assembler("# DUBNER addition END "); + } + return retval; + } + +void +parser_add( size_t nC, cbl_num_result_t *C, + size_t nA, cbl_refer_t *A, + cbl_arith_format_t format, + cbl_label_t *error, + cbl_label_t *not_error, + void *compute_error_p ) // Cast this to a tree / int * + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + fprintf(stderr, " A[%ld]:", nA); + for(size_t i=0; i<nA; i++) + { + if(i > 0) + { + fprintf(stderr, ","); + } + fprintf(stderr, "%s", A[i].field->name); + } + + fprintf(stderr, "%s", format==giving_e? " GIVING" : ""); + + fprintf(stderr, " C[%ld]:", nC); + for(size_t i=0; i<nC; i++) + { + if(i > 0) + { + fprintf(stderr, ","); + } + fprintf(stderr, "%s", C[i].refer.field->name); + } + + SHOW_PARSE_END + } + + TRACE1 + { + TRACE1_HEADER + TRACE1_END + } + + tree compute_error = (tree)compute_error_p; + if( compute_error == NULL ) + { + gg_assign(var_decl_default_compute_error, integer_zero_node); + compute_error = gg_get_address_of(var_decl_default_compute_error); + } + bool handled = false; + + if( fast_add( nC, C, + nA, A, + format) ) + { + handled = true; + } + else + { + bool computation_is_float = is_somebody_float(nA, A) + || is_somebody_float(nC, C); + // We now start deciding which arithmetic routine we are going to use: + if( computation_is_float ) + { + switch( format ) + { + case no_giving_e: + { + // Float format 1 + + set_up_arithmetic_error_handler(error, + not_error); + // Do phase 1, which calculates the subtotal and puts it into a + // temporary location + arithmetic_operation( 0, NULL, + nA, A, + 0, NULL, + format, + error, + not_error, + compute_error, + "__gg__add_float_phase1"); + + // Do phase 2, which accumulates the subtotal into each target location in turn + for(size_t i=0; i<nC; i++) + { + arithmetic_operation(1, &C[i], + 0, NULL, + 0, NULL, + format, + error, + not_error, + compute_error, + "__gg__addf1_float_phase2"); + } + arithmetic_error_handler( error, + not_error, + compute_error); + + handled = true; + break; + } + + case giving_e: + { + // Float format 2 + set_up_arithmetic_error_handler(error, + not_error); + // Do phase 1, which calculates the subtotal and puts it into a + // temporary location + arithmetic_operation( 0, NULL, + nA, A, + 0, NULL, + format, + error, + not_error, + compute_error, + "__gg__add_float_phase1"); + + // Do phase 2, which puts the subtotal into each target location in turn + for(size_t i=0; i<nC; i++) + { + arithmetic_operation(1, &C[i], + 0, NULL, + 0, NULL, + format, + error, + not_error, + compute_error, + "__gg__float_phase2_assign_to_c"); + } + arithmetic_error_handler( error, + not_error, + compute_error); + + handled = true; + break; + } + + case corresponding_e: + { + // Float format 3 + gcc_assert(nA == nC); + + set_up_arithmetic_error_handler(error, + not_error); + arithmetic_operation(nC, C, + nA, A, + 0, NULL, + format, + error, + not_error, + compute_error, + "__gg__addf3"); + arithmetic_error_handler( error, + not_error, + compute_error); + + handled = true; + break; + } + + case not_expected_e: + gcc_unreachable(); + break; + } + } + else + { + switch( format ) + { + case no_giving_e: + { + // Fixed format 1 + + set_up_arithmetic_error_handler(error, + not_error); + // Do phase 1, which calculates the subtotal and puts it into a + // temporary location + arithmetic_operation( 0, NULL, + nA, A, + 0, NULL, + format, + error, + not_error, + compute_error, + "__gg__add_fixed_phase1"); + + // Do phase 2, which accumulates the subtotal into each target location in turn + for(size_t i=0; i<nC; i++) + { + arithmetic_operation(1, &C[i], + 0, NULL, + 0, NULL, + format, + error, + not_error, + compute_error, + "__gg__addf1_fixed_phase2"); + } + arithmetic_error_handler( error, + not_error, + compute_error); + + handled = true; + break; + } + + case giving_e: + { + // Fixed format 2 + + set_up_arithmetic_error_handler(error, + not_error); + // Do phase 1, which calculates the subtotal and puts it into a + // temporary location + arithmetic_operation( 0, NULL, + nA, A, + 0, NULL, + format, + error, + not_error, + compute_error, + "__gg__add_fixed_phase1"); + + // Do phase 2, which puts the subtotal into each target location in turn + for(size_t i=0; i<nC; i++) + { + arithmetic_operation(1, &C[i], + 0, NULL, + 0, NULL, + format, + error, + not_error, + compute_error, + "__gg__fixed_phase2_assign_to_c"); + } + arithmetic_error_handler( error, + not_error, + compute_error); + + handled = true; + break; + } + + case corresponding_e: + { + // Fixed format 3 + gcc_assert(nA == nC); + + set_up_arithmetic_error_handler(error, + not_error); + arithmetic_operation(nC, C, + nA, A, + 0, NULL, + format, + error, + not_error, + compute_error, + "__gg__addf3"); + arithmetic_error_handler( error, + not_error, + compute_error); + + handled = true; + break; + } + + case not_expected_e: + gcc_unreachable(); + break; + } + } + } + + assert( handled ); + } + +void +parser_add( cbl_refer_t cref, + cbl_refer_t aref, + cbl_refer_t bref, + cbl_round_t rounded) + { + // This is the simple and innocent C = A + B + cbl_num_result_t C[1]; + C[0].rounded = rounded; + C[0].refer = cref; + + cbl_refer_t A[2]; + A[0] = aref; + A[1] = bref; + + parser_add( 1, C, + 2, A, + giving_e, + NULL, + NULL ); + } + +void +parser_multiply(size_t nC, cbl_num_result_t *C, + size_t nA, cbl_refer_t *A, + size_t nB, cbl_refer_t *B, + cbl_label_t *error, + cbl_label_t *not_error, + void *compute_error_p ) // This is a pointer to an int + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + + if( fast_multiply(nC, C, + nA, A, + nB, B) ) + { + + } + else + { + tree compute_error = (tree)compute_error_p; + + if( compute_error == NULL ) + { + gg_assign(var_decl_default_compute_error, integer_zero_node); + compute_error = gg_get_address_of(var_decl_default_compute_error); + } + + if( nB == 0 ) + { + // This is a FORMAT 1 multiply + + set_up_arithmetic_error_handler(error, + not_error); + // Phase 1 just converts identifier 1 to its intermediate form + arithmetic_operation( 0, NULL, + nA, A, + 0, NULL, + not_expected_e, + error, + not_error, + compute_error, + "__gg__multiplyf1_phase1"); + + // Phase2 multiplies the intermediate by each destination in turn + for(size_t i=0; i<nC; i++) + { + arithmetic_operation( 1, &C[i], + 0, NULL, + 0, NULL, + not_expected_e, + error, + not_error, + compute_error, + "__gg__multiplyf1_phase2"); + } + arithmetic_error_handler( error, + not_error, + compute_error); + + } + else + { + // This is a FORMAT 2 multiply + set_up_arithmetic_error_handler(error, + not_error); + arithmetic_operation( nC, C, + nA, A, + nB, B, + not_expected_e, + error, + not_error, + compute_error, + "__gg__multiplyf2"); + arithmetic_error_handler( error, + not_error, + compute_error); + } + } + TRACE1 + { + TRACE1_HEADER + TRACE1_FIELD("result operand C[0]: ", C[0].refer.field, ""); + TRACE1_END + } + } + +void +parser_divide( size_t nC, cbl_num_result_t *C, // C = A / B + size_t nA, cbl_refer_t *A, + size_t nB, cbl_refer_t *B, + cbl_refer_t remainder, + cbl_label_t *error, + cbl_label_t *not_error, + void *compute_error_p ) // This is a pointer to an int + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + + if( fast_divide(nC, C, + nA, A, + nB, B, + remainder) ) + { + + } + else + { + tree compute_error = (tree)compute_error_p; + + if( compute_error == NULL ) + { + gg_assign(var_decl_default_compute_error, integer_zero_node); + compute_error = gg_get_address_of(var_decl_default_compute_error); + } + + if( nB == 0 && !remainder.field ) + { + // This is a format 1 division + set_up_arithmetic_error_handler(error, + not_error); + arithmetic_operation(0, NULL, + nA, A, + 0, NULL, + not_expected_e, + NULL, + NULL, + compute_error, + "__gg__multiplyf1_phase1"); + + for(size_t i=0; i<nC; i++) + { + arithmetic_operation(1, &C[i], + 0, NULL, + 0, NULL, + not_expected_e, + error, + not_error, + compute_error, + "__gg__dividef1_phase2"); + } + arithmetic_error_handler( error, + not_error, + compute_error); + } + + if( nB && !remainder.field ) + { + // This is a format 2/3 division + set_up_arithmetic_error_handler(error, + not_error); + arithmetic_operation(nC, C, + 1, A, + 1, B, + not_expected_e, + error, + not_error, + compute_error, + "__gg__dividef23"); + + arithmetic_error_handler( error, + not_error, + compute_error); + } + + if( remainder.field ) + { + // This is a format 4/5 division + set_up_arithmetic_error_handler(error, + not_error); + arithmetic_operation(1, C, + 1, A, + 1, B, + not_expected_e, + error, + not_error, + compute_error, + "__gg__dividef45", + &remainder); + + arithmetic_error_handler( error, + not_error, + compute_error); + } + } + + TRACE1 + { + TRACE1_HEADER + TRACE1_END + } + } + +void +parser_multiply(cbl_refer_t cref, + cbl_refer_t aref, + cbl_refer_t bref, + cbl_round_t rounded ) + { + cbl_num_result_t C[1]; + C[0].rounded = rounded; + C[0].refer = cref; + + cbl_refer_t A[1]; + A[0] = aref; + + cbl_refer_t B[1]; + B[0] = bref; + + parser_multiply(1, C, + 1, B, + 1, A, + NULL, + NULL ); + } + +void +parser_divide( cbl_refer_t cref, + cbl_refer_t aref, + cbl_refer_t bref, + cbl_round_t rounded, + cbl_refer_t remainder_ref ) + { + cbl_num_result_t C[1]; + C[0].rounded = rounded; + C[0].refer = cref; + + cbl_refer_t A[1]; + A[0] = aref; + + cbl_refer_t B[1]; + B[0] = bref; + + parser_divide( 1, C, + 1, A, + 1, B, + remainder_ref, + NULL, + NULL ); + } + +void +parser_op( struct cbl_refer_t cref, + struct cbl_refer_t aref, + int op, + struct cbl_refer_t bref, + struct cbl_label_t *compute_error_label) + { + Analyze(); + set_up_compute_error_label(compute_error_label); + + gg_assign(var_decl_default_compute_error, integer_zero_node); + tree compute_error = compute_error_label + ? gg_get_address_of( compute_error_label-> + structs.compute_error-> + compute_error_code) + : gg_get_address_of(var_decl_default_compute_error) ; + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_REF(" ", cref) + SHOW_PARSE_TEXT(" = ") + SHOW_PARSE_REF("", aref) + char ach[4] = " "; + ach[1] = op; + SHOW_PARSE_TEXT(ach); + SHOW_PARSE_REF("", bref) + SHOW_PARSE_END + } + + // We have to do the trace in before/after mode; parser_op(a, a, op, a) + // is a legitimate call. + TRACE1 + { + TRACE1_HEADER + char ach[4] = " "; + ach[1] = op; + TRACE1_TEXT_ABC("operation is \"", ach, "\"") + TRACE1_INDENT + TRACE1_REFER("operand A: ", aref, "") + TRACE1_INDENT + TRACE1_REFER("operand B: ", bref, "") + TRACE1_INDENT + TRACE1_TEXT_ABC("result will be ", cref.field->name, "") + TRACE1_END + } + + struct cbl_num_result_t for_call = {}; + for_call.rounded = truncation_e; + for_call.refer = cref; + + switch(op) + { + case '+': + { + cbl_refer_t A[2]; + A[0] = aref; + A[1] = bref; + parser_add( 1, &for_call, + 2, A, + giving_e, + NULL, + NULL, + compute_error ); + break; + } + + case '-': + { + cbl_refer_t A[1]; + cbl_refer_t B[1]; + A[0] = bref; + B[0] = aref; + // Yes, the A-ness and B-ness are not really consistent + parser_subtract(1, &for_call, + 1, A, + 1, B, + giving_e, + NULL, + NULL, + compute_error ); + break; + } + + case '*': + { + cbl_refer_t A[1]; + cbl_refer_t B[1]; + A[0] = bref; + B[0] = aref; + parser_multiply(1, &for_call, + 1, A, + 1, B, + NULL, + NULL, + compute_error ); + break; + } + + case '/': + { + cbl_refer_t A[1]; + cbl_refer_t B[1]; + A[0] = aref; + B[0] = bref; + parser_divide(1, &for_call, + 1, A, + 1, B, + NULL, + NULL, + NULL, + compute_error ); + break; + } + + case '^': + { + arithmetic_operation( 1, &for_call, + 1, &aref, + 1, &bref, + no_giving_e, + NULL, + NULL, + compute_error, + "__gg__pow", + NULL); + break; + } + default: + cbl_internal_error( "parser_op() doesn't know how to " + "evaluate \"%s = %s %c %s\"\n", + cref.field->name, + aref.field->name, + op, + bref.field->name); + break; + } + } + +void +parser_subtract(size_t nC, cbl_num_result_t *C, // C = B - A + size_t nA, cbl_refer_t *A, + size_t nB, cbl_refer_t *B, + cbl_arith_format_t format, + cbl_label_t *error, + cbl_label_t *not_error, + void *compute_error_p ) // Cast this to a tree / int * + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + fprintf(stderr, " A[%ld]:", nA); + for(size_t i=0; i<nA; i++) + { + if(i > 0) + { + fprintf(stderr, ","); + } + fprintf(stderr, "%s", A[i].field->name); + } + + fprintf(stderr, " B[%ld]:", nB); + for(size_t i=0; i<nB; i++) + { + if(i > 0) + { + fprintf(stderr, ","); + } + fprintf(stderr, "%s", B[i].field->name); + } + + fprintf(stderr, " C[%ld]:", nC); + for(size_t i=0; i<nC; i++) + { + if(i > 0) + { + fprintf(stderr, ","); + } + fprintf(stderr, "%s", C[i].refer.field->name); + } + + SHOW_PARSE_END + } + + // We are going to look for configurations that allow us to do binary + // arithmetic and quickly assign the results: + + // no_giving_e is format 1; giving_e is format 2. + + bool handled = false; + + tree compute_error = (tree)compute_error_p; + if( compute_error == NULL ) + { + gg_assign(var_decl_default_compute_error, integer_zero_node); + compute_error = gg_get_address_of(var_decl_default_compute_error); + } + + if( fast_subtract(nC, C, + nA, A, + nB, B, + format) ) + { + handled = true; + } + else + { + bool computation_is_float = is_somebody_float(nA, A) + || is_somebody_float(nC, C); + + // We now start deciding which arithmetic routine we are going to use: + + if( computation_is_float ) + { + switch( format ) + { + case no_giving_e: + { + // Float format 1 + + set_up_arithmetic_error_handler(error, + not_error); + // Do phase 1, which calculates the subtotal and puts it into a + // temporary location + arithmetic_operation( 0, NULL, + nA, A, + 0, NULL, + format, + error, + not_error, + compute_error, + "__gg__add_float_phase1"); + + // Do phase 2, which subtracts the subtotal from each target in turn + for(size_t i=0; i<nC; i++) + { + arithmetic_operation(1, &C[i], + 0, NULL, + 0, NULL, + format, + error, + not_error, + compute_error, + "__gg__subtractf1_float_phase2"); + } + arithmetic_error_handler( error, + not_error, + compute_error); + + handled = true; + + break; + } + + case giving_e: + { + // Float SUBTRACT Format 2 + + gcc_assert(nB == 1); + set_up_arithmetic_error_handler(error, + not_error); + // Do phase 1, which calculates the subtotal and puts it into a + // temporary location + arithmetic_operation( 0, NULL, + nA, A, + nB, B, + format, + error, + not_error, + compute_error, + "__gg__subtractf2_float_phase1"); + + // Do phase 2, which puts the subtotal into each target location in turn + for(size_t i=0; i<nC; i++) + { + arithmetic_operation(1, &C[i], + 0, NULL, + 0, NULL, + format, + error, + not_error, + compute_error, + "__gg__fixed_phase2_assign_to_c"); + } + arithmetic_error_handler( error, + not_error, + compute_error); + + handled = true; + break; + } + + case corresponding_e: + { + // Float format 3 + gcc_assert(nA == nC); + + set_up_arithmetic_error_handler(error, + not_error); + arithmetic_operation(nC, C, + nA, A, + 0, NULL, + format, + error, + not_error, + compute_error, + "__gg__subtractf3"); + arithmetic_error_handler( error, + not_error, + compute_error); + + handled = true; + + break; + } + + case not_expected_e: + gcc_unreachable(); + break; + } + } + else + { + switch( format ) + { + case no_giving_e: + { + // Fixed format 1 + + set_up_arithmetic_error_handler(error, + not_error); + // Do phase 1, which calculates the subtotal and puts it into a + // temporary location + arithmetic_operation( 0, NULL, + nA, A, + 0, NULL, + format, + error, + not_error, + compute_error, + "__gg__add_fixed_phase1"); + + // Do phase 2, which subtracts the subtotal from each target in turn + for(size_t i=0; i<nC; i++) + { + arithmetic_operation(1, &C[i], + 0, NULL, + 0, NULL, + format, + error, + not_error, + compute_error, + "__gg__subtractf1_fixed_phase2"); + } + arithmetic_error_handler( error, + not_error, + compute_error); + + handled = true; + + break; + } + + case giving_e: + { + // Fixed SUBTRACT Format 2 + + gcc_assert(nB == 1); + set_up_arithmetic_error_handler(error, + not_error); + // Do phase 1, which calculates the subtotal and puts it into a + // temporary location + arithmetic_operation( 0, NULL, + nA, A, + nB, B, + format, + error, + not_error, + compute_error, + "__gg__subtractf2_fixed_phase1"); + + // Do phase 2, which puts the subtotal into each target location in turn + for(size_t i=0; i<nC; i++) + { + arithmetic_operation( 1, &C[i], + 0, NULL, + 0, NULL, + format, + error, + not_error, + compute_error, + "__gg__fixed_phase2_assign_to_c"); + } + arithmetic_error_handler( error, + not_error, + compute_error); + + handled = true; + break; + } + + case corresponding_e: + { + // Fixed format 3 + gcc_assert(nA == nC); + + set_up_arithmetic_error_handler(error, + not_error); + arithmetic_operation(nC, C, + nA, A, + 0, NULL, + format, + error, + not_error, + compute_error, + "__gg__subtractf3"); + arithmetic_error_handler( error, + not_error, + compute_error); + + handled = true; + break; + } + + case not_expected_e: + gcc_unreachable(); + break; + } + } + } + + if( !handled ) + { + abort(); + } + TRACE1 + { + TRACE1_HEADER + TRACE1_END + } + } + +void +parser_subtract(cbl_refer_t cref, // cref = aref - bref + cbl_refer_t aref, + cbl_refer_t bref, + cbl_round_t rounded ) + { + cbl_num_result_t C[1]; + C[0].rounded = rounded; + C[0].refer = cref; + + cbl_refer_t A[1]; + A[0] = aref; + + cbl_refer_t B[1]; + B[0] = bref; + + parser_subtract(1, C, // Beware: C = A - B, but the order has changed + 1, B, + 1, A, + giving_e, + NULL, + NULL ); + } diff --git a/gcc/cobol/genmath.h b/gcc/cobol/genmath.h new file mode 100644 index 0000000..9fc2fc3 --- /dev/null +++ b/gcc/cobol/genmath.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef _GENMATH_H_ +#define _GENMATH_H_ + +void set_up_on_exception_label(cbl_label_t *arithmetic_label); +void set_up_compute_error_label(cbl_label_t *compute_label); + +#endif
\ No newline at end of file diff --git a/gcc/cobol/genutil.cc b/gcc/cobol/genutil.cc new file mode 100644 index 0000000..f7708e8 --- /dev/null +++ b/gcc/cobol/genutil.cc @@ -0,0 +1,2642 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include "cobol-system.h" +#include "coretypes.h" +#include "tree.h" +#define HOWEVER_GCC_DEFINES_TREE 1 +#include "ec.h" +#include "common-defs.h" +#include "util.h" +#include "cbldiag.h" +#include "symbols.h" +#include "gengen.h" +#include "inspect.h" +#include "io.h" +#include "genapi.h" +#include "genutil.h" +#include "structs.h" +#include "gcobolio.h" +#include "libgcobol.h" +#include "charmaps.h" +#include "show_parse.h" +#include "exceptl.h" +#include "exceptg.h" + +bool internal_codeset_is_ebcdic() { return gcobol_feature_internal_ebcdic(); } + +bool exception_location_active = true; +bool skip_exception_processing = true; + +bool suppress_dest_depends = false; + +#define SET_EXCEPTION_CODE(a) do{set_exception_code((a));}while(0); + +std::vector<std::string>current_filename; + +tree var_decl_exception_code; // int __gg__exception_code; +tree var_decl_exception_handled; // int __gg__exception_handled; +tree var_decl_exception_file_number; // int __gg__exception_file_number; +tree var_decl_exception_file_status; // int __gg__exception_file_status; +tree var_decl_exception_file_name; // const char *__gg__exception_file_name; +tree var_decl_exception_statement; // const char *__gg__exception_statement; +tree var_decl_exception_source_file; // const char *__gg__exception_source_file; +tree var_decl_exception_line_number; // int __gg__exception_line_number; +tree var_decl_exception_program_id; // const char *__gg__exception_program_id; +tree var_decl_exception_section; // const char *__gg__exception_section; +tree var_decl_exception_paragraph; // const char *__gg__exception_paragraph; + +tree var_decl_default_compute_error; // int __gg__default_compute_error; +tree var_decl_rdigits; // int __gg__rdigits; +tree var_decl_odo_violation; // int __gg__odo_violation; +tree var_decl_unique_prog_id; // size_t __gg__unique_prog_id; + +tree var_decl_entry_location; // This is for managing ENTRY statements +tree var_decl_exit_address; // This is for implementing pseudo_return_pop + +tree var_decl_call_parameter_signature; // char *__gg__call_parameter_signature +tree var_decl_call_parameter_count; // int __gg__call_parameter_count +tree var_decl_call_parameter_lengths; // size_t *__gg__call_parameter_count + +tree var_decl_return_code; // short __gg__data_return_code + +tree var_decl_arithmetic_rounds_size; // size_t __gg__arithmetic_rounds_size; +tree var_decl_arithmetic_rounds; // int* __gg__arithmetic_rounds; +tree var_decl_fourplet_flags_size; // size_t __gg__fourplet_flags_size; +tree var_decl_fourplet_flags; // int* __gg__fourplet_flags; + +tree var_decl_treeplet_1f; // cblc_field_pp_type_node , "__gg__treeplet_1f" +tree var_decl_treeplet_1o; // SIZE_T_P , "__gg__treeplet_1o" +tree var_decl_treeplet_1s; // SIZE_T_P , "__gg__treeplet_1s" +tree var_decl_treeplet_2f; // cblc_field_pp_type_node , "__gg__treeplet_2f" +tree var_decl_treeplet_2o; // SIZE_T_P , "__gg__treeplet_2o" +tree var_decl_treeplet_2s; // SIZE_T_P , "__gg__treeplet_2s" +tree var_decl_treeplet_3f; // cblc_field_pp_type_node , "__gg__treeplet_3f" +tree var_decl_treeplet_3o; // SIZE_T_P , "__gg__treeplet_3o" +tree var_decl_treeplet_3s; // SIZE_T_P , "__gg__treeplet_3s" +tree var_decl_treeplet_4f; // cblc_field_pp_type_node , "__gg__treeplet_4f" +tree var_decl_treeplet_4o; // SIZE_T_P , "__gg__treeplet_4o" +tree var_decl_treeplet_4s; // SIZE_T_P , "__gg__treeplet_4s" + +// There are times when I need to insert a NOP into the code, mainly to force +// a .loc directive into the assembly language so that the GDB-COBOL debugger +// can show the COBOL source code. This is true, for example, the CONTINUE +// statement which otherwise would produce no assembly language. Since I +// wasn't successful figuring out how to create an actual NOP assembly language +// instruction, I instead gg_assign(var_decl_nop, integer_zero_node) +tree var_decl_nop; // int __gg__nop; +tree var_decl_main_called; // int __gg__main_called; + +int +get_scaled_rdigits(cbl_field_t *field) + { + int retval; + if( !(field->attr & scaled_e) ) + { + // The value is not P-scaled, so we just use the unchanged rdigits value + retval = field->data.rdigits; + } + else + { + if( field->data.rdigits < 0 ) + { + // The PIC string was something like 999PPPP, which means an rdigits value + // of -4. We return zero; somebody else will have the job of multiplying + // the three significant digits by 10^4 to get the magnitude correct. + retval = 0; + } + else + { + // The PIC string was something like PPPP999, which means an rdigits value + // of +4. We return an rdigits value of 4 + 3 = 7, which will mean that + // the three significant digits will be scaled to 0.0000999 + retval = field->data.digits + field->data.rdigits; + } + } + return retval; + } + +int +get_scaled_digits(cbl_field_t *field) + { + int retval; + if( !(field->attr & scaled_e) ) + { + // The value is not P-scaled, so we just use the unchanged rdigits value + retval = field->data.digits; + } + else + { + if( field->data.rdigits < 0 ) + { + // The PIC string was something like 999PPPP, which means an rdigits value + // of -4. digits is 3, reflecting the 9(3). We return seven, reflecting + // that all of the final digits are to the left of the decimal point + retval = field->data.digits - field->data.rdigits; + } + else + { + // The PIC string was something like PPPP999, which means an rdigits value + // of +4. We return and rdigits value of 4 + 3 = 7, which will mean that + // the three significant digits will be scaled to 0.0000999 and all of the + // seven digits are to the left of the decimal point + retval = field->data.digits + field->data.rdigits; + } + } + return retval; + } + +tree +tree_type_from_digits(size_t digits, int signable) + { + tree retval = NULL_TREE; + + if( signable ) + { + if(digits <= 2 ) + { + retval = CHAR; + } + else if (digits <= 4 ) + { + retval = SHORT; + } + else if (digits <= 9 ) + { + retval = INT; + } + else if (digits <= 18 ) + { + retval = LONGLONG; + } + else + { + retval = INT128; + } + } + else + { + if(digits <= 2 ) + { + retval = UCHAR; + } + else if (digits <= 4 ) + { + retval = USHORT; + } + else if (digits <= 9 ) + { + retval = UINT; + } + else if (digits <= 18 ) + { + retval = ULONGLONG; + } + else + { + retval = UINT128; + } + } + return retval; + } + +void +get_integer_value(tree value, + cbl_field_t *field, + tree offset, + bool check_for_fractional_digits) + { + Analyze(); + // Call this routine when you know the result has to be an integer with no + // rdigits. This routine became necessary the first time I saw an + // intermediate value for an array subscript: table((3 + 1) / 2)) + // + // If the field_i has rdigits, and if any of those rdigits are non-zero, we + // return a 1 so that our caller can decide what to do. + + static tree temp = gg_define_variable(INT128, "..giv_temp", vs_file_static); + static tree rdigits = gg_define_variable(INT, "..giv_rdigits", vs_file_static); + + if( field->attr & intermediate_e ) + { + // Get the binary value, which for 99V99 can be 1234, meaning 12.34 + get_binary_value(temp, NULL, field, offset); + + // Pick up the run-time number of rdigits: + gg_assign(rdigits, gg_cast(INT, member(field, "rdigits"))); + + // Scale by the number of rdigits, which turns 12.34 into 12. + // When check_for_fractional_digits is true, __gg__rdigits will be set + // to 1 for 12.34, and will be set to zero 12.00 + scale_by_power_of_ten(temp, + gg_negate(rdigits), + check_for_fractional_digits); + } + else + { + get_binary_value(temp, rdigits, field, offset); + scale_by_power_of_ten_N(temp, + -get_scaled_rdigits(field), + check_for_fractional_digits); + } + gg_assign(value, gg_cast(TREE_TYPE(value), temp)); + } + +static tree +get_data_offset_dest(cbl_refer_t &refer, + int *pflags = NULL) + { + Analyze(); + // This routine returns a tree which is the size_t offset to the data in the + // refer/field + + // Because this is for destination/receiving variables, OCCURS DEPENDING ON + // is not checked. + + tree retval = gg_define_variable(SIZE_T); + gg_assign(retval, size_t_zero_node); + + // We have a refer. + // At the very least, we have an constant offset + int all_flags = 0; + int all_flag_bit = 1; + + static tree value64 = gg_define_variable(LONG, ".._gdod_value64", vs_file_static); + + if( refer.nsubscript ) + { + // We have at least one subscript: + + // Figure we have three subscripts, so nsubscript is 3 + // Figure that the subscripts are {5, 4, 3} + + // We expect that starting from refer.field, that three of our ancestors -- + // call them A1, A2, and A3 -- have occurs clauses. + + // We need to start with the rightmost subscript, and work our way up through + // our parents. As we find each parent with an OCCURS, we increment qual_data + // by (subscript-1)*An->data.capacity + + // Establish the field_t pointer for walking up through our ancestors: + cbl_field_t *parent = refer.field; + + // Note the backwards test, because refer->nsubscript is an unsigned value + for(size_t i=refer.nsubscript-1; i<refer.nsubscript; i-- ) + { + // We need to search upward for an ancestor with occurs_max: + while(parent) + { + if( parent->occurs.ntimes() ) + { + break; + } + parent = parent_of(parent); + } + // we might have an error condition at this point: + if( !parent ) + { + cbl_internal_error("Too many subscripts"); + } + // Pick up the integer value of the subscript: + static tree subscript = gg_define_variable(LONG, "..gdod_subscript", vs_file_static); + + if( process_this_exception(ec_bound_subscript_e) ) + { + get_integer_value(value64, + refer.subscripts[i].field, + refer_offset_dest(refer.subscripts[i]), + CHECK_FOR_FRACTIONAL_DIGITS); + IF( var_decl_rdigits, + ne_op, + integer_zero_node ) + { + if( enabled_exceptions.match(ec_bound_subscript_e) ) + { + // The subscript isn't an integer + SET_EXCEPTION_CODE(ec_bound_subscript_e); + gg_assign(subscript, gg_cast(TREE_TYPE(subscript), integer_zero_node)); + } + else + { + rt_error("error: a table subscript is not an integer"); + } + } + ELSE + { + gg_assign(subscript, gg_cast(TREE_TYPE(subscript), value64)); + } + ENDIF + } + else + { + get_integer_value(subscript, + refer.subscripts[i].field, + refer_offset_dest(refer.subscripts[i])); + } + + // gg_printf("%s(): We have a subscript of %d from %s\n", + // gg_string_literal(__func__), + // subscript, + // gg_string_literal(refer.subscripts[i].field->name), + // NULL_TREE); + + if( (refer.subscripts[i].field->attr & FIGCONST_MASK) == zero_value_e ) + { + // This refer is a figconst ZERO; we treat it as an ALL ZERO + // This is our internal representation for ALL, as in TABLE(ALL) + + // Set the subscript to 1 + gg_assign(subscript, + build_int_cst_type( TREE_TYPE(subscript), 1)); + // Flag this position as ALL + all_flags |= all_flag_bit; + } + all_flag_bit <<= 1; + + // Subscript is now a one-based integer + // Make it zero-based: + + gg_decrement(subscript); + if( process_this_exception(ec_bound_subscript_e) ) + { + // gg_printf("process_this_exception is true\n", NULL_TREE); + IF( subscript, lt_op, gg_cast(TREE_TYPE(subscript), integer_zero_node) ) + { + // The subscript is too small + SET_EXCEPTION_CODE(ec_bound_subscript_e); + gg_assign(subscript, gg_cast(TREE_TYPE(subscript), integer_zero_node)); + } + ELSE + { + // gg_printf("parent->occurs.ntimes() is %d\n", build_int_cst_type(INT, parent->occurs.ntimes()), NULL_TREE); + IF( subscript, + ge_op, + build_int_cst_type(TREE_TYPE(subscript), parent->occurs.ntimes()) ) + { + // The subscript is too large + if( enabled_exceptions.match(ec_bound_subscript_e) ) + { + SET_EXCEPTION_CODE(ec_bound_subscript_e); + gg_assign(subscript, gg_cast(TREE_TYPE(subscript), integer_zero_node)); + } + else + { + rt_error("error: table subscript is too large"); + } + } + ELSE + { + // We have a good subscript: + // Check for an ODO violation: + if( parent->occurs.depending_on ) + { + cbl_field_t *depending_on = cbl_field_of(symbol_at(parent->occurs.depending_on)); + get_integer_value(value64, depending_on); + IF( subscript, ge_op, value64 ) + { + gg_assign(var_decl_odo_violation, integer_one_node); + } + ELSE + ENDIF + } + + tree augment = gg_multiply(subscript, build_int_cst_type(INT, parent->data.capacity)); + gg_assign(retval, gg_add(retval, gg_cast(SIZE_T, augment))); + } + ENDIF + } + ENDIF + } + else + { + // Assume a good subscript: + // Check for an ODO violation: + if( parent->occurs.depending_on ) + { + cbl_field_t *depending_on = cbl_field_of(symbol_at(parent->occurs.depending_on)); + get_integer_value(value64, depending_on); + IF( subscript, ge_op, value64 ) + { + gg_assign(var_decl_odo_violation, integer_one_node); + } + ELSE + ENDIF + } + tree augment = gg_multiply(subscript, build_int_cst_type(INT, parent->data.capacity)); + gg_assign(retval, gg_add(retval, gg_cast(SIZE_T, augment))); + } + parent = parent_of(parent); + } + } + + if( refer.refmod.from ) + { + // We have a refmod to deal with + static tree refstart = gg_define_variable(LONG, "..gdos_refstart", vs_file_static); + + if( process_this_exception(ec_bound_ref_mod_e) ) + { + get_integer_value(value64, + refer.refmod.from->field, + refer_offset_source(*refer.refmod.from), + CHECK_FOR_FRACTIONAL_DIGITS); + IF( var_decl_rdigits, + ne_op, + integer_zero_node ) + { + // refmod offset is not an integer, and has to be + if( enabled_exceptions.match(ec_bound_ref_mod_e) ) + { + SET_EXCEPTION_CODE(ec_bound_ref_mod_e); + gg_assign(refstart, gg_cast(LONG, integer_one_node)); + } + else + { + rt_error("error: a refmod FROM is not an integer"); + } + } + ELSE + gg_assign(refstart, value64); + ENDIF + } + else + { + get_integer_value(value64, + refer.refmod.from->field, + refer_offset_source(*refer.refmod.from) + ); + gg_assign(refstart, value64); + } + + // Make refstart zero-based: + gg_decrement(refstart); + + if( process_this_exception(ec_bound_ref_mod_e) ) + { + IF( refstart, lt_op, gg_cast(LONG, integer_zero_node) ) + { + if( enabled_exceptions.match(ec_bound_ref_mod_e) ) + { + SET_EXCEPTION_CODE(ec_bound_ref_mod_e); + gg_assign(refstart, gg_cast(LONG, integer_zero_node)); + } + else + { + rt_error("error: refmod FROM is less than one"); + } + } + ELSE + { + IF( refstart, gt_op, build_int_cst_type(LONG, refer.field->data.capacity) ) + { + if( enabled_exceptions.match(ec_bound_ref_mod_e) ) + { + SET_EXCEPTION_CODE(ec_bound_ref_mod_e); + gg_assign(refstart, gg_cast(LONG, integer_zero_node)); + } + else + { + rt_error("error: refmod FROM is too large"); + } + } + ELSE + ENDIF + } + ENDIF + } + + // We have a good refstart + gg_assign(retval, gg_add(retval, gg_cast(SIZE_T, refstart))); + } + + if( pflags ) + { + *pflags = all_flags; + } + +// gg_printf("*****>>>>> %s(): returning %p\n", +// gg_string_literal(__func__), +// retval, +// NULL_TREE); + return retval; + } + +static tree +get_data_offset_source(cbl_refer_t &refer, + int *pflags = NULL) + { + Analyze(); + // This routine returns a tree which is the size_t offset to the data in the + // refer/field + + // Because this is for source / sending variables, checks are made for + // OCCURS DEPENDING ON violations (when those exceptions are enabled) + + tree retval = gg_define_variable(SIZE_T); + gg_assign(retval, size_t_zero_node); + + // We have a refer. + // At the very least, we have an constant offset + int all_flags = 0; + int all_flag_bit = 1; + + static tree value64 = gg_define_variable(LONG, ".._gdos_value64", vs_file_static); + + if( refer.nsubscript ) + { + // We have at least one subscript: + + // Figure we have three subscripts, so nsubscript is 3 + // Figure that the subscripts are {5, 4, 3} + + // We expect that starting from refer.field, that three of our ancestors -- + // call them A1, A2, and A3 -- have occurs clauses. + + // We need to start with the rightmost subscript, and work our way up through + // our parents. As we find each parent with an OCCURS, we increment qual_data + // by (subscript-1)*An->data.capacity + + // Establish the field_t pointer for walking up through our ancestors: + cbl_field_t *parent = refer.field; + + // Note the backwards test, because refer->nsubscript is an unsigned value + for(size_t i=refer.nsubscript-1; i<refer.nsubscript; i-- ) + { + // We need to search upward for an ancestor with occurs_max: + while(parent) + { + if( parent->occurs.ntimes() ) + { + break; + } + parent = parent_of(parent); + } + // we might have an error condition at this point: + if( !parent ) + { + cbl_internal_error("Too many subscripts"); + } + // Pick up the integer value of the subscript: +// static tree subscript = gg_define_variable(LONG, "..gdos_subscript", vs_file_static); + tree subscript = gg_define_variable(LONG); + + if( process_this_exception(ec_bound_subscript_e) ) + { + get_integer_value(value64, + refer.subscripts[i].field, + refer_offset_source(refer.subscripts[i]), + CHECK_FOR_FRACTIONAL_DIGITS); + IF( var_decl_rdigits, + ne_op, + integer_zero_node ) + { + if( enabled_exceptions.match(ec_bound_subscript_e) ) + { + // The subscript isn't an integer + SET_EXCEPTION_CODE(ec_bound_subscript_e); + gg_assign(subscript, gg_cast(TREE_TYPE(subscript), integer_zero_node)); + } + else + { + rt_error("error: a table subscript is not an integer"); + } + } + ELSE + { + gg_assign(subscript, gg_cast(TREE_TYPE(subscript), value64)); + } + ENDIF + } + else + { + get_integer_value(subscript, + refer.subscripts[i].field, + refer_offset_source(refer.subscripts[i])); + } + + // gg_printf("%s(): We have a subscript of %d from %s\n", + // gg_string_literal(__func__), + // subscript, + // gg_string_literal(refer.subscripts[i].field->name), + // NULL_TREE); + + if( (refer.subscripts[i].field->attr & FIGCONST_MASK) == zero_value_e ) + { + // This refer is a figconst ZERO; we treat it as an ALL ZERO + // This is our internal representation for ALL, as in TABLE(ALL) + + // Set the subscript to 1 + gg_assign(subscript, + build_int_cst_type( TREE_TYPE(subscript), 1)); + // Flag this position as ALL + all_flags |= all_flag_bit; + } + all_flag_bit <<= 1; + + // Subscript is now a one-based integer + // Make it zero-based: + + gg_decrement(subscript); + if( process_this_exception(ec_bound_subscript_e) ) + { + // gg_printf("process_this_exception is true\n", NULL_TREE); + IF( subscript, lt_op, gg_cast(TREE_TYPE(subscript), integer_zero_node) ) + { + // The subscript is too small + SET_EXCEPTION_CODE(ec_bound_subscript_e); + gg_assign(subscript, gg_cast(TREE_TYPE(subscript), integer_zero_node)); + } + ELSE + { + // gg_printf("parent->occurs.ntimes() is %d\n", build_int_cst_type(INT, parent->occurs.ntimes()), NULL_TREE); + IF( subscript, + ge_op, + build_int_cst_type(TREE_TYPE(subscript), parent->occurs.ntimes()) ) + { + // The subscript is too large + if( enabled_exceptions.match(ec_bound_subscript_e) ) + { + SET_EXCEPTION_CODE(ec_bound_subscript_e); + gg_assign(subscript, gg_cast(TREE_TYPE(subscript), integer_zero_node)); + } + else + { + rt_error("error: table subscript is too large"); + } + } + ELSE + { + // We have a good subscript: + // Check for an ODO violation: + if( parent->occurs.depending_on ) + { + cbl_field_t *depending_on = cbl_field_of(symbol_at(parent->occurs.depending_on)); + get_integer_value(value64, depending_on); + IF( subscript, ge_op, value64 ) + { + gg_assign(var_decl_odo_violation, integer_one_node); + } + ELSE + ENDIF + } + + tree augment = gg_multiply(subscript, build_int_cst_type(INT, parent->data.capacity)); + gg_assign(retval, gg_add(retval, gg_cast(SIZE_T, augment))); + } + ENDIF + } + ENDIF + } + else + { + // Assume a good subscript: + // Check for an ODO violation: + if( parent->occurs.depending_on ) + { + cbl_field_t *depending_on = cbl_field_of(symbol_at(parent->occurs.depending_on)); + get_integer_value(value64, depending_on); + IF( subscript, ge_op, value64 ) + { + gg_assign(var_decl_odo_violation, integer_one_node); + } + ELSE + ENDIF + } + tree augment = gg_multiply(subscript, build_int_cst_type(INT, parent->data.capacity)); + gg_assign(retval, gg_add(retval, gg_cast(SIZE_T, augment))); + } + parent = parent_of(parent); + } + } + + if( refer.refmod.from ) + { + // We have a refmod to deal with + static tree refstart = gg_define_variable(LONG, "..gdo_refstart", vs_file_static); + + if( process_this_exception(ec_bound_ref_mod_e) ) + { + get_integer_value(value64, + refer.refmod.from->field, + refer_offset_source(*refer.refmod.from), + CHECK_FOR_FRACTIONAL_DIGITS); + IF( var_decl_rdigits, + ne_op, + integer_zero_node ) + { + // refmod offset is not an integer, and has to be + if( enabled_exceptions.match(ec_bound_ref_mod_e) ) + { + SET_EXCEPTION_CODE(ec_bound_ref_mod_e); + gg_assign(refstart, gg_cast(LONG, integer_one_node)); + } + else + { + rt_error("error: a refmod FROM is not an integer"); + } + } + ELSE + gg_assign(refstart, value64); + ENDIF + } + else + { + get_integer_value(value64, + refer.refmod.from->field, + refer_offset_source(*refer.refmod.from) + ); + gg_assign(refstart, value64); + } + + // Make refstart zero-based: + gg_decrement(refstart); + + if( process_this_exception(ec_bound_ref_mod_e) ) + { + IF( refstart, lt_op, gg_cast(LONG, integer_zero_node) ) + { + if( enabled_exceptions.match(ec_bound_ref_mod_e) ) + { + SET_EXCEPTION_CODE(ec_bound_ref_mod_e); + gg_assign(refstart, gg_cast(LONG, integer_zero_node)); + } + else + { + rt_error("error: refmod FROM is less than one"); + } + } + ELSE + { + IF( refstart, gt_op, build_int_cst_type(LONG, refer.field->data.capacity) ) + { + if( enabled_exceptions.match(ec_bound_ref_mod_e) ) + { + SET_EXCEPTION_CODE(ec_bound_ref_mod_e); + gg_assign(refstart, gg_cast(LONG, integer_zero_node)); + } + else + { + rt_error("error: refmod FROM is too large"); + } + } + ELSE + ENDIF + } + ENDIF + } + + // We have a good refstart + gg_assign(retval, gg_add(retval, gg_cast(SIZE_T, refstart))); + } + + if( pflags ) + { + *pflags = all_flags; + } + + +// gg_printf("*****>>>>> %s(): returning %p\n", +// gg_string_literal(__func__), +// retval, +// NULL_TREE); + return retval; + } + +void +get_binary_value( tree value, + tree rdigits, + cbl_field_t *field, + tree field_offset, + tree hilo + ) + { + Analyze(); + if( hilo ) + { + gg_assign(hilo, integer_zero_node); + } + + bool needs_scaling = true; + static const bool debugging=false; + + // Very special case: + if( strcmp(field->name, "ZEROS") == 0 ) + { + gg_assign(value, gg_cast(TREE_TYPE(value), integer_zero_node)); + if( rdigits ) + { + gg_assign(rdigits, gg_cast(TREE_TYPE(rdigits), integer_zero_node)); + } + return; + } + + static tree pointer = gg_define_variable(UCHAR_P, "..gbv_pointer", vs_file_static); + static tree pend = gg_define_variable(UCHAR_P, "..gbv_pend", vs_file_static); + + switch(field->type) + { + case FldLiteralN: + { + if( SCALAR_FLOAT_TYPE_P(value) ) + { + gg_assign(value, gg_cast(TREE_TYPE(value), field->literal_decl_node)); + } + else + { + if( rdigits ) + { + gg_assign(rdigits, build_int_cst_type(TREE_TYPE(rdigits), + field->data.rdigits)); + } + tree dest_type = TREE_TYPE(value); + tree source_type = tree_type_from_field(field); + + gg_assign(value, + gg_cast(dest_type, + gg_indirect( gg_cast(build_pointer_type(source_type), + gg_get_address_of(field->data_decl_node))))); + } + + break; + } + + case FldNumericDisplay: + { + Analyzer.Message("FldNumericDisplay"); + // Establish the source + tree source_address = get_data_address(field, field_offset); + + // We need to check early on for HIGH-VALUE and LOW-VALUE + // Pick up the byte + tree digit = gg_get_indirect_reference(source_address, NULL_TREE); + IF( digit, eq_op, build_int_cst(UCHAR, 0xFF) ) + { + if( hilo ) + { + gg_assign(hilo, integer_one_node); + } + if( rdigits ) + { + gg_assign(rdigits, + build_int_cst_type( TREE_TYPE(rdigits), + get_scaled_rdigits(field))); + } + gg_assign(value, build_int_cst_type(TREE_TYPE(value), 0xFFFFFFFFFFFFFFFUL)); + } + ELSE + { + IF( digit, eq_op, build_int_cst(UCHAR, 0x00) ) + { + if( hilo ) + { + gg_assign(hilo, integer_minus_one_node); + } + } + ELSE + { + // Establish rdigits: + if( rdigits ) + { + gg_assign(rdigits, + build_int_cst_type( TREE_TYPE(rdigits), + get_scaled_rdigits(field))); + } + // Zero out the destination + gg_assign(value, gg_cast(TREE_TYPE(value), integer_zero_node)); + // Pick up a pointer to the source bytes: + + gg_assign(pointer, source_address); + + // This is the we-are-done pointer + gg_assign(pend, gg_add( pointer, + build_int_cst_type(SIZE_T, field->data.capacity))); + + static tree signbyte = gg_define_variable(UCHAR, "..gbv_signbyte", vs_file_static); + + // The big decision is whether or not the variable is signed: + if( field->attr & signable_e ) + { + // The variable is signed + if( field->attr & separate_e ) + { + // The sign byte is separate + if( field->attr & leading_e) + { + // The first byte is '+' or '-' + gg_increment(pointer); + } + else + { + // The final byte is '+' or '-' + gg_decrement(pend); + } + } + else + { + // The sign byte is internal + if( field->attr & leading_e) + { + // The first byte has the sign bit: + gg_assign(signbyte, + gg_get_indirect_reference(source_address, NULL_TREE)); + if( internal_codeset_is_ebcdic() ) + { + // We need to make sure the EBCDIC sign bit is ON, for positive + gg_assign(gg_get_indirect_reference(source_address, NULL_TREE), + gg_bitwise_or(signbyte, + build_int_cst_type( UCHAR, + NUMERIC_DISPLAY_SIGN_BIT))); + } + else + { + // We need to make sure the ascii sign bit is Off, for positive + gg_assign(gg_get_indirect_reference(source_address, NULL_TREE), + gg_bitwise_and( signbyte, + build_int_cst_type( UCHAR, + ~NUMERIC_DISPLAY_SIGN_BIT))); + } + } + else + { + // The final byte has the sign bit: + gg_assign(signbyte, + gg_get_indirect_reference(source_address, + build_int_cst_type(SIZE_T, + field->data.capacity-1))); + if( internal_codeset_is_ebcdic() ) + { + // We need to make sure the EBCDIC sign bit is ON, for positive + gg_assign(gg_get_indirect_reference(source_address, + build_int_cst_type( SIZE_T, + field->data.capacity-1)), + gg_bitwise_or(signbyte, + build_int_cst_type( UCHAR, + NUMERIC_DISPLAY_SIGN_BIT))); + } + else + { + // We need to make sure the ASCII sign bit is Off, for positive + gg_assign(gg_get_indirect_reference(source_address, + build_int_cst_type( SIZE_T, + field->data.capacity-1)), + gg_bitwise_and( signbyte, + build_int_cst_type( UCHAR, + ~NUMERIC_DISPLAY_SIGN_BIT))); + } + } + } + } + // We can now set up the byte-by-byte processing loop: + if( internal_codeset_is_ebcdic() ) + { + // We are working in EBCDIC + WHILE( pointer, lt_op, pend ) + { + // Pick up the byte + digit = gg_get_indirect_reference(pointer, NULL_TREE); + IF( digit, lt_op, build_int_cst_type(UCHAR, EBCDIC_ZERO) ) + { + // break on a non-digit + gg_assign(pointer, pend); + } + ELSE + { + IF( digit, gt_op, build_int_cst_type(UCHAR, EBCDIC_NINE) ) + { + // break on a non-digit + gg_assign(pointer, pend); + } + ELSE + { + // Whether ASCII or EBCDIC, the bottom four bits tell the tale: + // Multiply our accumulator by ten: + gg_assign(value, gg_multiply(value, build_int_cst_type(TREE_TYPE(value), 10))); + // And add in the current digit + gg_assign(value, + gg_add(value, gg_cast(TREE_TYPE(value), gg_bitwise_and( digit, + build_int_cst_type(UCHAR, 0x0F) )))); + gg_increment(pointer); + } + ENDIF + } + ENDIF + } + WEND + } + else + { + // We are working in ASCII: + WHILE( pointer, lt_op, pend ) + { + // Pick up the byte + digit = gg_get_indirect_reference(pointer, NULL_TREE); + // Whether ASCII or EBCDIC, the bottom four bits tell the tale: + // Multiply our accumulator by ten: + gg_assign(value, gg_multiply(value, build_int_cst_type(TREE_TYPE(value), 10))); + // And add in the current digit + gg_assign(value, gg_add(value, gg_cast(TREE_TYPE(value), gg_bitwise_and(digit, build_int_cst_type(UCHAR, 0x0F))))); + gg_increment(pointer); + } + WEND + } + + // Value contains the binary value. The last thing is to apply -- and + // undo -- the signable logic: + + if( field->attr & signable_e ) + { + // The variable is signed + if( field->attr & separate_e ) + { + // The sign byte is separate + if( field->attr & leading_e) + { + // The first byte is '+' or '-' + if( internal_codeset_is_ebcdic() ) + { + // We are operating in EBCDIC, so we look for a 96 (is minus sign) + IF( gg_get_indirect_reference(source_address, NULL_TREE), + eq_op, + build_int_cst_type(UCHAR, 96) ) + { + gg_assign(value, gg_negate(value)); + } + ELSE + ENDIF + } + else + { + // We are operating in ASCII + IF( gg_get_indirect_reference(source_address, NULL_TREE), + eq_op, + build_int_cst_type(UCHAR, '-') ) + { + gg_assign(value, gg_negate(value)); + } + ELSE + ENDIF + } + } + else + { + // The final byte is '+' or '-' + if( internal_codeset_is_ebcdic() ) + { + // We are operating in EBCDIC, so we look for a 96 (is minus sign) + IF( gg_get_indirect_reference(source_address, build_int_cst_type(SIZE_T, field->data.capacity-1)), + eq_op, + build_int_cst_type(UCHAR, 96) ) + { + gg_assign(value, gg_negate(value)); + } + ELSE + ENDIF + } + else + { + // We are operating in ASCII + IF( gg_get_indirect_reference(source_address, build_int_cst_type(SIZE_T, field->data.capacity-1)), + eq_op, + build_int_cst_type(UCHAR, '-') ) + { + gg_assign(value, gg_negate(value)); + } + ELSE + ENDIF + } + } + } + else + { + // The sign byte is internal. Check the sign bit + if(internal_codeset_is_ebcdic()) + { + IF( gg_bitwise_and( signbyte, + build_int_cst_type( UCHAR, + NUMERIC_DISPLAY_SIGN_BIT)), eq_op, build_int_cst_type(UCHAR, 0) ) + { + // The EBCDIC sign bit was OFF, so negate the result + gg_assign(value, gg_negate(value)); + } + ELSE + ENDIF + } + else + { + IF( gg_bitwise_and( signbyte, + build_int_cst_type( UCHAR, + NUMERIC_DISPLAY_SIGN_BIT)), ne_op, build_int_cst_type(UCHAR, 0) ) + { + // The ASCII sign bit was on, so negate the result + gg_assign(value, gg_negate(value)); + } + ELSE + ENDIF + } + // It's time to put back the original data: + if( field->attr & leading_e) + { + // The first byte has the sign bit: + gg_assign(gg_get_indirect_reference(source_address, NULL_TREE), + signbyte); + } + else + { + // The final byte has the sign bit: + gg_assign(gg_get_indirect_reference(source_address, + build_int_cst_type(SIZE_T, field->data.capacity-1)), + signbyte); + } + } + } + } + ENDIF + } + ENDIF + + break; + } + + case FldNumericBinary: + { + // As of this writing, the source value is big-endian + // We have to convert it to a little-endian destination. + tree dest = gg_cast(build_pointer_type(UCHAR), gg_get_address_of(value)); + tree source = get_data_address(field, field_offset); + + size_t dest_nbytes = gg_sizeof(value); + size_t source_nbytes = field->data.capacity; + + if( debugging ) + { + gg_printf("dest_bytes/source_bytes %ld/%ld\n", + build_int_cst_type(SIZE_T, dest_nbytes), + build_int_cst_type(SIZE_T, source_nbytes), + NULL_TREE); + gg_printf("Starting value: ", NULL_TREE); + hex_dump(source, source_nbytes); + gg_printf("\n", NULL_TREE); + } + + if( dest_nbytes <= source_nbytes ) + { + // Destination is too small. We will move what we can, throwing away + // the most significant source bytes: + for(size_t i=0; i<dest_nbytes; i++) + { + gg_assign(gg_array_value(dest, i), + gg_array_value(source, source_nbytes-1-i) ); + } + } + else + { + // Destination is too big. We'll need to fill the high-order bytes with + // either 0x00 for positive numbers, or 0xFF for negative + static tree extension = gg_define_variable( UCHAR, + "..gbv_extension", + vs_file_static); + if( field->attr & signable_e ) + { + IF( gg_array_value(gg_cast(build_pointer_type(SCHAR), source)), lt_op, gg_cast(SCHAR, integer_zero_node) ) + { + gg_assign(extension, build_int_cst_type(UCHAR, 0xFF)); + } + ELSE + { + gg_assign(extension, build_int_cst_type(UCHAR, 0)); + } + ENDIF + } + else + { + gg_assign(extension, build_int_cst_type(UCHAR, 0)); + } + + // Flip the source end-for-end and put it into the dest: + size_t i=0; + while(i < source_nbytes) + { + gg_assign(gg_array_value(dest, i), + gg_array_value(source, source_nbytes-1-i) ); + i += 1; + } + // Fill the extra high-end bytes with 0x00 or 0xFF extension + + while(i < dest_nbytes) + { + gg_assign(gg_array_value(dest, i), + extension); + i += 1; + } + } + if( debugging ) + { + gg_printf("Ending value: ", NULL_TREE); + hex_dump(dest, dest_nbytes); + gg_printf("\n", NULL_TREE); + } + break; + } + + case FldNumericBin5: + case FldIndex: + case FldPointer: + { + if( field->attr & intermediate_e ) + { + // It is a intermediate, so rdigits has to come from the run-time structure + if( rdigits ) + { + gg_assign(rdigits, + gg_cast( TREE_TYPE(rdigits), + member(field, "rdigits"))); + } + } + else + { + // It isn't an intermediate, so we can safely use field->rdigits + if( rdigits ) + { + gg_assign(rdigits, + build_int_cst_type( TREE_TYPE(rdigits), + get_scaled_rdigits(field))); + } + } + tree source_address = get_data_address(field, field_offset); + tree dest_type = TREE_TYPE(value); + tree source_type = tree_type_from_size( field->data.capacity, + field->attr & signable_e); + if( debugging && rdigits) + { + gg_printf("get_binary_value bin5 rdigits: %d\n", rdigits, NULL_TREE); + } + + gg_assign(value, + gg_cast(dest_type, + gg_indirect(gg_cast( build_pointer_type(source_type), + source_address )))); + break; + } + + case FldPacked: + { + // Zero out the destination: + gg_assign(value, gg_cast(TREE_TYPE(value), integer_zero_node)); + gg_assign(pointer, get_data_address(field, field_offset)); + gg_assign(pend, + gg_add(pointer, + build_int_cst_type(SIZE_T, field->data.capacity-1))); + + // Convert all but the last byte of the packed decimal sequence + WHILE( pointer, lt_op, pend ) + { + // Convert the first nybble + gg_assign(value, gg_multiply(value, build_int_cst_type(TREE_TYPE(value), 10))); + gg_assign(value, gg_add(value, gg_cast(TREE_TYPE(value), gg_rshift(gg_get_indirect_reference(pointer, NULL_TREE), build_int_cst(UINT, 4))))); + + // Convert the second nybble + gg_assign(value, gg_multiply(value, build_int_cst_type(TREE_TYPE(value), 10))); + gg_assign(value, gg_add(value, gg_cast(TREE_TYPE(value), gg_bitwise_and(gg_get_indirect_reference(pointer, NULL_TREE), build_int_cst_type(UCHAR, 0xF))))); + gg_increment(pointer); + } + WEND + + // This is the final byte: + gg_assign(value, gg_multiply(value, build_int_cst_type(TREE_TYPE(value), 10))); + gg_assign(value, gg_add(value, gg_cast(TREE_TYPE(value), gg_rshift(gg_get_indirect_reference(pointer, NULL_TREE), build_int_cst(UINT, 4))))); + + IF( gg_bitwise_and(gg_get_indirect_reference(pointer, NULL_TREE), build_int_cst_type(UCHAR, 0xF)), eq_op, build_int_cst_type(UCHAR, 0x0D) ) + { + gg_assign(value, gg_negate(value)); + } + ELSE + { + IF( gg_bitwise_and(gg_get_indirect_reference(pointer, NULL_TREE), build_int_cst_type(UCHAR, 0xF)), eq_op, build_int_cst_type(UCHAR, 0x0B) ) + { + gg_assign(value, gg_negate(value)); + } + ELSE + ENDIF + } + ENDIF + break; + } + + case FldFloat: + { + // We are going to assume that the float value contains an integer. + if( rdigits ) + { + gg_assign(rdigits, + gg_cast( TREE_TYPE(rdigits), integer_zero_node)); + } + gg_assign(value, + gg_cast(TREE_TYPE(value), + gg_call_expr( INT128, + "__gg__integer_from_float128", + gg_get_address_of(field->var_decl_node), + NULL_TREE))); + needs_scaling = false; + break; + } + + case FldAlphanumeric: + { + + } + + + default: + { + fprintf(stderr, "%s(): We know not how to" + " get a binary value from %s\n", + __func__, + cbl_field_type_str(field->type) ); + abort(); + break; + } + } + + if( needs_scaling ) + { + if( field->attr & scaled_e ) + { + if( field->data.rdigits < 0 ) + { + scale_by_power_of_ten_N(value, -field->data.rdigits); + } + } + } + } + +tree +tree_type_from_field(cbl_field_t *field) + { + gcc_assert(field); + return tree_type_from_size(field->data.capacity, field->attr & signable_e); + } + +tree +get_data_address( cbl_field_t *field, + tree offset) // Offset is SIZE_T + { + if( offset ) + { + return gg_cast( UCHAR_P, + gg_add( gg_cast(SIZE_T, + member( field->var_decl_node, + "data")), + offset)); + } + else + { + return member(field->var_decl_node, "data"); + } + } + +__int128 +get_power_of_ten(int n) + { + // 2** 64 = 1.8E19 + // 2**128 = 3.4E38 + __int128 retval = 1; + static const int MAX_POWER = 19 ; + static const __int128 pos[MAX_POWER+1] = + { + 1ULL, // 00 + 10ULL, // 01 + 100ULL, // 02 + 1000ULL, // 03 + 10000ULL, // 04 + 100000ULL, // 05 + 1000000ULL, // 06 + 10000000ULL, // 07 + 100000000ULL, // 08 + 1000000000ULL, // 09 + 10000000000ULL, // 10 + 100000000000ULL, // 11 + 1000000000000ULL, // 12 + 10000000000000ULL, // 13 + 100000000000000ULL, // 14 + 1000000000000000ULL, // 15 + 10000000000000000ULL, // 16 + 100000000000000000ULL, // 17 + 1000000000000000000ULL, // 18 + 10000000000000000000ULL, // 19 + }; + if( n < 0 || n>MAX_POWER*2) // The most we can handle is 10**38 + { + fprintf(stderr, "Trying to raise 10 to %d as an int128, which we can't do.\n", n); + fprintf(stderr, "The problem is in %s.\n", __func__); + abort(); + } + if( n <= MAX_POWER ) + { + // Up to 10**18 we do directly: + retval = pos[n]; + } + else + { + // 19 through 38 is handled in a second step, because when this was written, + // GCC couldn't handle __int128 constants: + retval = pos[n/2]; + retval *= retval; + if( n & 1 ) + { + retval *= 10; + } + } + return retval; + } + +void +scale_by_power_of_ten_N(tree value, + int N, + bool check_for_fractional) + { + // This routine is called when we know N at compile time. + + Analyze(); + Analyzer.Message("takes int N"); + if( N == 0 ) + { + if( check_for_fractional ) + { + gg_assign(var_decl_rdigits, integer_zero_node); + } + } + else if( N > 0 ) + { + if( check_for_fractional ) + { + gg_assign(var_decl_rdigits, integer_zero_node); + } + tree value_type = TREE_TYPE(value); + __int128 power_of_ten = get_power_of_ten(N); + gg_assign(value, gg_multiply(value, build_int_cst_type( value_type, + power_of_ten))); + } + if( N < 0 ) + { + tree value_type = TREE_TYPE(value); + __int128 power_of_ten = get_power_of_ten(-N); + if( check_for_fractional ) + { + IF( gg_mod(value, build_int_cst_type( value_type, + power_of_ten)), + ne_op, + gg_cast(value_type, integer_zero_node) ) + { + gg_assign(var_decl_rdigits, integer_one_node); + } + ELSE + gg_assign(var_decl_rdigits, integer_zero_node); + ENDIF + } + gg_assign(value, gg_divide(value, build_int_cst_type( value_type, + power_of_ten))); + } + } + +tree +scale_by_power_of_ten(tree value, + tree N, + bool check_for_fractional) + { + Analyze(); + static tree retval = gg_define_variable(INT, "..sbpot2_retval", vs_file_static); + + if( check_for_fractional ) + { + // Our caller expects us to return 1 if value was something like 99v99 and + // the fractional part was non-zero + gg_assign(value, + gg_cast(TREE_TYPE(value), + gg_call_expr(INT128, + "__gg__scale_by_power_of_ten_1", + gg_cast(INT128, value), + N, + NULL_TREE))); + } + else + { + // Our caller does not expect us to test for fractional values + gg_assign(value, + gg_cast(TREE_TYPE(value), + gg_call_expr(INT128, + "__gg__scale_by_power_of_ten_2", + gg_cast(INT128, value), + N, + NULL_TREE))); + + } + + gg_assign(retval, integer_zero_node); + return retval; + } + +void +scale_and_round(tree value, + int value_rdigits, + bool target_is_signable, + int target_rdigits, + cbl_round_t rounded) + { + if( !target_is_signable ) + { + // The target has to be positive, so take the absolute value of the input + gg_assign(value, gg_abs(value)); + } + + if( target_rdigits >= value_rdigits ) + { + // The value doesn't have enough rdigits. All we need to do is multiply it + // by a power of ten to get it right: + scale_by_power_of_ten_N(value, + target_rdigits - value_rdigits); + } + else + { + // The value has too few rdigits. + switch(rounded) + { + case nearest_away_from_zero_e: + { + // This is rounding away from zero + + // We want to adjust value so that the extra digit is in the units + // place: + scale_by_power_of_ten_N(value, + target_rdigits - value_rdigits + 1); + // Add five to the result: + IF( value, lt_op, gg_cast(TREE_TYPE(value), integer_zero_node) ) + { + gg_assign(value, + gg_add( value, + build_int_cst_type(TREE_TYPE(value), -5))); + } + ELSE + { + gg_assign(value, + gg_add( value, + build_int_cst_type(TREE_TYPE(value), +5))); + } + // And now get rid of the lowest decimal digit + scale_by_power_of_ten_N(value, -1); + + break; + } + + case truncation_e: + { + // Without rounding, just scale the result + scale_by_power_of_ten_N(value, target_rdigits - value_rdigits); + break; + } + default: + abort(); + break; + } + } + } + +void +hex_dump(tree data, size_t bytes) + { + gg_printf("0x", NULL_TREE); + for(size_t i=0; i<bytes; i++) + { + gg_printf("%2.2x", + gg_cast(UINT, + gg_array_value( gg_cast(build_pointer_type(UCHAR), data), + i)), + NULL_TREE); + } + } + +tree +tree_type_from_size(size_t bytes, int signable) + { + tree retval = NULL_TREE; + + if( signable ) + { + switch( bytes ) + { + case 1: + retval = CHAR; + break; + case 2: + retval = SHORT; + break; + case 4: + retval = INT; + break; + case 8: + retval = LONGLONG; + break; + case 16: + retval = INT128; + break; + default: + gcc_unreachable(); + break; + } + } + else + { + switch( bytes ) + { + case 1: + retval = UCHAR; + break; + case 2: + retval = USHORT; + break; + case 4: + retval = UINT; + break; + case 8: + retval = ULONGLONG; + break; + case 16: + retval = UINT128; + break; + default: + gcc_unreachable(); + break; + } + } + return retval; + } + +static +bool +refer_has_depends(cbl_refer_t &refer, refer_type_t refer_type) + { + if( suppress_dest_depends ) + { + // This is set, for example, by parser_initialize, which needs to set a + // variable's value regardless of the impact of a DEPENDING ON clause. + return false; + } + + if( refer.field + && (refer.field->attr & (intermediate_e)) ) + { + // This field can't have a DEPENDING ON + return false; + } + + // Check if there there is an occurs with a depending_on in the hierarchy + bool proceed = false; + cbl_field_t *odo = symbol_find_odo(refer.field); + cbl_field_t *depending_on; + if( odo && odo != refer.field ) + { + // We have an ODO and refer.field is not the ODO, so we can keep looking + depending_on = cbl_field_of(symbol_at(odo->occurs.depending_on)); + if( depending_on->var_decl_node ) + { + // The depending_on has been initialized + if( refer_type == refer_source ) + { + proceed = true; + } + else + { + // In ISO/IEC 1989:2023, "OCCURS 13.18.38.4 General rules", talks about the + // three situations we know how to deal with. + + // Rule 7) We need to detect if depending_on is completely independent + // of refer.field + cbl_field_t *p; + cbl_field_t *parent1 = refer.field; + while( (p = parent_of(parent1)) ) + { + parent1 = p; + } + cbl_field_t *parent2 = depending_on; + while( (p = parent_of(parent2)) ) + { + parent2 = p; + } + if( parent1 != parent2 ) + { + // refer.field and depending_on have two different ultimate parents, so + // Rule 7) applies, and we have to trim the destination according to + // depending_on + //gg_printf("Rule 7 applies\n", NULL_TREE); + proceed = true; + } + else + { + // Rule 7) doesn't apply, so we have to check Rule 8) + // In this case: + // 01 digtab. + // 05 depl pic 9. + // 05 digitgrp. + // 10 digits occurs 1 to 9 depending on depl pic x. + // MOVE ... TO digitgrp + // The DEPENDING ON variable depl is not subordinate to digitgrp, and + // consequently we have to trim according to depl: + if( depending_on->offset < refer.field->offset ) + { + // depending_on comes before refer.field, so rule 8a) applies + //gg_printf("Rule 8a) applies\n", NULL_TREE); + proceed = true; + } + else + { + // depending_on comes after refer.field, so Rule 8b) for receiving + // items applies, and we will not trim according to depending_on + //gg_printf("Rule 8b) applies\n", NULL_TREE); + } + } + } + } + } + return proceed; + } + +void +set_exception_code_func(ec_type_t ec, int /*line*/, int from_raise_statement) + { + if( ec ) + { + gg_call(VOID, + "__gg__set_exception_code", + build_int_cst_type(INT, ec), + build_int_cst_type(INT, from_raise_statement), + NULL_TREE); + } + else + { + gg_printf("set_exception_code: set it to ZERO\n", NULL_TREE); + gg_assign(var_decl_exception_code, integer_zero_node); + } + } + +bool +process_this_exception(ec_type_t ec) + { + bool retval; + if( enabled_exceptions.match(ec) || !skip_exception_processing ) + { + retval = true; + } + else + { + retval = false; + } + return retval; + } + +void +rt_error(const char *msg) + { + // Come here with a fatal run-time error message + char ach[256]; + snprintf( ach, sizeof(ach), "%s:%d: %s", + current_filename.back().c_str(), + CURRENT_LINE_NUMBER, + msg); + gg_printf("%s\n", gg_string_literal(ach), NULL_TREE); + gg_abort(); + } + +void +copy_little_endian_into_place(cbl_field_t *dest, + tree dest_offset, + tree value, + int rhs_rdigits, + bool check_for_error, + tree &size_error) + { + if( check_for_error ) + { + // We need to see if value can fit into destref + + // We do this by comparing value to 10^(lhs.ldigits + rhs_rdigits) + // Example: rhs is 123.45, whichis 12345 with rdigits 2 + // lhs is 99.999. So, lhs.digits is 5, and lhs.rdigits is 3. + // 10^(5 - 3 + 2) is 10^4, which is 10000. Because 12345 is >= 10000, the + // source can't fit into the destination. + + // Note: I am not trying to avoid the use of stack variables, because I am + // not sure how to declare a file-static variable of unknown type. + tree abs_value = gg_define_variable(TREE_TYPE(value)); + IF( value, lt_op, build_int_cst_type(TREE_TYPE(value), 0) ) + { + gg_assign(abs_value, gg_negate(value)); + } + ELSE + { + gg_assign(abs_value, value); + } + ENDIF + + __int128 power_of_ten = get_power_of_ten( dest->data.digits + - dest->data.rdigits + + rhs_rdigits ); + IF( gg_cast(INT128, abs_value), + ge_op, + build_int_cst_type(INT128, power_of_ten) ) + { + // Flag the size error + gg_assign(size_error, integer_one_node); + } + ELSE + ENDIF + } + scale_by_power_of_ten_N(value, dest->data.rdigits - rhs_rdigits); + + tree dest_type = tree_type_from_size( dest->data.capacity, + dest->attr & signable_e); + tree dest_pointer = gg_add(member(dest->var_decl_node, "data"), + dest_offset); + gg_assign(gg_indirect(gg_cast(build_pointer_type(dest_type), dest_pointer)), + gg_cast(dest_type, value)); + } + +void +build_array_of_treeplets( int ngroup, + size_t N, + cbl_refer_t *refers) + { + if( N ) + { + // At the present time the most this routine is called is four times, for + // the implementation of the UNSTRING verb. + + if( N > MIN_FIELD_BLOCK_SIZE ) + { + gg_call(VOID, + "__gg__resize_treeplet", + build_int_cst_type(INT, ngroup), + build_int_cst_type(SIZE_T, N), + NULL_TREE + ); + } + switch(ngroup) + { + case 1: + for(size_t i=0; i<N; i++) + { + gg_assign(gg_array_value(var_decl_treeplet_1f, i), + refers[i].field ? gg_get_address_of(refers[i].field->var_decl_node) + : gg_cast(cblc_field_p_type_node, null_pointer_node)); + gg_assign(gg_array_value(var_decl_treeplet_1o, i), + refer_offset_source(refers[i])); + gg_assign(gg_array_value(var_decl_treeplet_1s, i), + refer_size_source(refers[i])); + } + break; + case 2: + for(size_t i=0; i<N; i++) + { + gg_assign(gg_array_value(var_decl_treeplet_2f, i), + refers[i].field ? gg_get_address_of(refers[i].field->var_decl_node) + : gg_cast(cblc_field_p_type_node, null_pointer_node)); + gg_assign(gg_array_value(var_decl_treeplet_2o, i), + refer_offset_source(refers[i])); + gg_assign(gg_array_value(var_decl_treeplet_2s, i), + refer_size_source(refers[i])); + } + break; + case 3: + for(size_t i=0; i<N; i++) + { + gg_assign(gg_array_value(var_decl_treeplet_3f, i), + refers[i].field ? gg_get_address_of(refers[i].field->var_decl_node) + : gg_cast(cblc_field_p_type_node, null_pointer_node)); + gg_assign(gg_array_value(var_decl_treeplet_3o, i), + refer_offset_source(refers[i])); + gg_assign(gg_array_value(var_decl_treeplet_3s, i), + refer_size_source(refers[i])); + } + break; + case 4: + for(size_t i=0; i<N; i++) + { + gg_assign(gg_array_value(var_decl_treeplet_4f, i), + refers[i].field ? gg_get_address_of(refers[i].field->var_decl_node) + : gg_cast(cblc_field_p_type_node, null_pointer_node)); + gg_assign(gg_array_value(var_decl_treeplet_4o, i), + refer_offset_source(refers[i])); + gg_assign(gg_array_value(var_decl_treeplet_4s, i), + refer_size_source(refers[i])); + } + break; + default: + abort(); + break; + } + } + else + { + // Just do nothing + } + } + +void +build_array_of_fourplets( int ngroup, + size_t N, + cbl_refer_t *refers) + { + int flag_bits = 0; + if( N ) + { + if( N > MIN_FIELD_BLOCK_SIZE ) + { + gg_call(VOID, + "__gg__resize_treeplet", + build_int_cst_type(INT, ngroup), + build_int_cst_type(SIZE_T, N), + NULL_TREE); + + gg_call(VOID, + "__gg__resize_int_p", + gg_get_address_of(var_decl_fourplet_flags_size), + gg_get_address_of(var_decl_fourplet_flags), + build_int_cst_type(SIZE_T, N), + NULL_TREE); + } + + for(size_t i=0; i<N; i++) + { + gg_assign(gg_array_value(var_decl_treeplet_1f, i), + gg_get_address_of(refers[i].field->var_decl_node)); + gg_assign(gg_array_value(var_decl_treeplet_1o, i), + refer_offset_source(refers[i], &flag_bits)); + gg_assign(gg_array_value(var_decl_treeplet_1s, i), + refer_size_source(refers[i])); + gg_assign(gg_array_value(var_decl_fourplet_flags, i), + build_int_cst_type(INT, flag_bits)); + } + } + else + { + abort(); + } + } + +tree +build_array_of_size_t( size_t N, + const size_t *values) + { + // We create and populate an array of size_t values + + // This only works because it is used in but one spot. If this routine is + // called twice, be careful about how the first one is used. It's a static + // variable, you see. + static tree values_p = gg_define_variable(SIZE_T_P, "..baost_values_p", vs_file_static); + if( N ) + { + gg_assign( values_p, + gg_cast(build_pointer_type(SIZE_T), + gg_malloc(N*sizeof(SIZE_T)))); + + for(size_t i=0; i<N; i++) + { + gg_assign( gg_array_value(values_p, i), + build_int_cst_type(SIZE_T, values[i])); + } + } + else + { + gg_assign( values_p, + gg_cast(build_pointer_type(SIZE_T), null_pointer_node )); + } + return values_p; + } + +void +parser_display_internal_field(tree file_descriptor, + cbl_field_t *field, + bool advance) + { + cbl_refer_t wrapper = {}; + wrapper.field = field; + parser_display_internal(file_descriptor, wrapper, advance); + } + +char * +get_literal_string(cbl_field_t *field) + { + assert(field->type == FldLiteralA); + char *buffer = NULL; + size_t buffer_length = 0; + if( buffer_length < field->data.capacity+1 ) + { + buffer_length = field->data.capacity+1; + buffer = (char *)xrealloc(buffer, buffer_length); + } + for(size_t i=0; i<field->data.capacity; i++) + { + buffer[i] = ascii_to_internal(field->data.initial[i]); + } + buffer[field->data.capacity] = '\0'; + return buffer; + } + +bool +refer_is_clean(cbl_refer_t &refer) + { + if( !refer.field ) + { + // It is routine for a refer to have no field. It happens when the parser + // passes us a refer for an optional parameter that has been ommitted, for + // example. + return true; + } + + return !refer.all + && !refer.addr_of + && !refer.nsubscript + && !refer.refmod.from + && !refer.refmod.len + && !refer_has_depends(refer, refer_source) + ; + } + +void +REFER_CHECK(const char *func, + int line, + cbl_refer_t &refer + ) + { + static int counter=1; + + if( counter == 5 ) + { + fprintf(stderr, "DING! %d\n", counter); + } + + + fprintf(stderr, + "ct REFER_CHECK(%d): %s():%d %s\n", + counter, + func, + line, + refer.field->name); + + gg_printf("rt REFER_CHECK(%d): %s():%d %s (%s)\n", + build_int_cst_type(INT, counter), + gg_string_literal(func), + build_int_cst_type(INT, line), + gg_string_literal(refer.field->name), + gg_string_literal(cbl_field_type_str(refer.field->type)), + NULL_TREE); + counter+=1; + } + +static +tree // size_t +refer_refmod_length(cbl_refer_t &refer) + { + Analyze(); + if( refer.refmod.from || refer.refmod.len ) + { + // First, check for compile-time errors + bool any_length = !!(refer.field->attr & any_length_e); + tree rt_capacity; + static tree value64 = gg_define_variable(LONG, "..rrl_value64", vs_file_static); + static tree refstart = gg_define_variable(LONG, "..rrl_refstart", vs_file_static); + static tree reflen = gg_define_variable(LONG, "..rrl_reflen", vs_file_static); + + if( any_length ) + { + rt_capacity = + gg_cast(LONG, + member(refer.field->var_decl_node, "capacity")); + } + else + { + rt_capacity = + build_int_cst_type(LONG, refer.field->data.capacity); + } + + gg_assign(reflen, gg_cast(TREE_TYPE(reflen), integer_one_node)); + + if( process_this_exception(ec_bound_ref_mod_e) ) + { + get_integer_value(value64, + refer.refmod.from->field, + refer_offset_source(*refer.refmod.from), + CHECK_FOR_FRACTIONAL_DIGITS); + IF( var_decl_rdigits, + ne_op, + integer_zero_node ) + { + if( enabled_exceptions.match(ec_bound_ref_mod_e) ) + { + SET_EXCEPTION_CODE(ec_bound_ref_mod_e); + gg_assign(refstart, gg_cast(LONG, integer_one_node)); + } + else + { + rt_error("a refmod FROM value is not an integer"); + } + } + ELSE + gg_assign(refstart, value64); + ENDIF + } + else + { + get_integer_value(value64, + refer.refmod.from->field, + refer_offset_source(*refer.refmod.from) + ); + gg_assign(refstart, value64); + } + + // Make refstart zero-based: + gg_decrement(refstart); + + if( process_this_exception(ec_bound_ref_mod_e) ) + { + IF( refstart, lt_op, build_int_cst_type(LONG, 0 ) ) + { + if( enabled_exceptions.match(ec_bound_ref_mod_e) ) + { + SET_EXCEPTION_CODE(ec_bound_ref_mod_e); + gg_assign(refstart, gg_cast(LONG, integer_zero_node)); + } + else + { + rt_error("a refmod FROM value is less than zero"); + } + } + ELSE + { + IF( refstart, gt_op, rt_capacity ) + { + if( enabled_exceptions.match(ec_bound_ref_mod_e) ) + { + SET_EXCEPTION_CODE(ec_bound_ref_mod_e); + gg_assign(refstart, gg_cast(LONG, integer_zero_node)); + } + else + { + rt_error("a refmod FROM value is too large"); + } + } + ELSE + { + if( refer.refmod.len ) + { + get_integer_value(value64, + refer.refmod.len->field, + refer_offset_source(*refer.refmod.len), + CHECK_FOR_FRACTIONAL_DIGITS); + IF( var_decl_rdigits, + ne_op, + integer_zero_node ) + { + // length is not an integer + if( enabled_exceptions.match(ec_bound_ref_mod_e) ) + { + SET_EXCEPTION_CODE(ec_bound_ref_mod_e); + gg_assign(reflen, gg_cast(LONG, integer_one_node)); + } + else + { + rt_error("a refmod LENGTH is not an integer"); + } + } + ELSE + { + gg_assign(reflen, gg_cast(LONG, value64)); + } + ENDIF + + IF( reflen, lt_op, gg_cast(LONG, integer_one_node) ) + { + // length is too small + if( enabled_exceptions.match(ec_bound_ref_mod_e) ) + { + SET_EXCEPTION_CODE(ec_bound_ref_mod_e); + gg_assign(reflen, gg_cast(LONG, integer_one_node)); + } + else + { + rt_error("a refmod LENGTH is less than one"); + } + } + ELSE + { + IF( gg_add(refstart, reflen), + gt_op, + rt_capacity ) + { + // Start + Length is too large + if( enabled_exceptions.match(ec_bound_ref_mod_e) ) + { + SET_EXCEPTION_CODE(ec_bound_ref_mod_e); + + // Our intentions are honorable. But at this point, where + // we notice that start + length is too long, the + // get_data_offset_source routine has already been run and + // it's too late to actually change the refstart. There are + // theoretical solutions to this -- mainly, + // get_data_offset_source needs to check the start + len for + // validity. But I am not going to do it now. Think of this + // as the TODO item. + gg_assign(refstart, gg_cast(LONG, integer_zero_node)); + gg_assign(reflen, gg_cast(LONG, integer_one_node)); + } + else + { + rt_error("refmod START + LENGTH is too large"); + } + } + ELSE + ENDIF + } + ENDIF + } + else + { + // There is no refmod length, so we default to the remaining characters + tree subtract_expr = gg_subtract( rt_capacity, + refstart); + gg_assign(reflen, subtract_expr); + } + } + ENDIF + } + ENDIF + } + else + { + if( refer.refmod.len ) + { + get_integer_value(value64, + refer.refmod.len->field, + refer_offset_source(*refer.refmod.len) + ); + gg_assign(reflen, gg_cast(LONG, value64)); + } + else + { + // There is no refmod length, so we default to the remaining characters + gg_assign(reflen, gg_subtract(rt_capacity, + refstart)); + } + } + + // Arrive here with valid values for refstart and reflen: + + return gg_cast(SIZE_T, reflen); + } + else + { + return size_t_zero_node; + } + } + +static +tree // size_t +refer_fill_depends(cbl_refer_t &refer) + { + // This returns a positive number which is the amount a depends-limited + // capacity needs to be reduced. + Analyze(); + cbl_field_t *odo = symbol_find_odo(refer.field); + cbl_field_t *depending_on; + depending_on = cbl_field_of(symbol_at(odo->occurs.depending_on)); + // refer.field has a relevant DEPENDING ON clause + + // gg_printf("var is %s type is %s\n", + // gg_string_literal(refer.field->name), + // gg_string_literal(cbl_field_type_str(refer.field->type)), + // NULL_TREE); + // gg_printf(" odo is %s\n", gg_string_literal(odo->name), NULL_TREE); + + // gg_printf(" depending_on is %s\n", gg_string_literal(depending_on->name), NULL_TREE); + // fprintf(stderr, + // "symbol_find_odo found %s, with depending_on %s\n", + // odo->name, + // depending_on->name); + + static tree value64 = gg_define_variable(LONG, "..rfd_value64", vs_file_static); + if( process_this_exception(ec_bound_odo_e) ) + { + get_integer_value(value64, + depending_on, + NULL, + CHECK_FOR_FRACTIONAL_DIGITS); + IF( var_decl_rdigits, ne_op, integer_zero_node ) + { + // This needs to evaluate to an integer + if( enabled_exceptions.match(ec_bound_odo_e) ) + { + SET_EXCEPTION_CODE(ec_bound_odo_e); + gg_assign(value64, build_int_cst_type(TREE_TYPE(value64), odo->occurs.bounds.upper)); + } + else + { + rt_error("DEPENDING ON is not an integer"); + } + } + ELSE + ENDIF + } + else + { + get_integer_value(value64, depending_on); + } + + if( process_this_exception(ec_bound_odo_e) ) + { + IF( value64, gt_op, build_int_cst_type(TREE_TYPE(value64), odo->occurs.bounds.upper) ) + { + SET_EXCEPTION_CODE(ec_bound_odo_e); + gg_assign(value64, build_int_cst_type(TREE_TYPE(value64), odo->occurs.bounds.upper)); + } + ELSE + { + IF( value64, lt_op, build_int_cst_type(TREE_TYPE(value64), odo->occurs.bounds.lower) ) + { + if( enabled_exceptions.match(ec_bound_odo_e) ) + { + SET_EXCEPTION_CODE(ec_bound_odo_e); + gg_assign(value64, build_int_cst_type(TREE_TYPE(value64), odo->occurs.bounds.lower)); + } + else + { + rt_error("DEPENDING ON is less than OCCURS lower limit"); + } + } + ELSE + ENDIF + IF( value64, lt_op, gg_cast(TREE_TYPE(value64), integer_zero_node) ) + { + if( enabled_exceptions.match(ec_bound_odo_e) ) + { + SET_EXCEPTION_CODE(ec_bound_odo_e); + gg_assign(value64, gg_cast(TREE_TYPE(value64), integer_zero_node)); + } + else + { + rt_error("DEPENDING ON is greater than OCCURS upper limit"); + } + } + ELSE + ENDIF + } + ENDIF + } + // value64 is >= zero and < bounds.upper + + // We multiply the ODO value by the size of the data capacity to get the + // shortened length: + + tree mult_expr = gg_multiply( build_int_cst_type(TREE_TYPE(value64), odo->data.capacity), + value64 ); + + // And we add that to the distance from the requested variable to the odo + // variable to get the modified length: + tree add_expr = gg_add(mult_expr, build_int_cst_type(SIZE_T, odo->offset - refer.field->offset)); + return add_expr; + } + +tree // size_t +refer_offset_dest(cbl_refer_t &refer) + { + Analyze(); + // This has to be on the stack, because there are places where this routine + // is called twice before the results are used. + + if( !refer.field ) + { + return size_t_zero_node; + } + + if( !refer.nsubscript ) + { + return get_data_offset_dest(refer); + } + + gg_assign(var_decl_odo_violation, integer_zero_node); + + tree retval = gg_define_variable(SIZE_T); + gg_assign(retval, get_data_offset_dest(refer)); + if( process_this_exception(ec_bound_odo_e) ) + { + IF( var_decl_odo_violation, ne_op, integer_zero_node ) + { + if( enabled_exceptions.match(ec_bound_odo_e) ) + { + SET_EXCEPTION_CODE(ec_bound_odo_e); + } + else + { + rt_error("receiving item subscript not in DEPENDING ON range"); + } + } + ELSE + ENDIF + } + return retval; + } + +tree // size_t +refer_size_dest(cbl_refer_t &refer) + { + Analyze(); + //static tree retval = gg_define_variable(SIZE_T, "..rsd_retval", vs_file_static); + tree retval = gg_define_variable(SIZE_T); + + if( !refer.field ) + { + return size_t_zero_node; + } + if( refer_is_clean(refer) ) + { + // When the refer has no modifications, we return zero, which is interpreted + // as "use the original length" + if( refer.field->attr & (intermediate_e | any_length_e) ) + { + return member(refer.field->var_decl_node, "capacity"); + } + else + { + return build_int_cst_type(SIZE_T, refer.field->data.capacity); + } + } + + // Step the first: Get the actual full length: + if( refer.field->attr & (intermediate_e | any_length_e) ) + { + // This is an intermediate; use the length that might have changed + // because of a FUNCTION TRIM, or whatnot. + + // We also pick up capacity for variables that were specified in + // linkage as ANY LENGTH + gg_assign(retval, member(refer.field->var_decl_node, "capacity")); + } + + if( refer_has_depends(refer, refer_dest) ) + { + // Because there is a depends, we might have to change the length: + gg_assign(retval, refer_fill_depends(refer)); + } + else + { + // Use the compile-time value + gg_assign(retval, build_int_cst_type(SIZE_T, refer.field->data.capacity)); + } + + if( refer.refmod.from || refer.refmod.len ) + { + tree refmod = refer_refmod_length(refer); + // retval is the ODO based total length. + // refmod is the length resulting from refmod(from:len) + // We have to reduce retval by the effect of refmod: + tree diff = gg_subtract(build_int_cst_type(SIZE_T, refer.field->data.capacity), + refmod); + gg_assign(retval, gg_subtract(retval, diff)); + } + return retval; + } + +tree // size_t +refer_offset_source(cbl_refer_t &refer, + int *pflags) + { + if( !refer.field ) + { + return size_t_zero_node; + } + if( !refer.nsubscript ) + { + return get_data_offset_source(refer); + } + + Analyze(); + + tree retval = gg_define_variable(SIZE_T); + gg_assign(var_decl_odo_violation, integer_zero_node); + + gg_assign(retval, get_data_offset_source(refer, pflags)); + if( process_this_exception(ec_bound_odo_e) ) + { + IF( var_decl_odo_violation, ne_op, integer_zero_node ) + { + if( enabled_exceptions.match(ec_bound_odo_e) ) + { + SET_EXCEPTION_CODE(ec_bound_odo_e); + } + else + { + rt_error("sending item subscript not in DEPENDING ON range"); + } + } + ELSE + ENDIF + } + return retval; + } + +tree // size_t +refer_size_source(cbl_refer_t &refer) + { + if( !refer.field ) + { + return size_t_zero_node; + } + if( refer_is_clean(refer) ) + { + // When the refer has no modifications, we return zero, which is interpreted + // as "use the original length" + if( refer.field->attr & (intermediate_e | any_length_e) ) + { + return member(refer.field->var_decl_node, "capacity"); + } + else + { + return build_int_cst_type(SIZE_T, refer.field->data.capacity); + } + } + + Analyze(); + + // Step the first: Get the actual full length: + static tree retval = gg_define_variable(SIZE_T, "..rss_retval", vs_file_static); + if( refer.field->attr & (intermediate_e | any_length_e) ) + { + // This is an intermediate; use the length that might have changed + // because of a FUNCTION TRIM, or whatnot. + + // We also pick up capacity for variables that were specified in + // linkage as ANY LENGTH + gg_assign(retval, + member(refer.field->var_decl_node, "capacity")); + } + + if( refer_has_depends(refer, refer_source) ) + { + // Because there is a depends, we might have to change the length: + gg_assign(retval, refer_fill_depends(refer)); + } + else + { + // Use the compile-time value + gg_assign(retval, build_int_cst_type(SIZE_T, refer.field->data.capacity)); + } + + if( refer.refmod.from || refer.refmod.len ) + { + tree refmod = refer_refmod_length(refer); + // retval is the ODO based total length. + // refmod is the length resulting from refmod(from:len) + // We have to reduce retval by the effect of refmod: + tree diff = gg_subtract(build_int_cst_type(SIZE_T, refer.field->data.capacity), + refmod); + gg_assign(retval, gg_subtract(retval, diff)); + } + return retval; + } + +tree +qualified_data_source(cbl_refer_t &refer) + { + return gg_add(member(refer.field->var_decl_node, "data"), + refer_offset_source(refer)); + } + +tree +qualified_data_dest(cbl_refer_t &refer) + { + return gg_add(member(refer.field->var_decl_node, "data"), + refer_offset_dest(refer)); + } diff --git a/gcc/cobol/genutil.h b/gcc/cobol/genutil.h new file mode 100644 index 0000000..e252377 --- /dev/null +++ b/gcc/cobol/genutil.h @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef _GENUTIL_H_ +#define _GENUTIL_H_ + +#define EBCDIC_MINUS (0x60) +#define EBCDIC_PLUS (0x4E) +#define EBCDIC_ZERO (0xF0) +#define EBCDIC_NINE (0xF9) + +bool internal_codeset_is_ebcdic(); + +extern bool exception_location_active; +extern bool skip_exception_processing; + +extern bool suppress_dest_depends; + +extern std::vector<std::string>current_filename; + +extern tree var_decl_exception_code; // int __gg__exception_code; +extern tree var_decl_exception_handled; // int __gg__exception_handled; +extern tree var_decl_exception_file_number; // int __gg__exception_file_number; +extern tree var_decl_exception_file_status; // int __gg__exception_file_status; +extern tree var_decl_exception_file_name; // const char *__gg__exception_file_name; +extern tree var_decl_exception_statement; // const char *__gg__exception_statement; +extern tree var_decl_exception_source_file; // const char *__gg__exception_source_file; +extern tree var_decl_exception_line_number; // int __gg__exception_line_number; +extern tree var_decl_exception_program_id; // const char *__gg__exception_program_id; +extern tree var_decl_exception_section; // const char *__gg__exception_section; +extern tree var_decl_exception_paragraph; // const char *__gg__exception_paragraph; + +extern tree var_decl_default_compute_error; // int __gg__default_compute_error; +extern tree var_decl_rdigits; // int __gg__rdigits; +extern tree var_decl_odo_violation; // int __gg__odo_violation; +extern tree var_decl_unique_prog_id; // size_t __gg__unique_prog_id; + +extern tree var_decl_entry_location; // This is for managing ENTRY statements +extern tree var_decl_exit_address; // This is for implementing pseudo_return_pop + +extern tree var_decl_call_parameter_signature; // char *__gg__call_parameter_signature +extern tree var_decl_call_parameter_count; // int __gg__call_parameter_count +extern tree var_decl_call_parameter_lengths; // size_t *var_decl_call_parameter_lengths + +extern tree var_decl_return_code; // short __gg__data_return_code + +extern tree var_decl_arithmetic_rounds_size; // size_t __gg__arithmetic_rounds_size; +extern tree var_decl_arithmetic_rounds; // int* __gg__arithmetic_rounds; +extern tree var_decl_fourplet_flags_size; // size_t __gg__fourplet_flags_size; +extern tree var_decl_fourplet_flags; // int* __gg__fourplet_flags; + +extern tree var_decl_treeplet_1f; // cblc_field_pp_type_node , "__gg__treeplet_1f" +extern tree var_decl_treeplet_1o; // SIZE_T_P , "__gg__treeplet_1o" +extern tree var_decl_treeplet_1s; // SIZE_T_P , "__gg__treeplet_1s" +extern tree var_decl_treeplet_2f; // cblc_field_pp_type_node , "__gg__treeplet_2f" +extern tree var_decl_treeplet_2o; // SIZE_T_P , "__gg__treeplet_2o" +extern tree var_decl_treeplet_2s; // SIZE_T_P , "__gg__treeplet_2s" +extern tree var_decl_treeplet_3f; // cblc_field_pp_type_node , "__gg__treeplet_3f" +extern tree var_decl_treeplet_3o; // SIZE_T_P , "__gg__treeplet_3o" +extern tree var_decl_treeplet_3s; // SIZE_T_P , "__gg__treeplet_3s" +extern tree var_decl_treeplet_4f; // cblc_field_pp_type_node , "__gg__treeplet_4f" +extern tree var_decl_treeplet_4o; // SIZE_T_P , "__gg__treeplet_4o" +extern tree var_decl_treeplet_4s; // SIZE_T_P , "__gg__treeplet_4s" + +extern tree var_decl_nop; // int __gg__nop +extern tree var_decl_main_called; // int __gg__main_called + +int get_scaled_rdigits(cbl_field_t *field); +int get_scaled_digits(cbl_field_t *field); +tree tree_type_from_digits(size_t digits, int signable); +tree tree_type_from_size(size_t bytes, int signable); +tree tree_type_from_field(cbl_field_t *field); +void get_binary_value( tree value, + tree rdigits, + cbl_field_t *field, + tree field_offset, + tree hilo = NULL); +tree get_data_address( cbl_field_t *field, + tree offset); +__int128 get_power_of_ten(int n); +void scale_by_power_of_ten_N(tree value, + int N, + bool check_for_fractional = false); +tree scale_by_power_of_ten(tree value, + tree N, + bool check_for_fractional = false); +void scale_and_round(tree value, + int value_rdigits, + bool target_is_signable, + int target_rdigits, + cbl_round_t rounded); +void hex_dump(tree data, size_t bytes); +void set_exception_code_func(ec_type_t ec, + int line, + int from_raise_statement=0); +#define set_exception_code(ec) set_exception_code_func(ec, __LINE__) +bool process_this_exception(ec_type_t ec); +#define CHECK_FOR_FRACTIONAL_DIGITS true +void get_integer_value(tree value, + cbl_field_t *field, + tree offset=NULL, // size_t + bool check_for_fractional_digits=false); +void rt_error(const char *msg); +void copy_little_endian_into_place(cbl_field_t *dest, + tree dest_offset, + tree value, + int rhs_rdigits, + bool check_for_error, + tree &size_error); +tree build_array_of_size_t( size_t N, + const size_t *values); +void parser_display_internal_field(tree file_descriptor, + cbl_field_t *field, + bool advance=DISPLAY_NO_ADVANCE); +char *get_literal_string(cbl_field_t *field); + +bool refer_is_clean(cbl_refer_t &refer); + +tree refer_offset_source(cbl_refer_t &refer, + int *pflags=NULL); +tree refer_size_source(cbl_refer_t &refer); +tree refer_offset_dest(cbl_refer_t &refer); +tree refer_size_dest(cbl_refer_t &refer); + +void REFER_CHECK( const char *func, + int line, + cbl_refer_t &refer + ); +#define refer_check(a) REFER_CHECK(__func__, __LINE__, a) + +tree qualified_data_source(cbl_refer_t &refer); + +tree qualified_data_dest(cbl_refer_t &refer); + +void build_array_of_treeplets( int ngroup, + size_t N, + cbl_refer_t *refers); + +void build_array_of_fourplets( int ngroup, + size_t N, + cbl_refer_t *refers); +#endif diff --git a/gcc/cobol/help.gen b/gcc/cobol/help.gen new file mode 100755 index 0000000..6aa201f --- /dev/null +++ b/gcc/cobol/help.gen @@ -0,0 +1,15 @@ +#! /usr/bin/awk -f + +BEGIN { + print "puts(" +} + +/^ {5}[-][[:alnum:]-]+/, /[.] / { + gsub(/[.] .+/, ". ") + gsub(/^ /, ""); + print "\t\"" $0 "\\n\"" +} + +END { + print ");" +} diff --git a/gcc/cobol/inspect.h b/gcc/cobol/inspect.h new file mode 100644 index 0000000..9e86a0b --- /dev/null +++ b/gcc/cobol/inspect.h @@ -0,0 +1,237 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef INSPECT_H +#define INSPECT_H +#include <algorithm> +#include <cstddef> +#include <cstring> +#include <cstdio> + +/* + * INSPECT has 3 repeating elements: + * + * 1. cbl_inspect_t + * Tally (identifier-2). parser_inspect takes N of these. + * Because REPLACING has no such loop, N == 1 for REPLACING. + * + * 2. cbl_inspect_oper_t + * The CHARACTERS/ALL/LEADING/FIRST phrase (type of match) + * Has N match/replace operands (or both) + * + * 3. cbl_inspect_match_t and cbl_inspect_replace_t + * The CHARACTERS/ALL/LEADING/FIRST operands + * Has N tuples of identifier-3 + [BEFORE and/or AFTER] + */ + +static inline bool +is_active( const cbl_refer_t& refer ) { return NULL != refer.field; } + +template <typename DATA> +struct cbx_inspect_qual_t { + bool initial; + DATA identifier_4; + + cbx_inspect_qual_t() : initial(false), identifier_4(DATA()) {} + cbx_inspect_qual_t( bool initial, const DATA& identifier_4 ) + : initial(initial), identifier_4(identifier_4) + { + //if( identifier_4.field ) yywarn("%s:qualifying field is '%s'", __func__, identifier_4.field->name); + } + cbx_inspect_qual_t( const cbx_inspect_qual_t& that ) + : initial(that.initial) + , identifier_4(that.identifier_4) + { + //if( identifier_4.field ) yywarn("%s:qualifying field is '%s'", __func__, identifier_4.field->name); + } + + cbx_inspect_qual_t& operator=( const cbx_inspect_qual_t& that ) { + initial = that.initial; + identifier_4 = that.identifier_4; + //if( identifier_4.field ) yywarn("%s:qualifying field is '%s'", __func__, identifier_4.field->name); + return *this; + } + + bool active() const { return is_active(identifier_4); } + + void clear() { + initial = false; + identifier_4.clear(); + } +}; + +typedef cbx_inspect_qual_t<cbl_refer_t> cbl_inspect_qual_t; + +/* + * Data for INSPECT X TALLYING Y FOR. Captures information for operands of + * CHARACTERS/ALL/LEADING. The CHARACTERS/ALL/LEADING control is kept at the + * next higher level, and may be repeated for each tally. + * + * cbx_inspect_match_t::matching is not used with CHARACTERS + */ +template <typename DATA> +struct cbx_inspect_match_t { + DATA matching; // identifier-3/5 or literal-1/3 + cbx_inspect_qual_t<DATA> before, after; // phrase 1 + + cbx_inspect_match_t( + const DATA& matching = DATA(), + cbx_inspect_qual_t<DATA> before = cbx_inspect_qual_t<DATA>(), + cbx_inspect_qual_t<DATA> after = cbx_inspect_qual_t<DATA>() + ) + : matching(matching) + , before(before) + , after(after) + {} + // match all characters + bool match_any() const { return !(before.active() || after.active()); } +}; + +typedef cbx_inspect_match_t<cbl_refer_t> cbl_inspect_match_t; + +/* + * Data for INSPECT X REPLACING. The CHARACTERS/ALL/LEADING/FIRST control is + * kept at the next higher level, and may be repeated. + */ +template <typename DATA> +struct cbx_inspect_replace_t : public cbx_inspect_match_t<DATA> { + DATA replacement; + + cbx_inspect_replace_t( const DATA& matching = DATA(), + const DATA& replacement = DATA() ) + : cbx_inspect_match_t<DATA>(matching) + , replacement(replacement) + {} + cbx_inspect_replace_t( const DATA& matching, + const DATA& replacement, + const cbx_inspect_qual_t<DATA>& before, + const cbx_inspect_qual_t<DATA>& after ) + : cbx_inspect_match_t<DATA>(matching, before, after) + , replacement(replacement) + {} +}; + +typedef cbx_inspect_replace_t<cbl_refer_t> cbl_inspect_replace_t; + +// One partial tally or substitution. +template <typename DATA> +struct cbx_inspect_oper_t { + cbl_inspect_bound_t bound; // CHARACTERS/ALL/LEADING/FIRST + size_t n_identifier_3; // N matches/replaces + // either tallies or replaces is NULL + cbx_inspect_match_t<DATA> *matches; + cbx_inspect_replace_t<DATA> *replaces; + + cbx_inspect_oper_t( cbl_inspect_bound_t bound, + std::list<cbx_inspect_match_t<DATA>> matches ) + : bound(bound) + , n_identifier_3( matches.size()) + , matches(NULL) + , replaces(NULL) + { + this->matches = new cbx_inspect_match_t<DATA>[n_identifier_3]; + std::copy( matches.begin(), matches.end(), this->matches ); + } + + cbx_inspect_oper_t( cbl_inspect_bound_t bound, + std::list<cbx_inspect_replace_t<DATA>> replaces ) + : bound(bound) + , n_identifier_3( replaces.size() ) + , matches(NULL) + , replaces(NULL) + { + this->replaces = new cbx_inspect_replace_t<DATA>[n_identifier_3]; + std::copy( replaces.begin(), replaces.end(), this->replaces ); + } + + cbx_inspect_oper_t() + : bound(bound_characters_e) + , n_identifier_3(0) + , matches(NULL) + , replaces(NULL) + { + assert( is_valid() ); + } + + bool is_valid() const { + if( matches && replaces ) return false; + if( matches || replaces ) return n_identifier_3 > 0; + return n_identifier_3 == 0; + } +}; + +typedef cbx_inspect_oper_t<cbl_refer_t> cbl_inspect_oper_t; + +// One whole tally or substitution. For REPLACING, nbound == 1 +template <typename DATA> +struct cbx_inspect_t { + DATA tally; // identifier-2: NULL without a tally + size_t nbound; // Each FOR or REPLACING operation starts with a cbl_inspect_bound_t + cbx_inspect_oper_t<DATA> *opers; + + cbx_inspect_t( const DATA& tally = DATA() ) + : tally(tally) + , nbound(0) + , opers(NULL) + {} + cbx_inspect_t( const DATA& tally, cbx_inspect_oper_t<DATA> oper ) + : tally(tally) + , nbound(1) + , opers(NULL) + { + this->opers = new cbx_inspect_oper_t<DATA>[1]; + this->opers[0] = oper; + } + cbx_inspect_t( const DATA& tally, + const std::list<cbx_inspect_oper_t<DATA>>& opers ) + : tally(tally) + , nbound( opers.size() ) + , opers(NULL) + { + this->opers = new cbx_inspect_oper_t<DATA>[nbound]; + std::copy( opers.begin(), opers.end(), this->opers ); + } +}; + +typedef cbx_inspect_t<cbl_refer_t> cbl_inspect_t; + + +/* + * Runtime + */ + +void parser_inspect( cbl_refer_t input, bool backward, + size_t ninspect, cbl_inspect_t *inspects ); +void parser_inspect_conv( cbl_refer_t input, bool backward, + cbl_refer_t original, + cbl_refer_t replacement, + cbl_inspect_qual_t before = cbl_inspect_qual_t(), + cbl_inspect_qual_t after = cbl_inspect_qual_t() ); + +#endif // INSPECT_H diff --git a/gcc/cobol/lang-specs.h b/gcc/cobol/lang-specs.h new file mode 100644 index 0000000..78e84c0 --- /dev/null +++ b/gcc/cobol/lang-specs.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* gcc-src/gcc/config/lang-specs.h */ + {".cob", "@cobol", 0, 0, 0}, + {".COB", "@cobol", 0, 0, 0}, + {".cbl", "@cobol", 0, 0, 0}, + {".CBL", "@cobol", 0, 0, 0}, + {"@cobol", + "cobol1 %i %(cc1_options) " + "%{D*} %{E} %{I*} %{fmax-errors*} %{fsyntax-only*} " + "%{fcobol-exceptions*} " + "%{copyext} " + "%{fstatic-call} %{fdefaultbyte} " + "%{ffixed-form} %{ffree-form} %{indicator-column*} " + "%{preprocess} " + "%{dialect} " + "%{include} " + "%{nomain} " + "%{!fsyntax-only:%(invoke_as)} " + , 0, 0, 0}, diff --git a/gcc/cobol/lang.opt b/gcc/cobol/lang.opt new file mode 100644 index 0000000..42c4020 --- /dev/null +++ b/gcc/cobol/lang.opt @@ -0,0 +1,144 @@ +; lang.opt -- Options for the gcc Cobol front end. + +; Copyright (C) 2021-2025 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; <http://www.gnu.org/licenses/>. + +; See the GCC internals manual for a description of this file's format. + +; Please try to keep this file in ASCII collating order. + +Language +Cobol + +D +Cobol Joined Separate +; Documented in c.opt + +E +Cobol +; Documented in c.opt + +I +Cobol Joined Separate +;; -I <dir> Add copybook search directory +; Documented in c.opt + +dialect +Cobol Joined Separate Enum(dialect_type) EnumBitSet Var(cobol_dialect) +Accept COBOL constructs used by non-ISO compilers + +Enum +Name(dialect_type) Type(int) UnknownError(Unrecognized COBOL dialect name: %qs) + +EnumValue +Enum(dialect_type) String(gcc) Value(0x04) Canonical + +EnumValue +Enum(dialect_type) String(ibm) Value(0x01) + +EnumValue +Enum(dialect_type) String(mf) Value(0x02) + +EnumValue +Enum(dialect_type) String(gnu) Value(0x04) + +fcobol-exceptions +Cobol Joined Separate Var(cobol_exceptions) +-fcobol-exceptions=<n> Enable some exceptions by default + +copyext +Cobol Joined Separate Var(cobol_copyext) Init(0) +Define alternative implicit copybook filename extension + +fdefaultbyte +Cobol RejectNegative Joined Separate UInteger Var(cobol_default_byte) +Set Working-Storage data items to the supplied value + +fflex-debug +Cobol Var(yy_flex_debug, 1) Init(0) +Enable Cobol lex debugging + +ffixed-form +Cobol RejectNegative +Assume that the source file is fixed form. + +fsyntax-only +Cobol RejectNegative +; Documented in c.opt + +ffree-form +Cobol RejectNegative +Assume that the source file is free form. + +findicator-column +Cobol RejectNegative Joined Separate UInteger Var(indicator_column) Init(0) IntegerRange(0, 8) +-findicator-column=<n> Column after which Region A begins + +finternal-ebcdic +Cobol Var(cobol_ebcdic, 1) Init(0) +-finternal-ebcdic Internal processing is in EBCDIC Code Page 1140 + +fmax-errors +Cobol Joined Separate +; Documented in C + +fstatic-call +Cobol Var(cobol_static_call, 1) Init(1) +Enable/disable static linkage for CALL literals + +ftrace-debug +Cobol Var(cobol_trace_debug, 1) Init(0) +Enable Cobol parser debugging + +fyacc-debug +Cobol Var(yy_debug, 1) Init(0) +Enable Cobol yacc debugging + +preprocess +Cobol Joined Separate Var(cobol_preprocess) +preprocess <source_filter> before compiling + +iprefix +Cobol Joined Separate +; Documented in C + +include +Cobol Joined Separate Var(cobol_include) +; Documented in C + +isysroot +Cobol Joined Separate +; Documented in C + +isystem +Cobol Joined Separate +; Documented in C + +main +Cobol +-main The first program-id in the next source file is called by a generated main() entry point + +main= +Cobol Joined Var(cobol_main_string) +-main=<source_file> source_file/PROGRAM-ID is called by the generated main() + +nomain +Cobol +-nomain No main() function is created from COBOL source files + +; This comment is to ensure we retain the blank line above. diff --git a/gcc/cobol/lang.opt.urls b/gcc/cobol/lang.opt.urls new file mode 100644 index 0000000..a0e1f19 --- /dev/null +++ b/gcc/cobol/lang.opt.urls @@ -0,0 +1,29 @@ +; Autogenerated by regenerate-opt-urls.py from gcc/cobol/lang.opt and generated HTML + +D +UrlSuffix(gcc/Preprocessor-Options.html#index-D-1) + +; skipping UrlSuffix for 'E' due to multiple URLs: +; duplicate: 'gcc/Link-Options.html#index-E-1' +; duplicate: 'gcc/Overall-Options.html#index-E' + +I +UrlSuffix(gcc/Directory-Options.html#index-I) LangUrlSuffix_D(gdc/Directory-Options.html#index-I) + +fsyntax-only +UrlSuffix(gcc/Warning-Options.html#index-fsyntax-only) LangUrlSuffix_D(gdc/Warnings.html#index-fno-syntax-only) + +fmax-errors +UrlSuffix(gcc/Warning-Options.html#index-fmax-errors) LangUrlSuffix_D(gdc/Warnings.html#index-fmax-errors) + +iprefix +UrlSuffix(gcc/Directory-Options.html#index-iprefix) LangUrlSuffix_D(gdc/Directory-Options.html#index-iprefix) + +include +UrlSuffix(gcc/Preprocessor-Options.html#index-include) + +isysroot +UrlSuffix(gcc/Directory-Options.html#index-isysroot) + +isystem +UrlSuffix(gcc/Directory-Options.html#index-isystem) diff --git a/gcc/cobol/lexio.cc b/gcc/cobol/lexio.cc new file mode 100644 index 0000000..40ba873 --- /dev/null +++ b/gcc/cobol/lexio.cc @@ -0,0 +1,1878 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <ext/stdio_filebuf.h> +#include "cobol-system.h" +#include "cbldiag.h" +#include "util.h" +#include "copybook.h" +#include "lexio.h" + +extern int yy_flex_debug; + +static struct { + bool first_file, explicitly; + int column, right_margin; + bool inference_pending() { + bool tf = first_file && !explicitly; + first_file = false; + return tf; + } +} indicator = { true, false, 0, 0 }; + +static bool debug_mode = false; + +/* + * The "debug mode" is a little odd, because we have to make sure a + * leading "D" doesn't start the verb DISPLAY (for example). If + * debug_mode is on, debug lines are included in the parse. If + * debug_mode is off but we're not in fixed_format, lines starting + * with "D" are also included. + * + * So, the line is excluded if: fixed format and not debug mode + * Else, it's included. +*/ + +static inline int left_margin() { + return indicator.column == 0? indicator.column : indicator.column - 1; +} +static inline int right_margin() { + return indicator.right_margin == 0? + indicator.right_margin : indicator.right_margin - 1; +} + +/* + * When setting the indicator column explicity: + * To get strict fixed 72-column lines, use a negative column number. + * When setting back to 0 (free), the right margin is also reset to 0. + */ +void +cobol_set_indicator_column( int column ) +{ + indicator.explicitly = true; + if( column == 0 ) indicator.right_margin = 0; + if( column < 0 ) { + column = -column; + indicator.right_margin = 73; + } + indicator.column = column; +} + +bool is_fixed_format() { return indicator.column == 7; } +bool is_reference_format() { + return indicator.column == 7 && indicator.right_margin == 73; +} +bool include_debug() { return indicator.column == 7 && debug_mode; } +bool set_debug( bool tf ) { return debug_mode = tf && is_fixed_format(); } + +static bool nonblank( const char ch ) { return !isblank(ch); } + +static inline char * +start_of_line( char *bol, char *eol ) { + bol = std::find_if(bol, eol, nonblank); + gcc_assert(bol < eol); // must exist + return bol; +} + +static inline char * +continues_at( char *bol, char *eol ) { + if( indicator.column == 0 ) return NULL; // cannot continue in free format + bol += left_margin(); + if( *bol != '-' ) return NULL; // not a continuation line + return start_of_line(++bol, eol); +} + +// Return pointer to indicator column. Test ch if provided. +// NULL means no indicator column or tested value not present. +static inline char * +indicated( char *bol, char *eol, char ch = '\0' ) { + if( indicator.column == 0 && *bol != '*' ) { + return NULL; // no indicator column in free format, except for comments + } + gcc_assert(bol != NULL); + auto ind = bol + left_margin(); + if( eol <= ind ) return NULL; // left margin would be after end of line + // If TAB is in the line-number region, nothing is in the indicator column. + bool has_tab = std::any_of(bol, ind, + [](const char ch) { return ch == '\t'; } ); + if( has_tab ) return NULL; + if( (bol += left_margin()) > eol ) return NULL; + return ch == '\0' || ch == *bol? bol : NULL; +} + +static char * +remove_inline_comment( char *bol, char *eol ) { + static char ends = '\0'; + char *nl = std::find(bol, eol, '\n'); + + if( bol < nl ) { + std::swap(*nl, ends); + char *comment = strstr(bol, "*>"); + if( comment ) { + std::fill(comment, nl, SPACE); + } + std::swap(*nl, ends); + } + return eol; +} + +static void +erase_line( char *src, char *esrc ) { + dbgmsg( "%s: erasing %.*s from input", __func__, int(esrc-src), src); + erase_source(src, esrc); +} + +static size_t +count_newlines( const char *beg, const char *end ) { + return std::count(beg, end, '\n'); +} + +size_t +filespan_t::tab_check( const char *src, const char *esrc ) { + static const char tab = '\t'; + + const char *data = src + left_margin(); + if( data < esrc ) { // not a blank line + const char *tab_at = std::find(src, data, tab); + if( tab_at < data ) { + return (tab_at - src) + 1; + } + } + return 0; +} + +static const auto extended_icase = regex::extended | regex::icase; + +std::stack< std::list<replace_t> > replace_directives; + +static bool +is_word_or_quote( char ch ) { + return ch == '"' || ch == '\'' || ISALNUM(ch); +} +/* + * If the replacement is not leading/trailing, the edges of the + * matched pattern must delimit a Cobol word. If not, add a space to + * the replacement. + */ +static void +maybe_add_space(const span_t& pattern, replace_t& recognized) { + static const char blank[] = " "; + const char *befter[2] = { "", "" }; + gcc_assert(0 < recognized.before.size()); + + // start of pattern and end of preceding text + if( pattern.p[0] == '.' && is_word_or_quote(recognized.before.p[-1]) ) { + befter[0] = blank; + } + // end of pattern, and start of succeeding text + if( pattern.pend[-1] == '.' && is_word_or_quote(recognized.before.pend[0]) ) { + befter[1] = blank; + } + + if( befter[0] == blank || befter[1] == blank ) { + char *s = xasprintf( "%s%.*s%s", + befter[0], + recognized.after.size(), recognized.after.p, + befter[1] ); + recognized.after = span_t(s, s + strlen(s)); + } +} + +/* + * Keep track of the next pending replacement for each active REPLACE + * directive. For the current line, apply patterns that begins on the + * line. (It may match input extending beyond the current eol.) + * + * As each replacement is identified, append it to the passsed list of + * pending replacements. For these elements: + * + * before is a span in mfile + * after is dynamically allocated + */ +static void +recognize_replacements( filespan_t mfile, std::list<replace_t>& pending_replacements ) { + static const char *top_of_stack_cache, *applies_to; + + struct future_replacement_t { + replace_t directive; + span_t found; + future_replacement_t( const replace_t& replace, span_t found ) + : directive(replace), found(found) + {} + bool operator<( const future_replacement_t& that ) const { + return found.p < that.found.p; + } + }; + + static std::list<future_replacement_t> futures; + + if( replace_directives.empty() ) return; + + if( ! (top_of_stack_cache == replace_directives.top().front().before.p + && + applies_to == mfile.data) ) + { + futures.clear(); + top_of_stack_cache = replace_directives.top().front().before.p; + applies_to = mfile.data; + } + + if( futures.empty() ) { + /* + * From the current point in the file, find the next match for each + * pattern at the top of the replacement stack. + */ + for( const auto& directive : replace_directives.top() ) { + regex re(directive.before.p, extended_icase); + cmatch cm; + + span_t found(mfile.eodata, mfile.eodata); + + if( regex_search( mfile.ccur(), (const char *)mfile.eodata, cm, re) ) { + gcc_assert(cm[1].matched); + found = span_t( cm[1].first, cm[1].second ); + if( yy_flex_debug ) { + size_t n = count_newlines(mfile.data, found.p); + dbgmsg("%s:%d first '%.*s' is on line %zu (offset %zu)", __func__, __LINE__, + directive.before.size(), directive.before.p, + ++n, found.p - mfile.data); + } + } else { + dbgmsg("%s:%d not found: '%s' in \n'%.*s'", __func__, __LINE__, + directive.before.p, int(strlen(directive.before.p)), mfile.cur); + } + futures.push_back( future_replacement_t(directive, found) ); + } + } + + gcc_assert(!futures.empty()); + gcc_assert(futures.size() == replace_directives.top().size()); + + replace_t recognized; + + auto pnext = std::min_element(futures.begin(), futures.end()); + + for( const char *bol = mfile.cur; // more than one replacement may apply to a line + bol <= pnext->found.p && pnext->found.p < mfile.eol; ) { + auto& next(*pnext); + recognized = replace_t( next.found, span_t(strlen(next.directive.after.p), + next.directive.after.p) ); + maybe_add_space(next.directive.before, recognized); + pending_replacements.push_back(recognized); + bol = next.found.pend; + + if( yy_flex_debug ) { + size_t n = std::count((const char *)mfile.data, recognized.before.p, '\n'); + dbgmsg( "%s:%d: line %zu @ %zu: '%s'\n/%.*s/%.*s/", __func__, __LINE__, + ++n, next.found.p - mfile.data, + next.directive.before.p, + int(recognized.before.size()), recognized.before.p, + int(recognized.after.size()), recognized.after.p ); + } + + // Update the futures element for this pattern + cmatch cm; + + next.found = span_t(mfile.eodata, mfile.eodata); + + regex re(next.directive.before.p, extended_icase); + if( regex_search(bol, (const char *)mfile.eodata, cm, re) ) { + gcc_assert(cm[1].matched); + next.found = span_t( cm[1].first, cm[1].second ); + size_t n = std::count((const char *)mfile.data, next.found.p, '\n'); + if( false ) + dbgmsg("%s:%d next '%.*s' will be on line %zu (offset %zu)", __func__, __LINE__, + next.directive.before.size(), next.directive.before.p, + ++n, next.found.p - mfile.data); + } + pnext = std::min_element(futures.begin(), futures.end()); + } +} + +static void +check_source_format_directive( filespan_t& mfile ) { + const char *p = std::find(mfile.cur, mfile.eol, '>'); + if( ! (p < mfile.eol && p[1] == *p ) ) return; + + const char pattern[] = + ">>[[:blank:]]*source[[:blank:]]+" + "(format[[:blank:]]+)?" + "(is[[:blank:]]+)?" + "(fixed|free)"; + static regex re(pattern, extended_icase); + + // show contents of marked subexpressions within each match + cmatch cm; + if( regex_search(p, (const char *)mfile.eol, cm, re) ) { + gcc_assert(cm.size() > 1); + switch( cm[3].length() ) { + case 4: + cobol_set_indicator_column(0); + break; + case 5: + cobol_set_indicator_column(-7); + break; + default: + gcc_assert(cm[3].length() == 4 || cm[3].length() == 5); + break; + } + mfile.cur = const_cast<char*>(cm[0].second); + dbgmsg( "%s:%d: %s format set, on line %zu", __func__, __LINE__, + indicator.column == 7? "FIXED" : "FREE", mfile.lineno() ); + erase_line(const_cast<char*>(cm[0].first), + const_cast<char*>(cm[0].second)); + } +} + +struct buffer_t : public bytespan_t { + char *pos; // current output position + + buffer_t( char *data, char *eodata ) + : bytespan_t(data, eodata) + , pos(data) + { + if(pos) *pos = '\0'; + } + + size_t nline() const { + gcc_assert(data <= pos); + return std::count(data, pos, '\n'); + } + size_t free_space() const { gcc_assert(pos <= eodata); return eodata - pos; } + + bool pad_lines( size_t goal ) { + while( nline() < goal ) { + if( pos == eodata ) return false; + *pos++ = '\n'; + } + return true; + } + + void show() const { + gcc_assert(data <= pos); + dbgmsg("flex input buffer: '%.*s'\n[xelf]", int(pos - data), data); + } + void dump() const { + if( getenv("lexer_input") ) show(); + } +}; + +static bool +valid_sequence_area( const char *p, const char *eodata ) { + const char *pend = p + 6; + if ( eodata < pend ) return false; + + for( ; p < pend; p++ ) { + if( ! (ISDIGIT(*p) || *p == SPACE) ) { + return false; + } + } + return true; // characters either digits or blanks +} + +const char * esc( size_t len, const char input[] ); + +static bool +is_word_char( char ch ) { + switch(ch) { + case '$': + case '-': + case '_': + return true; + } + return ISALNUM(ch); +} + +static bool +is_numeric_char( char ch ) { + return ISDIGIT(ch) + || TOUPPER(ch) == 'E' + || ch == '.' + || ch == ',' + ; +} + +static bool +is_numeric_term( span_t term ) { + gcc_assert(term.p); + if( term.p[0] == '+' || term.p[0] == '-' ) term.p++; + auto p = std::find_if( term.p, term.pend, + []( char ch ) { + return ! is_numeric_char(ch); + } ); + return p == term.pend; +} + +struct replacing_term_t { + bool matched, done; + span_t leading_trailing, term, stmt; + + replacing_term_t(const char input[]) : matched(false), done(false) { + stmt = span_t(input, input); + } +}; + +extern YYLTYPE yylloc; + +static const char * +last_newline (const char *p, const char *pend ) { + size_t len = pend - p; + return static_cast<const char *>( memrchr( p, '\n', len ) ); +} +/* + * For some statement parsed with regex_search, set yyloc to indicate the line + * and column spans of the term. Assume stmt begins at the start of a line. + */ +static void +update_yylloc( const csub_match& stmt, const csub_match& term ) { + gcc_assert(stmt.first <= term.first && term.second <= stmt.second); + + class dump_loc_on_exit { + public: + dump_loc_on_exit() { + if( getenv( "update_yylloc" ) ) + location_dump( "update_yylloc", __LINE__, "begin", yylloc); + } + ~dump_loc_on_exit() { + if( getenv( "update_yylloc" ) ) + location_dump( "update_yylloc", __LINE__, "end ", yylloc); + } + } dloe; + + size_t nline = std::count( stmt.first, term.second, '\n' ); + size_t n = std::count( term.first, term.second, '\n' ); + + if( nline ) { + yylloc.last_line += nline; + yylloc.first_line = yylloc.last_line - n; + } + + /* + * Set the column span for the term. + */ + const char *p = last_newline(stmt.first, stmt.second); + if( !p ) { // no newlines in entire statement + yylloc.first_column = (term.first - stmt.first) + 1; + yylloc.last_column = (term.second - stmt.first) + 1; + return; + } + + p = last_newline(stmt.first, term.first); + if( !p ) { // no newlines before term + yylloc.first_column = (term.first - stmt.first) + 1; + p = last_newline(term.first, term.second); + gcc_assert(p); // newline must be in term + yylloc.last_column = (term.second - p) + 1; + return; + } + + const char *bol = p; // bol points to last newline before term + + yylloc.first_column = term.first - bol; + p = last_newline(term.first, term.second); + if( p ) { // term has newlines, too + yylloc.last_column = (p - term.first); + } else { + yylloc.last_column = yylloc.first_column + term.length(); + } +} + +static replacing_term_t +parse_replacing_term( const char *stmt, const char *estmt ) { + gcc_assert(stmt); gcc_assert(estmt); gcc_assert(stmt < estmt); + replacing_term_t output(stmt); + + static const char pattern[] = + "^([[:space:];,]+(LEADING|TRAILING|BY))?" // 1, 2 + "[[:space:];,]+" // leading space between pairs + "(" // 3 + "(\"" "([\"]{2}|[^\"])*" "\")" // 4, 5 + "|" "('" "([']{2}|[^'])*" "')" // 6, 7 + "|" "(" "[+-]?[[:alnum:]]+([_-]+[[:alnum:]]+)*" ")" // 8, 9 + "|" "(==(" "(=?[^=]+)*" ")==)" // 10, 11, 12 + ")" + "(([[:space:];,]+[\"'=[:alnum:]+-]{1,2})" "|" "[[:space:];,]*([.]))?" // 13, 14, 15 + ; + + static regex re(pattern, extended_icase); + cmatch cm; + + if( ! regex_search( stmt, estmt, cm, re) ) return output; + + bool replacing_term = cm[2].matched && TOUPPER(cm[2].first[0]) == 'B'; + + if( cm[2].matched && ! replacing_term ) { + output.leading_trailing = cm[2]; + } + + // Apply such that quoted matches supersede word matches. + if( cm[11].matched ) output.term = cm[11]; + if( cm[ 8].matched ) output.term = cm[ 8]; + if( cm[ 6].matched ) output.term = cm[ 6]; + if( cm[ 4].matched ) output.term = cm[ 4]; + + // The matched segment extends to the end of the matched term, or to + // the dot at end of statement. Include the pseudotext ==, if found. + output.stmt = span_t(cm[0].first, output.term.pend); + if( cm[10].matched ) output.stmt.pend = cm[10].second; + + if( cm[15].matched && ISSPACE(cm[15].second[0]) ) { // matched end of statement + output.done = output.matched = true; + output.stmt = cm[0]; + gcc_assert(output.stmt.pend[-1] == '.'); + dbgmsg("%s:%d: done at '%.*s'", __func__, __LINE__, + output.term.size(), output.term.p); + return output; + } + + if( is_numeric_term(output.term) ) { + output.matched = output.stmt.p < output.term.p; + gcc_assert(output.matched); + // look for fractional part + if( is_numeric_char(*output.term.pend) && ISDIGIT(output.term.pend[1]) ) { + gcc_assert(!ISDIGIT(*output.term.pend)); + auto p = std::find_if(++output.term.pend, estmt, + []( char ch ) { return !ISDIGIT(ch); } ); + output.stmt.pend = output.term.pend = p; + output.done = '.' == output.stmt.pend[0] && ISSPACE(output.stmt.pend[1]); + if( output.done ) output.stmt.pend++; + } + dbgmsg("%s:%d: %s '%.*s'", __func__, __LINE__, + output.done? "done at" : "term is", + output.term.size(), output.term.p); + return output; + } + + if( yy_flex_debug ) { // should be looking only for words + dbgmsg("%s:%d: not done, working with '%.*s'", __func__, __LINE__, + cm[0].length(), cm[0].first); + int i=0; + for( auto m : cm ) { + if( m.matched ) + dbgmsg("%4d) '%.*s'", i, m.length(), m.first); + i++; + } + } + + if( !cm[8].matched ) { + output.matched = output.stmt.p < output.term.p; + gcc_assert(output.matched); + dbgmsg("%s:%d: term is '%.*s'", __func__, __LINE__, + output.term.size(), output.term.p); + return output; + } + + bool extraneous_replacing = 'R' == TOUPPER(cm[8].first[0]); // maybe + if( extraneous_replacing ) { // prove it + static const char replacing[] = "REPLACING"; + for( size_t i=0; i < strlen(replacing); i++ ) { + if( replacing[i] != TOUPPER(cm[8].first[i]) ) { + extraneous_replacing = false; + break; + } + } + if( extraneous_replacing ) { + update_yylloc( cm[0], cm[8] ); + yywarn("syntax error: invalid '%.*s'", cm[8].length(), cm[8].first); + output.matched = false; + return output; + } + } + + gcc_assert(cm[8].matched); + gcc_assert(0 < output.term.size()); + + dbgmsg("%s:%d: more words starting at '%.80s'", __func__, __LINE__, + output.term.pend); + + static const char term_pattern[] = + "^[[:space:]]+" + "(" "(IN|OF)[[:space:]]+" ")" // 1, 2 + "(" "[+-]?[[:alnum:]]+([$_-]+[[:alnum:]]+)*" ")" // 3, 4 + "(" "[[:space:]]*[(]" ")?" // 5 + "(([[:space:]]+[\"'=[:alnum:]+-]{1,2})" "|" "[[:space:]]*([.]))?" // 6, 7, 8 + ; + static const char paren_pattern[] = + "^[[:space:]]*" + "(" "[()][^()]*[()]" ")" // 1 + "(([[:space:]]+[\"'=[:alnum:]+-]{1,2})" "|" "[[:space:]]*([.]))?" // 2, 3, 4 + ; + + regex term_re(term_pattern, extended_icase); + regex paren_re(paren_pattern, extended_icase); + ssize_t nsub = 0; + + while( regex_search( output.term.pend, estmt, cm, term_re) ) { + output.stmt.pend = output.term.pend = cm[3].second; // found a word + if( cm[5].matched ) break; // found left parenthesis + const csub_match& done(cm[8]); + if( done.matched ) { + output.done = output.matched = output.stmt.p < output.term.p; + gcc_assert(output.done); + goto matched; + } + } + + // match subscripts, if any + while( regex_search( output.term.pend, estmt, cm, paren_re) ) { + output.stmt.pend = output.term.pend = cm[1].second; + if( cm[1].first[0] == '(' ) nsub++; + if( cm[1].first[0] == ')' ) nsub--; + if( cm[1].second[-1] == '(' ) nsub++; + if( cm[1].second[-1] == ')' ) nsub--; + + const csub_match& done(cm[4]); + if( done.matched ) { + output.matched = output.stmt.p < output.term.p; + output.stmt.pend = done.second; + output.done = output.stmt.pend[-1] == '.'; + goto matched; + } + + if( nsub == 0 ) break; + } + + matched: + output.matched = output.stmt.p < output.term.p; + + if( yy_flex_debug ) { + const char *status = "unmatched"; + if( output.matched ) status = output.done? "done" : "matched"; + dbgmsg("%s:%d: %s term is '%.*s'", __func__, __LINE__, status, + output.term.size(), output.term.p? output.term.p : ""); + } + return output; +} + +struct replacing_pair_t { + span_t leading_trailing, stmt; + replace_t replace; + + bool matched() const { return 0 < stmt.size(); } + bool done() const { return matched() && stmt.pend[-1] == '.'; } +}; +static replacing_pair_t +parse_replacing_pair( const char *stmt, const char *estmt ) { + replacing_pair_t pair; + + pair.replace = replace_t(); + auto parsed = parse_replacing_term( stmt, estmt ); // before + if( parsed.matched ) { + if( parsed.term.size() == 0 ) return pair; // failure: empty before string + pair.leading_trailing = parsed.leading_trailing; + pair.stmt = parsed.stmt; + pair.replace.before = parsed.term; + + if( !parsed.done ) { + parsed = parse_replacing_term( pair.stmt.pend, estmt ); // after + if( parsed.matched ) { + pair.stmt.pend = parsed.stmt.pend; + pair.replace.after = parsed.term; + } else { + dbgmsg("%s:%d: not matched '%.*s'", __func__, __LINE__, + pair.stmt.size(), pair.stmt.p); + } + } + if( yy_flex_debug ) { + const char *status = "unmatched"; + if( pair.matched() ) status = pair.done()? "done" : "matched"; + dbgmsg("%s:%d: [%s] replacing '%.*s' with '%.*s'", __func__, __LINE__, + status, + pair.replace.before.size(), pair.replace.before.p, + pair.replace.after.size(), pair.replace.after.p); + } + } else { + for( auto p = stmt; (p = std::find(p, estmt, '.')) < estmt; p++ ) { + if( ISSPACE(p[1]) ) { + pair.stmt = span_t(stmt, ++p); + break; + } + } + if( pair.stmt.p ) { + yywarn("CDF syntax error '%*s'", (int)pair.stmt.size(), pair.stmt.p); + } + else { + // This eliminated a compiler warning about "format-overflow" + yywarn("CDF syntax error"); + } + pair.stmt = span_t(0UL, stmt); + pair.replace = replace_t(); + } + return pair; +} + +static std::pair<std::list<replace_t>, char *> +parse_replace_pairs( const char *stmt, const char *estmt, bool is_copy_stmt ) { + std::list<replace_t> pairs ; + + static const char any_ch[] = "."; + static const char word_ch[] = "[[:alnum:]$_-]"; + static const char nonword_ch[] = "[^[:alnum:]\"'$_-]"; + + // Pattern to find one REPLACE pseudo-text pair + static const char replace_pattern[] = + "([[:space:]]+(LEADING|TRAILING))?" // 1, 2 + "[[:space:]]+" + "==(" "(=?[^=]+)+" ")==" // 3, 4 + "[[:space:]]+BY[[:space:]]+" + "==(" "(=?[^=]+)*" ")==" // 5, 6 + "(([[:space:]]+[\"'=[:alnum:]+-]{1,2})" "|" "[[:space:]]*([.]))?" // 7, 8, 9 + ; + + regex pair_re(replace_pattern, extended_icase); + cmatch cm; + replacing_pair_t parsed; + bool end_of_stmt = false; + + for( auto p = stmt; p < estmt && !end_of_stmt; p = parsed.stmt.pend ) { + if( is_copy_stmt ) { + parsed = parse_replacing_pair(p, estmt); + if( parsed.replace.before.size() == 0 ) break; // empty before + if( parsed.replace.after.p == NULL ) break; // invalid after + end_of_stmt = parsed.done(); + } else { + if( ! regex_search( p, estmt, cm, pair_re) ) break; + // Report findings. + if( false && yy_flex_debug ) { + for( size_t i=0; i < cm.size(); i++ ) { + dbgmsg("%s: %s %zu: '%.*s'", __func__, + cm[i].matched? "Pair" : "pair", + i, + cm[i].matched? int(cm[i].length()) : 0, + cm[i].matched? cm[i].first : ""); + } + } + gcc_assert(cm[3].matched); + gcc_assert(cm[5].matched); + parsed.leading_trailing = cm[2]; + parsed.replace.before = cm[3]; + parsed.replace.after = cm[5]; + + parsed.stmt = cm[0]; + // If not done, exclude trailing portion from statement match. + if( !parsed.done() && cm[8].matched ) { + gcc_assert(!cm[9].matched); + parsed.stmt.pend = cm[8].first; + } + } + + span_t& before(parsed.replace.before); + span_t& after(parsed.replace.after); + + const char *befter[2] = { nonword_ch, nonword_ch }; + gcc_assert(before.p < before.pend); + if( !is_word_char(before.p[0]) ) befter[0] = any_ch; + if( !is_word_char(before.pend[-1]) ) befter[1] = any_ch; + + const char *src = esc(before.size(), before.p); + + if( parsed.leading_trailing.size() > 0 ) { + switch( TOUPPER(parsed.leading_trailing.p[0]) ) { + case 'L': // leading + befter[1] = word_ch; + break; + case 'T': // trailing + befter[0] = word_ch; + break; + default: + gcc_unreachable(); + } + dbgmsg("%s:%d: dealing with %.*s", __func__, __LINE__, + int(parsed.leading_trailing.size()), parsed.leading_trailing.p); + } + + src = xasprintf("%s(%s)%s", befter[0], src, befter[1]); + + struct { span_t before, after; } output; + output.before = span_t(strlen(src), src); + output.after = after.dup(); + + gcc_assert(!before.has_nul()); + pairs.push_back( replace_t( output.before, output.after ) ); + + // COPY REPLACING matches end-of-statment here + // REPLACE matched end-of-statement in caller, and estmt[-1] == '.' + if( is_copy_stmt && parsed.stmt.pend[-1] == '.' ) break; + } + + if( yy_flex_debug ) { + dbgmsg( "%s:%d: %s: %zu pairs parsed from '%.*s'", __func__, __LINE__, + parsed.done()? "done" : "not done", + pairs.size(), parsed.stmt.size(), parsed.stmt.p ); + int i = 0; + for( const auto& replace : pairs ) { + dbgmsg("%s:%d:%4d: '%s' => '%s'", __func__, __LINE__, + ++i, replace.before.p, replace.after.p); + } + } + if( !parsed.done() ) { + pairs.clear(); + return std::make_pair(pairs, const_cast<char*>(stmt)); + } + + return std::make_pair(pairs, const_cast<char*>(parsed.stmt.pend)); +} + +struct copy_descr_t { + bool parsed; + int fd; + size_t nreplace; + span_t partial_line, erased_lines; + + copy_descr_t( const char *line, const char *eol) + : parsed(false), fd(-1), nreplace(0), partial_line(line, eol) {} +}; + +static YYLTYPE +location_in( const filespan_t& mfile, const csub_match cm ) { + YYLTYPE loc { + int(mfile.lineno() + 1), int(mfile.colno() + 1), + int(mfile.lineno() + 1), int(mfile.colno() + 1) + }; + gcc_assert(mfile.cur <= cm.first && cm.second <= mfile.eodata); + auto nline = std::count(cm.first, cm.second, '\n'); + if( nline ) { + gcc_assert(loc.first_line < nline); + loc.first_line -= nline; + auto p = static_cast<const char*>(memrchr(cm.first, '\n', cm.length())); + loc.last_column = (cm.second) - p; + } + location_dump(__func__, __LINE__, "copy?", loc); + return loc; +} + +static copy_descr_t +parse_copy_directive( filespan_t& mfile ) { + static const char *most_recent_buffer; + static span_t copy_stmt(mfile.eodata, mfile.eodata); + + static const char pattern[] = + "COPY" "[[:space:]]+" + /* 1 */ "(" + /*2,3*/ "\"(" "([\"]{2}|[^\"])+" ")\"" + /*4,5*/ "|" "'(" "([']{2}|[^'])+" ")[']" + /*6,7*/ "|" "(" "[[:alnum:]]+([_-]+[[:alnum:]]+)*" ")" + /* */ ")" + /* 8 */ "(" + /* 9 */ "[[:space:]]+(OF|IN)[[:space:]]+" + /* 10*/ "(" + /*11,12*/ "(\"" "([\"]{2}|[^\"])+" "\")" + /*13,14*/ "|" "('" "([']{2}|[^'])+" "')" + /*15,16*/ "|" "(" "[[:alnum:]]+([_-]+[[:alnum:]]+)*" ")" + /* */ ")" + /* */ ")?" + /*17,18*/ "([[:space:]]+SUPPRESS([[:space:]]+PRINTING)?)?" + /*19,20 */ "(" "([[:space:]]*[.])" "|" "[[:space:]]+REPLACING" ")" + ; + + static regex re(pattern, extended_icase); + cmatch cm; + copy_descr_t outcome(mfile.cur, mfile.cur); + + // COPY appears in current buffer? + if( most_recent_buffer != mfile.data || copy_stmt.p < mfile.cur ) { + most_recent_buffer = mfile.data; + copy_stmt.p = mfile.eodata; + + if( regex_search(mfile.ccur(), + (const char *)mfile.eodata, cm, re) ) { + copy_stmt = span_t( cm[0].first, cm[0].second ); + if( yy_flex_debug ) { + size_t nnl = 1 + count_newlines(mfile.data, copy_stmt.p); + size_t nst = 1 + count_newlines(copy_stmt.p, copy_stmt.pend); + dbgmsg("%s:%d: line %zu: COPY directive is %zu lines '%.*s'", + __func__, __LINE__, + nnl, nst, copy_stmt.size(), copy_stmt.p); + } + } + } + + // If COPY appears on the current line, parse it completely this time. + if( mfile.cur <= copy_stmt.p && + copy_stmt.p < mfile.eol ) { + outcome.parsed = regex_search(copy_stmt.p, copy_stmt.pend, cm, re); + gcc_assert(outcome.parsed); + outcome.partial_line = span_t(mfile.cur, copy_stmt.p); + + if( yy_flex_debug ) { + dbgmsg("%zu expressions", std::count(pattern, pattern + sizeof(pattern), '(')); + int i = 0; + for( const auto& m : cm ) { + if( m.matched ) + dbgmsg("%s:%d: %2d: '%.*s'", __func__, __LINE__, + i, int(m.length()), m.first); + i++; + } + } + + auto& copybook_name = cm[1]; + auto& library_name = cm[10]; + + bool replacing = !cm[20].matched; + + if( library_name.matched ) { + YYLTYPE loc = location_in( mfile, library_name ); + copybook.library( loc, xstrndup(library_name.first, library_name.length()) ); + } + YYLTYPE loc = location_in( mfile, copybook_name ); + outcome.fd = copybook.open( loc, xstrndup(copybook_name.first, + copybook_name.length()) ); + if( outcome.fd == -1 ) { // let parser report missing copybook + dbgmsg("%s:%d: (no copybook '%s' found)", __func__, __LINE__, copybook.source()); + return outcome; + } + + if( replacing ) { + std::pair<std::list<replace_t>, char*> + result = parse_replace_pairs( cm[0].second, mfile.eodata, true ); + + std::list<replace_t>& replacements(result.first); + outcome.parsed = (outcome.nreplace = replacements.size()) > 0; + if( outcome.parsed ) { + replace_directives.push(replacements); + } + copy_stmt.pend = result.second; + + // Maybe we don't need these. We'll see. + for( const auto& r : replacements ) { + copybook.replacement(pseudo_e, r.before.dup().p, r.after.dup().p); + } + } + + // If the parse failed, pass it through to the parser for analysis. + if( outcome.parsed ) { + erase_line( const_cast<char*>(copy_stmt.p), + const_cast<char*>(copy_stmt.pend)); + outcome.erased_lines = copy_stmt; + } + + mfile.eol = const_cast<char*>(copy_stmt.pend); + mfile.next_line(); + } + return outcome; +} + +static char * +parse_replace_last_off( filespan_t& mfile ) { + static const char pattern[] = + "REPLACE" "[[:space:]]+" + "(LAST[[:space:]]+)?OFF[[:space:]]*[.]" + ; + static regex re(pattern, extended_icase); + cmatch cm; + + // REPLACE [LAST] OFF? + bool found = regex_search(mfile.ccur(), + (const char *)mfile.eodata, cm, re); + gcc_assert(found); // caller ensures + + gcc_assert(cm.size() == 2); + // LAST OFF removes most recent REPLACE + if( cm[1].matched ) { + gcc_assert(TOUPPER(cm[1].first[0]) == 'L'); + if( ! replace_directives.empty() ) { + replace_directives.pop(); + } + } else { // OFF clears the REPLACE stack + while( ! replace_directives.empty() ) { + replace_directives.pop(); + } + } + + dbgmsg( "%s:%d: line %zu: parsed '%.*s', ", __func__, __LINE__, + mfile.lineno(), int(cm[0].length()), cm[0].first ); + + // Remove statement from input + erase_line(const_cast<char*>(cm[0].first), + const_cast<char*>(cm[0].second)); + + return const_cast<char*>(cm[0].second); +} + +static span_t +parse_replace_text( filespan_t& mfile ) { + static const char pattern[] = + /* 0 */ "REPLACE" + /* 1 */ "([[:space:]]+ALSO)?" + /* 2 */ "(" + /*3,4*/ "([[:space:]]+(LEADING|TRAILING))?" + /* 5 */ "([[:space:]]+" + /* 6 */ "==" "(=?[^=]+)+" "==" + /* */ "[[:space:]]+BY[[:space:]]+" + /* 7 */ "==" "(=?[^=]+)*" "==" + /* */ ")" + /* */ ")+[[:space:]]*[.]" + ; + static regex re(pattern, extended_icase); + cmatch cm; + const size_t current_lineno(mfile.lineno()); + + if( false && yy_flex_debug ) { + auto pend = mfile.eol; + gcc_assert(mfile.line_length() > 2); + if( pend[-1] == '\n' ) pend -= 2; + auto len = int(pend - mfile.cur); + dbgmsg("%s:%d: line %zu: parsing '%.*s", __func__, __LINE__, + current_lineno, len, mfile.cur); + } + + if( ! regex_search(mfile.ccur(), (const char *)mfile.eodata, cm, re) ) { + dbgmsg( "%s:%d: line %zu: not a REPLACE statement:\n'%.*s'", + __func__, __LINE__, current_lineno, + int(mfile.line_length()), mfile.cur ); + return span_t(); + } + + // Report findings. + if( yy_flex_debug ) { + dbgmsg("%zu expressions", std::count(pattern, pattern + sizeof(pattern), '(')); + int i = 0; + for( const auto& m : cm ) { + if( m.matched ) + dbgmsg("%s:%d: %2d: '%.*s'", __func__, __LINE__, + i, int(m.length()), m.first); + i++; + } + } + + gcc_assert(cm.size() > 7); + + // Update active REPLACE stack + if( ! cm[1].matched ) { // ALSO pushes, else clear stack and push one. + while( !replace_directives.empty() ) { + replace_directives.pop(); + } + } else { + gcc_assert(TOUPPER(cm[1].first[0]) == 'A'); + } + + span_t replace_stmt(cm[0].first, cm[0].second); + + std::pair<std::list<replace_t>, char*> + result = parse_replace_pairs(replace_stmt.p, replace_stmt.pend, false); + std::list<replace_t>& replacements(result.first); + replace_directives.push( replacements ); + + if( yy_flex_debug ) { + dbgmsg( "%s:%d: line %zu: %zu pairs parsed from '%.*s'", __func__, __LINE__, + current_lineno, replacements.size(), int(replace_stmt.size()), replace_stmt.p ); + for( const auto& replace : replacements ) { + int i = 0; + dbgmsg("%s:%d:%4d: '%s' => '%s'", __func__, __LINE__, + ++i, replace.before.p, replace.after.p); + } + } + + // Remove statement from input + erase_line(const_cast<char*>(replace_stmt.p), + const_cast<char*>(replace_stmt.pend)); + + return replace_stmt; +} + +static span_t +parse_replace_directive( filespan_t& mfile ) { + static const char *most_recent_buffer, *next_directive; + static bool off_coming_up; + static const char pattern[] = + "REPLACE" "[[:space:]]+" "(LAST|OFF|ALSO|LEADING|TRAILING|==)"; + + static regex re(pattern, extended_icase); + cmatch cm; + + // REPLACE appears in current buffer? + if( most_recent_buffer != mfile.data || next_directive < mfile.cur ) { + most_recent_buffer = mfile.data; + next_directive = mfile.eodata; + + if( regex_search(mfile.ccur(), + (const char *)mfile.eodata, cm, re) ) { + gcc_assert(cm[1].matched); + next_directive = cm[0].first; + + switch( TOUPPER(cm[1].first[0]) ) { + case 'L': + off_coming_up = 'A' == TOUPPER(cm[1].first[1]); // LAST OFF, else LEADING + break; + case 'O': // OFF + off_coming_up = true; + break; + case 'A': case 'T': case '=': // [ALSO] [ eading/Trailing] == ... + off_coming_up = false; + break; + default: + gcc_unreachable(); + } + } + } + + span_t erased; + // REPLACE appears on current line? + if( mfile.cur <= next_directive && + next_directive < mfile.eol ) { + if( off_coming_up ) { + parse_replace_last_off(mfile); + } else { + erased = parse_replace_text(mfile); + } + } + return erased; +} + +/* + * Maintain the number of newlines by counting those that will be + * overwritten, and appending them to the appended line. Return the + * new EOL pointer. + * + * The newlines accumulate past eodata, at the start of the blank + * lines created by the caller. + */ +char * +bytespan_t::append( const char *input, const char *eoinput ) { + gcc_assert(data < eodata); + +#define LEXIO 0 +#if LEXIO + auto nq = std::count_if(data, eodata, isquote); + dbgmsg("%s:%3d: input ------ '%.*s'", __func__, __LINE__, int(eoinput - input), input); + dbgmsg("%s:%3d: precondition '%.*s' (%zu: %s)", __func__, __LINE__, + int(size()), data, nq, in_string()? "in string" : "not in string"); +#endif + if( !in_string() ) { // Remove trailing space unless it's part of a literal. + while(data < eodata && ISSPACE(eodata[-1])) eodata--; + gcc_assert(ISSPACE(eodata[0])); + gcc_assert(data == eodata || !ISSPACE(eodata[-1])); + } + // skip leading blanks + while( input < eoinput && ISSPACE(*input) ) input++; + if( isquote(*input) ) input++; + + size_t len = eoinput - input; + char * pend = eodata + len; + + int nnl = std::count(eodata, pend, '\n'); // newlines to be overwritten + gcc_assert(0 == std::count(input, eoinput, '\n')); // newlines in input + + memmove(eodata, input, len); + nnl += std::count(pend, pend + nnl, '\n'); // other newlines to be overwritten + std::fill(pend, pend + nnl, '\n'); + + eodata = pend; + +#if LEXIO + dbgmsg("%s:%3d: postcondition '%.*s'", __func__, __LINE__, int(size() + len) - 1, data); +#endif + + return eodata; +} + +const char * cobol_filename(); + +static filespan_t& +mapped_file( FILE *input ) { + static std::map<int, filespan_t> inputs; + + int fd = fileno(input); + gcc_assert(fd > 0); + + filespan_t& mfile = inputs[fd]; + if( mfile.data ) { + return mfile; + } + + struct stat sb; + if( 0 != fstat(fd, &sb) ) { + cbl_err( "%s: could not stat fd %d", __func__, fd ); + } + + mfile.use_nada(); + + if( sb.st_size > 0 ) { + static const int flags = MAP_PRIVATE; + + void *p = mmap(0, sb.st_size, PROT_READ|PROT_WRITE, flags, fd, 0); + if( p == MAP_FAILED ) { + cbl_err( "%s: could not map fd %d", __func__, fd ); + } + + mfile.lineno_reset(); + mfile.data = mfile.cur = mfile.eol = mfile.eodata = static_cast<char*>(p); + mfile.eodata += sb.st_size; + } + return mfile; +} + +char filespan_t::empty_file[8] = " \n"; + +static void unmap_file( filespan_t& mfile ) { + if( ! mfile.nada() ) { + munmap(mfile.data, mfile.size() - 1); + } + mfile = filespan_t(); +} + +extern int yylineno; + +static void +print_lexer_input( const char *buf, const char *ebuf ) { + const char *eol, *lexio = getenv("lexio"); + int i; + static int nbuf = 1; + static FILE *output = NULL; + + if( !lexio ) return; + if( !output ) { + output = fopen( lexio, "w" ); + if( !output ) output = stderr; + } + + fprintf( output, "*> buffer %d\n", nbuf ); + for( i = 0, eol = std::find(buf, ebuf, '\n'); + eol != ebuf; buf = eol, eol = std::find(buf, ebuf, '\n'), i++ ) { + eol++; + fprintf( output, "%5d %.*s", yylineno + i, int(eol - buf), buf ); + } + if( buf < ebuf ) { + fprintf( output, "%5d %.*s", yylineno + i, int(eol - buf), buf ); + } + fprintf( output, "*> endbuf %d\n", nbuf++ ); + fflush(output); +} + +/* + * Fill about as much of the lexer's buffer as possible, except skip + * leading blanks on blank lines. + */ +int +lexer_input( char buf[], int max_size, FILE *input ) { + filespan_t& mfile( mapped_file(input) ); + + if( mfile.cur == mfile.eodata ) { + if( mfile.cur ) unmap_file(mfile); + return 0; + } + + gcc_assert( mfile.data <= mfile.cur && mfile.cur < mfile.eodata ); + + char *next = std::min(mfile.eodata, mfile.cur + max_size); + buffer_t output(buf, buf + max_size); // initializes pos + + // Fill output, keeping only NL for blank lines. + for( auto p = mfile.cur; p < next; *output.pos++ = *p++ ) { + static bool at_bol = false; + if( at_bol ) { + auto nonblank = std::find_if( p, next, + []( char ch ) { + return !isblank(ch); } ); + if( nonblank + 1 < next ) { + if( *nonblank == '\r' ) nonblank++; // Windows + if( *nonblank == '\n' ) { + p = nonblank; + continue; + } + } + } + at_bol = *p == '\n'; + } + + gcc_assert( output.pos <= output.eodata ); + output.eodata = output.pos; + + mfile.cur = next; + gcc_assert(mfile.cur <= mfile.eodata); + + // Buffer full or input exhausted. + print_lexer_input(output.data, output.eodata); + + return output.size(); +} + +static const char * +find_filter( const char filter[] ) { + + if( 0 == access(filter, X_OK) ) { + return filter; + } + + const char *path = getenv("PATH"); + if( ! path ) return NULL; + char *p = xstrdup(path), *eopath = p + strlen(p); + + while( *p != '\0' ) { + auto pend = std::find( p, eopath, ':' ); + if( *pend == ':' ) *pend++ = '\0'; + + char *name = xasprintf( "%s/%s", p, filter ); + + if( 0 == access(name, X_OK) ) { + return name; + } + p = pend; + } + return NULL; +} + +bool verbose_file_reader = false; + +typedef std::pair <char *, std::list<std::string> > preprocessor_filter_t; +static std::list<preprocessor_filter_t> preprocessor_filters; +static std::list<const char *> included_files; + +/* + * Keep a list of files added with -include on the command line. + */ +bool +include_file_add(const char filename[]) { + struct stat sb; + if( -1 == stat(filename, &sb) ) return false; + included_files.push_back(filename); + return true; +} + +bool +preprocess_filter_add( const char input[] ) { + char filter[ strlen(input) + 1 ]; + strcpy(filter, input); + char *optstr = strchr(filter, ','); + std::list <std::string> options; + + if( optstr ) { + for( char *opt = optstr + 1; (opt = strtok(opt, ",")); opt = NULL ) { + options.push_back(opt); + } + *optstr = '\0'; + } + + auto filename = find_filter(filter); + if( !filename ) { + yywarn("preprocessor '%s/%s' not found", getcwd(NULL, 0), filter); + return false; + } + preprocessor_filters.push_back( std::make_pair(xstrdup(filename), options) ); + return true; +} + +void +cdftext::echo_input( int input, const char filename[] ) { + int fd; + if( -1 == (fd = dup(input)) ) { + yywarn( "could not open preprocessed file %s to echo to standard output", + filename ); + return; + } + + auto mfile = map_file(fd); + + if( -1 == write(STDOUT_FILENO, mfile.data, mfile.size()) ) { + yywarn( "could not write preprocessed file %s to standard output", + filename ); + } + if( -1 == munmap(mfile.data, mfile.size()) ) { + yywarn( "could not release mapped file" ); + } + if( -1 == close(fd) ) { + yywarn( "could not close mapped file" ); + } +} + +static inline ino_t +inode_of( int fd ) { + struct stat sb; + if( -1 == fstat(fd, &sb) ) { + cbl_err( "could not stat fd %d", fd); + } + return sb.st_ino; +} + +FILE * +cdftext::lex_open( const char filename[] ) { + int input = open_input( filename ); + if( input == -1 ) return NULL; + + int output = open_output(); + + // Process any files supplied by the -include comamnd-line option. + for( auto name : included_files ) { + int input; + if( -1 == (input = open(name, O_RDONLY)) ) { + yyerrorvl(1, "", "cannot open -include file %s", name); + continue; + } + cobol_filename(name, inode_of(input)); + filespan_t mfile( free_form_reference_format( input ) ); + + process_file( mfile, output ); + } + + cobol_filename(filename, inode_of(input)); + filespan_t mfile( free_form_reference_format( input ) ); + + process_file( mfile, output ); + + if( lexer_echo() ) { + echo_input(output, filename); + } + + for( auto filter_pair : preprocessor_filters ) { + input = output; + output = open_output(); + + char *filter = filter_pair.first; + std::list<std::string>& options = filter_pair.second; + + char * argv[2 + options.size()] = { filter }; + + auto last_argv = std::transform( options.begin(), options.end(), argv + 1, + []( std::string& opt ) { + return xstrdup(opt.c_str()); + } ); + *last_argv = NULL; + + pid_t pid = fork(); + + switch(pid){ + case -1: cbl_err( "%s", __func__); + break; + case 0: // child + if( -1 == dup2(input, STDIN_FILENO) ) { + cbl_err( "%s: could not dup input", __func__); + } + if( -1 == dup2(output, STDOUT_FILENO) ) { + cbl_err( "%s: could not dup output", __func__); + } + if( -1 == lseek(STDIN_FILENO, SEEK_SET, 0) ) { + cbl_err( "%s: could not seek to start of file", __func__); + } + int erc; + if( -1 == (erc = execv(filter, argv)) ) { + yywarn("could not execute %s", filter); + } + _exit(erc); + } + int status; + auto kid = wait(&status); + gcc_assert(pid == kid); + if( kid == -1 ) cbl_err( "failed waiting for pid %d", pid); + + if( WIFSIGNALED(status) ) { + cbl_errx( "%s pid %d terminated by %s", + filter, kid, strsignal(WTERMSIG(status)) ); + } + if( WIFEXITED(status) ) { + if( (status = WEXITSTATUS(status)) != 0 ) { + cbl_errx( "%s exited with status %d", + filter, status); + } + } + yywarn( "applied %s", filter ); + } + + return fdopen( output, "r"); +} + +int +cdftext::open_input( const char filename[] ) { + int fd = open(filename, O_RDONLY); + if( fd == -1 ) { + dbgmsg( "could not open '%s': %m", filename ); + } + + verbose_file_reader = NULL != getenv("GCOBOL_TEMPDIR"); + + if( verbose_file_reader ) { + yywarn("verbose: opening %s for input", filename); + } + return fd; +} + +int +cdftext::open_output() { + char *name = getenv("GCOBOL_TEMPDIR"); + int fd; + + if( name && 0 != strcmp(name, "/") ) { + char * stem = xasprintf("%sXXXXXX", name); + if( -1 == (fd = mkstemp(stem)) ) { + cbl_err( "could not open temporary file '%s' (%s)", + name, realpath(name, stem)); + } + return fd; + } + + FILE *fh = tmpfile(); + if( !fh ) { + cbl_err("could not create temporary file"); + } + + return fileno(fh); +} + +filespan_t +cdftext::map_file( int fd ) { + gcc_assert(fd > 0); + + filespan_t mfile; + mfile.use_nada(); + + struct stat sb; + do { + if( 0 != fstat(fd, &sb) ) { + cbl_err( "%s: could not stat fd %d", __func__, fd ); + } + if( S_ISFIFO(sb.st_mode) ) { + // Copy FIFO to regular file that can be mapped. + int input = open_output(); + std::swap(fd, input); // fd will continue to be the input + static char block[4096 * 4]; + ssize_t n; + while( (n = read(input, block, sizeof(block))) != 0 ) { + ssize_t nout = write(fd, block, n); + if( nout != n ) { + cbl_err( "%s: could not prepare map file from FIFO %d", + __func__, input); + } + if( false ) dbgmsg("%s: copied %ld bytes from FIFO", + __func__, nout); + } + } + } while( S_ISFIFO(sb.st_mode) ); + + if( sb.st_size > 0 ) { + static const int flags = MAP_PRIVATE; + + void *p = mmap(0, sb.st_size, PROT_READ|PROT_WRITE, flags, fd, 0); + if( p == MAP_FAILED ) { + cbl_err( "%s: could not map fd %d", __func__, fd ); + } + + mfile.lineno_reset(); + mfile.data = mfile.cur = mfile.eol = mfile.eodata = static_cast<char*>(p); + mfile.eodata += sb.st_size; + } + + return mfile; +} + +bool lexio_dialect_mf(); + +filespan_t +cdftext::free_form_reference_format( int input ) { + filespan_t source_buffer = map_file(input); + filespan_t mfile(source_buffer); + + /* + * current_line_t describes the segment of mapped file that is the + * "current line" being processed. Its only use is for line + * continuation, whether string literals or not. + */ + struct current_line_t { + size_t lineno; + bytespan_t line; + // construct with length zero + current_line_t( char data[] ) : lineno(0), line(data, data) {} + } current( mfile.data ); + + /* + * If the format is not explicitly set on the command line, test the + * first 6 bytes of the first file to determine the format + * heuristically. If the first 6 characters are only digits or + * blanks, then the file is in fixed format. + */ + + if( indicator.inference_pending() ) { + const char *p = mfile.data; + while( p < mfile.eodata ) { + const char * pend = + std::find(p, const_cast<const char *>(mfile.eodata), '\n'); + if( 6 < pend - p ) break; + p = pend; + if( p < mfile.eodata) p++; + } + if( valid_sequence_area(p, mfile.eodata) ) indicator.column = 7; + + dbgmsg("%s:%d: %s format detected", __func__, __LINE__, + indicator.column == 7? "FIXED" : "FREE"); + } + + while( mfile.next_line() ) { + check_source_format_directive(mfile); + remove_inline_comment(mfile.cur, mfile.eol); + + if( mfile.is_blank_line() ) continue; + + char *indcol = indicated(mfile.cur, mfile.eol); // true only for fixed + // // format + + if( is_fixed_format() && !indcol ) { // short line + erase_source(mfile.cur, mfile.eol); + } + + if( indcol ) { + // Set to blank columns 1-6 and anything past the right margin. + erase_source(mfile.cur, indcol); + if( is_reference_format() ) { + if( mfile.cur + right_margin() < mfile.eol ) { + auto p = std::find(mfile.cur + right_margin(), mfile.eol, '\n'); + erase_source(mfile.cur + right_margin(), p); + } + } + + switch( TOUPPER(*indcol) ) { + case '-': + gcc_assert(0 < current.line.size()); + /* + * The "current line" -- the line being continued -- may be many + * lines earlier (with many intervening newlines) or may intrude + * on its succeeding line. Erase the continuation line. + */ + { + char *pend = mfile.eol; + if( right_margin() ) { + pend = std::min(mfile.cur + right_margin(), mfile.eol); + } + // The appended segment has no newline because the erased line retains + // one. + pend = std::find(indcol + 1, pend, '\n'); + char *p = current.line.append(indcol + 1, pend ); + if( (p = std::max(p, mfile.cur)) < mfile.eol ) { + erase_source(p, mfile.eol); + } + } + continue; + case SPACE: + break; + case 'D': + /* + * Pass the D to the lexer, because WITH DEBUGGING MODE is + * parsed in the parser. This assumes too strict a rule: that + * all the source is in one format. In fact, DEBUGGING MODE + * could be set on, and >>SOURCE-FORMAT can switch back and + * forth. To solve that, we'd have to parse WITH DEBUGGING MODE + * in free_form_reference_format(), which is a lot of work for + * an obsolete feature. + */ + break; + case '*': + case '/': + if( indcol < mfile.eol - 1 ) { + erase_source(indcol, mfile.eol); + } + continue; + case '$': + if( lexio_dialect_mf() ) { + break; + } + __attribute__ ((fallthrough)); + default: // flag other characters in indicator area + if( ! ISSPACE(indcol[0]) ) { + yyerrorvl( mfile.lineno(), cobol_filename(), + "error: stray indicator '%c' (0x%x): \"%.*s\"", + indcol[0], indcol[0], + int(mfile.line_length() - 1), mfile.cur ); + *indcol = SPACE; + } + break; + } + } + current.line.update(mfile.cur, mfile.eol, right_margin()); + current.lineno = mfile.lineno(); + } // next line + + return source_buffer; +} + +/* + * process_file is a recursive routine that opens and processes + * included files. It uses the input file stack in two ways: to check + * copybook uniqueness, and (via the lexer) to keep track filenames + * and line numbers. + * + * When reading copybook files, the copybook object enforces the rule + * that no copybook may include itself, even indirectly. It does that + * by relying on the unique_stack to deny a push. Because the reader + * makes no attempt to count lines, line numbers in the input stack + * are all 1 at this point. + * + * When returning from the top-level recursion, the input stack has + * the original file's name on top, with depth 1. At that point, the + * lexer begins tokenizing the input. + * + * The input stream sent to the lexer is delimited by #FILE tokens + * denoting the source filename. As far as the lexer is concerned, + * there's only ever one file: the name passed to lex_open() when we + * kicked things off. But messages and the debugger need to know + * which file and line each statment appeared in. + * + * The lexer uses the input stack to keep track of names and + * numbers. The top of the input file stack is the current file + * context, initially set to line 1. When the lexer sees a push, it + * updates the top-of-stack with the current line number, yylineno, + * and then pushes the copybook filename with line 1. When it sees a + * pop, the current file is popped, of course; its line number no + * longer matters. Then the top-of-stack is used to update the current + * cobol filename and yylineno. + */ +void +cdftext::process_file( filespan_t mfile, int output, bool second_pass ) { + static size_t nfiles = 0; + std::list<replace_t> replacements; + + __gnu_cxx::stdio_filebuf<char> outbuf(fdopen(output, "w"), std::ios::out); + std::ostream out(&outbuf); + std::ostream_iterator<char> ofs(out); + + // indicate current file + static const char file_push[] = "\f#FILE PUSH ", file_pop[] = "\f#FILE POP\f"; + + if( !second_pass && nfiles++ ) { + static const char delimiter[] = "\f"; + const char *filename = cobol_filename(); + std::copy(file_push, file_push + strlen(file_push), ofs); + std::copy(filename, filename + strlen(filename), ofs); + std::copy(delimiter, delimiter + strlen(delimiter), ofs); + out.flush(); + } + + // parse CDF directives + while( mfile.next_line() ) { + yylloc = mfile.as_location(); + auto copied = parse_copy_directive(mfile); + if( copied.parsed && copied.fd != -1 ) { + gcc_assert(copied.erased_lines.p); + std::copy_if(copied.erased_lines.p, copied.erased_lines.pend, ofs, + []( char ch ) { return ch == '\n'; } ); + struct { int in, out; filespan_t mfile; } copy; + dbgmsg("%s:%d: line %zu, opening %s on fd %d", __func__, __LINE__, + mfile.lineno(), + copybook.source(), copybook.current()->fd); + copy.in = copybook.current()->fd; + copy.mfile = free_form_reference_format( copy.in ); + + if( copied.partial_line.size() ) { + std::copy(copied.partial_line.p, copied.partial_line.pend, ofs); + } + out.flush(); + + if( copied.nreplace == 0 ) { + // process with extant REPLACE directive + process_file(copy.mfile, output); + } else { + copy.out = open_output(); + // process to intermediate, applying COPY ... REPLACING + process_file(copy.mfile, copy.out); + copy.mfile = map_file(copy.out); + replace_directives.pop(); + // process intermediate with extant REPLACE directive + process_file(copy.mfile, output, true); + // COPY statement is erased from input if processed successfully + } + cobol_filename_restore(); + } + + auto erased = parse_replace_directive(mfile); + if( erased.p ) { + std::copy_if( erased.p, erased.pend, ofs, + []( char ch ) { return ch == '\n'; } ); + } + if( replace_directives.empty() ) { + std::copy(mfile.cur, mfile.eol, ofs); + continue; // No active REPLACE directive. + } + + std::list<span_t> segments = segment_line(mfile); // no replace yields + // // 1 segment + + for( const auto& segment : segments ) { + std::copy(segment.p, segment.pend, ofs); + } + + if( segments.size() == 2 ) { + struct { + size_t before, after; + int delta() const { return before - after; } } nlines; + nlines.before = std::count(segments.front().p, + segments.front().pend, '\n'); + nlines.after = std::count(segments.back().p, segments.back().pend, '\n'); + if( nlines.delta() < 0 ) { + yywarn("line %zu: REPLACED %zu lines with %zu lines, " + "line count off by %d", mfile.lineno(), + nlines.before, nlines.after, nlines.delta()); + } + int nnl = nlines.delta(); + while( nnl-- > 0 ) { + static const char nl[] = "\n"; + std::copy(nl, nl + 1, ofs); + } + } + out.flush(); + } + // end of file + if( !second_pass && --nfiles ) { + std::copy(file_pop, file_pop + strlen(file_pop), ofs); + out.flush(); + } +} + +std::list<span_t> +cdftext::segment_line( filespan_t& mfile ) { + std::list<span_t> output; + + gcc_assert( ! replace_directives.empty() ); + std::list<replace_t> pending; + recognize_replacements( mfile, pending ); + + if( pending.empty() ) { + output.push_back( span_t(mfile.cur, mfile.eol) ); + return output; + } + + for( const replace_t& segment : pending ) { + gcc_assert(mfile.cur <= segment.before.p); + gcc_assert(segment.before.pend <= mfile.eodata); + + output.push_back( span_t(mfile.cur, segment.before.p) ); + output.push_back( span_t(segment.after.p, segment.after.pend ) ); + + mfile.cur = const_cast<char*>(segment.before.pend); + } + + if( mfile.eol < mfile.cur ) { + if( (mfile.eol = std::find(mfile.cur, mfile.eodata, '\n')) < mfile.eodata ) { + mfile.eol++; + } + } + + // last segment takes to EOL + output.push_back( span_t(mfile.cur, mfile.eol) ); + + return output; +} + +//////// End of the cdf_text.h file diff --git a/gcc/cobol/lexio.h b/gcc/cobol/lexio.h new file mode 100644 index 0000000..cf7f53a --- /dev/null +++ b/gcc/cobol/lexio.h @@ -0,0 +1,294 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <algorithm> +#include <cassert> +#include <cctype> +#include <cstdlib> +#include <cstring> + +#include <sys/mman.h> + +#ifndef _LEXIO_H_ +#define _LEXIO_H_ + +#define SPACE ' ' + +bool lexer_echo(); + +bool is_reference_format(); + +static inline bool isquote( char ch ) { + return ch == '\'' || ch == '"'; +} + +static inline void +erase_source( char *src, char *esrc ) { + std::replace_if(src, esrc, + [](char ch) { return ch != '\n'; }, + SPACE ); +} + +/* + * Column number as in Cobol, with 1 at the start of the line. + * 0: free-format, but comment lines may start with '*'. + * N: columns less than N treated as space. + * '/' or '*' in N starts a comment + * 'D' starts a debug line + * '-' is a line-continuation indicator + * Others ignored. + * Right margin is enforced if it is greater than left margin. + */ +struct bytespan_t { + char *data, *eodata; + + bytespan_t( char *data = NULL, char *eodata = NULL ) + : data(data), eodata(eodata) + { + if( eodata < data ) { + this->eodata = data + strlen(data); + } + assert( this->data <= this->eodata ); + } + size_t size() const { return eodata - data; } + + bool in_string( ) const { + char open = '\0'; + + for( char *q = data; (q = std::find_if(q, eodata, isquote)) != eodata; q++) { + if( !open ) { + open = *q; // first quote opens + continue; + } + if( open == *q && q + 1 < eodata && q[0] == q[1] ) { // doubled + q++; + continue; + } + if( open == *q ) open = '\0'; // closing quote must match + } + return isquote(open); + } + + char * append( const char *input, const char *eoinput ); + + bytespan_t& + update( char *line, char *eoline, size_t right_margin ) { + *this = bytespan_t(line, eoline); + if( right_margin && data + right_margin < eodata ) { + erase_source(data + right_margin, eodata); + eodata = data + right_margin; + } + eodata = std::find(data, eodata, '\n'); + return *this; + } +}; + +/* Location type. Borrowed from parse.h as generated by Bison. */ +#if ! defined YYLTYPE && ! defined YYLTYPE_IS_DECLARED +typedef struct YYLTYPE YYLTYPE; +struct YYLTYPE +{ + int first_line; + int first_column; + int last_line; + int last_column; +}; +# define YYLTYPE_IS_DECLARED 1 +# define YYLTYPE_IS_TRIVIAL 1 +#endif + +struct filespan_t : public bytespan_t { + char *cur, *eol, *quote; + private: + size_t iline, icol; + size_t line_quote72; + static char empty_file[8]; + public: + filespan_t() + : cur(data), eol(data), quote(NULL), iline(0), icol(0), line_quote72(0) + {} + filespan_t(void *p, size_t len) + : bytespan_t( static_cast<char*>(p), static_cast<char*>(p) + len ) + , cur(data), eol(data), quote(NULL), iline(0), line_quote72(0) + {} + + size_t lineno() const { return iline; } + size_t colno() const { return icol; } + + void lineno_reset() { iline = 0; } + size_t colno( size_t icol ) { return this->icol = icol; } + + bool nada() const { return data == empty_file; } + void use_nada() { + assert(!data); + cur = eol = data = empty_file; + eol = eodata = empty_file + sizeof(empty_file) - 1; + } + + const char *ccur() const { return cur; } + + /* + * "If an alphanumeric or national literal that is to be continued on + * the next line has as its last character a quotation mark in + * column 72, the continuation line must start with two consecutive + * quotation marks." + */ + bool was_quote72() const { return iline == line_quote72 + 1; } + + size_t next_line() { + // Before advancing, mark the current line as ending in a quote, if true. + if( is_reference_format() && 72 <= line_length() ) { + if( isquote(cur[71]) ) { line_quote72 = iline; } + } + + cur = eol; + assert(data <= cur && cur <= eodata); + if( cur == eodata ) return 0; + + eol = std::find(cur, eodata, '\n'); + + if( eol < eodata ) { + ++eol; + ++iline; + icol = 0; + } + return eol - cur; + } + + size_t line_length() const { return eol - cur; } + + static size_t tab_check( const char *src, const char *esrc ); + + bool is_blank_line() const { + auto p = std::find_if( cur, eol, []( char ch ) { return !fisspace(ch); } ); + return p == eol; + } + + YYLTYPE as_location() const { + YYLTYPE loc; + + loc.first_line = loc.last_line = 1 + iline; + loc.first_column = loc.last_column = 1 + icol; + return loc; + } + +}; + +#if USE_STD_REGEX +# include <regex> +#else +# include "dts.h" +using dts::csub_match; +using dts::cmatch; +using dts::regex; +using dts::regex_search; +#endif + +struct span_t { + protected: + void verify() const { + if( !p ) { + dbgmsg("span_t::span_t: p is NULL"); + } else if( ! (p <= pend) ) { + dbgmsg("span_t::span_t: p %p > pend %p", p, pend); + } + assert(p && p <= pend); + } + span_t& trim() { + while( p < pend && isblank(p[0]) ) p++; + while( p < pend - 1 && isblank(pend[-1]) ) pend--; + return *this; + } + public: + const char *p, *pend; + span_t() : p(NULL), pend(NULL) {} + + span_t( size_t len, const char *data ) : p(data), pend(data + len) { + verify(); + } + span_t( const char *data, const char *eodata ) : p(data), pend(eodata) { + verify(); + } + span_t& operator=( const csub_match& cm ) { + p = cm.first; + pend = cm.second; + return p && pend ? trim() : *this; + } + + int size() const { return pend - p; } + + span_t dup() const { + auto output = new char[size() + 1]; + auto eout = std::copy(p, pend, output); + *eout = '\0'; + return span_t(output, eout); + } + const char * has_nul() const { + auto p = std::find(this->p, pend, '\0'); + return p != pend? p : NULL; + } +}; + +struct replace_t { + struct span_t before, after; + replace_t( span_t before = span_t(), + span_t after = span_t() ) + : before(before), after(after) + {} + replace_t& reset() { + before = after = span_t(); + return *this; + } +}; + +#include <cstdio> +#include <list> + +class cdftext { + static filespan_t free_form_reference_format( int fd ); + static void process_file( filespan_t, int output, bool second_pass = false ); + + static filespan_t map_file( int fd ); + + static void echo_input( int input, const char filename[] ); + + static int open_input( const char filename[] ); + static int open_output(); + + static std::list<span_t> segment_line( filespan_t& mfile ); + + public: + static FILE * lex_open( const char filename[] ); +}; + +std::list<replace_t> free_form_reference_format( filespan_t mfile ); + +#endif diff --git a/gcc/cobol/parse.y b/gcc/cobol/parse.y new file mode 100644 index 0000000..15dbd1c --- /dev/null +++ b/gcc/cobol/parse.y @@ -0,0 +1,13107 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +%code requires { + #include <fstream> // Before cobol-system because it uses poisoned functions + #include "cobol-system.h" + #include <cmath> + #include <algorithm> + #include <map> + #include "io.h" + #include "ec.h" + +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" + + enum radix_t { + decimal_e = 10, + hexadecimal_e = 16, + boolean_e = 2, + }; + + enum accept_func_t { + accept_done_e, + accept_command_line_e, + accept_envar_e, + }; + + class literal_t { + size_t isym; + public: + char prefix[3]; + size_t len; + char *data; + + bool empty() const { return data == NULL; } + size_t isymbol() const { return isym; } + const char * symbol_name() const { + return isym? cbl_field_of(symbol_at(isym))->name : ""; + } + + literal_t& + set( size_t len, char *data, const char prefix[] ) { + set_prefix(prefix, strlen(prefix)); + set_data(len, data); + return *this; + } + + literal_t& + set( const cbl_field_t * field ) { + assert(field->has_attr(constant_e)); + assert(is_literal(field)); + + set_prefix( "", 0 ); + set_data( field->data.capacity, + const_cast<char*>(field->data.initial), + field_index(field) ); + return *this; + } + literal_t& + set_data( size_t len, char *data, size_t isym = 0 ) { + this->isym = isym; + this->len = len; + this->data = data; + if( this->prefix[0] == 'Z' ) { + this->data = new char[++this->len]; + auto p = std::copy(data, data + len, this->data); + *p = '\0'; + } + return *this; + } + literal_t& + set_prefix( const char *input, size_t len ) { + assert(len < sizeof(prefix)); + std::fill(prefix, prefix + sizeof(prefix), '\0'); + std::transform(input, input + len, prefix, toupper); + return *this; + } + bool + compatible_prefix( const literal_t& that ) const { + if( prefix[0] != that.prefix[0] ) { + return prefix[0] != 'N' && that.prefix[0] != 'N'; + } + return true; + } + }; + + struct acrc_t { // Abbreviated combined relation condition + cbl_refer_t *term; + relop_t op; + bool invert; + acrc_t& init( cbl_refer_t *term = NULL, + relop_t op = relop_t(-1), + bool invert = false ) + { + this->term = term; + this->op = op; + this->invert = invert; + return *this; + } + static acrc_t make( cbl_refer_t *term = NULL, + relop_t op = relop_t(-1), + bool invert = false ) + { + acrc_t output; + return output.init( term, op, invert ); + } + relop_t relop_from( relop_t ante_op ) const { + assert(ante_op != -1); + return op != -1? op : ante_op; + } + bool is_relation_condition() const { return term && term->field; } + }; + typedef std::list<acrc_t> acrcs_t; + + enum data_category_t { data_category_none, + data_category_all, + data_alphabetic_e, + data_alphanumeric_e, + data_alphanumeric_edited_e, + data_boolean_e, + data_data_pointer_e, + data_function_pointer_e, + data_msg_tag_e, + data_dbcs_e, + data_egcs_e, + data_national_e, + data_national_edited_e, + data_numeric_e, + data_numeric_edited_e, + data_object_referenc_e, + data_program_pointer_e, + }; + + const char * data_category_str( data_category_t category ); + + typedef std::map<data_category_t, struct cbl_refer_t*> category_map_t; + + struct substitution_t { + enum subst_fl_t { subst_all_e, subst_first_e = 'F', subst_last_e = 'L' }; + bool anycase; + subst_fl_t first_last; + cbl_refer_t *orig, *replacement; + + substitution_t& init( bool anycase, char first_last, + cbl_refer_t *orig, cbl_refer_t *replacement ) { + this->anycase = anycase; + switch(first_last) { + case 'F': this->first_last = subst_first_e; break; + case 'L': this->first_last = subst_last_e; break; + default: + this->first_last = subst_all_e; + break; + } + this->orig = orig; + this->replacement = replacement; + return *this; + } + }; + typedef std::list<substitution_t> substitutions_t; + + struct init_statement_t { + bool to_value; + data_category_t category; + category_map_t replacement; + + init_statement_t( category_map_t replacement ) + : to_value(false) + , category(data_category_none) + , replacement(replacement) + + {} + + init_statement_t( bool to_value = false ) + : to_value(to_value) + , category(data_category_none) + , replacement(category_map_t()) + {} + + }; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-function" + static data_category_t + data_category_of( const cbl_refer_t& refer ); + + static _Float128 + numstr2i( const char input[], radix_t radix ); + + struct cbl_field_t; + static inline cbl_field_t * + new_literal( const char initial[], enum radix_t radix ); +#pragma GCC diagnostic pop + + + #include <list> + + enum select_clause_t { + access_clause_e = 0x0001, + alt_key_clause_e = 0x0002, + assign_clause_e = 0x0004, + collating_clause_e = 0x0008, + file_status_clause_e = 0x0010, + lock_mode_clause_e = 0x0020, + organization_clause_e = 0x0040, + padding_clause_e = 0x0080, + record_delim_clause_e = 0x0100, + record_key_clause_e = 0x0200, + relative_key_clause_e = 0x0400, + reserve_clause_e = 0x0800, + sharing_clause_e = 0x1000, + }; + + struct symbol_elem_t; + struct symbol_elem_t * symbols_begin( size_t first ); + struct symbol_elem_t * symbols_end(); + + void field_done(); + + template <typename E> + struct Elem_list_t { + std::list<E> elems; + Elem_list_t() {} + Elem_list_t( E elem ) { + elems.push_back(elem); + } + Elem_list_t * push_back( E elem ) { + elems.push_back(elem); + return this; + } + void clear() { + for( auto p = elems.begin(); p != elems.end(); p++ ) { + assert( !(symbols_begin(0) <= *p && *p < symbols_end()) ); + delete *p; + } + elems.clear(); + } + }; + + struct file_list_t; + struct cbl_label_t; + typedef struct Elem_list_t<cbl_label_t*> Label_list_t; + + struct cbl_file_key_t; + typedef struct Elem_list_t<cbl_file_key_t*> key_list_t; + + struct cbl_declarative_t; + typedef struct Elem_list_t<cbl_declarative_t*> declarative_list_t; + typedef struct Elem_list_t<ec_type_t> ec_list_t; + typedef struct Elem_list_t<size_t> isym_list_t; + + struct rel_part_t; + + bool set_debug(bool); + +#include "ec.h" +#include "common-defs.h" +#include "inspect.h" +} + +%{ +#include <fstream> // Before cobol-system because it uses poisoned functions +#include "cobol-system.h" +#include "cdfval.h" +#include "ec.h" +#include "common-defs.h" +#include "util.h" +#include "cbldiag.h" +#include "symbols.h" +#include "inspect.h" +#include "io.h" +#include "genapi.h" +#include "exceptl.h" +#include "exceptg.h" +#include "parse_ante.h" +%} + +%token IDENTIFICATION_DIV "IDENTIFICATION DIVISION" + ENVIRONMENT_DIV "ENVIRONMENT DIVISION" + PROCEDURE_DIV "PROCEDURE DIVISION" + DATA_DIV "DATA DIVISION" + FILE_SECT "FILE SECTION" + INPUT_OUTPUT_SECT "INPUT-OUTPUT SECTION" + LINKAGE_SECT "LINKAGE SECTION" + LOCAL_STORAGE_SECT "LOCAL-STORAGE SECTION" + WORKING_STORAGE_SECT "WORKING-STORAGE SECTION" + +%token OBJECT_COMPUTER "OBJECT COMPUTER" + +%token DISPLAY_OF "DISPLAY OF" + END_FUNCTION "END FUNCTION" + END_PROGRAM "END PROGRAM" + END_SUBPROGRAM "END PROGRAM <contained program>" + +%token JUSTIFIED RETURNING NO_CONDITION "invalid token" + +%token <string> ALNUM ALPHED +%token <number> ERROR EXCEPTION SIZE_ERROR "SIZE ERROR" +%token <ec_type> EXCEPTION_NAME "EXCEPTION NAME" +%token <number> LEVEL LEVEL66 "66" LEVEL78 "78" LEVEL88 "88" +%token <string> CLASS_NAME "class name" + NAME + NAME88 "Level 88 NAME" + NUME "Name" + NUMED "NUMERIC-EDITED picture" + NUMED_CR "NUMERIC-EDITED CR picture" + NUMED_DB "NUMERIC-EDITED DB picture" +%token <number> NINEDOT NINES NINEV PIC_P +%token <string> SPACES +%token <literal> LITERAL +%token <number> END EOP +%token <string> FILENAME +%token <number> INVALID +%token <number> NUMBER NEGATIVE +%token <numstr> NUMSTR "numeric literal" +%token <number> OVERFLOW +%token <computational> COMPUTATIONAL + +%token <boolean> PERFORM BACKWARD +%token <number> POSITIVE +%token <field_attr> POINTER +%token <string> SECTION +%token <number> STANDARD_ALPHABET "STANDARD ALPHABET" +%token <string> SWITCH +%token <string> UPSI +%token <number> ZERO + + /* environment names */ +%token <number> SYSIN SYSIPT SYSOUT SYSLIST SYSLST SYSPUNCH SYSPCH CONSOLE +%token <number> C01 C02 C03 C04 C05 C06 C07 C08 C09 C10 C11 C12 CSP +%token <number> S01 S02 S03 S04 S05 AFP_5A "AFP 5A" +%token <number> STDIN STDOUT STDERR + + /* intrinsics */ +%token <string> LIST MAP NOLIST NOMAP NOSOURCE +%token <number> MIGHT_BE "IS or IS NOT" + FUNCTION_UDF "UDF name" + FUNCTION_UDF_0 "UDF" + +%token <string> DATE_FMT "date format" + TIME_FMT "time format" + DATETIME_FMT "datetime format" + + /* tokens without semantic value */ + /* CDF (COPY and >> defined here but used in cdf.y) */ +%token BASIS CBL CONSTANT COPY + DEFINED ENTER FEATURE INSERTT + LSUB "(" + PARAMETER_kw "PARAMETER" + OVERRIDE READY RESET + RSUB ")" + SERVICE_RELOAD "SERVICE RELOAD" STAR_CBL "*CBL" + SUBSCRIPT SUPPRESS TITLE TRACE USE + + COBOL_WORDS ">>COBOL-WORDS" EQUATE UNDEFINE + CDF_DEFINE ">>DEFINE" CDF_DISPLAY ">>DISPLAY" + CDF_IF ">>IF" CDF_ELSE ">>ELSE" CDF_END_IF ">>END-IF" + CDF_EVALUATE ">>EVALUATE" + CDF_WHEN ">>WHEN" + CDF_END_EVALUATE ">>END-EVALUATE" + CALL_COBOL "CALL" CALL_VERBATIM "CALL (as C)" + + IF THEN ELSE + SENTENCE + ACCEPT ADD ALTER CALL CANCEL CLOSE COMPUTE CONTINUE + DELETE DISPLAY DIVIDE EVALUATE EXIT FILLER_kw "FILLER" + GOBACK GOTO + INITIALIZE INSPECT + MERGE MOVE MULTIPLY OPEN PARAGRAPH + READ RELEASE RETURN REWRITE + SEARCH SET SELECT SORT SORT_MERGE "SORT-MERGE" + STRING_kw "STRING" STOP SUBTRACT START + UNSTRING WRITE WHEN + + ABS ACCESS ACOS ACTUAL ADVANCING AFTER ALL + ALLOCATE + ALPHABET ALPHABETIC ALPHABETIC_LOWER "ALPHABETIC-LOWER" + ALPHABETIC_UPPER "ALPHABETIC-UPPER" + ALPHANUMERIC + ALPHANUMERIC_EDITED "ALPHANUMERIC-EDITED" + ALSO ALTERNATE ANNUITY ANUM ANY ANYCASE APPLY ARE + AREA AREAS AS + ASCENDING ACTIVATING ASIN ASSIGN AT ATAN + + BASED BASECONVERT + BEFORE BINARY BIT BIT_OF "BIT-OF" BIT_TO_CHAR "BIT-TO-CHAR" + BLANK BLOCK + BOOLEAN_OF_INTEGER "BOOLEAN-OF-INTEGER" + BOTTOM BY + BYTE BYTE_LENGTH "BYTE-LENGTH" + + CF CH + CHANGED CHAR CHAR_NATIONAL "CHAR-NATIONAL" + CHARACTER CHARACTERS CHECKING CLASS + COBOL CODE CODESET COLLATING + COLUMN COMBINED_DATETIME "COMBINED-DATETIME" + COMMA COMMAND_LINE "COMMAND-LINE" + COMMAND_LINE_COUNT "COMMAND-LINE-COUNT" + COMMIT COMMON + + CONCAT CONDITION CONFIGURATION_SECT "CONFIGURATION SECTION" + CONTAINS + CONTENT CONTROL CONTROLS CONVERT CONVERTING CORRESPONDING COS + COUNT CURRENCY CURRENT CURRENT_DATE + + DATA DATE DATE_COMPILED + DATE_OF_INTEGER "DATE-OF-INTEGER" + DATE_TO_YYYYMMDD "DATE-TO-YYYYMMDD" + DATE_WRITTEN "DATE-WRITTEN" + DAY DAY_OF_INTEGER "DAY-OF-INTEGER" + DAY_OF_WEEK "DAY-OF-WEEK" + DAY_TO_YYYYDDD "DAY-TO-YYYYDDD" + DBCS DE DEBUGGING DECIMAL_POINT + DECLARATIVES DEFAULT DELIMITED DELIMITER DEPENDING + DESCENDING DETAIL DIRECT + DIRECT_ACCESS "DIRECT-ACCESS" + DOWN DUPLICATES + DYNAMIC + + E EBCDIC EC EGCS ENTRY ENVIRONMENT EQUAL EVERY + EXAMINE EXHIBIT EXP EXP10 EXTEND EXTERNAL + + EXCEPTION_FILE "EXCEPTION-FILE" + EXCEPTION_FILE_N "EXCEPTION-FILE-N" + EXCEPTION_LOCATION "EXCEPTION-LOCATION" + EXCEPTION_LOCATION_N "EXCEPTION-LOCATION-N" + EXCEPTION_STATEMENT "EXCEPTION-STATEMENT" + EXCEPTION_STATUS "EXCEPTION-STATUS" + + FACTORIAL FALSE_kw "False" FD + FILE_CONTROL "FILE-CONTROL" + FILE_KW "File" + FILE_LIMIT "FILE-LIMIT" + FINAL FINALLY + FIND_STRING "FIND-STRING" + FIRST FIXED FOOTING FOR + FORMATTED_CURRENT_DATE "FORMATTED-CURRENT-DATE" + FORMATTED_DATE "FORMATTED-DATE" + FORMATTED_DATETIME "FORMATTED-DATETIME" + FORMATTED_TIME "FORMATTED-TIME" + FORM_OVERFLOW "FORM-OVERFLOW" + FREE + FRACTION_PART "FRACTION-PART" + FROM FUNCTION + + GENERATE GIVING GLOBAL GO GROUP + + HEADING HEX + HEX_OF "HEX-OF" + HEX_TO_CHAR "HEX-TO-CHAR" + HIGH_VALUES "HIGH-VALUES" + HIGHEST_ALGEBRAIC "HIGHEST-ALGEBRAIC" + HOLD + + IBM_360 IN INCLUDE INDEX INDEXED INDICATE INITIAL_kw "INITIAL" + INITIATE INPUT INSTALLATION INTERFACE + INTEGER + INTEGER_OF_BOOLEAN "INTEGER-OF-BOOLEAN" + INTEGER_OF_DATE "INTEGER-OF-DATE" + INTEGER_OF_DAY "INTEGER-OF-DAY" + INTEGER_OF_FORMATTED_DATE "INTEGER-OF-FORMATTED-DATE" + INTEGER_PART "INTEGER-PART" + INTO INTRINSIC INVOKE IO IO_CONTROL "IO-CONTROL" + IS ISNT "IS NOT" + + KANJI KEY + + LABEL LAST LEADING LEFT LENGTH + LENGTH_OF "LENGTH-OF" + LIMIT LIMITS LINE LINES + LINE_COUNTER "LINE-COUNTER" + LINAGE LINKAGE LOCALE LOCALE_COMPARE "LOCALE-COMPARE" + LOCALE_DATE "LOCALE-DATE" + LOCALE_TIME "LOCALE-TIME" + LOCALE_TIME_FROM_SECONDS "LOCALE-TIME-FROM-SECONDS" + LOCAL_STORAGE "LOCAL-STORAGE" + LOCATION + LOCK LOCK_ON LOG LOG10 + LOWER_CASE "LOWER-CASE" + LOW_VALUES "LOW-VALUES" + LOWEST_ALGEBRAIC "LOWEST-ALGEBRAIC" + LPAREN " )" + + MANUAL MAXX "Max" MEAN MEDIAN MIDRANGE + MINN "Min" MULTIPLE MOD MODE + MODULE_NAME "MODULE-NAME " + + NAMED NAT NATIONAL + NATIONAL_EDITED "NATIONAL-EDITED" + NATIONAL_OF "NATIONAL-OF" + NATIVE NESTED NEXT + NO NOTE + NULLS NULLPTR + NUMERIC + NUMERIC_EDITED NUMVAL + NUMVAL_C "NUMVAL-C" + NUMVAL_F "NUMVAL-F" + + OCCURS OF OFF OMITTED ON ONLY OPTIONAL OPTIONS ORD ORDER + ORD_MAX "ORD-MAX" + ORD_MIN "ORD-MIN" + ORGANIZATION OTHER OTHERWISE OUTPUT + + PACKED_DECIMAL PADDING PAGE + PAGE_COUNTER "PAGE-COUNTER" + PF PH PI PIC PICTURE + PLUS PRESENT_VALUE PRINT_SWITCH + PROCEDURE PROCEDURES PROCEED PROCESS + PROGRAM_ID "PROGRAM-ID" + PROGRAM_kw "Program" PROPERTY PROTOTYPE PSEUDOTEXT + + QUOTES "QUOTE" + + RANDOM RANDOM_SEED RANGE RAISE RAISING + RD RECORD RECORDING RECORDS RECURSIVE + REDEFINES REEL REFERENCE RELATIVE REM REMAINDER REMARKS + REMOVAL RENAMES REPLACE REPLACING REPORT REPORTING REPORTS + REPOSITORY RERUN RESERVE RESTRICTED RESUME + REVERSE REVERSED REWIND RF RH RIGHT ROUNDED RUN + + SAME SCREEN SD + SECONDS_FROM_FORMATTED_TIME "SECONDS-FROM-FORMATTED-TIME" + SECONDS_PAST_MIDNIGHT "SECONDS-PAST-MIDNIGHT" + SECURITY + SEPARATE SEQUENCE SEQUENTIAL SHARING + SIMPLE_EXIT "(simple) EXIT" + SIGN SIN SIZE + SMALLEST_ALGEBRAIC "SMALLEST-ALGEBRAIC" + SOURCE + SOURCE_COMPUTER "SOURCE-COMPUTER" + SPECIAL_NAMES SQRT STACK + STANDARD + STANDARD_1 "STANDARD-1" + STANDARD_DEVIATION "STANDARD-DEVIATION " + STANDARD_COMPARE "STANDARD-COMPARE" + STATUS STRONG + SUBSTITUTE SUM SYMBOL SYMBOLIC SYNCHRONIZED + + TALLY TALLYING TAN TERMINATE TEST + TEST_DATE_YYYYMMDD "TEST-DATE-YYYYMMDD" + TEST_DAY_YYYYDDD "TEST-DAY-YYYYDDD" + TEST_FORMATTED_DATETIME "TEST-FORMATTED-DATETIME" + TEST_NUMVAL "TEST-NUMVAL" + TEST_NUMVAL_C "TEST-NUMVAL-C" + TEST_NUMVAL_F "TEST-NUMVAL-F" + THAN TIME TIMES + TO TOP + TOP_LEVEL + TRACKS TRACK_AREA TRAILING TRANSFORM TRIM TRUE_kw "True" TRY + TURN TYPE TYPEDEF + + ULENGTH UNBOUNDED UNIT UNITS UNIT_RECORD UNTIL UP UPON + UPOS UPPER_CASE USAGE USING USUBSTR USUPPLEMENTARY + UTILITY UUID4 UVALID UWIDTH + + VALUE VARIANCE VARYING VOLATILE + + WHEN_COMPILED WITH WORKING_STORAGE + XML XMLGENERATE XMLPARSE + YEAR_TO_YYYY YYYYDDD YYYYMMDD + + /* unused Context Words */ + ARITHMETIC ATTRIBUTE AUTO AUTOMATIC + AWAY_FROM_ZERO "AWAY-FROM-ZERO" + BACKGROUND_COLOR "BACKGROUND-COLOR" + BELL + BINARY_ENCODING "BINARY-ENCODING" + BLINK + CAPACITY CENTER CLASSIFICATION CYCLE + DECIMAL_ENCODING "DECIMAL-ENCODING" + ENTRY_CONVENTION EOL EOS ERASE EXPANDS + FLOAT_BINARY "FLOAT-BINARY" + FLOAT_DECIMAL "FLOAT-DECIMAL" + FOREGROUND_COLOR FOREVER FULL + HIGHLIGHT + HIGH_ORDER_LEFT "HIGH-ORDER-LEFT" + HIGH_ORDER_RIGHT "HIGH-ORDER-RIGHT" + IGNORING IMPLEMENTS INITIALIZED INTERMEDIATE + LC_ALL_kw "LC-ALL" + LC_COLLATE_kw "LC-COLLATE" + LC_CTYPE_kw "LC-CTYPE" + LC_MESSAGES_kw "LC-MESSAGES" + LC_MONETARY_kw "LC-MONETARY" + LC_NUMERIC_kw "LC-NUMERIC" + LC_TIME_kw "LC-TIME" + LOWLIGHT + NEAREST_AWAY_FROM_ZERO "NEAREST-AWAY-FROM-ZERO" + NEAREST_EVEN NEAREST_TOWARD_ZERO "NEAREST-EVEN NEAREST-TOWARD-ZERO" + NONE NORMAL NUMBERS + PREFIXED PREVIOUS PROHIBITED RELATION REQUIRED + REVERSE_VIDEO ROUNDING + SECONDS SECURE SHORT SIGNED + STANDARD_BINARY "STANDARD-BINARY" + STANDARD_DECIMAL "STANDARD-DECIMAL" + STATEMENT STEP STRUCTURE + TOWARD_GREATER "TOWARD-GREATER" + TOWARD_LESSER "TOWARD-LESSER" + TRUNCATION + UCS_4 "UCS-4" + UNDERLINE UNSIGNED + UTF_16 "UTF-16" + UTF_8 "UTF-8" + + ADDRESS + END_ACCEPT "END-ACCEPT" + END_ADD "END-ADD" + END_CALL "END-CALL" + END_COMPUTE "END-COMPUTE" + END_DELETE "END-DELETE" + END_DISPLAY "END-DISPLAY" + END_DIVIDE "END-DIVIDE" + END_EVALUATE "END-EVALUATE" + END_MULTIPLY "END-MULTIPLY" + END_PERFORM "END-PERFORM" + END_READ "END-READ" + END_RETURN "END-RETURN" + END_REWRITE "END-REWRITE" + END_SEARCH "END-SEARCH" + END_START "END-START" + END_STRING "END-STRING" + END_SUBTRACT "END-SUBTRACT" + END_UNSTRING "END-UNSTRING" + END_WRITE "END-WRITE" + END_IF "END-IF" + /* end tokens without semantic value */ + + // YYEOF added for compatibility with Bison 3.5 + // https://savannah.gnu.org/forum/forum.php?forum_id=9735 +%token YYEOF 0 "end of file" + +%type <number> sentence statements statement +%type <number> star_cbl_opt close_how + +%type <number> test_before usage_clause1 might_be +%type <boolean> all optional sign_leading on_off initialized strong +%type <number> count data_clauses data_clause +%type <number> nine nines nps relop spaces_etc reserved_value signed +%type <number> variable_type +%type <number> true_false posneg eval_posneg +%type <number> open_io alphabet_etc +%type <special_type> device_name +%type <string> numed collating_sequence context_word ctx_name locale_spec +%type <literal> namestr alphabet_lit program_as repo_as +%type <field> perform_cond kind_of_name +%type <refer> alloc_ret + +%type <field> log_term rel_expr rel_abbr eval_abbr +%type <refer> num_value num_term value factor +%type <refer> simple_cond bool_expr +%type <log_expr_t> log_expr rel_abbrs eval_abbrs +%type <rel_term_t> rel_term rel_term1 + +%type <field_data> value78 +%type <field> literal name nume typename +%type <field> num_literal signed_literal + +%type <number> perform_start +%type <refer> perform_times +%type <perf> perform_verb + perform_inline perform_except + +%type <refer> eval_subject1 +%type <vargs> vargs disp_vargs; +%type <field> level_name +%type <string> fd_name picture_sym name66 paragraph_name +%type <literal> literalism +%type <number> bound advance_when org_clause1 read_next +%type <number> access_mode multiple lock_how lock_mode +%type <select_clauses> select_clauses +%type <select_clause> select_clause access_clause alt_key_clause + assign_clause collate_clause status_clause + lock_mode_clause org_clause padding_clause + record_delim_clause record_key_clause + relative_key_clause reserve_clause sharing_clause + +%type <file> filename read_body write_body delete_body +%type <rewrite_t> rewrite_body +%type <min_max> record_vary rec_contains from_to record_desc +%type <file_op> read_file rewrite1 write_file +%type <field> data_descr data_descr1 write_what file_record +%type <field> name88 +%type <refer> advancing advance_by +%type <refer> alphaval alpha_val numeref scalar scalar88 +%type <refer> tableref tableish +%type <refer> varg varg1 varg1a +%type <refer> expr expr_term compute_expr free_tgt by_value_arg +%type <refer> move_tgt selected_name read_key read_into vary_by +%type <refer> accept_refer num_operand envar search_expr any_arg +%type <accept_func> accept_body +%type <refers> expr_list subscripts arg_list free_tgts +%type <targets> move_tgts set_tgts +%type <field> search_varying +%type <field> search_term search_terms +%type <label> label_name +%type <tgt> sort_target +%type <files> filenames cdf_use_files +%type <field> one_switch +%type <fields> field_list switches key_sources key_source +%type <sort_keys> sort_keys +%type <sort_key> sort_key +%type <sort_io> sort_input sort_output +%type <boolean> sort_dup forward_order unique_key sign_separate +%type <number> befter cardinal initial first_leading + +%type <refer> inspected +%type <insp_qual> insp_qual +%type <insp_match> insp_quals insp_mtquals tally_match +%type <insp_replace> x_by_y +%type <insp_oper> replace_oper x_by_ys +%type <insp_oper> tally_forth tally_matches +%type <inspect> tally +%type <insp_one> replacement tally_fors +%type <insp_all> tallies replacements + +%type <arith> add_body subtract_body multiply_body divide_body +%type <arith> add_impl subtract_impl multiply_impl divide_impl +%type <arith> add_cond subtract_cond multiply_cond divide_cond +%type <arith> divide_into divide_by + +%type <refer> intrinsic_call +%type <field> intrinsic intrinsic_locale + +%type <field> intrinsic0 +%type <number> intrinsic_v intrinsic_I intrinsic_N intrinsic_X +%type <number> intrinsic_I2 intrinsic_N2 intrinsic_X2 +%type <number> lopper_case +%type <number> return_body return_file +%type <field> trim_trailing function_udf + +%type <refer> str_input str_size +%type <refer2> str_into + +%type <refers> sum scalar88s ffi_names +%type <delimited_1> str_delimited +%type <delimiteds> str_delimiteds +%type <str_body> string_body + +%type <refmod_parts> refmod + +%type <uns_body> unstring_body +%type <refers> uns_delimiters uns_delimited +%type <refer> uns_delimiter +%type <uns_into> uns_into +%type <uns_tgts> uns_tgts +%type <uns_tgt> uns_tgt + +%type <error> on_overflow on_overflows +%type <error> arith_err arith_errs +%type <error> accept_except accept_excepts call_except call_excepts +%type <compute_body_t> compute_body + +%type <refer> ffi_name set_operand set_tgt scalar_arg unstring_src +%type <number> /* addr_len_of */ alphanum_pic +%type <pic_part> alphanum_part + +%type <ffi_arg> parameter ffi_by_ref ffi_by_con ffi_by_val +%type <ffi_args> parameters +%type <ffi_impl> call_body call_impl + +%type <ffi_arg> procedure_use +%type <ffi_args> procedure_uses + +%type <comminit> comminit comminits program_attrs + +%type <error_clauses> io_invalids read_eofs write_eops +%type <boolean> io_invalid read_eof write_eop + global is_global anycase backward +%type <number> mistake globally first_last +%type <io_mode> io_mode + +%type <labels> labels +%type <label> label_1 section_name + +%type <switches> upsi_entry + +%type <special> acceptable disp_target +%type <display> disp_body + +%type <false_domain> domains domain +%type <colseq> alphabet_seq +%type <alphasym> alphabet_name alphabet_seqs sort_seq + +%type <init_stmt> init_clause init_value +%type <data_category> init_categora init_category +%type <replacement> init_by +%type <replacements> init_bys init_replace +%type <refer> init_data exit_with stop_status +%type <float128> cce_expr cce_factor const_value +%type <prog_end> end_program1 +%type <substitution> subst_input +%type <substitutions> subst_inputs +%type <numval_locale_t> numval_locale + +%type <ec_type> except_name exit_raising +%type <ec_list> except_names +%type <isym_list> except_files +%type <dcl_list_t> perform_ec + +%type <opt_init_sects> opt_init_sects +%type <opt_init_sect> opt_init_sect +%type <number> opt_init_value +%type <opt_round> rounded round_between rounded_type rounded_mode +%type <opt_arith> opt_arith_type +%type <module_type> module_type + +%union { + bool boolean; + int number; + char *string; + _Float128 float128; // Hope springs eternal: 28 Mar 2023 + literal_t literal; + cbl_field_attr_t field_attr; + ec_type_t ec_type; + ec_list_t* ec_list; + declarative_list_t* dcl_list_t; + isym_list_t* isym_list; + struct { radix_t radix; char *string; } numstr; + struct { int token; literal_t name; } prog_end; + struct { int token; special_name_t id; } special_type; + struct { cbl_field_type_t type; + uint32_t capacity; bool signable; } computational; + struct cbl_special_name_t *special; + struct cbl_alphabet_t *alphasym; + struct tgt_list_t *targets; + struct cbl_file_t *file; + struct { bool varying; size_t min, max; } min_max; + struct { cbl_file_t *file; cbl_field_t *buffer; } rewrite_t; + struct { cbl_file_t *file; file_status_t handled; } file_op; + struct cbl_label_t *label; + struct { cbl_label_t *label; int token; } exception; + struct cbl_field_data_t *field_data; + struct cbl_field_t *field; + struct { bool tf; cbl_field_t *field; } bool_field; + struct { int token; cbl_field_t *cond; } cond_field; + struct cbl_refer_t *refer; + + struct rel_term_type { bool invert; cbl_refer_t *term; } rel_term_t; + struct log_expr_t *log_expr_t; + struct vargs_t* vargs; + struct perform_t *perf; + struct cbl_perform_tgt_t *tgt; + Label_list_t *labels; + key_list_t *file_keys; + cbl_file_mode_t io_mode; + struct cbl_file_key_t *file_key; + struct file_list_t *files; + struct field_list_t *fields; + struct refer_list_t *refers; + struct sort_key_t *sort_key; + struct sort_keys_t *sort_keys; + struct file_sort_io_t *sort_io; + struct arith_t *arith; + struct { size_t ntgt; cbl_num_result_t *tgts; + cbl_refer_t *expr; } compute_body_t; + struct ast_inspect_t *insp_one; + struct ast_inspect_list_t *insp_all; + struct ast_inspect_oper_t *insp_oper; + struct { bool before; cbl_inspect_qual_t *qual; } insp_qual; + cbl_inspect_t *inspect; + cbl_inspect_match_t *insp_match; + cbl_inspect_replace_t *insp_replace; + + struct { cbl_refer_t *delimited; refer_list_t *inputs; } delimited; + struct { cbl_refer_t *input, *delimiter; } delimited_1; + struct { cbl_refer_t *from, *len; } refmod_parts; + struct refer_collection_t *delimiteds; + struct { cbl_label_t *on_error, *not_error; } error; + struct { unsigned int nclause; bool tf; } error_clauses; + struct refer_pair_t { cbl_refer_t *first, *second; } refer2; + struct { refer_collection_t *inputs; refer_pair_t into; } str_body; + + struct { accept_func_t func; cbl_refer_t *into, *from; } accept_func; + struct unstring_into_t *uns_into; + struct unstring_tgt_list_t *uns_tgts; + struct unstring_tgt_t *uns_tgt; + struct { cbl_refer_t *input; + refer_list_t *delimited; unstring_into_t *into; } uns_body; + + struct cbl_ffi_arg_t *ffi_arg; + struct ffi_args_t *ffi_args; + struct { YYLTYPE loc; cbl_refer_t *ffi_name, *ffi_returning; + ffi_args_t *using_params; } ffi_impl; + + struct { bool common, initial, recursive; } comminit; + struct { enum select_clause_t clause; cbl_file_t *file; } select_clause; + struct { size_t clauses; cbl_file_t *file; } select_clauses; + struct { YYLTYPE loc; char *on, *off; } switches; + struct cbl_domain_t *false_domain; + struct { size_t also; unsigned char *low, *high; } colseq; + struct { cbl_field_attr_t attr; int nbyte; } pic_part; + + data_category_t data_category; + struct { data_category_t category; cbl_refer_t* replacement; } replacement; + category_map_t *replacements; + init_statement_t *init_stmt; + struct { cbl_special_name_t *special; vargs_t *vargs; } display; + substitution_t substitution; + substitutions_t *substitutions; + struct { bool is_locale; cbl_refer_t *arg2; } numval_locale_t; + + cbl_options_t::arith_t opt_arith; + cbl_round_t opt_round; + cbl_section_type_t opt_init_sect; + struct { bool local, working; } opt_init_sects; + module_type_t module_type; +} + +%printer { fprintf(yyo, "clauses: 0x%04x", $$); } data_clauses +%printer { fprintf(yyo, "%s %s", refer_type_str($$), $$? $$->name() : "<none>"); } <refer> +%printer { fprintf(yyo, "%s", $$->field? name_of($$->field) : "[omitted]"); } alloc_ret +%printer { fprintf(yyo, "%s %s '%s' (%s)", + $$? cbl_field_type_str($$->type) : "<%empty>", + $$? name_of($$) : "", + $$? $$->data.initial? $$->data.initial : "<nil>" : "", + $$? $$->value_str() : "" ); } <field> + +%printer { fprintf(yyo, "%c %s", + $$.invert? '!' : ' ', + $$.term? name_of($$.term->field) : "<none>"); } <rel_term_t> + +%printer { fprintf(yyo, "%s (token %d)", keyword_str($$), $$ ); } relop +%printer { fprintf(yyo, "'%s'", $$? $$ : "" ); } NAME <string> +%printer { fprintf(yyo, "%s'%.*s'{%zu} %s", $$.prefix, int($$.len), $$.data, $$.len, + $$.symbol_name()); } <literal> +%printer { fprintf(yyo, "%s (1st of %zu)", + $$->targets.empty()? "" : $$->targets.front().refer.field->name, + $$->targets.size() ); } <targets> +%printer { fprintf(yyo, "#%zu: %s", + is_temporary($$)? 0 : field_index($$), + $$? name_of($$) : "<nil>" ); } name +%printer { fprintf(yyo, "{%zu-%zu}", $$.min, $$.max ); } <min_max> +%printer { fprintf(yyo, "{%s}", $$? "+/-" : "" ); } signed +%printer { fprintf(yyo, "{%s of %zu}", + teed_up_names().front(), teed_up_names().size() ); } qname +%printer { fprintf(yyo, "{%d}", $$ ); } <number> +%printer { fprintf(yyo, "'%s'", $$.string ); } <numstr> +%printer { const char *s = string_of($$); + fprintf(yyo, "{%s}", s? s : "??" ); } <float128> +%printer { fprintf(yyo, "{%s %c%u}", cbl_field_type_str($$.type), + $$.signable? '+' : ' ', + $$.capacity ); } <computational> +%printer { fprintf(yyo, "{'%s'-'%s'%s}", + $$.low? (const char*) $$.low : "", + $$.high? (const char*) $$.high : "", + $$.also? "+" : "" ); } <colseq> +%printer { fprintf(yyo, "{%s, %zu parameters}", + name_of($$.ffi_name->field), !$$.using_params? 0 : + $$.using_params->elems.size()); } call_body +%printer { fprintf(yyo, "%s <- %s", data_category_str($$.category), + name_of($$.replacement->field)); } init_by + + /* CDF (COPY and >> defined here but used in cdf.y) */ +%left BASIS CBL CONSTANT COPY + DEFINED ENTER FEATURE INSERTT + LIST LSUB MAP NOLIST NOMAP NOSOURCE + PARAMETER_kw OVERRIDE READY RESET RSUB + SERVICE_RELOAD STAR_CBL + SUBSCRIPT SUPPRESS TITLE TRACE USE + + COBOL_WORDS EQUATE UNDEFINE + + CDF_DEFINE CDF_DISPLAY + CDF_IF CDF_ELSE CDF_END_IF + CDF_EVALUATE + CDF_WHEN + CDF_END_EVALUATE + CALL_COBOL CALL_VERBATIM + +%right IF THEN ELSE + SENTENCE + ACCEPT ADD ALTER CALL CANCEL CLOSE COMPUTE CONTINUE + DELETE DISPLAY DIVIDE EVALUATE END EOP EXIT FILLER_kw + GOBACK GOTO + INITIALIZE INSPECT + MERGE MOVE MULTIPLY OPEN OVERFLOW PARAGRAPH PERFORM + READ RELEASE RETURN REWRITE + SEARCH SET SELECT SORT SORT_MERGE + STRING_kw STOP SUBTRACT START + UNSTRING WRITE WHEN INVALID + +%left ABS ACCESS ACOS ACTUAL ADVANCING AFP_5A AFTER ALL + ALLOCATE + ALPHABET ALPHABETIC ALPHABETIC_LOWER + ALPHABETIC_UPPER + ALPHANUMERIC + ALPHANUMERIC_EDITED + ALPHED ALSO ALTERNATE ANNUITY ANUM ANY ANYCASE APPLY ARE + AREA AREAS AS + ASCENDING ACTIVATING ASIN ASSIGN AT ATAN + + BACKWARD BASED BASECONVERT + BEFORE BINARY BIT BIT_OF BIT_TO_CHAR + BLANK BLOCK + BOOLEAN_OF_INTEGER + BOTTOM BY + BYTE BYTE_LENGTH + + C01 C02 C03 C04 C05 C06 C07 C08 C09 C10 C11 C12 CF CH + CHANGED CHAR CHAR_NATIONAL + CHARACTER CHARACTERS CHECKING CLASS + COBOL CODE CODESET COLLATING + COLUMN COMBINED_DATETIME + COMMA COMMAND_LINE + COMMAND_LINE_COUNT + COMMIT COMMON COMPUTATIONAL + + CONCAT CONDITION CONFIGURATION_SECT + CONSOLE CONTAINS + CONTENT CONTROL CONTROLS CONVERT CONVERTING CORRESPONDING COS + COUNT CSP CURRENCY CURRENT CURRENT_DATE + + DATA DATE DATE_COMPILED + DATE_OF_INTEGER + DATE_TO_YYYYMMDD + DATE_FMT + TIME_FMT + DATETIME_FMT + DATE_WRITTEN + DAY DAY_OF_INTEGER + DAY_OF_WEEK + DAY_TO_YYYYDDD + DBCS DE DEBUGGING DECIMAL_POINT + DECLARATIVES DEFAULT DELIMITED DELIMITER DEPENDING + DESCENDING DETAIL DIRECT + DIRECT_ACCESS + DOWN DUPLICATES + DYNAMIC + + E EBCDIC EC EGCS ENTRY ENVIRONMENT EQUAL ERROR EVERY + EXAMINE EXCEPTION EXHIBIT EXP EXP10 EXTEND EXTERNAL + + EXCEPTION_FILE + EXCEPTION_FILE_N + EXCEPTION_LOCATION + EXCEPTION_LOCATION_N + EXCEPTION_NAME + EXCEPTION_STATEMENT + EXCEPTION_STATUS + + FACTORIAL FALSE_kw FD FILENAME + FILE_CONTROL + FILE_KW + FILE_LIMIT + FINAL FINALLY + FIND_STRING + FIRST FIXED FOOTING FOR + FORMATTED_CURRENT_DATE + FORMATTED_DATE + FORMATTED_DATETIME + FORMATTED_TIME + FORM_OVERFLOW + FREE + FRACTION_PART + FROM FUNCTION + FUNCTION_UDF + + GENERATE GIVING GLOBAL GO GROUP + + HEADING HEX + HEX_OF + HEX_TO_CHAR + HIGH_VALUES + HIGHEST_ALGEBRAIC + HOLD + + IBM_360 IN INCLUDE INDEX INDEXED INDICATE INITIAL_kw + INITIATE INPUT INSTALLATION INTERFACE + INTEGER + INTEGER_OF_BOOLEAN + INTEGER_OF_DATE + INTEGER_OF_DAY + INTEGER_OF_FORMATTED_DATE + INTEGER_PART + INTO INTRINSIC INVOKE IO IO_CONTROL + IS ISNT + + KANJI KEY + + LABEL LAST LEADING LEFT LENGTH + LENGTH_OF + LEVEL LEVEL66 + LEVEL88 LIMIT LIMITS LINE LINES + LINE_COUNTER + LINAGE LINKAGE LOCALE LOCALE_COMPARE + LOCALE_DATE + LOCALE_TIME + LOCALE_TIME_FROM_SECONDS + LOCAL_STORAGE + LOCATION + LOCK LOCK_ON LOG LOG10 + LOWER_CASE + LOW_VALUES + LOWEST_ALGEBRAIC + LPAREN + + MANUAL MAXX MEAN MEDIAN MIDRANGE + MIGHT_BE MINN MULTIPLE MOD MODE + MODULE_NAME + + NAMED NAT NATIONAL + NATIONAL_EDITED + NATIONAL_OF + NATIVE NEGATIVE NESTED NEXT + NINEDOT NINES NINEV NO NOTE NO_CONDITION + NULLS NULLPTR NUMBER + NUME NUMED NUMED_CR NUMED_DB NUMERIC + NUMERIC_EDITED NUMSTR NUMVAL + NUMVAL_C + NUMVAL_F + + OCCURS OF OFF OMITTED ON ONLY OPTIONAL OPTIONS ORD ORDER + ORD_MAX + ORD_MIN + ORGANIZATION OTHER OTHERWISE OUTPUT + + PACKED_DECIMAL PADDING PAGE + PAGE_COUNTER + PF PH PI PIC PICTURE PIC_P + PLUS POINTER POSITIVE PRESENT_VALUE PRINT_SWITCH + PROCEDURE PROCEDURES PROCEED PROCESS + PROGRAM_ID + PROGRAM_kw PROPERTY PROTOTYPE PSEUDOTEXT + + QUOTES + + RANDOM RANDOM_SEED RANGE RAISE RAISING + RD RECORD RECORDING RECORDS RECURSIVE + REDEFINES REEL REFERENCE RELATIVE REM REMAINDER REMARKS + REMOVAL RENAMES REPLACE REPLACING REPORT REPORTING REPORTS + REPOSITORY RERUN RESERVE RESTRICTED RESUME + REVERSE REVERSED REWIND RF RH RIGHT ROUNDED RUN + + S01 S02 S03 S04 S05 SAME SCREEN SD + SECONDS_FROM_FORMATTED_TIME + SECONDS_PAST_MIDNIGHT + SECTION SECURITY + SEPARATE SEQUENCE SEQUENTIAL SHARING + SIMPLE_EXIT + SIGN SIN SIZE SIZE_ERROR + SMALLEST_ALGEBRAIC + SOURCE + SOURCE_COMPUTER + SPACES SPECIAL_NAMES SQRT STACK + STANDARD + STANDARD_ALPHABET + STANDARD_1 + STANDARD_DEVIATION + STANDARD_COMPARE + STATUS STRONG STDERR STDIN STDOUT + LITERAL SUBSTITUTE SUM SWITCH SYMBOL SYMBOLIC SYNCHRONIZED + SYSIN SYSIPT SYSLST SYSOUT SYSPCH SYSPUNCH + + TALLY TALLYING TAN TERMINATE TEST + TEST_DATE_YYYYMMDD + TEST_DAY_YYYYDDD + TEST_FORMATTED_DATETIME + TEST_NUMVAL + TEST_NUMVAL_C + TEST_NUMVAL_F + THAN TIME TIMES + TO TOP + TOP_LEVEL + TRACKS TRACK_AREA TRAILING TRANSFORM TRIM TRUE_kw TRY + TURN TYPE TYPEDEF + + ULENGTH UNBOUNDED UNIT UNITS UNIT_RECORD UNTIL UP UPON + UPOS UPPER_CASE UPSI USAGE USING USUBSTR USUPPLEMENTARY + UTILITY UUID4 UVALID UWIDTH + + VALUE VARIANCE VARYING VOLATILE + + WHEN_COMPILED WITH WORKING_STORAGE + XML XMLGENERATE XMLPARSE + YEAR_TO_YYYY YYYYDDD YYYYMMDD + ZERO + + /* unused Context Words */ + ARITHMETIC ATTRIBUTE AUTO AUTOMATIC + AWAY_FROM_ZERO + BACKGROUND_COLOR + BELL + BINARY_ENCODING + BLINK + CAPACITY CENTER CLASSIFICATION CYCLE + DECIMAL_ENCODING + ENTRY_CONVENTION EOL EOS ERASE EXPANDS + FLOAT_BINARY + FLOAT_DECIMAL + FOREGROUND_COLOR FOREVER FULL + HIGHLIGHT + HIGH_ORDER_LEFT + HIGH_ORDER_RIGHT + IGNORING IMPLEMENTS INITIALIZED INTERMEDIATE + LC_ALL_kw + LC_COLLATE_kw + LC_CTYPE_kw + LC_MESSAGES_kw + LC_MONETARY_kw + LC_NUMERIC_kw + LC_TIME_kw + LOWLIGHT + NEAREST_AWAY_FROM_ZERO + NEAREST_EVEN NEAREST_TOWARD_ZERO + NONE NORMAL NUMBERS + PREFIXED PREVIOUS PROHIBITED RELATION REQUIRED + REVERSE_VIDEO ROUNDING + SECONDS SECURE SHORT SIGNED + STANDARD_BINARY + STANDARD_DECIMAL + STATEMENT STEP STRUCTURE + TOWARD_GREATER + TOWARD_LESSER + TRUNCATION + UCS_4 + UNDERLINE UNSIGNED + UTF_16 + UTF_8 + +%left CLASS_NAME NAME NAME88 +%left ADDRESS +%left END_ACCEPT END_ADD END_CALL END_COMPUTE + END_DELETE END_DISPLAY END_DIVIDE + END_EVALUATE END_MULTIPLY END_PERFORM + END_READ END_RETURN END_REWRITE + END_SEARCH END_START END_STRING END_SUBTRACT + END_UNSTRING END_WRITE + error + END_IF + +%left THRU +%left OR +%left AND +%right NOT +%left '<' '>' '=' NE LE GE +%left '-' '+' +%left '*' '/' +%right POW +%precedence NEG + + + +%{ + static cbl_field_type_t + set_operand_type(const cbl_refer_t& refer) { + if( refer.field == NULL ) return FldInvalid; + return refer.addr_of? FldPointer : refer.field->type; + } + + static bool + refer_pointer( const cbl_num_result_t& elem ) { + assert(elem.refer.field); + return elem.refer.is_pointer(); + } + static bool + valid_set_targets( const tgt_list_t& tgts, bool want_pointers ) { + bool ok = true; + // The only targets that can have addr_of are BASED or in Linkage Section. + auto baddie = std::find_if( tgts.targets.begin(), + tgts.targets.end(), + []( const auto& num_result ) { + if( num_result.refer.addr_of ) { + auto f = num_result.refer.field; + if( ! (f->has_attr(based_e) || f->has_attr(linkage_e)) ) { + return true; + } + } + return false; + } ); + if( baddie != tgts.targets.end() ) { + auto loc = symbol_field_location(field_index(baddie->refer.field)); + error_msg(loc,"target %s must be BASED or in LINKAGE SECTION", + baddie->refer.name() ); + return false; + } + + for( const auto& num_result : tgts.targets ) { + auto loc = symbol_field_location(field_index(num_result.refer.field)); + if( refer_pointer(num_result) ) { + if( !want_pointers ) { + ok = false; + error_msg( loc, "%s is a pointer", num_result.refer.name() ); + } + } else { + if( want_pointers ) { + ok = false; + error_msg( loc, "%s is not a pointer", num_result.refer.name() ); + } + } + } + return ok; + } + + static void initialize_allocated( cbl_refer_t input ); + static void + initialize_statement( std::list<cbl_num_result_t>& tgts, + bool with_filler, + data_category_t category, + const category_map_t& replacement = category_map_t()); + + + unsigned char cbl_alphabet_t::nul_string[2] = ""; // 2 NULs lets us use one + unsigned char *nul_string() { return cbl_alphabet_t::nul_string; } + + static inline literal_t literal_of( char *s ) { + literal_t output; + return output.set( strlen(s), s, "" ); + } + static inline char * string_of( const literal_t& lit ) { + return strlen(lit.data) == lit.len? lit.data : NULL; + } + + static inline char * string_of( _Float128 cce ) { + static const char empty[] = "", format[] = "%.32E"; + char output[64]; + int len = strfromf128 (output, sizeof(output), format, cce); + if( sizeof(output) < size_t(len) ) { + dbgmsg("string_of: value requires %d digits (of %zu)", + len, sizeof(output)); + return xstrdup(empty); + } + + char decimal = symbol_decimal_point(); + std::replace(output, output + strlen(output), '.', decimal); + return xstrdup(output); + } + + cbl_field_t * + new_literal( const literal_t& lit, enum cbl_field_attr_t attr ); + + static YYLTYPE first_line_of( YYLTYPE loc ); +%} + +%locations +%token-table +%define parse.error verbose // custom +%expect 6 +%require "3.5.1" // 3.8.2 also works, but not 3.8.0 +%% + +top: programs + { + if( ! goodnight_gracie() ) { + YYABORT; + } + if( nparse_error > 0 ) YYABORT; + } + | programs end_program + { + if( nparse_error > 0 ) YYABORT; + } + ; +programs: program + | programs end_program program + ; +program: id_div options_para env_div data_div + { + if( ! data_division_ready() ) { + mode_syntax_only(procedure_div_e); + } + current_division = procedure_div_e; + } + procedure_div + { + if( yydebug ) labels_dump(); + } + ; + +id_div: cdf_words IDENTIFICATION_DIV '.' program_id + | cdf_words program_id + | cdf_words IDENTIFICATION_DIV '.' function_id + ; + +cdf_words: %empty + | cobol_words + ; +cobol_words: cobol_words1 + | cobol_words cobol_words1 + ; +cobol_words1: COBOL_WORDS EQUATE NAME[keyword] WITH NAME[name] { + if( ! tokens.equate(@keyword, $keyword, $name) ) { YYERROR; } + } + | COBOL_WORDS UNDEFINE NAME[keyword] { + if( ! tokens.undefine(@keyword, $keyword) ) { YYERROR; } + } + | COBOL_WORDS SUBSTITUTE NAME[keyword] BY NAME[name] { + if( ! tokens.substitute(@keyword, $keyword, $name) ) { YYERROR; } + } + | COBOL_WORDS RESERVE NAME[name] { + if( ! tokens.reserve(@name, $name) ) { YYERROR; } + } + ; + +program_id: PROGRAM_ID dot namestr[name] program_as program_attrs[attr] dot + { + internal_ebcdic_lock(); + current_division = identification_div_e; + parser_division( identification_div_e, NULL, 0, NULL ); + location_set(@1); + int main_error=0; + const char *name = string_of($name); + parser_enter_program( name, false, &main_error ); + if( main_error ) { + error_msg(@name, "PROGRAM-ID 'main' is invalid with -main option"); + YYERROR; + } + + if( symbols_begin() == symbols_end() ) { + symbol_table_init(); + } + if( !current.new_program(@name, LblProgram, name, + $program_as.data, + $attr.common, $attr.initial) ) { + auto L = symbol_program(current_program_index(), name); + assert(L); + error_msg(@name, "PROGRAM-ID %s already defined on line %d", + name, L->line); + YYERROR; + } + if( nparse_error > 0 ) YYABORT; + } + ; +dot: %empty + | '.' + ; +program_as: %empty { $$ = (literal_t){}; } + | AS LITERAL { $$ = $2; } + ; + +function_id: FUNCTION '.' NAME program_as program_attrs[attr] '.' + { + internal_ebcdic_lock(); + current_division = identification_div_e; + parser_division( identification_div_e, NULL, 0, NULL ); + location_set(@1); + + int main_error = 0; + parser_enter_program( $NAME, true, &main_error ); + if( main_error ) { + error_msg(@NAME, "FUNCTION-ID 'main' is invalid with -main option"); + YYERROR; + } + if( symbols_begin() == symbols_end() ) { + symbol_table_init(); + } + if( !current.new_program(@NAME, LblFunction, $NAME, + $program_as.data, + $attr.common, $attr.initial) ) { + auto L = symbol_program(current_program_index(), $NAME); + assert(L); + error_msg(@NAME, "FUNCTION %s already defined on line %d", + $NAME, L->line); + YYERROR; + } + if( keyword_tok($NAME, true) ) { + error_msg(@NAME, "FUNCTION %s is an intrinsic function", + $NAME); + YYERROR; + } + current.udf_add(current_program_index()); + if( nparse_error > 0 ) YYABORT; + } + | FUNCTION '.' NAME program_as is PROTOTYPE '.' + { + cbl_unimplemented("FUNCTION PROTOTYPE"); + } + ; + +options_para: %empty + | OPTIONS opt_clauses '.' + | OPTIONS + ; + +opt_clauses: opt_clause + | opt_clauses opt_clause + ; +opt_clause: opt_arith + | opt_round + | opt_entry + | opt_binary + | opt_decimal { + cbl_unimplementedw("type FLOAT-DECIMAL was ignored"); + } + | opt_intermediate + | opt_init + ; + +opt_arith: ARITHMETIC is opt_arith_type { + if( ! current.option($opt_arith_type) ) { + error_msg(@3, "unable to set ARITHMETIC option"); + } + } + ; +opt_arith_type: NATIVE { $$ = cbl_options_t::native_e; } + | STANDARD { $$ = cbl_options_t::standard_e; } + | STANDARD_BINARY { $$ = cbl_options_t::standard_binary_e; } + | STANDARD_DECIMAL { $$ = cbl_options_t::standard_decimal_e; } + ; +opt_round: DEFAULT ROUNDED mode is rounded_type[type] { + current_rounded_mode($type); + } + ; +opt_entry: ENTRY_CONVENTION is COBOL { + yywarn("ENTRY-CONVENTION IS COBOL, check"); + } + ; +opt_binary: FLOAT_BINARY default_kw is HIGH_ORDER_LEFT + { + cbl_unimplementedw("HIGH-ORDER-LEFT was ignored"); + if( ! current.option_binary(cbl_options_t::high_order_left_e) ) { + error_msg(@3, "unable to set HIGH_ORDER_LEFT"); + } + } + | FLOAT_BINARY default_kw is HIGH_ORDER_RIGHT[opt] + { + cbl_unimplementedw("HIGH-ORDER-RIGHT was ignored"); + if( ! current.option_binary(cbl_options_t::high_order_right_e) ) { + error_msg(@opt, "unable to set HIGH-ORDER-RIGHT"); + } + } + ; +default_kw: %empty + | DEFAULT + ; +opt_decimal: FLOAT_DECIMAL default_kw is HIGH_ORDER_LEFT[opt] + { + cbl_unimplementedw("HIGH-ORDER-LEFT was ignored"); + if( ! current.option_decimal(cbl_options_t::high_order_left_e) ) { + error_msg(@opt, "unable to set HIGH-ORDER-LEFT"); + } + } + | FLOAT_DECIMAL default_kw is HIGH_ORDER_RIGHT[opt] + { + cbl_unimplementedw("HIGH-ORDER-RIGHT was ignored"); + if( ! current.option_decimal(cbl_options_t::high_order_right_e) ) { + error_msg(@opt, "unable to set HIGH-ORDER-RIGHT"); + } + } + | FLOAT_DECIMAL default_kw is BINARY_ENCODING[opt] + { + cbl_unimplementedw("BINARY-ENCODING was ignored"); + if( ! current.option(cbl_options_t::binary_encoding_e) ) { + error_msg(@opt, "unable to set BINARY-ENCODING option"); + } + } + | FLOAT_DECIMAL default_kw is DECIMAL_ENCODING[opt] + { + cbl_unimplementedw("DECIMAL-ENCODING was ignored"); + if( ! current.option(cbl_options_t::decimal_encoding_e) ) { + error_msg(@opt, "unable to set DECIMAL-ENCODING option"); + } + } + ; +opt_intermediate: + INTERMEDIATE ROUNDING is round_between[round] { + current.intermediate_round($round); + } + ; + +opt_init: INITIALIZE opt_init_sects[sect] opt_section to opt_init_value[init] + { + if( $sect.local ) { + current.initial_value(local_sect_e, $init); + } + if( $sect.working ) { + current.initial_value(working_sect_e, $init); + } + } + ; +opt_section: %empty + | SECTION + ; +opt_init_sects: ALL { $$.local = $$.working = true; } + | opt_init_sect { + $$.local = $$.working = false; + switch($1) { + case local_sect_e: + $$.local = true; break; + case working_sect_e: + $$.working = true; break; + default: gcc_unreachable(); + } + } + | opt_init_sects opt_init_sect { + $$ = $1; + switch($2) { + case local_sect_e: + if( $$.local ) { + error_msg(@2, "LOCAL-STORAGE repeated"); + } + $$.local = true; break; + case working_sect_e: + if( $$.working ) { + error_msg(@2, "WORKING-STORAGE repeated"); + } + $$.working = true; break; + default: gcc_unreachable(); + } + } + ; +opt_init_sect: LOCAL_STORAGE { $$ = local_sect_e; } + | SCREEN { cbl_unimplemented("SCREEN SECTION"); } + | WORKING_STORAGE { $$ = working_sect_e; } + ; +opt_init_value: BINARY ZERO { $$ = constant_index(NULLS); } + | HIGH_VALUES { $$ = constant_index(HIGH_VALUES); } + | LITERAL + { + if( $1.prefix[0] != 'X' ) { + error_msg(@1, "hexadecimal literal required"); + } + if( $1.len != 1 ) { + error_msg(@1, "1-byte hexadecimal literal required"); + } + char ach[16]; + sprintf(ach, "%d", (int)($1.data[0])); + //auto f = new_literal($1.data); + auto f = new_literal(ach); + f = field_add(@1, f); + $$ = field_index(f); + } + | LOW_VALUES { $$ = constant_index(LOW_VALUES); } + | SPACES { $$ = constant_index(SPACES); } + ; + +namestr: ctx_name { + $$ = literal_of($1); + if( ! string_of($$) ) { + error_msg(@1, "'%s' has embedded NUL", $$.data); + YYERROR; + } + } + | LITERAL { + if( $$.prefix[0] != '\0' ) { + error_msg(@1, "literal cannot use %s prefix in this context", + $$.prefix); + YYERROR; + } + if( !is_cobol_word($$.data) ) { + error_msg(@1, "literal '%s' must be a COBOL or C identifier", + $$.data); + } + } + ; + +program_attrs: %empty { $$.common = $$.initial = $$.recursive = false; } + | is comminits program_kw { $$ = $2; } + ; +comminits: comminit + | comminits comminit { + if( ($1.initial && $2.recursive) || + ($2.initial && $1.recursive) ) { + auto loc = $1.initial? @1 : @2; + error_msg(loc, "INITIAL cannot be used with RECURSIVE"); + } + $$ = $1; + if( $2.common ) { + if( $1.common ) { + error_msg(@2, "COMMON repeated"); + } + $$.common = $2.common; + } + if( $2.initial ) { + if( $1.initial ) { + error_msg(@2, "INITIAL repeated"); + } + $$.initial = $2.initial; + } + if( $2.recursive ) { + if( $1.recursive ) { + error_msg(@2, "RECURSIVE repeated"); + } + $$.recursive = $2.recursive; + } + } + ; +comminit: COMMON { + if( program_level() == 0 ) { // PROGRAM-ID being parsed not added yet. + error_msg(@1, "COMMON may be used only in a contained program"); + } + $$.common = true; + $$.initial = $$.recursive = false; + } + | INITIAL_kw { $$.initial = true; $$.common = $$.recursive = false;} + | RECURSIVE { + $$.recursive = true; $$.common = $$.initial = false; + } + ; + + +env_div: %empty { current_division = environment_div_e; } + | ENVIRONMENT_DIV '.' { current_division = environment_div_e; } + | ENVIRONMENT_DIV '.' { + current_division = environment_div_e; + } env_sections + ; + +env_sections: env_section + | env_sections env_section + ; + +env_section: INPUT_OUTPUT_SECT '.' + | INPUT_OUTPUT_SECT '.' io_sections + | INPUT_OUTPUT_SECT '.' selects { /* IBM requires FILE CONTROL. */ } + | CONFIGURATION_SECT '.' + | CONFIGURATION_SECT '.' config_paragraphs + | cdf + ; + +io_sections: io_section + | io_sections io_section + ; + +io_section: FILE_CONTROL '.' + | FILE_CONTROL '.' selects + | IO_CONTROL '.' + | IO_CONTROL '.' io_control_clauses '.' + ; + +io_control_clauses: io_control_clause + | io_control_clauses io_control_clause + ; +io_control_clause: + SAME record area for_kw filenames + { + symbol_file_same_record_area( $filenames->files ); + } + | SAME smerge area for_kw filenames + { + symbol_file_same_record_area( $filenames->files ); + } + | APPLY COMMIT on field_list + { + cbl_unimplementedw("I-O-CONTROL APPLY COMMIT"); + } + ; +area: %empty + | AREA + ; +smerge: SORT + | SORT_MERGE + ; + +selects: select + | selects select + ; + +select: SELECT optional NAME[name] select_clauses[clauses] '.' + { + assert($clauses.file); + cbl_file_t *file = $clauses.file; + + file->optional = $optional; + file->line = yylineno; + if( !namcpy(@clauses, file->name, $name) ) YYERROR; + + if( ! ($clauses.clauses & assign_clause_e) ) { + error_msg(@name, "ASSIGN clause missing for %s", file->name); + } + + // key check + switch(file->org) { + case file_indexed_e: + // indexed file cannot have relative key + if( ($clauses.clauses & relative_key_clause_e) != 0) { + assert(file->keys); + auto ikey = file->nkey - 1; + assert(file->keys[ikey].fields); + auto f = cbl_field_of(symbol_at(file->keys[ikey].fields[0])); + error_msg(@name, "INDEXED file %s cannot have RELATIVE key %s", + file->name, f->name); + break; // because next message would be redundant + } + if( ($clauses.clauses & record_key_clause_e) == 0 ) { + error_msg(@name, "INDEXED file %s has no RECORD KEY", + file->name); + } + break; + case file_disorganized_e: + file->org = file_sequential_e; + __attribute__((fallthrough)); + default: + if( ($clauses.clauses & record_key_clause_e) != 0 ) { + assert(file->keys); + auto ikey = file->nkey - 1; + assert(file->keys[ikey].fields); + auto f = cbl_field_of(symbol_at(file->keys[ikey].fields[0])); + error_msg(@name, "%s file %s cannot have RECORD key %s", + file_org_str(file->org), file->name, f->name); + } + break; + } + + // access check + switch(file->access) { + case file_access_rnd_e: + case file_access_dyn_e: + if( is_sequential(file) ) { + error_msg(@name, "%s file %s cannot have ACCESS %s", + file_org_str(file->org), file->name, + file_access_str(file->access)); + } + break; + default: + break; + } + + // install file, and set record area's name + if( (file = file_add(@name, file)) == NULL ) YYERROR; + auto ifile = symbol_index(symbol_elem_of(file)); + // update keys + for( auto p = file->keys; + p && p < file->keys + file->nkey; p++ ) + { + if( p->name[0] == '\0' ) continue; + auto f = symbol_field(PROGRAM, 0, p->name); + cbl_field_of(f)->parent = ifile; + size_t isym = field_index(cbl_field_of(f)); + update_symbol_map(symbol_at(isym)); + } + } + | SELECT optional NAME[name] '.' + { + cbl_file_t file = protofile; + + file.optional = $optional; + file.line = yylineno; + if( !namcpy(@name, file.name, $name) ) YYERROR; + + if( file_add(@name, &file) == NULL ) YYERROR; + } + ; +selected_name: external scalar { $$ = $2; } + | external LITERAL[name] + { + const char *name = string_of($name); + if( ! name ) { + error_msg(@name, "'%s' has embedded NUL", $name.data); + YYERROR; + } + uint32_t len = $name.len; + cbl_field_t field = { + 0, FldLiteralA, FldInvalid, quoted_e | constant_e, + 0, 0, 0, nonarray, 0, "", 0, cbl_field_t::linkage_t(), + {len,len,0,0, $name.data, NULL, {NULL}, {NULL}}, NULL }; + field.attr |= literal_attr($name.prefix); + $$ = new cbl_refer_t( field_add(@name, &field) ); + } + ; +external: %empty /* GnuCOBOL uses EXTERNAL to control name resolution. */ + | EXTERNAL + ; + +select_clauses: select_clause { $$.clauses = $1.clause; $$.file = $1.file; } + | select_clauses[total] select_clause[part] + { + $$ = $total; + // The default organization is sequential. + if( ($$.clauses & organization_clause_e) == 0 ) { + $$.file->org = file_sequential_e; + } + const bool exists = ($$.clauses & $part.clause); + $$.clauses |= $part.clause; + + switch($part.clause) { + case alt_key_clause_e: + assert( $part.file->nkey == 1 ); + if( $$.file->nkey++ == 0 ) { + // If no key yet exists, create room for it and the + // present alternate. + assert($$.file->keys == &no_key); + $$.file->keys = new cbl_file_key_t[++$$.file->nkey]; + } + { + auto keys = new cbl_file_key_t[$$.file->nkey]; + auto alt = std::copy($$.file->keys, + $$.file->keys + + $$.file->nkey - 1, + keys); + // Assign the alternate key to the last element, + // and update the pointer. + *alt = $part.file->keys[0]; + delete[] $$.file->keys; + $$.file->keys = keys; + } + break; + case assign_clause_e: + if( exists ) { + error_msg(@part, "clause is repeated"); + YYERROR; + } + $$.file->filename = $part.file->filename; + break; + case collating_clause_e: + if( exists ) { + error_msg(@part, "clause is repeated"); + YYERROR; + } + break; + case lock_mode_clause_e: + if( exists ) { + error_msg(@part, "clause is repeated"); + YYERROR; + } + $$.file->lock = $part.file->lock; + break; + case organization_clause_e: + if( exists ) { + error_msg(@part, "clause is repeated"); + YYERROR; + } + $$.file->org = $part.file->org; + break; + case padding_clause_e: + case reserve_clause_e: + case sharing_clause_e: + case record_delim_clause_e: + if( exists ) { + error_msg(@part, "clause is repeated"); + YYERROR; + } + break; + case access_clause_e: + if( exists ) { + error_msg(@part, "clause is repeated"); + YYERROR; + } + $$.file->access = $part.file->access; + break; + case relative_key_clause_e: + if( exists ) { + error_msg(@part, "clause is repeated"); + YYERROR; + } + if( $$.clauses & record_key_clause_e ) { + error_msg(@part, "FILE %s is INDEXED, has no RELATIVE key", + $$.file->name); + YYERROR; + } + // fall thru + case record_key_clause_e: + if( exists ) { + error_msg(@part, "clause is repeated"); + YYERROR; + } + if( ($$.clauses & relative_key_clause_e) && + $part.clause == record_key_clause_e ) { + error_msg(@part, "FILE %s is RELATIVE, has no RECORD key", + $$.file->name); + YYERROR; + } + if( $$.file->nkey == 0 ) { + $$.file->nkey = $part.file->nkey; + $$.file->keys = $part.file->keys; + } else { + $$.file->keys[0] = $part.file->keys[0]; + } + break; + /* case password_clause_e: */ + case file_status_clause_e: + if( exists ) { + error_msg(@part, "clause is repeated"); + YYERROR; + } + $$.file->user_status = $part.file->user_status; + $$.file->vsam_status = $part.file->vsam_status; + break; + } + if( $$.file->lock.locked() ) { + if( $$.file->org == file_sequential_e && + $$.file->lock.multiple ) { + error_msg(@part, "SEQUENTIAL file cannot lock MULTIPLE records"); + } + } + + delete $part.file; + } + ; + +select_clause: access_clause + | alt_key_clause[alts] + | assign_clause[alts] + | collate_clause + | /* file */ status_clause + | lock_mode_clause + | org_clause + | padding_clause + | record_delim_clause + | record_key_clause + | relative_key_clause + | reserve_clause + | sharing_clause + ; + +access_clause: ACCESS mode is access_mode[acc] + { + $$.clause = access_clause_e; + $$.file = new cbl_file_t(protofile); + $$.file->access = static_cast<cbl_file_access_t>($acc); + } + ; +access_mode: RANDOM { $$ = file_access_rnd_e; } + | DYNAMIC { $$ = file_access_dyn_e; } + | SEQUENTIAL { $$ = file_access_seq_e; } + ; + +alt_key_clause: ALTERNATE record key is name key_source[fields] unique_key + { + $$.clause = alt_key_clause_e; + $$.file = new cbl_file_t(protofile); + $$.file->nkey = 1; + if( $fields == NULL ) { + $$.file->keys = new cbl_file_key_t(field_index($name), + $unique_key); + } else { + $name->type = FldLiteralA; + $name->data.initial = $name->name; + $name->attr |= record_key_e; + auto& name = *$name; + $$.file->keys = new cbl_file_key_t(name.name, + $fields->fields, + $unique_key); + } + } + ; +key_source: %empty { $$ = NULL; } + | SOURCE is key_sources[fields] { $$ = $fields; } + ; +key_sources: name { $$ = new field_list_t($1); } + | key_sources name { $$ = $1; $$->fields.push_back($2); } + ; +unique_key: %empty { $$ = true; } + | with DUPLICATES { $$ = false; } + ; + +assign_clause: ASSIGN to selected_name[selected] { + $$.clause = assign_clause_e; + $$.file = new cbl_file_t(protofile); + $$.file->filename = field_index($selected->field); + } + | ASSIGN to device_name USING name { + $$.clause = assign_clause_e; + cbl_unimplemented("ASSIGN TO DEVICE"); + YYERROR; + } + | ASSIGN to device_name { + $$.clause = assign_clause_e; + cbl_unimplemented("ASSIGN TO DEVICE"); + YYERROR; + } + | ASSIGN USING name { + $$.clause = assign_clause_e; + $$.file = new cbl_file_t(protofile); + $$.file->filename = field_index($name); + } + ; + +collate_clause: collate_claus1 { + $$.clause = collating_clause_e; + $$.file = new cbl_file_t(protofile); + } + ; +collate_claus1: collating SEQUENCE NAME /* SEQUENCE swallows IS/FOR */ + | collating SEQUENCE ALPHANUMERIC is NAME + | collating SEQUENCE NATIONAL is NAME + ; + +status_clause: file STATUS is name[user] + { + $$.clause = file_status_clause_e; + $$.file = new cbl_file_t(protofile); + $$.file->user_status = field_index($user); + } + | file STATUS is name[user] name[vsam] + { + $$.clause = file_status_clause_e; + $$.file = new cbl_file_t(protofile); + $$.file->user_status = field_index($user); + $$.file->vsam_status = field_index($vsam); + } + ; + +lock_mode_clause: // ISO only + LOCK mode is lock_mode lock_how[how] + { + $$.clause = lock_mode_clause_e; + $$.file = new cbl_file_t(protofile); + $$.file->lock.multiple = $how > 0; + if( ! $$.file->lock.mode_set($lock_mode) ) { + error_msg(@lock_mode, "logic error: %s is not a file lock mode", + keyword_str($lock_mode) ); + } + } +lock_how: %empty { $$ = 0; } + | with LOCK_ON multiple records { $$ = $multiple; } + ; +lock_mode: MANUAL { $$ = MANUAL; } + | RECORD { $$ = RECORD; } + | AUTOMATIC { $$ = AUTOMATIC; } + ; +multiple: %empty { $$ = 0; } + | MULTIPLE { $$ = MULTIPLE; } + ; +records: RECORD + | RECORDS + ; + +org_clause: org_clause1[org] + { + $$.clause = organization_clause_e; + $$.file = new cbl_file_t(protofile); + $$.file->org = static_cast<cbl_file_org_t>($org); + } + ; +org_is: %empty + | ORGANIZATION is + ; + // file_sequential is the proper default +org_clause1: org_is SEQUENTIAL { $$ = file_sequential_e; } + | org_is LINE SEQUENTIAL { $$ = file_line_sequential_e; } + | org_is RELATIVE { $$ = file_relative_e; } + | org_is INDEXED { $$ = file_indexed_e; } + ; + + /* + * "The PADDING CHARACTER clause is syntax checked, but has no + * effect on the execution of the program." + */ +padding_clause: PADDING character is padding_char + { + $$.clause = padding_clause_e; + $$.file = new cbl_file_t(protofile); + } + ; +character: %empty + | CHARACTER + ; +padding_char: NAME + | LITERAL + | NUMSTR + ; + +record_delim_clause: RECORD DELIMITER is STANDARD_ALPHABET + { + $$.clause = record_delim_clause_e; + $$.file = new cbl_file_t(protofile); + } + ; + +record_key_clause: RECORD key is name key_source[fields] + { + $$.clause = record_key_clause_e; + $$.file = new cbl_file_t(protofile); + $$.file->nkey = 1; + if( $fields == NULL ) { + $$.file->keys = new cbl_file_key_t(field_index($name)); + } else { // "special" not-literal literal: a key name + $name->type = FldLiteralA; + $name->data.initial = $name->name; + $name->attr |= record_key_e; + $$.file->keys = new cbl_file_key_t($name->name, + $fields->fields, true); + } + } + ; + +relative_key_clause: /* RELATIVE */ KEY is name + { // lexer returns KEY for RELATIVE ... NAME + $$.clause = relative_key_clause_e; + $$.file = new cbl_file_t(protofile); + $$.file->nkey = 1; + $$.file->keys = new cbl_file_key_t(field_index($name)); + } + ; + +reserve_clause: RESERVE NUMSTR reserve_area + { + $$.clause = reserve_clause_e; + $$.file = new cbl_file_t(protofile); + } + ; +reserve_area: %empty + | AREA + | AREAS + ; + +sharing_clause: SHARING with sharing_who + { + $$.clause = sharing_clause_e; + $$.file = new cbl_file_t(protofile); + } + ; +sharing_who: ALL other + | NO other + | READ ONLY + ; +other: %empty + | OTHER + ; + +config_paragraphs: config_paragraph + | config_paragraphs config_paragraph + ; + +config_paragraph: + SPECIAL_NAMES '.' + | SPECIAL_NAMES '.' specials '.' + | SOURCE_COMPUTER '.' NAME with_debug '.' + | OBJECT_COMPUTER '.' NAME collating_sequence[name] '.' + { + if( $name ) { + if( !current.collating_sequence($name) ) { + error_msg(@name, "collating sequence already defined as '%s'", + current.collating_sequence()); + YYERROR; + } + } + } + | REPOSITORY '.' + | REPOSITORY '.' repo_members '.' + ; + +repo_members: repo_member + | repo_members repo_member + ; +repo_member: repo_class + { cbl_unimplemented("CLASS"); } + | repo_interface + { cbl_unimplemented("INTERFACE"); } + | repo_func + | repo_program + | repo_property + { cbl_unimplemented("PROPERTY"); } + ; + +repo_class: CLASS NAME repo_as repo_expands + ; +repo_as: %empty { $$ = literal_t(); } + | AS LITERAL { $$ = $2; } + ; +repo_expands: %empty + | EXPANDS NAME USING NAME + ; + +repo_interface: INTERFACE NAME repo_as repo_expands + ; + +repo_func: FUNCTION repo_func_names INTRINSIC + { + auto namelocs( name_queue.pop() ); + for( const auto& nameloc : namelocs ) { + current.repository_add(nameloc.name); + } + } + | FUNCTION ALL INTRINSIC + { + current.repository_add_all(); + } + | FUNCTION repo_func_names + ; +repo_func_names: + repo_func_name + | repo_func_names repo_func_name + ; +repo_func_name: NAME { + if( ! current.repository_add($NAME) ) { // add intrinsic by name + auto token = current.udf_in($NAME); + if( !token ) { + error_msg(@NAME, "%s is not defined here as a user-defined function", + $NAME); + current.udf_dump(); + YYERROR; + } + auto e = symbol_function(0, $NAME); + assert(e); + current.repository_add(symbol_index(e)); // add UDF to repository + } + } + ; + +repo_program: PROGRAM_kw NAME repo_as + { + size_t parent = 0; + auto program = symbol_label( PROGRAM, LblProgram, 0, $NAME ); + if( ! program ) { + if( $repo_as.empty() ) { + error_msg(@repo_as, "'%s' does not name an earlier program", $NAME); + YYERROR; + } + program = symbol_label( PROGRAM, LblProgram, 0, + "", $repo_as.data ); + } + if( ! program ) { + error_msg(@repo_as, "'%s' does not name an earlier program", + $repo_as.data); + YYERROR; + } + assert(program); + parent = symbol_index(symbol_elem_of(program)); + // Literal field whose parent is the the aliased program. + cbl_field_t prog = { .type = FldLiteralA, + .attr = quoted_e, + .parent = parent, + .data = {.initial = $repo_as.data} }; + namcpy(@NAME, prog.name, $NAME); + if( ! prog.data.initial ) { + assert(program); + prog.data.initial = program->name; + } + auto e = symbol_field_add(PROGRAM, &prog); + symbol_field_location(symbol_index(e), @NAME); + } + ; + +repo_property: PROPERTY NAME repo_as + ; + +with_debug: %empty + | with DEBUGGING MODE { + if( ! set_debug(true) ) { + error_msg(@2, "DEBUGGING MODE valid only in fixed format"); + } + } + ; + +collating_sequence: %empty { $$ = NULL; } + | PROGRAM_kw COLLATING SEQUENCE is NAME[name] { $$ = $name; } + | PROGRAM_kw SEQUENCE is NAME[name] { $$ = $name; } + | COLLATING SEQUENCE is NAME[name] { $$ = $name; } + | SEQUENCE is NAME[name] { $$ = $name; } + ; + +specials: special_names + ; +special_names: special_name + | special_names special_name + ; + +special_name: dev_mnemonic + | ALPHABET NAME[name] is alphabet_name[abc] + { + if( !$abc ) YYERROR; + assert($abc); // already in symbol table + if( !namcpy(@name, $abc->name, $name) ) YYERROR; + if( yydebug ) $abc->dump(); + } + | CLASS NAME is domains + { + struct cbl_field_t field = { 0, + FldClass, FldInvalid, 0, 0, 0, 0, nonarray, yylineno, "", + 0, cbl_field_t::linkage_t(), + { 0,0,0,0, NULL, NULL, { NULL }, { NULL } }, NULL }; + if( !namcpy(@NAME, field.name, $2) ) YYERROR; + + struct cbl_domain_t *domain = + new cbl_domain_t[ domains.size() + 1 ] ; + + std::copy(domains.begin(), domains.end(), domain); + + field.data.false_value = $domains; + field.data.domain = domain; + domains.clear(); + + if( field_add(@2, &field) == NULL ) { + dbgmsg("failed class"); + YYERROR; + } + } + | CURRENCY sign is LITERAL[lit] with picture_sym + { + // The COBOL is "CURRENCY sign SYMBOL PICTURE symbol" + // In our processing, we flip the order, and refer to + // symbol_currency_add (symbol, sign-string). 'symbol' is the + // character in the PICTURE string, and 'sign' is the substitution + // that gets made in memory. + if( ! string_of($lit) ) { + error_msg(@lit, "'%s' has embedded NUL", $lit.data); + YYERROR; + } + symbol_currency_add( $picture_sym, $lit.data ); + } + | DECIMAL_POINT is COMMA + { + symbol_decimal_point_set(','); + } + | LOCALE NAME is locale_spec + { + current.locale($NAME, $locale_spec); + cbl_unimplemented("LOCALE syntax"); + } + ; + | upsi + | SYMBOLIC characters symbolic is_alphabet + { + cbl_unimplemented("SYMBOLIC syntax"); + } + ; +locale_spec: NAME { $$ = $1; } + | LITERAL { $$ = string_of($1); } + + ; +symbolic: NAME + | NUMSTR + ; +is_alphabet: ARE NUMSTR + | is NUMSTR + ; + +dev_mnemonic: device_name is NAME + { + cbl_special_name_t special = { .token = $1.token, + .id = $1.id }; + if( !namcpy(@NAME, special.name, $NAME) ) YYERROR; + + const char *filename; + + switch( special.id ) { + case STDIN_e: case SYSIN_e: case SYSIPT_e: + filename = "/dev/stdin"; + break; + case STDOUT_e: case SYSOUT_e: + case SYSLIST_e: case SYSLST_e: case CONSOLE_e: + filename ="/dev/stdout"; + break; + case STDERR_e: case SYSPUNCH_e: case SYSPCH_e: case SYSERR_e: + filename ="/dev/stderr"; + break; + default: + filename ="/dev/null"; + break; + } + + special.filename = symbol_index(symbol_literalA(0, filename)); + + symbol_special_add(PROGRAM, &special); + } + | NAME[device] is NAME[name] + { + static const std::map< std::string, special_name_t > fujitsus + { // Fujitsu calls these "function names", not device names + { "ARGUMENT-NUMBER", ARG_NUM_e }, + { "ARGUMENT-VALUE", ARG_VALUE_e } , + { "ENVIRONMENT-NAME", ENV_NAME_e }, + { "ENVIRONMENT-VALUE", ENV_VALUE_e }, + }; + char device[ 1 + strlen($device) ]; + std::transform($device, $device + strlen($device) + 1, + device, toupper); + auto p = fujitsus.find(device); + if( p == fujitsus.end() ) { + error_msg(@device, "%s is not a device name"); + } + + cbl_special_name_t special = { .id = p->second }; + if( !namcpy(@name, special.name, $name) ) YYERROR; + + symbol_special_add(PROGRAM, &special); + } + ; + +device_name: SYSIN { $$.token = SYSIN; $$.id = SYSIN_e; } + | SYSIPT { $$.token = SYSIPT; $$.id = SYSIPT_e; } + | SYSOUT { $$.token = SYSOUT; $$.id = SYSOUT_e; } + | SYSLIST { $$.token = SYSLIST; $$.id = SYSLIST_e; } + | SYSLST { $$.token = SYSLST; $$.id = SYSLST_e; } + | SYSPUNCH { $$.token = SYSPUNCH; $$.id = SYSPUNCH_e; } + | SYSPCH { $$.token = SYSPCH; $$.id = SYSPCH_e; } + | CONSOLE { $$.token = CONSOLE; $$.id = CONSOLE_e; } + | C01 { $$.token = C01; $$.id = C01_e; } + | C02 { $$.token = C02; $$.id = C02_e; } + | C03 { $$.token = C03; $$.id = C03_e; } + | C04 { $$.token = C04; $$.id = C04_e; } + | C05 { $$.token = C05; $$.id = C05_e; } + | C06 { $$.token = C06; $$.id = C06_e; } + | C07 { $$.token = C07; $$.id = C07_e; } + | C08 { $$.token = C08; $$.id = C08_e; } + | C09 { $$.token = C09; $$.id = C09_e; } + | C10 { $$.token = C10; $$.id = C10_e; } + | C11 { $$.token = C11; $$.id = C11_e; } + | C12 { $$.token = C12; $$.id = C12_e; } + | CSP { $$.token = CSP; $$.id = CSP_e; } + | S01 { $$.token = S01; $$.id = S01_e; } + | S02 { $$.token = S02; $$.id = S02_e; } + | S03 { $$.token = S03; $$.id = S03_e; } + | S04 { $$.token = S04; $$.id = S04_e; } + | S05 { $$.token = S05; $$.id = S05_e; } + | AFP_5A { $$.token = AFP_5A; $$.id = AFP_5A_e; } + | STDIN { $$.token = STDIN; $$.id = STDIN_e; } + | STDOUT { $$.token = STDOUT; $$.id = STDOUT_e; } + | STDERR { $$.token = STDERR; $$.id = STDERR_e; } + ; + +alphabet_name: STANDARD_ALPHABET { $$ = alphabet_add(@1, ASCII_e); } + | NATIVE { $$ = alphabet_add(@1, EBCDIC_e); } + | EBCDIC { $$ = alphabet_add(@1, EBCDIC_e); } + | alphabet_seqs + { + $$ = cbl_alphabet_of(symbol_alphabet_add(PROGRAM, $1)); + } + | error + { + error_msg(@1, "code-name-1 may be STANDARD-1, STANDARD-2, " + "NATIVE, OR EBCDIC"); + $$ = NULL; + } + ; +alphabet_seqs: alphabet_seq[seq] + /* + * The 1st element of the 1st sequence represents the + * low-value; its index becomes cbl_alphabet_t::low_index. The + * high_index belongs to the last element of the last sequence + * that is not an ALSO. + */ + { + $$ = new cbl_alphabet_t(@seq, custom_encoding_e); + + if( !$seq.low || $seq.also ) { + error_msg(@1, "syntax error at ALSO"); + YYERROR; + } + $$->add_sequence(@seq, $seq.low); + size_t len = $seq.low == nul_string()? 1 : strlen((const char*)$seq.low); + assert(len > 0); + $$->add_interval(@seq, $seq.low[--len], $seq.high[0]); + $$->add_sequence(@seq, $seq.high); + } + | alphabet_seqs alphabet_seq[seq] + { + // ALSO x'00' is valid, but in that case the low pointer is NULL + if( !$seq.low ) { + $$->also(@seq, $seq.also); + } else { + $$->add_sequence(@seq, $seq.low); + size_t len = $seq.low == nul_string()? 1 : strlen((const char*)$seq.low); + assert(len > 0); + $$->add_interval(@seq, $seq.low[--len], $seq.high[0]); + $$->add_sequence(@seq, $seq.high); + } + } + ; +alphabet_seq: alphabet_lit[low] + { + $$.also = 0; + if( $low.len == 1 && $low.data[0] == '\0' ) { + $$.high = $$.low = nul_string(); + } else { + size_t size = 1 + $low.len; + $$.low = new unsigned char[size]; + memcpy($$.low, $low.data, size); + $$.high = $$.low + size - 1; + assert($$.high[0] == '\0'); + } + } + | alphabet_lit[low] THRU alphabet_lit[high] + { + $$.also = 0; + size_t size = 1 + $low.len; + if( $low.len == 1 && $low.data[0] == '\0' ) { + $$.low = nul_string(); + } else { + $$.low = new unsigned char[size]; + memcpy($$.low, $low.data, size); + } + assert($high.len > 0); + assert($high.data[0] != '\0'); + size = 1 + $high.len; + $$.high = new unsigned char[size]; + memcpy($$.high, $high.data, size); + } + | ALSO alphabet_etc { $$ = {}; $$.also = $2; } + ; +alphabet_etc: alphabet_lit + { + if( $1.len > 1 ) { + error_msg(@1, "'%c' can be only a single letter", $1.data); + YYERROR; + } + $$ = (unsigned char)$1.data[0]; + } + | spaces_etc { + // For figurative constants, pass the synmbol table index, + // marked with the high bit. + static const auto bits = sizeof($$) * 8 - 1; + $$ = 1; + $$ = $$ << bits; + $$ |= constant_index($1); + } + ; +alphabet_lit: LITERAL { $$ = $1; assert($$.len > 0); } + | NUMSTR { + assert( $1.radix == decimal_e); + $$ = literal_of($1.string); + } + ; + +upsi: UPSI is NAME + { + assert($UPSI); + size_t parent = symbol_index(symbol_field(0,0,"UPSI-0")); + cbl_field_t *field = field_alloc(@NAME, FldSwitch, parent, $NAME); + if( !field ) YYERROR; + field->attr = constant_e; + field->data.initial = $UPSI; + } + | UPSI is NAME upsi_entry[entry] + { + assert($UPSI); + size_t parent = symbol_index(symbol_field(0,0,"UPSI-0")); + cbl_field_t *field = field_alloc(@NAME, FldSwitch, parent, $NAME); + if( !field ) YYERROR; + field->attr = constant_e; + field->data.initial = $UPSI; + + assert('0' <= $UPSI[0] && $UPSI[0] < '8'); + const uint32_t bitn = $UPSI[0] - '0', value = (1 << bitn); + + if( $entry.on ) { + cbl_field_t *on = field_alloc(@NAME, FldSwitch, parent, $entry.on); + if( !on ) YYERROR; + on->data.upsi_mask = new cbl_upsi_mask_t(true, value); + } + if( $entry.off ) { + cbl_field_t *off = field_alloc(@NAME, FldSwitch, parent, $entry.off); + if( !off ) YYERROR; + off->data.upsi_mask = new cbl_upsi_mask_t(false, value); + } + } + | UPSI upsi_entry[entry] + { + size_t parent = symbol_index(symbol_field(0,0,"UPSI-0")); + assert('0' <= $UPSI[0] && $UPSI[0] < '8'); + const uint32_t bitn = $UPSI[0] - '0', value = (1 << bitn); + + if( $entry.on ) { + cbl_field_t *on = field_alloc($entry.loc, FldSwitch, parent, $entry.on); + if( !on ) YYERROR; + on->data.upsi_mask = new cbl_upsi_mask_t(true, value); + } + if( $entry.off ) { + cbl_field_t *off = field_alloc($entry.loc, FldSwitch, parent, $entry.off); + if( !off ) YYERROR; + off->data.upsi_mask = new cbl_upsi_mask_t(false, value); + } + } + ; +upsi_entry: ON status is NAME + { + $$.loc = @NAME; + $$.on = $NAME; + $$.off = NULL; + } + | OFF status is NAME + { + $$.loc = @NAME; + $$.on = NULL; + $$.off = $NAME; + } + | OFF status is NAME[off] ON status is NAME[on] + { + $$.loc = @off; + $$.on = $on; + $$.off = $off; + } + | ON status is NAME[on] OFF status is NAME[off] + { + $$.loc = @on; + $$.on = $on; + $$.off = $off; + } + ; + +picture_sym: %empty { $$ = NULL; } + | PICTURE SYMBOL LITERAL[lit] { + if( ! string_of($lit) ) { + error_msg(@lit, "'%s' has embedded NUL", $lit.data); + YYERROR; + } + $$ = string_of($lit); + } + ; + + /* + * The domains nonterminal ($domain) carries the FALSE value, + * if any. The domains variable (global std::list) carries the + * variable's DOMAIN, ending in a NULL. See the action for + * "CLASS NAME is domains". + */ +domains: domain + | domains domain { $$ = $1? $1 : $2; } + ; + +domain: all LITERAL[a] + { + if( ! string_of($a) ) { + gcc_location_set(@a); + yywarn("'%s' has embedded NUL", $a.data); + } + $$ = NULL; + cbl_domain_t domain(@a, $all, $a.len, $a.data); + domains.push_back(domain); + } + | all[a_all] LITERAL[a] THRU all[z_all] LITERAL[z] + { + if( ! string_of($a) ) { + yywarn("'%s' has embedded NUL", $a.data); + } + if( ! string_of($z) ) { + yywarn("'%s' has embedded NUL", $z.data); + } + $$ = NULL; + cbl_domain_elem_t first(@a, $a_all, $a.len, $a.data), + last(@z, $z_all, $z.len, $z.data); + domains.push_back(cbl_domain_t(first, last)); + } + | all NUMSTR[n] + { + $$ = NULL; + cbl_domain_t dom(@n, $all, strlen($n.string), $n.string, true); + domains.push_back(dom); + } + | all[n_all] NUMSTR[n] THRU all[m_all] NUMSTR[m] + { + $$ = NULL; + cbl_domain_elem_t first(@n, $n_all, strlen($n.string), $n.string, true), + last(@m, $m_all, strlen($m.string), $m.string, true); + domains.push_back(cbl_domain_t(first, last)); + } + | all reserved_value { + $$ = NULL; + if( $2 == NULLS ) YYERROR; + auto value = constant_of(constant_index($2))->data.initial; + struct cbl_domain_t domain( @2, $all, strlen(value), value ); + domains.push_back(domain); + } + | all[a_all] reserved_value[a] THRU all[z_all] LITERAL[z] { + if( ! string_of($z) ) { + yywarn("'%s' has embedded NUL", $z.data); + } + $$ = NULL; + if( $a == NULLS ) YYERROR; + auto value = constant_of(constant_index($a))->data.initial; + cbl_domain_elem_t first(@a, $a_all, strlen(value), value), + last(@z, $z_all, $z.len, $z.data); + domains.push_back(cbl_domain_t(first, last)); + } + | all[a_all] reserved_value[a] THRU all[z_all] NUMSTR[z] { + $$ = NULL; + if( $a == NULLS ) YYERROR; + auto value = constant_of(constant_index($a))->data.initial; + cbl_domain_elem_t first(@a, $a_all, strlen(value), value, true), + last(@z, $z_all, strlen($z.string), $z.string, true); + domains.push_back(cbl_domain_t(first, last)); + } + | when_set_to FALSE_kw is LITERAL[value] + { + if( ! string_of($value) ) { + yywarn("'%s' has embedded NUL", $value.data); + } + char *dom = $value.data; + $$ = new cbl_domain_t(@value, false, $value.len, dom); + } + | when_set_to FALSE_kw is reserved_value + { + if( $4 == NULLS ) YYERROR; + auto value = constant_of(constant_index($4))->data.initial; + $$ = new cbl_domain_t(@4, false, strlen(value), value ); + } + | when_set_to FALSE_kw is NUMSTR[n] + { + $$ = new cbl_domain_t(@n, false, strlen($n.string), $n.string, true); + } + ; +when_set_to: %empty + | WHEN + | SET + | TO + | WHEN SET + | SET TO + | WHEN TO + | WHEN SET TO + ; + +data_div: %empty + | DATA_DIV + | DATA_DIV { current_division = data_div_e; } data_sections + { + current_data_section = not_data_datasect_e; + parser_division( data_div_e, NULL, 0, NULL ); + } + ; + +data_sections: data_section + | data_sections data_section + ; + +data_section: FILE_SECT '.' + | FILE_SECT '.' { + current_data_section_set(@1, file_datasect_e); + } file_descrs + | WORKING_STORAGE_SECT '.' { + current_data_section_set(@1, working_storage_datasect_e); + } fields_maybe + | LOCAL_STORAGE_SECT '.' { + current_data_section_set(@1, local_storage_datasect_e); + } fields_maybe + | LINKAGE_SECT '.' { + current_data_section_set(@1, linkage_datasect_e); + } fields_maybe + | SCREEN SECTION '.' { + cbl_unimplemented("SCREEN SECTION"); + } + ; + +file_descrs: file_descr + | file_descrs file_descr + ; +file_descr: fd_name '.' { field_done(); } fields + | fd_name fd_clauses '.' { field_done(); } fields + ; + +fd_name: FD NAME { $$ = $2; file_section_fd_set(fd_e, $2, @2); } + | SD NAME { $$ = $2; file_section_fd_set(sd_e, $2, @2); } + ; + +fd_clauses: fd_clause + | fd_clauses fd_clause + ; +fd_clause: record_desc + { + auto f = cbl_file_of(symbol_at(file_section_fd)); + f->varying_size.min = $1.min; + f->varying_size.max = $1.max; + auto& cap = cbl_field_of(symbol_at(f->default_record))->data.capacity; + cap = std::max(cap, uint32_t(f->varying_size.max)); + // If min != max now, we know varying is explicitly defined. + f->varying_size.explicitly = f->varies(); + if( f->varying_size.max != 0 ) { + if( !(f->varying_size.min <= f->varying_size.max) ) { + error_msg(@1, "%zu must be <= %zu", + f->varying_size.min, f->varying_size.max); + YYERROR; + } + } + } + | block_desc + | label_desc + | DATA record_is field_list + | RECORDING mode is NAME + { + switch( $NAME[0] ) { + case 'F': + case 'V': + case 'U': + case 'S': + break; + default: + error_msg(@NAME, "invalid RECORDING MODE '%s'", $NAME); + YYERROR; + } + cbl_unimplementedw("RECORDING MODE was ignored, not defined by ISO 2023"); + } + | VALUE OF fd_values + | CODESET is NAME + | is GLOBAL + { + auto f = cbl_file_of(symbol_at(file_section_fd)); + f->attr |= global_e; + } + | is EXTERNAL + { + auto f = cbl_file_of(symbol_at(file_section_fd)); + f->attr |= external_e; + } + | is EXTERNAL as LITERAL + { + auto f = cbl_file_of(symbol_at(file_section_fd)); + f->attr |= external_e; + cbl_unimplemented("AS LITERAL "); + } + | fd_linage + | fd_report { + cbl_unimplemented("REPORT WRITER"); + YYERROR; + } + ; + +block_desc: BLOCK contains rec_contains chars_recs + ; +rec_contains: NUMSTR[min] { + ssize_t n; + if( (n = numstr2i($min.string, $min.radix)) < 0 ) { + error_msg(@min, "size %s cannot be negative", $min.string); + YYERROR; + } + $$.min = $$.max = n; // fixed length + } + | NUMSTR[min] TO NUMSTR[max] { + ssize_t n; + if( (n = numstr2i($min.string, $min.radix)) < 0 ) { + error_msg(@min, "size %s cannot be negative", $min.string); + YYERROR; + } + $$.min = n; + + if( (n = numstr2i($max.string, $max.radix)) < 0 ) { + error_msg(@max, "size %s cannot be negative", $max.string); + YYERROR; + } + $$.max = n; + if( !($$.min < $$.max) ) { + error_msg(@max, "FROM (%xz) must be less than TO (%zu)", + $$.min, $$.max); + YYERROR; + } + } + ; +chars_recs: %empty + | CHARACTERS + | RECORDS + ; + +label_desc: LABEL record_is STANDARD + | LABEL record_is OMITTED + | LABEL record_is fd_labels + ; + +record_is: RECORD /* lexer swallows IS/ARE */ + | RECORDS + ; + +fd_values: fd_value + | fd_values fd_value + ; + /* "The VALUE OF clause is syntax checked, but has + no effect on the execution of the program." */ +fd_value: NAME is alpha_val + ; +alpha_val: alphaval + | scalar + ; + +fd_labels: fd_label + | fd_labels fd_label + ; +fd_label: NAME + ; + +record_desc: RECORD is record_vary[r] depending { $$ = $r; } + | RECORD contains rec_contains[r] characters { $$ = $r; } + ; + +record_vary: VARYING in_size from_to { $$ = $from_to; } + | VARYING from_to { $$ = $from_to; } + | VARYING in_size { $$.min = 0; $$.max = 0; } + | VARYING { $$.min = 0; $$.max = 0; } + ; + +in_size: IN SIZE + | IN + | SIZE + ; + +from_to: FROM NUMSTR[min] TO NUMSTR[max] characters { + ssize_t n; + if( (n = numstr2i($min.string, $min.radix)) < 0 ) { + error_msg(@min, "size %s cannot be negative", $min.string); + YYERROR; + } + $$.min = n; + if( (n = numstr2i($max.string, $max.radix)) < 0 ) { + error_msg(@min, "size %s cannot be negative", $max.string); + YYERROR; + } + $$.max = n; + } + | NUMSTR[min] TO NUMSTR[max] characters { + ssize_t n; + if( (n = numstr2i($min.string, $min.radix)) < 0 ) { + error_msg(@min, "size %s cannot be negative", $min.string); + YYERROR; + } + $$.min = n; + if( (n = numstr2i($max.string, $max.radix)) < 0 ) { + error_msg(@max, "size %s cannot be negative", $max.string); + YYERROR; + } + $$.max = n; + } + + | TO NUMSTR[max] characters { + ssize_t n; + if( (n = numstr2i($max.string, $max.radix)) < 0 ) { + error_msg(@max, "size %s cannot be negative", $max.string); + YYERROR; + } + $$.min = 0; + $$.max = n; + } + + | FROM NUMSTR[min] characters { + ssize_t n; + if( (n = numstr2i($min.string, $min.radix)) < 0 ) { + error_msg(@min, "size %s cannot be negative", $min.string); + YYERROR; + } + $$.min = n; + $$.max = size_t(-1); + } + | NUMSTR[min] characters { + ssize_t n; + if( (n = numstr2i($min.string, $min.radix)) < 0 ) { + error_msg(@min, "size %s cannot be negative", $min.string); + YYERROR; + } + $$.min = n; + $$.max = size_t(-1); + } + + | CHARACTERS { $$.min = 0; $$.max = size_t(-1); } + ; + +depending: %empty + | DEPENDING on NAME + { + assert(file_section_fd > 0); + symbol_elem_t *e = symbol_at(file_section_fd); + assert(e); + auto file = cbl_file_of(e); + size_t odo; + + if( (e = symbol_field(PROGRAM, 0, $3)) != NULL ) { + assert(e->type == SymField); + odo = symbol_index(e); + } else { + e = symbol_field_forward_add(PROGRAM, 0, $NAME, yylineno); + if( !e ) YYERROR; + symbol_field_location( symbol_index(e), @NAME ); + odo = field_index(cbl_field_of(e)); + } + + file->record_length = odo; + assert( file->record_length > 0 ); + } + ; + +fd_linage: LINAGE is num_value with_footings + | LINAGE is num_value lines + ; +with_footings: with_footing + | with_footings with_footing + ; +with_footing: lines with FOOTING at num_value + | lines at top_bot num_value + ; +top_bot: TOP + | BOTTOM + ; + +fd_report: REPORT + | REPORTS + ; + +fields_maybe: %empty + | fields + ; +fields: field + | fields field + ; + +field: cdf + | data_descr '.' + { + if( in_file_section() && $data_descr->level == 1 ) { + if( !file_section_parent_set($data_descr) ) { + YYERROR; + } + } + field_done(); + + const auto& field(*$data_descr); + + // Format data.initial per picture + if( 0 == pristine_values.count(field.data.initial) ) { + if( field.data.digits > 0 && + field.data.value != 0.0 ) + { + char *initial; + int rdigits = field.data.rdigits < 0? + 1 : field.data.rdigits + 1; + + if( field.has_attr(scaled_e) ) { + if( field.data.rdigits > 0 ) { + rdigits = field.data.digits + field.data.rdigits; + } else { + rdigits = 0; + } + } + initial = string_of(field.data.value); + if( !initial ) { + error_msg(@1, xstrerror(errno)); + YYERROR; + } + char decimal = symbol_decimal_point(); + std::replace(initial, initial + strlen(initial), '.', decimal); + free(const_cast<char*>($data_descr->data.initial)); + $data_descr->data.initial = initial; + if( yydebug ) { + const char *value_str = string_of(field.data.value); + dbgmsg("%s::data.initial is (%%%d.%d) %s ==> '%s'", + field.name, + field.data.digits, + rdigits, + value_str? value_str : "", + field.data.initial); + } + } + } + } + ; + +occurs_clause: OCCURS cardinal_lb indexed + | OCCURS cardinal_lb key_descrs indexed + | OCCURS depending_on key_descrs indexed + | OCCURS depending_on indexed + | OCCURS name indexed + { + if( ! (is_constant($name) && $name->type == FldLiteralN) ) { + error_msg(@name, "%s is not CONSTANT", $name->name); + YYERROR; + } + cbl_occurs_t *occurs = ¤t_field()->occurs; + occurs->bounds.lower = + occurs->bounds.upper = $name->data.value; + } + ; +cardinal_lb: cardinal times { + current_field()->occurs.bounds.lower = $cardinal; + current_field()->occurs.bounds.upper = $cardinal; + } + ; + +cardinal: NUMSTR[input] + { + $$ = numstr2i( $input.string, $input.radix ); + } + ; + +depending_on: cardinal[lower] TO bound DEPENDING on name + { + cbl_occurs_t *occurs = ¤t_field()->occurs; + occurs->bounds.lower = (size_t)$lower; + occurs->bounds.upper = (size_t)$bound; + occurs->depending_on = field_index($name); + } + | bound DEPENDING on name + { + cbl_occurs_t *occurs = ¤t_field()->occurs; + occurs->bounds.lower = 1; + occurs->bounds.upper = (size_t)$bound; + occurs->depending_on = field_index($name); + } + ; +bound: cardinal times + | UNBOUNDED times { $$ = -1; } + ; + +key_descrs: key_descr + | key_descrs key_descr + ; +key_descr: ordering key is key_fields + ; +ordering: ASCENDING + { + current_field()->occurs.key_alloc(true); + } + | DESCENDING + { + current_field()->occurs.key_alloc(false); + } + ; +key_fields: key_field1 + | key_fields key_field1 + ; +key_field1: name + { + current_field()->occurs.key_field_add($1); + } + ; + +indexed: %empty + | INDEXED by index_fields + ; +index_fields: index_field1 + | index_fields index_field1 + ; +index_field1: ctx_name[name] + { + static const cbl_field_data_t data { .capacity = 8, .digits = 0 }; + cbl_field_t field = { .type = FldIndex, + .parent = field_index(current_field()), + .data = data }; + if( !namcpy(@name, field.name, $name) ) YYERROR; + + auto symbol = symbol_field(PROGRAM, 0, $name); + if( symbol ) { + auto field( cbl_field_of(symbol) ); + error_msg(@name, "'%s' already defined on line %d", + field->name, field->line ); + YYERROR; + } + + auto index = field_add(@name, &field); + if( !index ) { + YYERROR; + } + + current_field()->occurs.index_add(index); + } + ; + +level_name: LEVEL ctx_name + { + switch($LEVEL) { + case 1 ... 49: + case 66: + case 77: + case 88: + break; + default: + error_msg(@LEVEL, "LEVEL %d not supported", $LEVEL); + YYERROR; + } + struct cbl_field_t field = { 0, + FldInvalid, FldInvalid, 0, 0, 0, capacity_cast($1), + nonarray, yylineno, "", + 0, cbl_field_t::linkage_t(), + { 0,0,0,0, NULL, NULL, { NULL }, { NULL } }, NULL }; + if( !namcpy(@ctx_name, field.name, $2) ) YYERROR; + + $$ = field_add(@$, &field); + if( !$$ ) { + YYERROR; + } + current_field($$); // make available for data_clauses + } + | LEVEL + { + switch($LEVEL) { + case 1 ... 49: + case 66: + case 77: + case 88: + break; + default: + error_msg(@LEVEL, "LEVEL %d not supported", $LEVEL); + YYERROR; + } + struct cbl_field_t field = { 0, + FldInvalid, FldInvalid, 0, 0, 0, capacity_cast($1), + nonarray, yylineno, "", + 0, {}, { 0,0,0,0, NULL, NULL, { NULL }, { NULL } }, NULL }; + + $$ = field_add(@1, &field); + if( !$$ ) { + YYERROR; + } + current_field($$); // make available for data_clauses + } + ; + +data_descr: data_descr1 + { + $$ = current_field($1); // make available for occurs, etc. + char *env = getenv("symbols_update"); + if( env && env[0] == 'P' ) { + dbgmsg("parse.y:%d: %-15s %s (%s)", __LINE__, + cbl_field_type_str($$->type) + 3, + field_str($$), + cbl_field_type_str($$->usage) + 3); + } + } + | error { static cbl_field_t none = {}; $$ = &none; } + ; + +const_value: cce_expr + | BYTE_LENGTH of name { $$ = $name->data.capacity; } + | LENGTH of name { $$ = $name->data.capacity; } + | LENGTH_OF of name { $$ = $name->data.capacity; } + ; + +value78: literalism + { + cbl_field_data_t + data = { .capacity = capacity_cast(strlen($1.data)), + .initial = $1.data }; + $$ = new cbl_field_data_t(data); + } + | const_value + { + cbl_field_data_t data = { .value = $1 }; + $$ = new cbl_field_data_t(data); + } + | true_false + { + cbl_unimplemented("Boolean constant"); + YYERROR; + } + ; + +data_descr1: level_name + { + assert($1 == current_field()); + if( $1->usage == FldIndex ) { + field_type_update($1, $1->usage, @1, true); + } + } + + | level_name CONSTANT is_global as const_value + { + cbl_field_t& field = *$1; + if( field.level != 1 ) { + error_msg(@1, "%s must be an 01-level data item", field.name); + YYERROR; + } + + field.attr |= constant_e; + if( $is_global ) field.attr |= global_e; + field.type = FldLiteralN; + field.data.value = $const_value; + field.data.initial = string_of($const_value); + + if( !cdf_value(field.name, static_cast<int64_t>($const_value)) ) { + error_msg(@1, "%s was defined by CDF", field.name); + } + } + | level_name CONSTANT is_global as literalism[lit] + { + cbl_field_t& field = *$1; + field.attr |= constant_e; + if( $is_global ) field.attr |= global_e; + field.type = FldLiteralA; + field.data.capacity = $lit.len; + field.data.initial = $lit.data; + field.attr |= literal_attr($lit.prefix); + if( field.level != 1 ) { + error_msg(@lit, "%s must be an 01-level data item", field.name); + YYERROR; + } + if( !cdf_value(field.name, $lit.data) ) { + error_msg(@1, "%s was defined by CDF", field.name); + } + value_encoding_check(@lit, $1); + } + | level_name CONSTANT is_global FROM NAME + { + assert($1 == current_field()); + const cdfval_t *cdfval = cdf_value($NAME); + if( !cdfval ) { + error_msg(@1, "%s was defined by CDF", $NAME); + YYERROR; + } + cbl_field_t& field = *$1; + field.attr |= ($is_global | constant_e); + field.data.capacity = cdfval->string ? strlen(cdfval->string) + : sizeof(field.data.value); + field.data.initial = cdfval->string; + field.data.value = cdfval->number; + if( !cdf_value(field.name, *cdfval) ) { + error_msg(@1, "%s was defined by CDF", field.name); + } + } + + | LEVEL78 NAME[name] VALUE is value78[data] + { + if( ! dialect_mf() ) { + dialect_error(@1, "level 78", "mf"); + YYERROR; + } + struct cbl_field_t field = { 0, FldLiteralA, FldInvalid, + constant_e, 0, 0, 78, nonarray, + yylineno, "", 0, {}, *$data, NULL }; + if( !namcpy(@name, field.name, $name) ) YYERROR; + if( field.data.initial ) { + field.attr |= quoted_e; + if( !cdf_value(field.name, field.data.initial) ) { + yywarn("%s was defined by CDF", field.name); + } + } else { + field.type = FldLiteralN; + field.data.initial = string_of(field.data.value); + if( !cdf_value(field.name, + static_cast<int64_t>(field.data.value)) ) { + yywarn("%s was defined by CDF", field.name); + } + } + if( ($$ = field_add(@name, &field)) == NULL ) { + error_msg(@name, "failed level 78"); + YYERROR; + } + } + + | LEVEL88 NAME /* VALUE */ NULLPTR + { + struct cbl_field_t field = { 0, + FldClass, FldInvalid, 0, 0, 0, 88, nonarray, yylineno, "", + 0, cbl_field_t::linkage_t(), + { 0,0,0,0, NULL, NULL, { NULL }, { NULL } }, NULL }; + if( !namcpy(@NAME, field.name, $2) ) YYERROR; + + auto fig = constant_of(constant_index(NULLS))->data.initial; + struct cbl_domain_t *domain = new cbl_domain_t[2]; + + domain[0] = cbl_domain_t(@NAME, false, strlen(fig), fig); + + field.data.domain = domain; + + if( ($$ = field_add(@2, &field)) == NULL ) { + error_msg(@NAME, "failed level 88"); + YYERROR; + } + auto parent = cbl_field_of(symbol_at($$->parent)); + if( parent->type != FldPointer ) { + error_msg(@NAME, "LEVEL 88 %s VALUE NULLS invalid for " + "%s %s, which is not a POINTER", + $$->name, parent->level_str(), parent->name); + } + } + | LEVEL88 NAME VALUE domains + { + struct cbl_field_t field = { 0, + FldClass, FldInvalid, 0, 0, 0, 88, nonarray, yylineno, "", + 0, cbl_field_t::linkage_t(), + { 0,0,0,0, NULL, NULL, { NULL }, { NULL } }, NULL }; + if( !namcpy(@NAME, field.name, $2) ) YYERROR; + + struct cbl_domain_t *domain = + new cbl_domain_t[ domains.size() + 1]; + + std::copy(domains.begin(), domains.end(), domain); + + field.data.domain = domain; + field.data.false_value = $domains; + domains.clear(); + + if( ($$ = field_add(@2, &field)) == NULL ) { + error_msg(@NAME, "failed level 88"); + YYERROR; + } + } + + | name66[alias] RENAMES name[orig] + { + symbol_field_alias_end(); + if( is_literal($orig) ) { + error_msg(@orig, "cannot RENAME '%s'", name_of($orig)); + YYERROR; + } + if( !immediately_follows($orig) ) { + error_msg(@orig, "%s must immediately follow %s to RENAME it", + $alias, name_of($orig)); + YYERROR; + } + if( $orig->occurs.ntimes() ) { + error_msg(@orig, "cannot RENAME table %s %s", + $orig->level_str(), name_of($orig)); + YYERROR; + } + auto table = occurs_in($orig); + if( table ) { + error_msg(@orig, "cannot RENAME '%s' OF %s", + name_of($orig), table->name); + YYERROR; + } + if( ! $orig->rename_level_ok() ) { + error_msg(@orig, "cannot RENAME %s %s", + $orig->level_str(), name_of($orig)); + YYERROR; + } + symbol_elem_t *orig = symbol_at(field_index($orig)); + $$ = cbl_field_of(symbol_field_alias(orig, $alias)); + symbol_field_location(field_index($$), @alias); + } + + | name66[alias] RENAMES name[orig] THRU name[thru] + { + symbol_field_alias_end(); + if( !immediately_follows($orig) ) { + error_msg(@orig, "RENAMES: %s must immediately follow %s", + $alias, name_of($orig)); + YYERROR; + } + if( is_literal($orig) ) { + error_msg(@orig, "cannot RENAME '%s'", name_of($orig)); + YYERROR; + } + if( is_literal($thru) ) { + error_msg(@thru, "cannot RENAME '%s'", name_of($thru)); + YYERROR; + } + auto table = occurs_in($orig); + if( table ) { + error_msg(@orig, "cannot RENAME '%s' OF %s", + name_of($orig), table->name); + YYERROR; + } + table = occurs_in($thru); + if( table ) { + error_msg(@thru, "cannot RENAME '%s' OF %s", + name_of($thru), table->name); + YYERROR; + } + if( ! $orig->rename_level_ok() ) { + error_msg(@orig, "cannot RENAME %s %s", + $orig->level_str(), name_of($orig)); + YYERROR; + } + if( $orig->has_subordinate($thru) ) { + error_msg(@orig, "cannot RENAME %s %s THRU %s %s " + "because %s is subordinate to %s", + $orig->level_str(), name_of($orig), + $thru->level_str(), name_of($thru), + name_of($thru), name_of($orig)); + YYERROR; + } + auto not_ok = rename_not_ok($orig, $thru); + if( not_ok ) { + error_msg(@orig, "cannot RENAME %s %s THRU %s %s " + "because %s %s cannot be renamed", + $orig->level_str(), name_of($orig), + $thru->level_str(), name_of($thru), + not_ok->level_str(), name_of(not_ok)); + YYERROR; + } + if( field_index($thru) <= field_index($orig) ) { + error_msg(@orig, "cannot RENAME %s %s THRU %s %s " + "because they're in the wrong order", + $orig->level_str(), name_of($orig), + $thru->level_str(), name_of($thru)); + YYERROR; + } + symbol_elem_t *orig = symbol_at(field_index($orig)); + symbol_elem_t *last = symbol_at(field_index($thru)); + $$ = cbl_field_of(symbol_field_alias2(orig, last, $alias)); + symbol_field_location(field_index($$), @alias); + } + + | level_name[field] data_clauses + { + gcc_assert($field == current_field()); + if( $data_clauses == value_clause_e ) { // only VALUE, no PIC + // Error unless VALUE is a figurative constant or (quoted) string. + if( $field->type != FldPointer && + ! $field->has_attr(quoted_e) && + normal_value_e == cbl_figconst_of($field->data.initial) ) + { + error_msg(@field, "%s numeric VALUE %s requires PICTURE", + $field->name, $field->data.initial); + } + if( null_value_e == cbl_figconst_of($field->data.initial) ) { + // don't change the type + assert(FldPointer == $field->type); + } else { + // alphanumeric VALUE by itself implies alphanumeric type + assert(FldPointer != $field->type); + $field->type = FldAlphanumeric; + if( $field->data.initial ) { + $field->data.capacity = strlen($field->data.initial); + } + } + } + + // Verify BLANK WHEN ZERO + if( $field->has_attr(blank_zero_e) ) { + switch($field->type) { + case FldNumericEdited: + if( $field->has_attr(signable_e) ) { + error_msg(@2, "%s has 'S' in PICTURE, cannot be BLANK WHEN ZERO", + $field->name, cbl_field_type_str($field->type) ); + } + break; + default: + error_msg(@2, "%s must be " + "NUMERIC DISPLAY or NUMERIC-EDITED, not %s", + $field->name, cbl_field_type_str($field->type) ); + } + $field->data.picture = original_picture(); + } + + // SIGN clause valid only with "S" in picture + if( $field->type == FldNumericDisplay && !is_signable($field) ) { + static const size_t sign_attrs = leading_e | separate_e; + static_assert(sizeof(sign_attrs) == sizeof($field->attr), + "size matters"); + + // remove inapplicable inherited sign attributes + size_t group_sign = group_attr($field) & sign_attrs; + $field->attr &= ~group_sign; + + if( $field->attr & sign_attrs ) { + dbgmsg("%s:%d: %s", __func__, __LINE__, field_str($field)); + error_msg(@field, "%s must be signed for SIGN IS", + $field->name ); + YYERROR; + } + } + + // Increase numeric display capacity by 1 for SIGN SEPARATE. + if( $field->type == FldNumericDisplay && + is_signable($field) && + $field->has_attr(separate_e) ){ + $field->data.capacity++; + } + + // Set Packed-Decimal capacity + if( $field->type == FldPacked ) { + $field->data.capacity = type_capacity($field->type, + $field->data.digits); + if( $field->attr & separate_e ) + { + // This is a gentle kludge required by the the belated + // introduction of COMP-6, which is like COMP-3 but with no + // sign nybble. The code in type_capacity assumes a sign + // nybble. + $field->data.capacity = ($field->data.digits+1)/2; + } + } + + // Check COMP-5 capacity + // No capacity means no PICTURE, valid only for a (potential) group + if( $field->type == FldNumericBin5 && $field->data.capacity == 0 ) { + if( has_clause ($data_clauses, usage_clause_e) && + !has_clause ($data_clauses, picture_clause_e) ) { + // invalidate until a child is born + $field->type = FldInvalid; + } + } + + // Ensure signed initial VALUE is for signed numeric type + if( is_numeric($field) && + $field->data.initial && + $field->type != FldFloat ) + { + switch( $field->data.initial[0] ) { + case '-': + if( !$field->has_attr(signable_e) ) { + error_msg(@field, "%s is unsigned but has signed VALUE '%s'", + $field->name, $field->data.initial); + } + } + } + + // Verify VALUE + $field->report_invalid_initial_value(@data_clauses); + + // verify REDEFINES + auto parent = parent_of($field); + if( parent && $field->level == parent->level ) { + valid_redefine(@field, $field, parent); // calls yyerror + } + } + ; + +literalism: LITERAL { $$ = $1; } + | literalism[first] '&' LITERAL[second] + { + $$ = $first; + literal_t& output($$); + + output.len += $second.len; + output.data = reinterpret_cast<char*>(xrealloc(output.data, + output.len + 1)); + memcpy( output.data + $first.len, $second.data, $second.len ); + output.data[output.len] = '\0'; + + if( $second.prefix[0] ) { strcpy(output.prefix, $second.prefix); } + if( ! $first.compatible_prefix($second) ) { + yywarn("dissimilar literals, '%s' prevails", + output.prefix); + } + } + ; + +name66: LEVEL66 NAME[alias] + { + build_symbol_map(); + if( ! symbol_field_alias_begin() ) { + error_msg(@alias, "no Level 01 record exists " + "for %s to redefine", $alias); + } + $$ = $alias; + } + ; + +data_clauses: data_clause + { + if( $data_clause == redefines_clause_e ) { + auto parent = parent_of(current_field()); + if( !parent ) { + error_msg(@1, "%s invalid REDEFINES", + current_field()->name); + YYERROR; + } + if( parent->occurs.ntimes() > 0 ) { + error_msg(@1, "%s cannot REDEFINE table %s", + current_field()->name, + parent->name); + YYERROR; + } + } + } + | data_clauses data_clause { + const char *clause = "data"; + switch($2) { + case occurs_clause_e: clause = "OCCURS"; break; + case picture_clause_e: clause = "PIC"; break; + case usage_clause_e: clause = "USAGE"; break; + case value_clause_e: clause = "VALUE"; break; + case global_clause_e: clause = "GLOBAL"; break; + case external_clause_e: clause = "EXTERNAL"; break; + case justified_clause_e: clause = "JUSTIFIED"; break; + case redefines_clause_e: clause = "REDEFINES"; break; + case blank_zero_clause_e: clause = "BLANK WHEN ZERO"; break; + case synched_clause_e: clause = "SYNCHRONIZED"; break; + case sign_clause_e: clause = "SIGN"; break; + case based_clause_e: clause = "BASED"; break; + case same_clause_e: clause = "SAME AS"; break; + case volatile_clause_e: clause = "VOLATILE"; break; + case type_clause_e: clause = "TYPE"; break; + case typedef_clause_e: clause = "TYPEDEF"; break; + } + if( ($$ & $2) == $2 ) { + error_msg(@2, "%s clause repeated", clause); + YYERROR; + } + + if( $data_clause == redefines_clause_e ) { + error_msg(@2, "REDEFINES must appear " + "immediately after LEVEL and NAME"); + YYERROR; + } + cbl_field_t *field = current_field(); + const int globex = (global_e | external_e); + if( (($$ | $2) & globex) == globex ) { + error_msg(@2, "GLOBAL and EXTERNAL specified"); + YYERROR; + } + + $$ |= $2; + + // If any implied TYPE bits are on in addition to + // type_clause_e, they're in conflict. + static const size_t type_implies = + // ALIGNED clause not implemented + blank_zero_clause_e | justified_clause_e | picture_clause_e + | sign_clause_e | synched_clause_e | usage_clause_e; + + if( type_clause_e < ($$ & (type_clause_e | type_implies)) ) { + if( $2 == type_clause_e ) { + error_msg(@2, "TYPE TO incompatible with ALIGNED, " + "BLANK WHEN ZERO, JUSTIFIED, PICTURE, SIGN, " + "SYNCHRONIZED, and USAGE"); + } else { + error_msg(@2, "%s incompatible with TYPE TO", clause); + } + YYERROR; + } + + if( ($$ & same_clause_e) == same_clause_e ) { + if( 0 < ($$ & ~same_clause_e) ) { + error_msg(@2, "%s %s SAME AS " + "precludes other DATA DIVISION clauses", + field->level_str(), field->name); + YYERROR; + } + } + + if( is_numeric(field->type) && field->type != FldNumericDisplay ) { + if( $$ & sign_clause_e ) { + error_msg(@2, "%s is binary NUMERIC type, " + "incompatible with SIGN IS", field->name); + } + } + + if( gcobol_feature_embiggen() ) { + if( field->is_binary_integer() && field->data.capacity == 4) { + auto redefined = symbol_redefines(field); + if( redefined && redefined->type == FldPointer ) { + if( yydebug ) { + yywarn("expanding %s size from %u bytes to %zu " + "because it redefines %s with USAGE POINTER", + field->name, field->size(), sizeof(void*), + redefined->name); + } + field->embiggen(); + } + } + } + + switch( field->type ) { + case FldFloat: + if( ($$ & picture_clause_e) == picture_clause_e ) { + error_msg(@2, "%s: FLOAT types do not allow PICTURE", + field->name); + } + break; + default: + break; + } + + if( ! field->is_justifiable() ) { + error_msg(@2, "%s: %s is incompatible with JUSTIFIED", + field->name, 3 + cbl_field_type_str(field->type)); + } + } + ; + +data_clause: any_length { $$ = any_length_e; } + | based_clause { $$ = based_clause_e; } + | blank_zero_clause { $$ = blank_zero_clause_e; } + | external_clause { $$ = external_clause_e; } + | global_clause { $$ = global_clause_e; } + | justified_clause { $$ = justified_clause_e; } + | occurs_clause { $$ = occurs_clause_e; + cbl_field_t *field = current_field(); + switch( field->level ) { + case 1: + if( dialect_mf() ) break; + __attribute__((fallthrough)); + case 77: + case 88: + error_msg(@$, "%s %s: invalid LEVEL for OCCURS", + field->level_str(), field->name ); + break; + default: + assert( field->parent > 0 ); + } + } + | picture_clause { $$ = picture_clause_e; } + | redefines_clause { $$ = redefines_clause_e; } + | same_clause { $$ = same_clause_e; } + | sign_clause { $$ = sign_clause_e; } + | synched_clause { $$ = synched_clause_e; } + | type_clause { $$ = type_clause_e; } + | typedef_clause { $$ = typedef_clause_e; } + | usage_clause { $$ = usage_clause_e; } + | value_clause { $$ = value_clause_e; + cbl_field_t *field = current_field(); + + if( field->type != FldAlphanumeric && + field->data.initial && field->data.initial[0] ) + { + // Embedded NULs are valid only in FldAlphanumeric, and are + // already handled. + if( strlen(field->data.initial) < field->data.capacity ) { + auto p = blank_pad_initial( field->data.initial, + strlen(field->data.initial), + field->data.capacity ); + if( !p ) YYERROR; + field->data.initial = p; + } + } + const cbl_field_t *parent; + if( (parent = parent_has_value(field)) != NULL ) { + error_msg(@1, "VALUE invalid because group %s has VALUE clause", + parent->name); + } + } + | volatile_clause { $$ = volatile_clause_e; } + ; + +picture_clause: PIC signed nps[fore] nines nps[aft] + { + cbl_field_t *field = current_field(); + if( !field_type_update(field, FldNumericDisplay, @$) ) { + YYERROR; + } + ERROR_IF_CAPACITY(@PIC, field); + field->attr |= $signed; + field->data.capacity = type_capacity(field->type, $4); + field->data.digits = $4; + if( long(field->data.digits) != $4 ) { + error_msg(@2, "indicated size would be %ld bytes, " + "maximum data item size is %u", + $4, UINT32_MAX); + } + + if( $fore && $aft ) { // leading and trailing P's + error_msg(@2, "PIC cannot have both leading and trailing P"); + YYERROR; + } + if( $fore || $aft ) { + field->attr |= scaled_e; + field->data.rdigits = $fore? $fore : -$aft; + } + if( ! field->reasonable_capacity() ) { + error_msg(@2, "%s limited to capacity of %d (would need %u)", + field->name, MAX_FIXED_POINT_DIGITS, field->data.capacity); + } + } + + | PIC signed NINEV[left] nine[rdigits] + { + cbl_field_t *field = current_field(); + field->data.digits = $left + $rdigits; + + if( field->is_binary_integer() ) { + field->data.capacity = type_capacity(field->type, + field->data.digits); + } else { + if( !field_type_update(field, FldNumericDisplay, @$) ) { + YYERROR; + } + ERROR_IF_CAPACITY(@PIC, field); + field->attr |= $signed; + field->data.capacity = field->data.digits; + field->data.rdigits = $rdigits; + } + if( ! field->reasonable_capacity() ) { + error_msg(@2, "%s limited to capacity of %d (would need %u)", + field->name, MAX_FIXED_POINT_DIGITS, field->data.capacity); + } + } + | PIC signed NINEDOT[left] nine[rdigits] + { + uint32_t size = $left + $rdigits; + + cbl_field_t *field = current_field(); + if( !field_type_update(field, FldNumericEdited, @$) ) { + YYERROR; + } + ERROR_IF_CAPACITY(@PIC, field); + field->attr |= $signed; + field->data.digits = size; + field->data.capacity = ++size; + field->data.rdigits = $rdigits; + + if( ! field->reasonable_capacity() ) { + error_msg(@2, "%s limited to capacity of %d (would need %u)", + field->name, MAX_FIXED_POINT_DIGITS, field->data.capacity); + } + } + + | PIC alphanum_pic[size] + { + cbl_field_t *field = current_field(); + + if( field->type == FldNumericBin5 && + field->data.capacity == 0 && + dialect_mf() ) + { // PIC X COMP-X or COMP-9 + if( ! field->has_attr(all_x_e) ) { + error_msg(@2, "COMP PICTURE requires all X's or all 9's"); + YYERROR; + } + } else { + if( !field_type_update(field, FldAlphanumeric, @$) ) { + YYERROR; + } + } + assert(0 < $size); + if( field->data.initial != NULL ) { + if( 0 < field->data.capacity && + field->data.capacity < uint32_t($size) ) { + auto p = blank_pad_initial( field->data.initial, + field->data.capacity, $size ); + if( !p ) YYERROR; + field->data.initial = p; + } + } + + field->data.capacity = $size; + field->data.picture = NULL; + + if( false ) dbgmsg("PIC alphanum_pic[size]:%d: %s", + field->line, field_str(field)); + } + + | PIC numed[picture] + { + cbl_field_t *field = current_field(); + if( !field_type_update(field, FldNumericEdited, @$) ) { + YYERROR; + } + ERROR_IF_CAPACITY(@PIC, field); + if( !is_numeric_edited($picture) ) { + error_msg(@picture, numed_message); + YYERROR; + } + field->data.picture = $picture; + field->data.capacity = length_of_picture($picture); + field->data.digits = digits_of_picture($picture, false); + field->data.rdigits = rdigits_of_picture($picture); + if( is_picture_scaled($picture) ) field->attr |= scaled_e; + } + + | PIC ALPHED[picture] + { + bool is_alpha_edited( const char picture[] ); + + cbl_field_t *field = current_field(); + ERROR_IF_CAPACITY(@PIC, field); + field->data.capacity = length_of_picture($picture); + field->data.picture = $picture; + + // In case the lexer guesses wrong. + cbl_field_type_t type = is_numeric_edited($picture)? + FldNumericEdited : FldAlphaEdited; + if( !field_type_update(field, type, @$) ) { + YYERROR; + } + + switch( type ) { + case FldNumericEdited: + field->data.digits = digits_of_picture($picture, false); + field->data.rdigits = rdigits_of_picture($picture); + if( is_picture_scaled($picture) ) field->attr |= scaled_e; + break; + case FldAlphaEdited: + if( !is_alpha_edited(field->data.picture) ) { + error_msg(@picture, "invalid picture for Alphanumeric-edited"); + YYERROR; + } + break; + default: + gcc_unreachable(); + } + } + ; + +alphanum_pic: alphanum_part { + current_field()->set_attr($1.attr); + $$ = $1.nbyte; + } + | alphanum_pic alphanum_part + { + auto field = current_field(); + dbgmsg("%s has %s against %s", + field->name, field_attr_str(field), + cbl_field_attr_str($2.attr)); + + if( ! field->has_attr($2.attr) ) { + field->clear_attr(all_ax_e); // clears 2 bits + } + $$ += $2.nbyte; + + dbgmsg("%s attrs: %s", field->name, field_attr_str(field)); + } + ; +alphanum_part: ALNUM[picture] count + { + $$.attr = uniform_picture($picture); + $$.nbyte = strlen($picture); + auto count($count); + if( count > 0 ) { + --count; + $$.nbyte += count; // AX9(3) has count 5 + } + if( count < 0 ) { + error_msg(@2, "PICTURE count '(%d)' is negative", count ); + YYERROR; + } + } + ; + +signed: %empty { $$ = 0; } + | 'S' { $$ = signable_e; } + ; + +nps: %empty { $$ = 0; } + | PIC_P { $$ = $1; } + ; + +nine: %empty { $$ = 0; } + | nines + { + $$ = $1; + if( $$ == 0 ) { + error_msg(@1, "'(0)' invalid in PICTURE (ISO 2023 13.18.40.3)"); + } + } + ; +nines: NINES + | nines NINES { $$ = $1 + $2; } + ; + +count: %empty { $$ = 0; } + | '(' NUMSTR ')' + { + $$ = numstr2i( $NUMSTR.string, $NUMSTR.radix ); + if( $$ == 0 ) { + error_msg(@2, "'(0)' invalid in PICTURE (ISO 2023 13.18.40.3)"); + } + } + | '(' NAME ')' + { + auto value = cdf_value($NAME); + if( ! (value && value->is_numeric()) ) { + error_msg(@NAME, "PICTURE '(%s)' requires a CONSTANT value", $NAME ); + YYERROR; + } + int nmsg = 0; + auto e = symbol_field(PROGRAM, 0, $NAME); + if( e ) { // verify not floating point with nonzero fraction + auto field = cbl_field_of(e); + assert(is_literal(field)); + if( field->data.value != size_t(field->data.value) ) { + nmsg++; + error_msg(@NAME, "invalid PICTURE count '(%s)'", + field->data.initial ); + } + } + $$ = value->as_number(); + if( $$ <= 0 && !nmsg) { + error_msg(@NAME, "invalid PICTURE count '(%s)'", $NAME ); + } + } + ; + +numed: NUMED + | NUMED_CR + | NUMED_DB + ; + +usage_clause: usage_clause1[type] + { + cbl_field_t *field = current_field(); + cbl_field_type_t type = static_cast<cbl_field_type_t>($type); + if( ! field_type_update(field, type, @$, true) ) { + YYERROR; + } + } + ; +usage_clause1: usage COMPUTATIONAL[comp] native + { + bool infer = true; + cbl_field_t *field = current_field(); + + // Some binary types have defined capacity; + switch($comp.type) { + // COMPUTATIONAL and COMP-5 rely on PICTURE. + case FldNumericBinary: + field->attr |= big_endian_e; + __attribute__((fallthrough)); + case FldNumericBin5: + // If no capacity yet, then no picture, infer $comp.capacity. + // If field has capacity, ensure USAGE is compatible. + if( field->data.capacity > 0 ) { // PICTURE before USAGE + infer = false; + switch( field->type ) { + case FldAlphanumeric: // PIC X COMP-5 or COMP-X + assert( field->data.digits == 0 ); + assert( field->data.rdigits == 0 ); + if( dialect_mf() ) { + field->type = $comp.type; + field->clear_attr(signable_e); + } else { + error_msg(@comp, "numeric USAGE invalid " + "with Alpnanumeric PICTURE"); + YYERROR; + } + break; + case FldNumericDisplay: // PIC 9 COMP-5 or COMP-X + if( $comp.capacity == 0xFF ) { // comp-x is a bit like comp-5 + assert( field->data.digits == field->data.capacity ); + if( ! dialect_mf() ) { + dialect_error(@1, "COMP-X", "mf"); + } + } + field->type = $comp.type; + field->data.capacity = type_capacity(field->type, + field->data.digits); + break; + default: break; + } + } + break; + case FldPacked: // comp-6 is unsigned comp-3 + assert(! $comp.signable); // else PACKED_DECIMAL from scanner + field->attr |= separate_e; + if( ! dialect_mf() ) { + dialect_error(@1, "COMP-6", "mf"); + } + if( field->type == FldNumericDisplay ) {// PICTURE before USAGE + infer = false; + assert(field->data.capacity > 0); + field->type = $comp.type; + field->data.capacity = type_capacity(field->type, + field->data.digits); + } + break; + default: + break; + } + + if( infer ) { + if( $comp.capacity > 0 ) { + if( field->data.capacity > 0 ) { + error_msg(@comp, "%s is BINARY type, incompatible with PICTURE", + field->name); + YYERROR; + } + field->data.capacity = $comp.capacity; + field->type = $comp.type; + if( $comp.signable ) { + field->attr = (field->attr | signable_e); + } + } + } + $$ = $comp.type; + } + | usage DISPLAY native { $$ = FldDisplay; } + | usage PACKED_DECIMAL native { $$ = FldPacked; } + | usage PACKED_DECIMAL with NO SIGN + { + cbl_field_t *field = current_field(); + if( field->data.capacity > 0 && + field->type != FldNumericDisplay) { + error_msg(@2, "%s PICTURE is incompatible with USAGE PACKED DECIMAL", + field->name); + YYERROR; + } + field->clear_attr(separate_e); + field->clear_attr(signable_e); + if( field->type == FldNumericDisplay ) {// PICTURE before USAGE + assert(field->data.capacity > 0); + field->data.capacity = type_capacity(FldPacked, + field->data.digits); + } + $$ = field->type = FldPacked; + } + | usage INDEX { + $$ = symbol_field_index_set( current_field() )->type; + } + // We should enforce data/code pointers with a different type. + | usage POINTER + { + $$ = FldPointer; + auto field = current_field(); + auto redefined = symbol_redefines(field); + + if( $POINTER ) { + field->set_attr($POINTER); + } + if( gcobol_feature_embiggen() && redefined && + is_numeric(redefined->type) && redefined->size() == 4) { + // For now, we allow POINTER to expand a 32-bit item to 64 bits. + field->data.capacity = sizeof(void *); + dbgmsg("%s: expanding #%zu %s capacity %u => %u", __func__, + field_index(redefined), redefined->name, + redefined->data.capacity, field->data.capacity); + + redefined->embiggen(); + + if( redefined->data.initial ) { + auto s = xasprintf( "%s ", redefined->data.initial); + std::replace(s, s + strlen(s), '!', char(0x20)); + redefined->data.initial = s; + } + } + } + | usage POINTER TO error + { + cbl_unimplemented("POINTER TO"); + $$ = FldPointer; + } + ; + +value_clause: VALUE all LITERAL[lit] { + cbl_field_t *field = current_field(); + field->data.initial = $lit.data; + field->attr |= literal_attr($lit.prefix); + // The __gg__initialize_data routine needs to know that VALUE is a + // quoted literal. This is critical for NumericEdited variables + field->attr |= quoted_e; + + if( field->data.capacity == 0 ) { + field->data.capacity = $lit.len; + } else { + if( $all ) { + field_value_all(field); + } else { + if( $lit.len < field->data.capacity ) { + auto p = blank_pad_initial( $lit.data, $lit.len, + field->data.capacity ); + if( !p ) YYERROR; + field->data.initial = p; + } + } + } + value_encoding_check(@lit, field); + } + | VALUE all cce_expr[value] { + cbl_field_t *field = current_field(); + auto orig_str = original_number(); + auto orig_val = numstr2i(orig_str, decimal_e); + char *initial = NULL; + + if( orig_val == $value ) { + initial = orig_str; + pristine_values.insert(initial); + } else { + initial = string_of($value); + gcc_assert(initial); + } + + char decimal = symbol_decimal_point(); + std::replace(initial, initial + strlen(initial), '.', decimal); + + field->data.initial = initial; + field->data.value = $value; + + if( $all ) field_value_all(field); + } + | VALUE all reserved_value[value] + { + if( $value != NULLS ) { + auto fig = constant_of(constant_index($value)); + current_field()->data.initial = fig->data.initial; + } + } + | /* VALUE is */ NULLPTR + { + auto fig = constant_of(constant_index(NULLS)); + current_field()->data.initial = fig->data.initial; + } + | VALUE error + { + error_msg(@2, "no valid VALUE supplied"); + } + ; + +global_clause: is GLOBAL + { + cbl_field_t *field = current_field(); + field->attr |= (field->attr | global_e); + } + ; +external_clause: is EXTERNAL + { + cbl_field_t *field = current_field(); + field->attr |= (field->attr | external_e); + } + ; + +justified_clause: is JUSTIFIED + { + cbl_field_t *field = current_field(); + field->attr |= rjust_e; + } + ; + +redefines_clause: REDEFINES NAME[orig] + { + struct symbol_elem_t *e = field_of($orig); + if( !e ) { + error_msg(@2, "REDEFINES target not defined"); + YYERROR; + } + cbl_field_t *field = current_field(); + cbl_field_t *orig = cbl_field_of(e); + if( orig->has_attr(filler_e) ) { + error_msg(@2, "%s may not REDEFINE %s", + field->name, orig->name); + } + cbl_field_t *super = symbol_redefines(orig); + if( super ) { + error_msg(@2, "%s may not REDEFINE %s, " + "which redefines %s", + field->name, orig->name, super->name); + } + if( field->level != orig->level ) { + error_msg(@2, "cannot redefine %s %s as %s %s " + "because they have different levels", + orig->level_str(), name_of(orig), + field->level_str(), name_of(field)); + } + // ISO 13.18.44.3 + auto parent( symbol_index(e) ); + auto p = std::find_if( symbol_elem_of(orig) + 1, + symbol_elem_of(field), + [parent, level = field->level]( const auto& elem ) { + if( elem.type == SymField ) { + auto f = cbl_field_of(&elem); + return + f->level == level && + f->parent != parent; + } + return false; + } ); + if( p != symbol_elem_of(field) ) { + auto mid( cbl_field_of(p) ); + error_msg(@2, "cannot redefine %s %s as %s %s " + "because %s %s intervenes", + orig->level_str(), name_of(orig), + field->level_str(), name_of(field), + mid->level_str(), name_of(mid)); + } + + if( valid_redefine(@2, field, orig) ) { + /* + * Defer "inheriting" the parent's description until the + * redefine is complete. + */ + current_field()->parent = symbol_index(e); + } + } + ; + +any_length: ANY LENGTH + { cbl_field_t *field = current_field(); + if( field->attr & any_length_e ) { + error_msg(@1, "ANY LENGTH already set"); + } + if( ! (field->level == 1 && + current_data_section == linkage_datasect_e && + (1 < current.program_level() || + current.program()->is_function())) ) { + error_msg(@1, "ANY LENGTH valid only for 01 " + "in LINKAGE SECTION of a function or contained program"); + YYERROR; + } + field->attr |= any_length_e; + } + ; + +based_clause: BASED + { cbl_field_t *field = current_field(); + if( field->attr & based_e ) { + error_msg(@1, "BASED already set"); + } + field->attr |= based_e; + } + ; + +blank_zero_clause: blank_when_zero + { cbl_field_t *field = current_field(); + // the BLANK WHEN ZERO clause defines the item as numeric-edited. + if( !field_type_update(field, FldNumericEdited, @1) ) { + YYERROR; + } + field->attr |= blank_zero_e; + } + ; +blank_when_zero: + BLANK WHEN ZERO + | BLANK ZERO + ; + +synched_clause: SYNCHRONIZED + | SYNCHRONIZED LEFT + | SYNCHRONIZED RIGHT + ; + +same_clause: SAME AS name + { + cbl_field_t *field = current_field(), *other = $name; + if( other->occurs.ntimes() > 0 ) { + error_msg(@name, "SAME AS %s: cannot have OCCURS", + other->name); // 13.18.49.2,P5 + YYERROR; + } + if( field->level == 77 and !is_elementary(other->type) ) { + // ISO 2023 13.18.49.2,P8 + error_msg(@name, "%s %s SAME AS %s: must be elementary", + field->level_str(), field->name, other->name); + YYERROR; + } + + if( (other->attr & (sign_clause_e | usage_clause_e)) > 0 ) { + error_msg(@name, "%s: source of SAME AS cannot have " + "SIGN or USAGE clause", other->name); + YYERROR; + } + if( other->usage == FldGroup ) { + error_msg(@name, "%s: source of SAME AS cannot have " + "GROUP-USAGE clause", other->name); + YYERROR; + } + if( other->has_attr(constant_e ) ) { + error_msg(@name, "%s: source of SAME AS cannot " + "be constant", other->name); + YYERROR; + } + if( field->parent == field_index(other) ) { + error_msg(@name, "%s: SAME AS uses " + "its own parent %s", field->name, other->name); + YYERROR; + } + + auto e = symbol_field_same_as( field, other ); + symbol_field_location( symbol_index(e), @name ); + } + ; + +sign_clause: sign_is sign_leading sign_separate + { + cbl_field_t *field = current_field(); + if( $sign_leading ) { + field->attr |= leading_e; + } else { + field->attr &= ~size_t(leading_e); // turn off in case inherited + field->attr |= signable_e; + } + if( $sign_separate ) field->attr |= separate_e; + } + ; +sign_is: %empty + | SIGN is + ; +sign_leading: LEADING { $$ = true; } + | TRAILING { $$ = false; } + ; +sign_separate: %empty { $$ = false; } + | SEPARATE CHARACTER { $$ = true; } + | SEPARATE { $$ = true; } + ; + +/* + * "The effect of the TYPE clause is as though the data description identified + * by type-name-1 had been coded in place of the TYPE clause, excluding the + * level-number, name, alignment, and the GLOBAL, SELECT WHEN, and TYPEDEF + * clauses specified for type-name-1;" + * + * The essential characteristics of a type, which is identified by its + * type-name, are the: + * — relative positions and lengths of the elementary items + * — ALIGNED clause + * — BLANK WHEN ZERO clause + * — JUSTIFIED clause + * — PICTURE clause + * — SIGN clause + * — SYNCHRONIZED clause + * — USAGE clause + */ +type_clause: TYPE to typename + { + cbl_field_t *field = current_field(); + if( $typename ) { + auto e = symbol_field_same_as(field, $typename); + symbol_field_location( symbol_index(e), @typename ); + } + } + | USAGE is typename + { + if( ! dialect_mf() ) { + dialect_error(@typename, "USAGE TYPENAME", "mf"); + YYERROR; + } + cbl_field_t *field = current_field(); + if( $typename ) { + auto e = symbol_field_same_as(field, $typename); + symbol_field_location( symbol_index(e), @typename ); + } + } + ; + +typedef_clause: is TYPEDEF strong + { + cbl_field_t *field = current_field(); + switch( field->level ) { + case 1: case 77: break; + default: + error_msg(@2, "%s %s IS TYPEDEF must be level 01", + field->level_str(), field->name); + } + field->attr |= typedef_e; + if( $strong ) field->attr |= strongdef_e; + if( ! current.typedef_add(field) ) { + auto prior = current.has_typedef(field); + assert(prior); + error_msg(@2, "%s %s IS TYPEDEF is not unique " + "(see %s, line %d)", + field->level_str(), field->name, + prior->name, prior->line); + } + } + ; + +volatile_clause: + VOLATILE + { + if( dialect_ibm() ) { + yywarn("VOLATILE has no effect"); + } else { + dialect_error(@1, "VOLATILE", "ibm"); + } + } + ; + +procedure_div: %empty { + if( !procedure_division_ready(@$, NULL, NULL) ) YYABORT; + } + | PROCEDURE_DIV '.' { + if( !procedure_division_ready(@$, NULL, NULL) ) YYABORT; + } declaratives sentences + | PROCEDURE_DIV procedure_args '.' declaratives sentences + | PROCEDURE_DIV procedure_args '.' + ; + +procedure_args: USING procedure_uses[args] + { + if( !procedure_division_ready(@args, NULL, $args) ) YYABORT; + } + | USING procedure_uses[args] RETURNING name[ret] + { + if( !procedure_division_ready(@ret, $ret, $args) ) YYABORT; + if( ! $ret->has_attr(linkage_e) ) { + error_msg(@ret, "RETURNING %s is not defined in LINKAGE SECTION", + $ret->name); + } + } + | RETURNING name[ret] + { + if( !procedure_division_ready(@ret, $ret, NULL) ) YYABORT; + if( ! $ret->has_attr(linkage_e) ) { + error_msg(@ret, "RETURNING %s is not defined in LINKAGE SECTION", + $ret->name); + } + } + ; +procedure_uses: procedure_use { $$ = new ffi_args_t($1); } + | procedure_uses procedure_use { $$->push_back($2); } + ; +procedure_use: optional scalar { + $$ = new cbl_ffi_arg_t(by_default_e, $scalar); + $$->optional = $optional; + $$->validate(); // produces message + } + | by REFERENCE optional scalar { + $$ = new cbl_ffi_arg_t(by_reference_e, $scalar); + $$->optional = $optional; + $$->validate(); // produces message + } + | by CONTENT error { // no "by content" in procedure definition + $$ = new cbl_ffi_arg_t(by_content_e, + new_reference(literally_zero)); + } + | by VALUE by_value_arg[arg] { + $$ = new cbl_ffi_arg_t(by_value_e, $arg); + $$->validate(); // produces message + } + ; +by_value_arg: scalar + | LITERAL { $$ = new_reference(new_literal($1, quoted_e)); } + | reserved_value + { + $$ = new_reference(constant_of(constant_index($1))); + } + ; + +declaratives: %empty + | DECLARATIVES '.' + <label>{ + current.enabled_exception_cache = enabled_exceptions; + enabled_exceptions.clear(); + current.doing_declaratives(true); + $$ = label_add(LblString, "_end_declaratives", 0); + assert($$); + parser_label_goto($$); + } [label] + sentences END DECLARATIVES '.' + { + size_t ndecl = current.declaratives.as_list().size(); + cbl_declarative_t decls[ ndecl ]; + auto decl_list = current.declaratives.as_list(); + std::copy( decl_list.begin(), decl_list.end(), decls ); + std::sort( decls, decls + ndecl ); + current.doing_declaratives(false); + /* TODO: if( intradeclarative_reference() ) yyerror; + * Test also at paragraph_reference, for non-forward + * reference with good line numbers. See + * utilcc::procedures_t and ambiguous_reference(). At this + * point, no reference should pick up anything except a + * forward reference, because we haven't yet begun to parse + * nondeclarative procedures. + */ + parser_label_label($label); + enabled_exceptions = current.enabled_exception_cache; + current.enabled_exception_cache.clear(); + ast_enter_section(implicit_section()); + } + ; + +sentences: sentence { + ast_first_statement(@1); + symbol_temporaries_free(); + } + | section_name + | paragraph_name[para] '.' + { + location_set(@para); + cbl_label_t *label = label_add(@para, LblParagraph, $para); + if( !label ) { + YYERROR; + } + ast_enter_paragraph(label); + current.new_paragraph(label); + apply_declaratives(); + } + | sentences sentence + { // sentences might not be sentence + ast_first_statement(@2); + symbol_temporaries_free(); + } + | sentences section_name + | sentences paragraph_name[para] '.' + { + location_set(@para); + cbl_label_t *label = label_add(@para, LblParagraph, $para); + if( !label ) { + YYERROR; + } + ast_enter_paragraph(label); + current.new_paragraph(label); + apply_declaratives(); + } + ; +paragraph_name: NAME + | NUMSTR { $$ = $1.string; } + ; + +sentence: statements '.' + | statements YYEOF + { + if( ! goodnight_gracie() ) { + YYABORT; + } + if( nparse_error > 0 ) YYABORT; + YYACCEPT; + } + | program END_SUBPROGRAM namestr[name] '.' + { // a contained program (no prior END PROGRAM) is a "sentence" + const cbl_label_t *prog = current.program(); + assert(prog); + const char *name = string_of($name); + if( !name || 0 != strcasecmp(prog->name, name) ) { + error_msg(@name, "END PROGRAM '%s' does not match PROGRAM-ID '%s'", + name? name : $name.data, prog->name); + YYERROR; + } + + std::set<std::string> externals = current.end_program(); + if( !externals.empty() ) { + for( const auto& name : externals ) { + yywarn("%s calls external symbol '%s'", + prog->name, name.c_str()); + } + YYERROR; + } + // pointer still valid because name is in symbol table + ast_end_program(prog->name); + } + | program YYEOF + { // a contained program (no prior END PROGRAM) is a "sentence" + if( nparse_error > 0 ) YYABORT; + do { + if( ! goodnight_gracie() ) YYABORT; // no recovery + } while( current.program_level() > 0 ); + YYACCEPT; + } + ; + +statements: statement { $$ = $1; } + | statements statement { $$ = $2; } + ; + +statement: error { + if( current.declarative_section_name() ) { + error_msg(@1, "missing END DECLARATIVES or SECTION name", + nparse_error); + YYABORT; + } + if( max_errors_exceeded(nparse_error) ) { + error_msg(@1, "max errors %d reached", nparse_error); + YYABORT; + } + } + | accept { $$ = ACCEPT; } + | add { $$ = ADD; } + | allocate { $$ = ALLOCATE; } + | alter { $$ = ALTER; } + | call { $$ = CALL; } + | cancel { $$ = CANCEL; } + | close { $$ = CLOSE; } + | compute { $$ = COMPUTE; } + | continue_stmt { $$ = CONTINUE; } + | delete { $$ = DELETE; } + | display { $$ = DISPLAY; } + | divide { $$ = DIVIDE; } + | entry { $$ = ENTRY; } + | evaluate { $$ = EVALUATE; } + | exit { $$ = EXIT; } + | free { $$ = FREE; } + | go_to { $$ = GOTO; } + | if_stmt { $$ = IF; } + | initialize { $$ = INITIALIZE; } + | inspect { $$ = INSPECT; } + | merge { $$ = MERGE; } + | move { $$ = MOVE; } + | multiply { $$ = MULTIPLY; } + | open { $$ = OPEN; } + | return_stmt { $$ = RETURN; } + | perform { $$ = PERFORM; } + | raise { $$ = RAISE; } + | read { $$ = READ; } + | release { $$ = RELEASE; } + | resume { $$ = RESUME; } + | rewrite { $$ = REWRITE; } + | search { $$ = SEARCH; } + | set { $$ = SET; } + | sort { $$ = SORT; } + | start { $$ = START; } + | stop { $$ = STOP; } + | string { $$ = STRING_kw; } + | subtract { $$ = SUBTRACT; } + | unstring { $$ = UNSTRING; } + | write { $$ = WRITE; } + ; + + /* + * ISO defines ON EXCEPTION only for Format 3 (screen). We + * implement extensions defined by MF and Fujitsu (and us) to + * use ACCEPT to interact with the command line and the + * environment. + * + * ISO ACCEPT and some others are implemented in accept_body, + * before the parser sees any ON EXCEPTION. In those cases + * accept_body returns accept_done_e to denote that the + * statement has been handled. If ON EXCEPTION is then parsed, + * it's an error. Otherwise, accept_body returns something + * else, and the relevant parser_accept_foo function is called + * in the "accept" action. + */ +accept: accept_body end_accept { + cbl_field_t *argi = register_find("_ARGI"); + switch( $accept_body.func ) { + case accept_done_e: + break; + case accept_command_line_e: + if( $1.from->field == NULL ) { // take next command-line arg + parser_accept_command_line(*$1.into, argi, NULL, NULL); + cbl_num_result_t tgt { truncation_e, argi }; + parser_add2(tgt, literally_one); // increment argi + } else if( $1.from->field == argi ) { + parser_move(*$1.into, *$1.from); + } else { + parser_accept_command_line(*$1.into, *$1.from, NULL, NULL); + } + break; + case accept_envar_e: + parser_accept_envar(*$1.into, *$1.from, NULL, NULL); + break; + } + } + | accept_body accept_excepts[ec] end_accept { + cbl_field_t *argi = register_find("_ARGI"); + switch( $accept_body.func ) { + case accept_done_e: + error_msg(@ec, "ON EXCEPTION valid only " + "with ENVIRONMENT or COMAMND-LINE(n)"); + break; + case accept_command_line_e: + if( $1.from->field == NULL ) { // take next command-line arg + parser_accept_command_line(*$1.into, argi, + $ec.on_error, $ec.not_error); + cbl_num_result_t tgt { truncation_e, argi }; + parser_add2(tgt, literally_one); // increment argi + } else if( $1.from->field == argi ) { + parser_move(*$1.into, *$1.from); + if( $ec.on_error || $ec.not_error ) { + error_msg(@ec, "ON EXCEPTION valid only " + "with ENVIRONMENT or COMAMND-LINE(n)"); + } + } else { + parser_accept_command_line(*$1.into, *$1.from, + $ec.on_error, $ec.not_error); + } + break; + case accept_envar_e: + parser_accept_envar(*$1.into, *$1.from, + $ec.on_error, $ec.not_error); + break; + } + } + ; +end_accept: %empty %prec ACCEPT + | END_ACCEPT + ; + +accept_body: accept_refer + { + $$.func = accept_done_e; + parser_accept(*$1, CONSOLE_e); + } + | accept_refer FROM DATE + { + $$.func = accept_done_e; + if( $1->is_reference() ) { + error_msg(@1, "subscripts are unsupported here"); + YYERROR; + } + parser_accept_date_yymmdd($1->field); + } + | accept_refer FROM DATE YYYYMMDD + { + $$.func = accept_done_e; + if( $1->is_reference() ) { + error_msg(@1, "subscripts are unsupported here"); + YYERROR; + } + parser_accept_date_yyyymmdd($1->field); + } + | accept_refer FROM DAY + { + $$.func = accept_done_e; + if( $1->is_reference() ) { + error_msg(@1, "subscripts are unsupported here"); + YYERROR; + } + parser_accept_date_yyddd($1->field); + } + | accept_refer FROM DAY YYYYDDD + { + $$.func = accept_done_e; + if( $1->is_reference() ) { + error_msg(@1, "subscripts are unsupported here"); + YYERROR; + } + parser_accept_date_yyyyddd($1->field); + } + | accept_refer FROM DAY_OF_WEEK + { + $$.func = accept_done_e; + if( $1->is_reference() ) { + error_msg(@1, "subscripts are unsupported here"); + YYERROR; + } + parser_accept_date_dow($1->field); + } + + | accept_refer FROM TIME + { + $$.func = accept_done_e; + if( $1->is_reference() ) { + error_msg(@1, "subscripts are unsupported here"); + YYERROR; + } + parser_accept_date_hhmmssff($1->field); + } + | accept_refer FROM acceptable + { + cbl_field_t *argc = register_find("_ARGI"); + switch( $acceptable->id ) { + case ARG_NUM_e: + $$.func = accept_command_line_e; + $$.into = $1; + $$.from = new_reference(argc); + break; + case ARG_VALUE_e: + $$.func = accept_command_line_e; + $$.into = $1; + $$.from = cbl_refer_t::empty(); + break; + default: + $$.func = accept_done_e; + parser_accept( *$1, $acceptable->id ); + } + } + | accept_refer FROM ENVIRONMENT envar + { + $$.func = accept_envar_e; + $$.into = $1; + $$.from = $envar; + //// parser_accept_envar( *$1, *$envar ); + } + | accept_refer FROM COMMAND_LINE + { + $$.func = accept_done_e; + parser_accept_command_line(*$1, NULL, NULL, NULL ); + } + | accept_refer FROM COMMAND_LINE '(' expr ')' + { + $$.func = accept_command_line_e; + $$.into = $1; + $$.from = $expr; + //// parser_accept_command_line(*$1, $expr->field ); + } + | accept_refer FROM COMMAND_LINE_COUNT { + $$.func = accept_done_e; + parser_accept_command_line_count(*$1); + } + ; + +accept_refer: ACCEPT scalar { statement_begin(@1, ACCEPT); $$ = $2; } + ; + +accept_excepts: accept_excepts[a] accept_except[b] statements %prec ACCEPT + { + if( $a.on_error && $a.not_error ) { + error_msg(@b, "too many ON EXCEPTION clauses"); + YYERROR; + } + // "ON" and "NOT ON" could be reversed, but not duplicated. + if( $a.on_error && $b.on_error ) { + error_msg(@b, "duplicate ON EXCEPTION clauses"); + YYERROR; + } + if( $a.not_error && $b.not_error ) { + error_msg(@b, "duplicate NOT ON EXCEPTION clauses"); + YYERROR; + } + $$ = $a; + if( $b.on_error ) { + $$.on_error = $b.on_error; + assert($a.not_error); + } else { + $$.not_error = $b.not_error; + assert($a.on_error); + } + assert( $b.on_error || $b.not_error ); + assert( ! ($b.on_error && $b.not_error) ); + cbl_label_t *tgt = $b.on_error? $b.on_error : $b.not_error; + parser_accept_exception_end(tgt); + } + | accept_except[a] statements %prec ACCEPT + { + $$ = $a; + assert( $a.on_error || $a.not_error ); + assert( ! ($a.on_error && $a.not_error) ); + cbl_label_t *tgt = $a.on_error? $a.on_error : $a.not_error; + parser_accept_exception_end(tgt); + } + ; + +accept_except: EXCEPTION + { + $$.not_error = NULL; + $$.on_error = label_add(LblArith, + uniq_label("accept"), yylineno); + if( !$$.on_error ) YYERROR; + parser_accept_exception( $$.on_error ); + + assert( $1 == EXCEPTION || $1 == NOT ); + if( $1 == NOT ) { + std::swap($$.on_error, $$.not_error); + } + } + ; + +envar: scalar { $$ = $1; $$->field->attr |= envar_e; } + | LITERAL { + $$ = new_reference(new_literal($1, quoted_e)); + $$->field->attr |= envar_e; + } + ; + +acceptable: device_name + { + $$ = symbol_special( $1.id ); + if( !$$ ) { + error_msg(@1, "no such environment name"); + YYERROR; + } + } + | NAME + { + $$ = special_of($1); + if( !$$ ) { + error_msg(@NAME, "no such environment mnemonic name: %s", $NAME); + YYERROR; + } + } + ; + +add: add_impl end_add { ast_add($1); } + | add_cond end_add { ast_add($1); } + ; +add_impl: ADD add_body + { + statement_begin(@1, ADD); + $$ = $2; + } + ; +add_cond: ADD add_body[body] arith_errs[err] + { + statement_begin(@1, ADD); + $body->on_error = $err.on_error; + $body->not_error = $err.not_error; + $$ = $body; + } + ; +end_add: %empty %prec ADD + | END_ADD + ; + +add_body: sum TO rnames + { + $$ = new arith_t(no_giving_e, $sum); + std::copy( rhs.begin(), + rhs.end(), back_inserter($$->tgts) ); + rhs.clear(); + } + | sum TO num_operand[value] GIVING rnames + { + $$ = new arith_t(giving_e, $sum); + $$->A.push_back(*$value); + std::copy( rhs.begin(), + rhs.end(), back_inserter($$->tgts) ); + rhs.clear(); + } + | sum GIVING rnames + { // implicit TO + $$ = new arith_t(giving_e, $sum); + std::copy( rhs.begin(), + rhs.end(), back_inserter($$->tgts) ); + rhs.clear(); + } + | CORRESPONDING sum TO rnames + { + corresponding_fields_t pairs = + corresponding_arith_fields( $sum->refers.front().field, + rhs.front().refer.field ); + if( pairs.empty() ) { + yywarn( "%s and %s have no corresponding fields", + $sum->refers.front().field->name, + rhs.front().refer.field->name ); + } + // First src/tgt elements are templates. + // Their subscripts apply to the correspondents. + $$ = new arith_t(corresponding_e, $sum); + $$->tgts.push_front(rhs.front()); + // use arith_t functor to populate A and tgts + *$$ = std::for_each( pairs.begin(), pairs.end(), *$$ ); + $$->A.pop_front(); + $$->tgts.pop_front(); + rhs.clear(); + } + ; + +rounded: %empty { $$ = truncation_e; } + | ROUNDED { $$ = current_rounded_mode(); } + | ROUNDED rounded_mode { $$ = rounded_of($rounded_mode); } + ; +rounded_mode: MODE is rounded_type { $$ = $rounded_type; } + ; +rounded_type: AWAY_FROM_ZERO { $$ = away_from_zero_e; } + | NEAREST_TOWARD_ZERO { $$ = nearest_toward_zero_e; } + | TOWARD_GREATER { $$ = toward_greater_e; } + | TOWARD_LESSER { $$ = toward_lesser_e; } + | round_between + ; +round_between: NEAREST_AWAY_FROM_ZERO { $$ = nearest_away_from_zero_e; } + | NEAREST_EVEN { $$ = nearest_even_e; } + | PROHIBITED { $$ = prohibited_e; } + | TRUNCATION { $$ = truncation_e; } + ; + +might_be: %empty { $$ = IS; } + | MIGHT_BE + ; + +posneg: POSITIVE { $$ = $1 == NOT? le_op : gt_op; } + | NEGATIVE { $$ = $1 == NOT? ge_op : lt_op; } + | ZERO { $$ = $1 == NOT? ne_op : eq_op; } + ; + +scalar88s: scalar88 { $$ = new refer_list_t($1); } + | scalar88s scalar88 { $1->push_back($2); } + ; + +name88: NAME88 { + name_queue.qualify(@1, $1); + auto namelocs( name_queue.pop() ); + auto names( name_queue.namelist_of(namelocs) ); + if( ($$ = field_find(names)) == NULL ) { + if( procedure_div_e == current_division ) { + error_msg(namelocs.back().loc, + "DATA-ITEM '%s' not found", names.back() ); + YYERROR; + } + } + assert($$->level == 88); + } + ; + +scalar88: name88 subscripts[subs] refmod[ref] + { + size_t n = $subs->size(); + auto subscripts = new cbl_refer_t[n]; + $subs->use_list(subscripts); + if( $ref.from->is_reference() || $ref.len->is_reference() ) { + error_msg(@subs, "subscripts on start:len refmod " + "parameters are unsupported"); + YYERROR; + } + cbl_span_t span( $ref.from, $ref.len ); + $$ = new cbl_refer_t($1, n, subscripts, span); + } + | name88 refmod[ref] + { + if( $ref.from->is_reference() || $ref.len->is_reference() ) { + error_msg(@ref, "subscripts on start:len refmod " + "parameters are unsupported"); + YYERROR; + } + cbl_span_t span( $ref.from, $ref.len ); + $$ = new cbl_refer_t($1, span); + } + | name88 subscripts[subs] + { + $$ = new cbl_refer_t($1); + if( $subs->refers.size() != $$->subscripts_set($subs->refers) ) { + subscript_dimension_error(@subs, $subs->refers.size(), $$); + } + } + | name88 + { + $$ = new_reference($1); + } + ; + +allocate: ALLOCATE expr[size] CHARACTERS initialized RETURNING scalar[returning] + { + statement_begin(@1, ALLOCATE); + if( $size->field->type == FldLiteralN ) { + if( $size->field->data.value <= 0 ) { + error_msg(@size, "size must be greater than 0"); + YYERROR; + } + } + reject_refmod( @returning, *$returning ); + if( ! require_pointer(@returning, *$returning) ) YYERROR; + parser_allocate( *$size, *$returning, $initialized ); + } + | ALLOCATE scalar[based] initialized alloc_ret[returning] + { + statement_begin(@1, ALLOCATE); + if( ! $based->field->has_attr(based_e) ) { + error_msg(@based, "%s must be BASED", $based->name()); + YYERROR; + } + reject_refmod( @based, *$based ); + reject_refmod( @returning, *$returning ); + if( $returning->field && + ! require_pointer(@returning, *$returning) ) YYERROR; + parser_allocate( *$based, *$returning, $initialized ); + if( $initialized ) { + initialize_allocated(*$based); + } + } + ; +initialized: %empty { $$ = false; } + | INITIALIZED { $$ = true; } + ; +alloc_ret: %empty { static cbl_refer_t empty; $$ = ∅ } + | RETURNING scalar[name] { $$ = $name; } + ; + +compute: compute_impl end_compute { current.compute_end(); } + | compute_cond end_compute { current.compute_end(); } + ; +compute_impl: COMPUTE compute_body[body] + { + parser_assign( $body.ntgt, $body.tgts, *$body.expr, + NULL, NULL, current.compute_label() ); + current.declaratives_evaluate(ec_none_e); + } + ; +compute_cond: COMPUTE compute_body[body] arith_errs[err] + { + parser_assign( $body.ntgt, $body.tgts, *$body.expr, + $err.on_error, $err.not_error, + current.compute_label() ); + current.declaratives_evaluate(ec_size_e); + } + ; +end_compute: %empty %prec COMPUTE + | END_COMPUTE + ; + +compute_body: rnames { statement_begin(@$, COMPUTE); } compute_expr[expr] { + $$.ntgt = rhs.size(); + auto C = new cbl_num_result_t[$$.ntgt]; + $$.tgts = use_any(rhs, C); + $$.expr = $expr; + } + ; +compute_expr: '=' { + current.compute_begin(); + } expr { + $$ = $expr; + } + ; + | EQUAL { + if( ! dialect_ibm() ) { + dialect_error(@1, "EQUAL invalid as assignment operator", "ibm"); + } + current.compute_begin(); + } expr { + $$ = $expr; + } + ; + +display: disp_body end_display + { + size_t len = $1.vargs->args.size(); + struct cbl_refer_t args[len]; + + if( $1.special && $1.special->id == ARG_NUM_e ) { + if( $1.vargs->args.size() != 1 ) { + error_msg(@1, "ARGUMENT-NUMBER can be set to only one value"); + } + cbl_refer_t& src( $1.vargs->args.front() ); + cbl_field_t *dst = register_find("_ARGI"); + parser_move( dst, src ); + } else { + parser_display($1.special, use_vargs($1.vargs, args), len, + DISPLAY_ADVANCE); + } + current.declaratives_evaluate(ec_none_e); + } + | disp_body NO ADVANCING end_display + { + size_t len = $1.vargs->args.size(); + struct cbl_refer_t args[len]; + + if( $1.special && $1.special->id == ARG_NUM_e ) { + if( $1.vargs->args.size() != 1 ) { + error_msg(@1, "ARGUMENT-NUMBER can be set to only one value"); + } + cbl_refer_t& src( $1.vargs->args.front() ); + cbl_field_t *dst = register_find("_ARGI"); + parser_move( dst, src ); + } else { + parser_display($1.special, use_vargs($1.vargs, args), len, + DISPLAY_NO_ADVANCE); + } + current.declaratives_evaluate(ec_none_e); + } + ; +end_display: %empty + | END_DISPLAY + ; +disp_body: disp_vargs[vargs] + { + $$.special = NULL; + $$.vargs = $vargs; + } + | disp_vargs[vargs] UPON disp_target[special] + { + $$.special = $special; + $$.vargs = $vargs; + } + ; +disp_vargs: DISPLAY vargs { + statement_begin(@1, DISPLAY); + $$ = $vargs; + } + ; + +disp_target: device_name { + $$ = symbol_special($1.id); + } + | NAME + { + symbol_elem_t *e = symbol_special(PROGRAM, $1); + if( !e ) { + error_msg(@NAME, "no such special name '%s'", $NAME); + YYERROR; + } + $$ = cbl_special_name_of(e); + } + ; + +divide: divide_impl end_divide { ast_divide($1); } + | divide_cond end_divide { ast_divide($1); } + ; + +divide_impl: DIVIDE divide_body[body] + { + statement_begin(@1, DIVIDE); + $$ = $body; + } + ; +divide_cond: DIVIDE divide_body[body] arith_errs[err] + { + statement_begin(@1, DIVIDE); + $$ = $body; + $$->on_error = $err.on_error; + $$->not_error = $err.not_error; + } + ; +end_divide: %empty %prec DIVIDE + | END_DIVIDE + ; + +divide_body: num_operand INTO rnames + { /* format 1 */ + $$ = new arith_t(no_giving_e); + $$->A.push_back(*$num_operand); + std::copy( rhs.begin(), + rhs.end(), back_inserter($$->tgts) ); + rhs.clear(); + } + | divide_into + | divide_into REMAINDER scalar[rem] + { + if( $1->tgts.size() != 1 ) { + error_msg(@1, "only 1 (not %zu) " + "GIVING with REMAINDER", $1->tgts.size()); + YYERROR; + } + $$ = $1; + $$->remainder = *$rem; + } + | divide_by + | divide_by REMAINDER scalar[rem] + { + if( $1->tgts.size() != 1 ) { + error_msg(@1, "only 1 (not %zu) " + "GIVING with REMAINDER", $1->tgts.size()); + YYERROR; + } + $$ = $1; + $$->remainder = *$rem; + } + ; + +divide_into: num_operand[b] INTO num_operand[a] GIVING rnames + { // format 2 & 4 + $$ = new arith_t(giving_e); + $$->A.push_back(*$a); + $$->B.push_back(*$b); + std::copy( rhs.begin(), + rhs.end(), back_inserter($$->tgts) ); + rhs.clear(); + } + ; +divide_by: num_operand[a] BY num_operand[b] GIVING rnames + { // format 3 & 5 + $$ = new arith_t(giving_e); + $$->A.push_back(*$a); + $$->B.push_back(*$b); + std::copy( rhs.begin(), + rhs.end(), back_inserter($$->tgts) ); + rhs.clear(); + } + ; + +end_program: end_program1[end] '.' + { + const cbl_label_t *prog = current.program(); + assert(prog); + const char *name = string_of($end.name); + + bool matches = false; + const char *token_name = keyword_str($end.token) + 4; + switch($end.token) { + case END_PROGRAM: + matches = prog->type == LblProgram; + break; + case END_FUNCTION: + matches = prog->type == LblFunction; + break; + default: + error_msg(@end, "logic error: END token invalid '%s'", name); + gcc_unreachable(); + } + if( !matches ) { + error_msg(@end, "END %s %s' does not match IDENTIFICATION DIVISION '%s'", + token_name, name, prog->name); + YYERROR; + } + + if( 0 != strcasecmp(prog->name, name) ) { + error_msg(@end, "END PROGRAM '%s' does not match PROGRAM-ID '%s'", + name, prog->name); + YYERROR; + } + std::set<std::string> externals = current.end_program(); + if( !externals.empty() ) { + for( auto name : externals ) { + yywarn("%s calls external symbol '%s'", prog->name, name.c_str()); + } + YYERROR; + } + // pointer still valid because name is in symbol table + ast_end_program(prog->name); + } + | end_program1[end] error + { + const char *token_name = "???"; + switch($end.token) { + case END_PROGRAM: + token_name = "PROGRAM"; + break; + case END_FUNCTION: + token_name = "FUNCTION"; + break; + default: + cbl_internal_error( "END token invalid"); + } + error_msg(@end, "END %s requires NAME before '.'", token_name); + YYERROR; + } + ; +end_program1: END_PROGRAM namestr[name] + { + $$.token = END_PROGRAM; + $$.name = $name; + } + | END_FUNCTION namestr[name] + { + $$.token = END_FUNCTION; + $$.name = $name; + } + | END_PROGRAM '.' // error + { + $$.token = END_PROGRAM; + } + | END_FUNCTION '.' // error + { + $$.token = END_FUNCTION; + } + ; + +continue_stmt: CONTINUE { + statement_begin(@1, CONTINUE); + parser_sleep(*cbl_refer_t::empty()); + } + | CONTINUE AFTER expr SECONDS { + statement_begin(@1, CONTINUE); + parser_sleep(*$expr); + } + ; + +exit: GOBACK exit_with[status] + { + statement_begin(@1, GOBACK); + parser_exit(*$status); + } + | GOBACK exit_raising[ec] + { + statement_begin(@1, GOBACK); + parser_exit(*cbl_refer_t::empty(), $ec); + } + | EXIT { statement_begin(@1, EXIT); } exit_what + | SIMPLE_EXIT + { + error_msg(@1, "EXIT is invalid here"); + } + ; + /* Valid "simple" EXIT (Format 1) swallowed by lexer */ + + /* + * If the EXIT PROGRAM statement is executed in a program that + * is not under the control of a calling runtime element, the + * EXIT PROGRAM statement is treated as if it were a CONTINUE + * statement. + * To indicate this, We pass a "magic" refer with prog_func set. + */ +exit_with: %empty + { + /* "If a RETURNING phrase is specified in the procedure + * division header of the program containing the GOBACK + * statement, the value in the data item referenced by that + * RETURNING phrase becomes the result of the program + * activation. Execution continues in the calling element + * as specified in the rules." + */ + $$ = cbl_refer_t::empty(); + if( dialect_ibm() ) { + static auto rt = cbl_field_of(symbol_at(return_code_register())); + static cbl_refer_t status(rt); + $$ = &status; + } + auto prog = cbl_label_of(symbol_at(current_program_index())); + if( prog->returning ) { + $$ = new cbl_refer_t( cbl_field_of(symbol_at(prog->returning)) ); + } + } + | with NORMAL stop_status + { + $$ = $stop_status? $stop_status : new_reference(literally_zero); + } + | with ERROR stop_status + { + $$ = $stop_status? $stop_status : new_reference(literally_one); + } + | RETURNING stop_status + { + if( ! dialect_mf() ) { + dialect_error(@2, "RETURNING <number>", "mf"); + } + $$ = $stop_status? $stop_status : new_reference(literally_one); + } + ; +exit_what: PROGRAM_kw { parser_exit_program(); } + | PROGRAM_kw exit_raising[ec] { parser_exit_program(); } + | SECTION { parser_exit_section(); } + | PARAGRAPH { parser_exit_paragraph(); } + | PERFORM { + if( performs.empty() ) { + error_msg(@$, "EXIT PERFORM valid only " + "within inline PERFORM procedure" ); + YYERROR; + } + parser_exit_perform(&perform_current()->tgt, $1); + } + ; + +exit_raising: RAISING EXCEPTION EXCEPTION_NAME[ec] + { + $$ = $ec; + } + | RAISING error { + cbl_unimplemented("RAISING exception-object"); + $$ = ec_none_e; + } + | RAISING LAST /* lexer swallows EXCEPTION */ + { + $$ = ec_all_e; + } + ; + +free: FREE free_tgts + { + size_t n = $free_tgts->size(); + assert( n > 0 ); + auto tgts = new cbl_refer_t[n]; + parser_free( n, $free_tgts->use_list(tgts) ); + } + ; +free_tgts: free_tgt { $$ = new refer_list_t($1); } + | free_tgts free_tgt { $$->push_back($2); } + ; +free_tgt: scalar { + $$ = $1; + reject_refmod(@scalar, *$1); + } + | ADDRESS OF scalar[name] + { + $$ = $name; + $$->addr_of = true; + reject_refmod(@name, *$name); + } + ; + + /* + * Conditional Expressions + */ +simple_cond: kind_of_name + { + $$ = new_reference($1); + } + | SWITCH + { + $$ = new_reference(new_temporary(FldConditional)); + cbl_field_t *field = cbl_field_of(symbol_find(@1, $1)); + assert(field->type == FldSwitch); + cbl_field_t *parent = parent_of(field); + size_t value = field->data.upsi_mask->value; + bitop_t op = field->data.upsi_mask->on_off? + bit_on_op : bit_off_op; + parser_bitop($$->cond(), parent, op, value ); + } + | expr is CLASS_NAME[domain] + { + $$ = new_reference(new_temporary(FldConditional)); + // symbol_find does not find FldClass symbols + struct symbol_elem_t *e = symbol_field(PROGRAM, 0, $domain); + parser_setop($$->cond(), $1->field, is_op, cbl_field_of(e)); + } + | expr NOT CLASS_NAME[domain] { + $$ = new_reference(new_temporary(FldConditional)); + // symbol_find does not find FldClass symbols + struct symbol_elem_t *e = symbol_field(PROGRAM, 0, $domain); + parser_setop($$->cond(), $1->field, is_op, cbl_field_of(e)); + parser_logop($$->cond(), NULL, not_op, $$->cond()); + } + | expr is OMITTED + { + auto lhs = cbl_refer_t($expr->field); + lhs.addr_of = true; + auto rhs = cbl_field_of(symbol_field(0,0, "NULLS")); + $$ = new_reference(new_temporary(FldConditional)); + parser_relop($$->field, lhs, eq_op, rhs); + } + | expr NOT OMITTED + { + auto lhs = cbl_refer_t($expr->field); + lhs.addr_of = true; + auto rhs = cbl_field_of(symbol_field(0,0, "NULLS")); + $$ = new_reference(new_temporary(FldConditional)); + parser_relop($$->field, lhs, ne_op, rhs); + } + | expr posneg[op] { + $$ = new_reference(new_temporary(FldConditional)); + relop_t op = static_cast<relop_t>($op); + cbl_field_t *zero = constant_of(constant_index(ZERO)); + parser_relop($$->cond(), *$1, op, zero); + } + | scalar88 { + // copy the subscripts and set the parent field + cbl_refer_t parent = *$scalar88; + parent.field = parent_of($scalar88->field); + if( !parent.field ) { + cbl_internal_error("Type 88 has no referent"); + YYERROR; + } + $$ = new_reference(new_temporary(FldConditional)); + $$->field->parent = field_index($scalar88->field); + parser_relop($$->cond(), parent, eq_op, *$scalar88); + } + ; + +kind_of_name: expr might_be variable_type + { + $$ = new_temporary(FldConditional); + enum classify_t type = classify_of($3); + assert(type != ClassInvalidType ); + + parser_classify( $$, *$1, type ); + if( $2 == NOT ) { + parser_logop($$, NULL, not_op, $$); + } + } + ; + +bool_expr: log_expr { $$ = new_reference($1->resolve()); } + ; + +log_expr: log_term { $$ = new log_expr_t($1); } %prec AND + | log_expr[lhs] OR rel_abbr[rhs] + { + $$ = $1; + $$->or_term($rhs); + } + | log_expr[lhs] OR log_expr[rhs] + { + $$ = $lhs; + assert( ! $rhs->unresolved() ); // what to do? + $$->or_term($rhs->and_term()); + } + | log_expr[lhs] AND rel_abbr[rhs] + { + $$ = $1; + $$->and_term($rhs); + } + | log_expr[lhs] AND log_expr[rhs] + { + $$ = $lhs; + assert( ! $rhs->unresolved() ); // what to do? + $$->and_term($rhs->and_term()); + } + ; + +log_term: '(' log_expr ')' { + current.antecedent_reset(); + $$ = $log_expr->resolve(); + } + | NOT '(' log_expr ')' { + current.antecedent_reset(); + $$ = $log_expr->resolve(); + parser_logop($$, NULL, not_op, $$); + } + | rel_expr + | simple_cond { + current.antecedent_reset(); + $$ = $1->cond(); + } + | NOT simple_cond { + current.antecedent_reset(); + $$ = $2->cond(); + parser_logop($$, NULL, not_op, $$); + } + ; + +rel_expr: rel_lhs rel_term[rhs] + { + rel_part_t& ante = current.antecedent(); + if( $rhs.invert ) { + error_msg(@rhs, "NOT %s is invalid, cannot negate RHS", + ante.operand->field->name); + } + auto op = ante.relop; + if( ante.invert ) { + op = relop_invert(op); + ante.invert = false; + } + auto cond = new_temporary(FldConditional); + parser_relop( cond, *ante.operand, op, *$rhs.term ); + $$ = cond; + } + | rel_lhs[lhs] '(' rel_abbrs ')' { + $$ = $rel_abbrs->resolve(); + } + ; + +rel_abbrs: rel_abbr { $$ = new log_expr_t($1); } + | '(' rel_abbrs ')' { + $$ = $2; + $$->resolve(); + + } + | rel_abbrs OR rel_abbr[rhs] { + $$ = $1; + $$->or_term($rhs); + } + | rel_abbrs OR '(' rel_abbr[rhs] ')' { + $$ = $1; + $$->or_term($rhs); + } + | rel_abbrs AND rel_abbr[rhs] { + $$ = $1; + $$->and_term($rhs); + } + | rel_abbrs AND '(' rel_abbr[rhs] ')' { + $$ = $1; + $$->and_term($rhs); + } + ; + +rel_lhs: rel_term[lhs] relop { + // no value, just set current antecedent + auto op = relop_of($relop); + auto ante = new rel_part_t($lhs.term, op, $lhs.invert); + current.antecedent(*ante); + } + ; + +rel_abbr: rel_term { + static rel_part_t ante; + ante = current.antecedent(); + if( ! ante.operand ) { + error_msg(@1, "'AND %s' invalid because " + "LHS is not a relation condition", + name_of($rel_term.term->field) ); + YYERROR; + } + assert(ante.has_relop); + if( $rel_term.invert ) ante.relop = relop_invert(ante.relop); + auto cond = new_temporary(FldConditional); + parser_relop(cond, *ante.operand, ante.relop, *$rel_term.term); + $$ = cond; + } + | relop rel_term { + static rel_part_t ante; + if( $rel_term.invert ) { + error_msg(@2, "%s NOT %s is invalid", + keyword_str($relop), + name_of($rel_term.term->field)); + } + auto op( relop_of($relop) ); + ante = current.antecedent().relop_set(op); + if( ! ante.operand ) { + error_msg(@1, "AND %s invalid because " + "LHS is not a relation condition", + name_of($rel_term.term->field) ); + YYERROR; + } + auto cond = new_temporary(FldConditional); + parser_relop(cond, *ante.operand, ante.relop, *$rel_term.term); + $$ = cond; + } + ; + +rel_term: rel_term1 + ; + +rel_term1: all LITERAL + { + $$.invert = false; + $$.term = new_reference(new_literal($2, quoted_e)); + $$.term->all = $all; + } + | all spaces_etc[value] + { + $$.invert = false; + $$.term = new_reference(constant_of(constant_index($value))); + $$.term->all = $all; + } + | all NULLS + { + $$.invert = false; + $$.term = new_reference(constant_of(constant_index(NULLS))); + $$.term->all = $all; + } + | ALL ZERO + { // ZERO without ALL comes from expr, from num_term. + $$.invert = false; + $$.term = new_reference(constant_of(constant_index(ZERO))); + $$.term->all = true; + } + | expr { + $$.invert = false; + $$.term = $1; + } + | NOT rel_term { + $$ = $2; + $$.invert = true; + } + ; + +expr: expr_term + ; +expr_term: expr_term '+' num_term + { + if( ($$ = ast_op($1, '+', $3)) == NULL ) YYERROR; + } + | expr_term '-' num_term + { + if( ($$ = ast_op($1, '-', $3)) == NULL ) YYERROR; + } + | num_term + ; + +num_term: num_term '*' value + { + if( ($$ = ast_op($1, '*', $3)) == NULL ) YYERROR; + } + | num_term '/' value + { + if( ($$ = ast_op($1, '/', $3)) == NULL ) YYERROR; + } + | value + ; + +value: value POW factor + { + if( ($$ = ast_op($1, '^', $3)) == NULL ) YYERROR; + } + | '-' value %prec NEG { $$ = negate( $2 );} + | '+' factor %prec NEG { $$ = $2;} + | factor[rhs] + ; + +factor: '(' expr ')' { $$ = $2; } + | num_value { $$ = $num_value; } + ; + +if_stmt: if_impl end_if + ; + +if_impl: if_verb if_test if_body + { + parser_fi(); + } + ; +if_verb: IF { statement_begin(@1, IF); } + ; +if_test: bool_expr then + { + if( ! is_conditional($bool_expr) ) { + error_msg(@1, "%s is not a Boolean expression", + name_of($bool_expr->field) ); + YYERROR; + } + parser_if( $bool_expr->cond() ); + } + ; + +if_body: next_statements + { + parser_else(); + } + | next_statements ELSE { + location_set(@2); + parser_else(); + } next_statements + ; + +next_statements: statements %prec ADD + | NEXT SENTENCE %prec ADD + { + next_sentence = label_add(LblNone, "next_sentence", 0); + parser_label_goto(next_sentence); + } + ; + +end_if: %empty %prec ADD + | END_IF + ; + +evaluate: eval_verb eval_subjects eval_switch end_evaluate { + auto& ev( eval_stack.current() ); + parser_label_label(ev.when()); + parser_label_label(ev.done()); + eval_stack.free(); + } + ; +eval_verb: EVALUATE { + statement_begin(@1, EVALUATE); + eval_stack.alloc(); + } + ; + +eval_subjects: eval_subject + | eval_subjects ALSO eval_subject + ; +eval_subject: eval_subject1 { + auto& ev( eval_stack.current() ); + ev.append(*$1); + } + ; +eval_subject1: bool_expr + | expr + | true_false + { + static cbl_field_t *zero = constant_of(constant_index(ZERO)); + enum relop_t op = $1 == TRUE_kw? eq_op : ne_op; + $$ = new cbl_refer_t( new_temporary(FldConditional) ); + parser_relop($$->field, zero, op, zero); + } + ; + +eval_switch: eval_cases + | eval_cases WHEN OTHER { + auto& ev( eval_stack.current() ); + ev.write_when_label(); + } + statements %prec ADD + ; + +eval_cases: eval_case + | eval_cases eval_case + ; + +eval_case: eval_objects statements %prec ADD { + auto& ev( eval_stack.current() ); + parser_label_goto( ev.done() ); + ev.rewind(); + } + | eval_objects NEXT SENTENCE %prec ADD + { + auto& ev( eval_stack.current() ); + ev.write_when_label(); + next_sentence = label_add(LblNone, "next_sentence", 0); + parser_label_goto(next_sentence); + } + ; + +eval_objects: eval_whens { + auto& ev( eval_stack.current() ); + // Place the object's Yeah label before the statements. + ev.write_yeah_label(); + } + ; +eval_whens: eval_when + | eval_whens eval_when + ; + +eval_when: WHEN { + auto& ev( eval_stack.current() ); + ev.write_when_label(); + } + eval_obj_cols %prec ADD { // all TRUE, go to statements + auto& ev( eval_stack.current() ); + parser_label_goto(ev.yeah()); + auto subj( ev.subject() ); + if( subj ) { + error_msg(@2, "WHEN clause incomplete, %zu of %zu evaluated", + ev.object_count(), ev.subject_count()); + } + ev.rewind(); + } + | WHEN error + ; + +eval_obj_cols: eval_obj_col + | eval_obj_cols ALSO eval_obj_col + ; + +eval_obj_col: ANY { + auto& ev( eval_stack.current() ); + if( ! ev.decide(ANY) ) { + error_msg(@1, "WHEN 'ANY' phrase exceeds subject set count of %zu", + ev.subject_count()); + YYERROR; + } + } + | true_false { + auto& ev( eval_stack.current() ); + auto subj( ev.subject() ); + if( !subj ) { + error_msg(@$, "WHEN '%s' phrase exceeds subject set count of %zu", + keyword_str($1), ev.subject_count()); + YYERROR; + } + if( ! is_conditional( subj ) ) { + error_msg(@1, "subject %s, type %s, " + "cannot be compared to TRUE/FALSE", + subj->name, 3 + cbl_field_type_str(subj->type) ); + } + ev.decide($1); + } + | eval_posneg[op] { + relop_t op = static_cast<relop_t>($op); + cbl_field_t *zero = constant_of(constant_index(ZERO)); + auto& ev( eval_stack.current() ); + auto subj( ev.subject() ); + if( !subj ) { + error_msg(@1, "WHEN '%s' phrase exceeds subject set count of %zu", + relop_str(op), ev.subject_count()); + YYERROR; + } + ev.decide(op, zero, false); + } + | bool_expr { + auto& ev( eval_stack.current() ); + auto subj( ev.subject() ); + if( !subj ) { + error_msg(@1, "WHEN CONDITIONAL phrase exceeds " + "subject set count of %zu", + ev.subject_count()); + YYERROR; + } + if( ! is_conditional( subj ) ) { + error_msg(@1, "subject %s, type %s, " + "cannot be compared to conditional expression", + subj->name, 3 + cbl_field_type_str(subj->type) ); + } + ev.decide(*$1, false); + } + | eval_abbrs { + auto& ev( eval_stack.current() ); + ev.decided( $1->resolve() ); + } + | rel_term[a] THRU rel_term[b] %prec THRU { + auto& ev( eval_stack.current() ); + auto subj( ev.subject() ); + if( !subj ) { + error_msg(@a, "WHEN %s THRU %s phrase exceeds " + "subject set count of %zu", + $a.term->name(), $b.term->name(), ev.subject_count()); + YYERROR; + } + if( is_conditional($a.term) || is_conditional($b.term) ) { + error_msg(@a, "THRU with boolean operand"); + } + if( $b.invert ) { + error_msg(@b, "NOT %s is invalid with THRU", + name_of($b.term->field)); + } + ev.decide(*$a.term, *$b.term, $a.invert); + } + | rel_term[a] ELSE + { + error_msg(@ELSE, "ELSE not valid in WHEN"); + YYERROR; + } + ; +eval_posneg: POSITIVE { $$ = $1 == NOT? le_op : gt_op; } + | NEGATIVE { $$ = $1 == NOT? ge_op : lt_op; } + ; + +eval_abbrs: rel_term[a] { + auto& ev( eval_stack.current() ); + auto subj( ev.subject() ); + if( !subj ) { + error_msg(@1, "WHEN %s phrase exceeds " + "subject set count of %zu", + $a.term->name(), ev.subject_count()); + YYERROR; + } + if( ! ev.compatible($a.term->field) ) { + auto obj($a.term->field); + error_msg(@1, "subject %s, type %s, " + "cannot be compared %s, type %s", + subj->name, 3 + cbl_field_type_str(subj->type), + obj->name, 3 + cbl_field_type_str(obj->type) ); + } + auto result = ev.compare(*$a.term); + if( ! result ) YYERROR; + if( $a.invert ) { + parser_logop(result, nullptr, not_op, result); + } + $$ = new log_expr_t(result); + } + | relop rel_term[a] { + auto& ev( eval_stack.current() ); + relop_t relop(relop_of($relop)); + ev.object_relop(relop); + auto subj( ev.subject() ); + if( !subj ) { + error_msg(@1, "WHEN %s %s phrase exceeds " + "subject set count of %zu", + relop_str(relop_of($relop)), $a.term->name(), ev.subject_count()); + YYERROR; + } + if( ! ev.compatible($a.term->field) ) { + auto obj($a.term->field); + error_msg(@1, "subject %s, type %s, " + "cannot be compared %s, type %s", + subj->name, 3 + cbl_field_type_str(subj->type), + obj->name, 3 + cbl_field_type_str(obj->type) ); + } + if( is_conditional(ev.subject()) ) { + auto obj($a.term->field); + error_msg(@1, "subject %s, type %s, " + "cannot be %s %s, type %s", + subj->name, 3 + cbl_field_type_str(subj->type), + relop_str(relop_of($relop)), + obj->name, 3 + cbl_field_type_str(obj->type) ); + } + auto result = ev.compare(relop, *$a.term); + if( ! result ) YYERROR; + if( $a.invert ) { + parser_logop(result, nullptr, not_op, result); + } + $$ = new log_expr_t(result); + } + | '(' eval_abbrs ')' { + $$ = $2; + $$->resolve(); + } + | eval_abbrs OR eval_abbr[rhs] { + $$ = $1; + $$->or_term($rhs); + } + | eval_abbrs OR '(' eval_abbr[rhs] ')' { + $$ = $1; + $$->or_term($rhs); + } + | eval_abbrs AND eval_abbr[rhs] { + $$ = $1; + $$->and_term($rhs); + } + | eval_abbrs AND '(' eval_abbr[rhs] ')' { + $$ = $1; + $$->and_term($rhs); + } + ; + +eval_abbr: rel_term[a] { + auto& ev( eval_stack.current() ); + relop_t relop(ev.object_relop()); + auto subj( ev.subject() ); + assert( subj ); + $$ = ev.compare(relop, *$a.term); + if( $a.invert ) { + parser_logop($$, nullptr, not_op, $$); + } + } + | relop rel_term[a] { + auto& ev( eval_stack.current() ); + relop_t relop(relop_of($relop)); + ev.object_relop(relop); + $$ = ev.compare(relop, *$a.term); + if( $a.invert ) { + parser_logop($$, nullptr, not_op, $$); + } + } + ; + +end_evaluate: %empty %prec EVALUATE + | END_EVALUATE + ; + +true_false: TRUE_kw { $$ = TRUE_kw; } + | FALSE_kw { $$ = FALSE_kw; } + ; + +scalar: tableref { + // Check for missing subscript; others already checked. + if( $1->nsubscript == 0 && 0 < dimensions($1->field) ) { + subscript_dimension_error(@1, 0, $$); + } + } + ; + +tableref: tableish { + // tableref is used by SORT. It may name a table without subscripts. + $$ = $1; + $$->loc = @1; + if( $$->is_table_reference() ) { + if( $$->nsubscript != dimensions($$->field) ) { + subscript_dimension_error(@1, $$->nsubscript, $$); + YYERROR; + } + } + } +tableish: name subscripts[subs] refmod[ref] %prec NAME + { + assert(yychar != LPAREN); + $$ = new cbl_refer_t($name); + $$->subscripts_set($subs->refers); + literal_subscripts_valid( @subs, *$$ ); + $$->refmod = cbl_span_t( $ref.from, + $ref.len ); + literal_refmod_valid( @ref, *$$ ); + } + | name refmod[ref] %prec NAME + { + $$ = new cbl_refer_t($name); + $$->refmod = cbl_span_t( $ref.from, + $ref.len ); + literal_refmod_valid( @ref, *$$ ); + } + | name subscripts[subs] %prec NAME + { + $$ = new cbl_refer_t($name); + $$->subscripts_set($subs->refers); + literal_subscripts_valid( @subs, *$$ ); + } + | name %prec NAME + { + $$ = new cbl_refer_t($name); + } + ; + +refmod: LPAREN expr[from] ':' expr[len] ')' %prec NAME + { + if( ! require_numeric(@from, *$from) ) YYERROR; + if( ! require_numeric(@len, *$len) ) YYERROR; + $$.from = $from; + $$.len = $len; + } + | LPAREN expr[from] ':' ')' %prec NAME + { + if( ! require_numeric(@from, *$from) ) YYERROR; + $$.from = $from; + $$.len = nullptr; + } + ; + +typename: NAME + { + auto e = symbol_typedef(PROGRAM, $NAME); + if( ! e ) { + error_msg(@1, "DATA-ITEM '%s' not found", $NAME ); + YYERROR; + } + $$ = cbl_field_of(e); + } + ; + +name: qname + { + build_symbol_map(); + auto namelocs( name_queue.pop() ); + auto names( name_queue.namelist_of(namelocs) ); + auto inner = namelocs.back(); + if( ($$ = field_find(names)) == NULL ) { + if( procedure_div_e == current_division ) { + error_msg(inner.loc, + "DATA-ITEM '%s' not found", inner.name ); + YYERROR; + } + /* + * Insert forward references, starting outermost. + */ + size_t parent = 0; + while( ! names.empty() ) { + auto name = names.front(); + names.pop_front(); + auto e = symbol_field_forward_add(PROGRAM, parent, + name, yylineno); + if( !e ) YYERROR; + symbol_field_location( symbol_index(e), @qname ); + parent = symbol_index(e); + $$ = cbl_field_of(e); + } + } + gcc_assert($$); + } + ; + +qname: ctx_name + { + name_queue.qualify(@1, $1); + } + | qname inof ctx_name + { + name_queue.qualify(@3, $3); + } + ; +inof: IN + | OF + ; + +ctx_name: NAME + | context_word + ; + +context_word: APPLY { static char s[] ="APPLY"; + $$ = s; } // screen description entry + | ARITHMETIC { static char s[] ="ARITHMETIC"; + $$ = s; } // OPTIONS paragraph + | ATTRIBUTE { static char s[] ="ATTRIBUTE"; + $$ = s; } // SET statement + | AUTO { static char s[] ="AUTO"; + $$ = s; } // screen description entry + | AUTOMATIC { static char s[] ="AUTOMATIC"; + $$ = s; } // LOCK MODE clause + | AWAY_FROM_ZERO { static char s[] ="AWAY-FROM-ZERO"; + $$ = s; } // ROUNDED phrase + | BACKGROUND_COLOR { static char s[] ="BACKGROUND-COLOR"; + $$ = s; } // screen description entry + | BELL { static char s[] ="BELL"; + $$ = s; } // screen description entry and SET attribute statement + | BINARY_ENCODING { static char s[] ="BINARY-ENCODING"; + $$ = s; } // USAGE clause and FLOAT-DECIMAL clause + | BLINK { static char s[] ="BLINK"; + $$ = s; } // screen description entry and SET attribute statement + | BYTE_LENGTH { static char s[] ="BYTE-LENGTH"; + $$ = s; } // constant entry + | CAPACITY { static char s[] ="CAPACITY"; + $$ = s; } // OCCURS clause + | CENTER { static char s[] ="CENTER"; + $$ = s; } // COLUMN clause + | CLASSIFICATION { static char s[] ="CLASSIFICATION"; + $$ = s; } // OBJECT-COMPUTER paragraph + | CYCLE { static char s[] ="CYCLE"; + $$ = s; } // EXIT statement + | DECIMAL_ENCODING { static char s[] ="DECIMAL-ENCODING"; + $$ = s; } // USAGE clause and FLOAT-DECIMAL clause + | EOL { static char s[] ="EOL"; + $$ = s; } // ERASE clause in a screen description entry + | EOS { static char s[] ="EOS"; + $$ = s; } // ERASE clause in a screen description entry + | ENTRY_CONVENTION { static char s[] ="ENTRY-CONVENTION"; + $$ = s; } // OPTIONS paragraph + | ERASE { static char s[] ="ERASE"; + $$ = s; } // screen description entry + | EXPANDS { static char s[] ="EXPANDS"; + $$ = s; } // class-specifier and interface-specifier of the REPOSITORY paragraph + | FEATURE { static char s[] ="FEATURE"; + $$ = s; } // gcobol CDF token + | FLOAT_BINARY { static char s[] ="FLOAT-BINARY"; + $$ = s; } // OPTIONS paragraph + | FLOAT_DECIMAL { static char s[] ="FLOAT-DECIMAL"; + $$ = s; } // OPTIONS paragraph + | FOREGROUND_COLOR { static char s[] ="FOREGROUND-COLOR"; + $$ = s; } // screen description entry + | FOREVER { static char s[] ="FOREVER"; + $$ = s; } // RETRY phrase + | FULL { static char s[] ="FULL"; + $$ = s; } // screen description entry + | HIGH_ORDER_LEFT { static char s[] ="HIGH-ORDER-LEFT"; + $$ = s; } // FLOAT-BINARY clause, FLOAT-DECIMAL clause, and USAGE clause + | HIGH_ORDER_RIGHT { static char s[] ="HIGH-ORDER-RIGHT"; + $$ = s; } // FLOAT-BINARY clause, FLOAT-DECIMAL clause, and USAGE clause + | HIGHLIGHT { static char s[] ="HIGHLIGHT"; + $$ = s; } // screen description entry and SET attribute statement + | IGNORING { static char s[] ="IGNORING"; + $$ = s; } // READ statement + | IMPLEMENTS { static char s[] ="IMPLEMENTS"; + $$ = s; } // FACTORY paragraph and OBJECT paragraph + | INITIALIZED { static char s[] ="INITIALIZED"; + $$ = s; } // ALLOCATE statement and OCCURS clause + | INTERMEDIATE { static char s[] ="INTERMEDIATE"; + $$ = s; } // OPTIONS paragraph + | INTRINSIC { static char s[] ="INTRINSIC"; + $$ = s; } // function-specifier of the REPOSITORY paragraph + | LC_ALL_kw { static char s[] ="LC_ALL"; + $$ = s; } // SET statement + | LC_COLLATE_kw { static char s[] ="LC_COLLATE"; + $$ = s; } // SET statement + | LC_CTYPE_kw { static char s[] ="LC_CTYPE"; + $$ = s; } // SET statement + | LC_MESSAGES_kw { static char s[] ="LC_MESSAGES"; + $$ = s; } // SET statement + | LC_MONETARY_kw { static char s[] ="LC_MONETARY"; + $$ = s; } // SET statement + | LC_NUMERIC_kw { static char s[] ="LC_NUMERIC"; + $$ = s; } // SET statement + | LC_TIME_kw { static char s[] ="LC_TIME"; + $$ = s; } // SET statement + | LOWLIGHT { static char s[] ="LOWLIGHT"; + $$ = s; } // screen description entry and SET attribute statement + | MANUAL { static char s[] ="MANUAL"; + $$ = s; } // LOCK MODE clause + | MULTIPLE { static char s[] ="MULTIPLE"; + $$ = s; } // LOCK ON phrase + | NEAREST_AWAY_FROM_ZERO { static char s[] ="NEAREST-AWAY-FROM-ZERO"; + $$ = s; } // INTERMEDIATE ROUNDING clause and ROUNDED phrase + | NEAREST_EVEN { static char s[] ="NEAREST-EVEN"; + $$ = s; } // INTERMEDIATE ROUNDING clause and ROUNDED phrase + | NEAREST_TOWARD_ZERO { static char s[] ="NEAREST-TOWARD-ZERO"; + $$ = s; } // INTERMEDIATE ROUNDING clause and ROUNDED phrase + | NONE { static char s[] ="NONE"; + $$ = s; } // DEFAULT clause + | NORMAL { static char s[] ="NORMAL"; + $$ = s; } // STOP statement + | NUMBERS { static char s[] ="NUMBERS"; + $$ = s; } // COLUMN clause and LINE clause + | ONLY { static char s[] ="ONLY"; + $$ = s; } // Object-view, SHARING clause, SHARING phrase, and USAGE clause + | PREFIXED { static char s[] ="PREFIXED"; + $$ = s; } // DYNAMIC LENGTH STRUCTURE clause + | PREVIOUS { static char s[] ="PREVIOUS"; + $$ = s; } // READ statement + | PROHIBITED { static char s[] ="PROHIBITED"; + $$ = s; } // INTERMEDIATE ROUNDING clause and ROUNDED phrase + | RECURSIVE { static char s[] ="RECURSIVE"; + $$ = s; } // PROGRAM-ID paragraph + | RELATION { static char s[] ="RELATION"; + $$ = s; } // VALIDATE-STATUS clause + | REQUIRED { static char s[] ="REQUIRED"; + $$ = s; } // screen description entry + | REVERSE_VIDEO { static char s[] ="REVERSE-VIDEO"; + $$ = s; } // screen description entry and SET attribute statement + | ROUNDING { static char s[] ="ROUNDING"; + $$ = s; } // OPTIONS paragraph + | SECONDS { static char s[] ="SECONDS"; + $$ = s; } // RETRY phrase + | SECURE { static char s[] ="SECURE"; + $$ = s; } // screen description entry + | SHORT { static char s[] ="SHORT"; + $$ = s; } // DYNAMIC LENGTH STRUCTURE clause + | SIGNED { static char s[] ="SIGNED"; + $$ = s; } // DYNAMIC LENGTH STRUCTURE clause and USAGE clause + | STANDARD_BINARY { static char s[] ="STANDARD-BINARY"; + $$ = s; } // ARITHMETIC clause + | STANDARD_DECIMAL { static char s[] ="STANDARD-DECIMAL"; + $$ = s; } // ARITHMETIC clause + | STATEMENT { static char s[] ="STATEMENT"; + $$ = s; } // RESUME statement + | STEP { static char s[] ="STEP"; + $$ = s; } // OCCURS clause + | STRONG { static char s[] ="STRONG"; + $$ = s; } // TYPEDEF clause + | STRUCTURE { static char s[] ="STRUCTURE"; + $$ = s; } // DYNAMIC LENGTH STRUCTURE clause + | SYMBOL { static char s[] ="SYMBOL"; + $$ = s; } // CURRENCY clause + | TOWARD_GREATER { static char s[] ="TOWARD-GREATER"; + $$ = s; } // ROUNDED phrase + | TOWARD_LESSER { static char s[] ="TOWARD-LESSER"; + $$ = s; } // ROUNDED phrase + | TRUNCATION { static char s[] ="TRUNCATION"; + $$ = s; } // INTERMEDIATE ROUNDING clause and ROUNDED phrase + | UCS_4 { static char s[] ="UCS-4"; + $$ = s; } // ALPHABET clause + | UNDERLINE { static char s[] ="UNDERLINE"; + $$ = s; } // screen description entry and SET attribute statement + | UNSIGNED { static char s[] ="UNSIGNED"; + $$ = s; } // USAGE clause + | UTF_8 { static char s[] ="UTF-8"; + $$ = s; } // ALPHABET clause + | UTF_16 { static char s[] ="UTF-16"; + $$ = s; } // ALPHABET clause + | YYYYDDD { static char s[] ="YYYYDDD"; + $$ = s; } // ACCEPT statement + | YYYYMMDD { static char s[] ="YYYYMMDD"; + $$ = s; } // ACCEPT statement + ; + +move: MOVE scalar TO move_tgts[tgts] + { + statement_begin(@1, MOVE); + if( $scalar->field->type == FldIndex ) { + error_msg(@1, "'%s' cannot be MOVEd because it's an INDEX", + name_of($scalar->field) ); + YYERROR; + } + if( !parser_move2($tgts, *$scalar) ) { YYERROR; } + } + | MOVE all literalism[input] TO move_tgts[tgts] + { + statement_begin(@1, MOVE); + struct cbl_refer_t *src = new_reference(new_literal($input, + quoted_e)); + src->all = $all; + if( !parser_move2($tgts, *src) ) { YYERROR; } + } + | MOVE all spaces_etc[src] TO move_tgts[tgts] + { + statement_begin(@1, MOVE); + cbl_field_t *field; + auto p = std::find_if( $tgts->targets.begin(), + $tgts->targets.end(), + [&field]( const auto& num_result ) { + const cbl_refer_t& tgt = num_result.refer; + field = tgt.field; + return is_numeric(tgt.field); + } ); + + if( p != $tgts->targets.end() ) { + error_msg(@src, "cannot MOVE %s " + "to numeric receiving field %s", + constant_of(constant_index($src))->name, + field->name ); + YYERROR; + } + + struct cbl_field_t* src = constant_of(constant_index($src)); + if( !parser_move2($tgts, src) ) { YYERROR; } + } + | MOVE all signed_literal[lit] TO move_tgts[tgts] + { + statement_begin(@1, MOVE); + cbl_refer_t src( $lit, $all); + if( !parser_move2($tgts, src) ) { YYERROR; } + } + + | MOVE intrinsic_call TO move_tgts[tgts] + { + statement_begin(@1, MOVE); + if( !parser_move2($tgts, *$2) ) { YYERROR; } + } + + | MOVE CORRESPONDING scalar[from] TO scalar[to] + { + statement_begin(@1, MOVE); + if( $from->field->type != FldGroup ) { + error_msg(@from, "%s does not name a group", $from->name()); + YYERROR; + } + if( $to->field->type != FldGroup ) { + error_msg(@to, "%s does not name a group", $to->name()); + YYERROR; + } + + if( !move_corresponding(*$to, *$from) ) { + yywarn( "%s and %s have no corresponding fields", + $from->field->name, $to->field->name ); + } + } + ; + +move_tgts: move_tgt[tgt] { + $$ = new tgt_list_t; + if( $tgt ) list_add($$->targets, *$tgt, current_rounded_mode()); + } + | move_tgts move_tgt[tgt] + { + if( $tgt ) list_add($1->targets, *$tgt, current_rounded_mode()); + } + ; +move_tgt: scalar[tgt] { + if( is_literal($tgt->field) ) { + auto litcon = $tgt->field->name[0] == '_'? "literal" : "constant"; + error_msg(@1, "%s is a %s", name_of($tgt->field), litcon); + } + } + | literal { + const auto& field(*$1); + static char buf[32]; + const char *value_str( name_of($literal) ); + if( is_numeric($1) && float(field.data.value) == int(field.data.value) ) { + sprintf(buf, "%d", int(field.data.value)); + value_str = buf; + } + auto litcon = field.name[0] == '_'? "literal" : "constant"; + error_msg(@literal, "%s is a %s", value_str, litcon); + $$ = NULL; + } + | error + { + static const char * error_at; + if( error_at != yytext ) { // avoid repeated message + error_at = yytext; + error_msg(first_line_of(@1), "invalid receiving operand"); + } + $$ = NULL; + } + ; + +multiply: multiply_impl end_multiply { ast_multiply($1); } + | multiply_cond end_multiply { ast_multiply($1); } + ; +multiply_impl: MULTIPLY multiply_body + { + statement_begin(@1, MULTIPLY); + $$ = $2; + } + ; +multiply_cond: MULTIPLY multiply_body[body] arith_errs[err] + { + statement_begin(@1, MULTIPLY); + $$ = $body; + $$->on_error = $err.on_error; + $$->not_error = $err.not_error; + } + ; +end_multiply: %empty %prec MULTIPLY + | END_MULTIPLY + ; + +multiply_body: num_operand BY rnames + { + $$ = new arith_t(no_giving_e); + $$->A.push_back(*$num_operand); + std::copy( rhs.begin(), + rhs.end(), back_inserter($$->tgts) ); + rhs.clear(); + } + | num_operand BY signed_literal[lit] + { + error_msg(@lit, "%s is not a receiving field", name_of($lit)); + YYERROR; + } + | num_operand[a] BY num_operand[b] GIVING rnames + { + $$ = new arith_t(giving_e); + $$->A.push_back(*$a); + $$->B.push_back(*$b); + std::copy( rhs.begin(), + rhs.end(), back_inserter($$->tgts) ); + rhs.clear(); + } + | num_operand[a] BY num_operand[b] GIVING signed_literal[lit] + { + error_msg(@lit, "%s is not a receiving field", name_of($lit)); + YYERROR; + } + | LITERAL + { + error_msg(@1, "invalid string operand '%s'", $1.data); + YYERROR; + } + ; + +arith_errs: arith_err[a] statements %prec ADD + { + assert( $a.on_error || $a.not_error ); + assert( ! ($a.on_error && $a.not_error) ); + cbl_label_t *tgt = $a.on_error? $a.on_error : $a.not_error; + parser_arith_error_end(tgt); + } + | arith_errs[a] arith_err[b] statements %prec ADD + { + if( $a.on_error && $a.not_error ) { + error_msg(@1, "too many ON ERROR clauses"); + YYERROR; + } + // "ON" and "NOT ON" could be reversed, but not duplicated. + if( $a.on_error && $b.on_error ) { + error_msg(@1, "duplicate ON ERROR clauses"); + YYERROR; + } + if( $a.not_error && $b.not_error ) { + error_msg(@1, "duplicate NOT ON ERROR clauses"); + YYERROR; + } + $$ = $a; + if( $b.on_error ) { + $$.on_error = $b.on_error; + assert($a.not_error); + } else { + $$.not_error = $b.not_error; + assert($a.on_error); + } + assert( $b.on_error || $b.not_error ); + assert( ! ($b.on_error && $b.not_error) ); + cbl_label_t *tgt = $b.on_error? $b.on_error : $b.not_error; + parser_arith_error_end(tgt); + } + ; + +arith_err: SIZE_ERROR + { + assert( $1 == ERROR || $1 == NOT ); + $$.on_error = NULL; + $$.not_error = NULL; + cbl_label_t **ptgt = $1 == NOT? &$$.not_error : &$$.on_error; + if( current.in_compute() ) { + *ptgt = $1 == NOT? + current.compute_not_error() : current.compute_on_error(); + } else { + *ptgt = label_add(LblArith, uniq_label("arith"), yylineno); + } + (*ptgt)->lain = yylineno; + parser_arith_error( *ptgt ); + } + ; + + /* + * Relational operator Can be written + * IS GREATER THAN IS > + * IS NOT GREATER THAN IS NOT > + * IS LESS THAN IS < + * IS NOT LESS THAN IS NOT < + * IS EQUAL TO IS = + * IS NOT EQUAL TO IS NOT = + * IS GREATER THAN OR EQUAL TO IS >= + * IS LESS THAN OR EQUAL TO IS <= + * + * The lexer returns simple tokens. + */ + +relop: '<' { $$ = '<'; } + | LE { $$ = LE; } + | '=' { $$ = '='; } + | NE { $$ = NE; } + | GE { $$ = GE; } + | '>' { $$ = '>'; } + ; + +rnames: scalar rounded + { + list_add( rhs, *$scalar, $rounded ); + } + | rnames scalar rounded + { + cbl_num_result_t arg = { static_cast<cbl_round_t>($rounded), + *$scalar }; + rhs.push_back(arg); + } + ; + +sum: num_operand { $$ = new refer_list_t($num_operand); } + | sum num_operand { $$->push_back($num_operand); } + ; + +num_operand: scalar + | signed_literal { $$ = new_reference($1); } + | intrinsic_call + ; + +num_value: scalar // might actually be a string + | intrinsic_call + | num_literal { $$ = new_reference($1); } + | ADDRESS OF scalar {$$ = $scalar; $$->addr_of = true; } + | DETAIL OF scalar {$$ = $scalar; } + | LENGTH_OF name[val] { + location_set(@1); + $$ = new cbl_refer_t( new_tempnumeric() ); + $$->field->clear_attr(signable_e); + if( dialect_gcc() ) { + dialect_error(@1, "LENGTH OF", "ibm"); + } + parser_set_numeric($$->field, $val->data.capacity); + } + | LENGTH_OF name[val] subscripts[subs] { + location_set(@1); + $$ = new cbl_refer_t( new_tempnumeric() ); + $$->field->clear_attr(signable_e); + if( dialect_gcc() ) { + dialect_error(@1, "LENGTH OF", "ibm"); + } + if( 0 == dimensions($val) ) { + cbl_refer_t r1($val); + subscript_dimension_error( @subs, $subs->refers.size(), &r1 ); + } + parser_set_numeric($$->field, $val->data.capacity); + } + ; + + + /* + * Constant Compile-time Expressions + */ + +/* cce_cond_expr: cce_bool_expr { $$ = $1 == 0? false : true; } */ +/* ; */ +/* cce_bool_expr: cce_and */ +/* | cce_bool_expr OR cce_and { $$ = $1 || $3; } */ +/* ; */ +/* cce_and: cce_reloper */ +/* | cce_and AND cce_reloper { $$ = $1 && $3; } */ +/* ; */ +/* cce_reloper: cce_relexpr */ +/* | NOT cce_relexpr { $$ = $2 != 0; } */ +/* ; */ +/* cce_relexpr: cce_expr */ +/* | cce_relexpr '<' cce_expr { $$ = $1 < $3; } */ +/* | cce_relexpr LE cce_expr { $$ = $1 <= $3; } */ +/* | cce_relexpr '=' cce_expr { $$ = $1 == $3; } */ +/* | cce_relexpr NE cce_expr { $$ = $1 != $3; } */ +/* | cce_relexpr GE cce_expr { $$ = $1 >= $3; } */ +/* | cce_relexpr '>' cce_expr { $$ = $1 > $3; } */ +/* ; */ + +cce_expr: cce_factor + | cce_expr '+' cce_expr { $$ = $1 + $3; } + | cce_expr '-' cce_expr { $$ = $1 - $3; } + | cce_expr '*' cce_expr { $$ = $1 * $3; } + | cce_expr '/' cce_expr { $$ = $1 / $3; } + | '+' cce_expr %prec NEG { $$ = $2; } + | '-' cce_expr %prec NEG { $$ = -$2; } + | '(' cce_expr ')' { $$ = $2; } + ; + +cce_factor: NUMSTR { + /* + * As of March 2023, glibc printf does not deal with + * __int128_t. The below assertion is not required. It + * serves only remind us we're far short of the precision + * required by ISO. + */ + static_assert( sizeof($$) == sizeof(_Float128), + "quadmath?" ); + static_assert( sizeof($$) == 16, + "long doubles?" ); + $$ = numstr2i($1.string, $1.radix); + } + ; + + /* + * End Constant Compile-time Expressions + */ + +section_name: NAME section_kw '.' + { + statement_begin(@1, SECTION); + $$ = label_add(@1, LblSection, $1); + ast_enter_section($$); + apply_declaratives(); + } + | NAME section_kw // lexer swallows '.' before USE + <label>{ + statement_begin(@1, SECTION); + $$ = label_add(@1, LblSection, $1); + ast_enter_section($$); + apply_declaratives(); + } [label] + cdf_use dot + { + $$ = $label; + } + ; + +section_kw: SECTION + { + if( $1 ) { + if( *$1 == '-' ) { + error_msg(@1, "SECTION segment %s is negative", $1); + } else { + cbl_unimplementedw("SECTION segment %s was ignored", $1); + } + } + } + | SECTION error + { + error_msg(@1, "unknown section qualifier"); + } + ; + +stop: STOP RUN exit_with + { + statement_begin(@1, STOP); + parser_see_stop_run( *$exit_with, NULL ); + } + | STOP NUMSTR[status] // IBM syntax + { + statement_begin(@1, STOP); + if( ! dialect_ibm() ) { + dialect_error(@2, "STOP <number> is not ISO syntax,", "ibm"); + YYERROR; + } + cbl_refer_t status( new_literal($status.string, $status.radix) ); + parser_see_stop_run( status, NULL ); + } + | STOP LITERAL[name] // CCVS-85 && IBM syntax + { + statement_begin(@1, STOP); + const char *name = string_of($name); + if( ! name ) { + error_msg(@name, "'%s' has embedded NUL", $name.data); + YYERROR; + } + parser_see_stop_run( literally_zero, $name.data ); + } + ; +stop_status: status { $$ = NULL; } + | status scalar { $$ = $2; } + | status NUMSTR { + $$ = new_reference(new_literal($2.string, $2.radix)); + } + ; + +subscripts: LPAREN expr_list ')' { + $$ = $2; + const auto& exprs( $$->refers ); + bool ok = std::all_of( exprs.begin(), exprs.end(), + []( const auto& refer ) { + return is_numeric(refer.field); + } ); + if( ! ok ) { + int i=0; + for( auto refer : exprs ) { + if( ! is_numeric(refer.field) ) { + error_msg(@1, "subscript %d, %s, is not numeric (%s)", + ++i, name_of(refer.field), + cbl_field_type_str(refer.field->type) + 3); + } + } + YYERROR; + } + } + ; +expr_list: expr + { + if( ! require_numeric(@expr, *$expr) ) YYERROR; + $$ = new refer_list_t($expr); + } + | expr_list expr { + if( $1->size() == MAXIMUM_TABLE_DIMENSIONS ) { + error_msg(@1, "table dimensions limited to %d", + MAXIMUM_TABLE_DIMENSIONS); + YYERROR; + } + if( ! require_numeric(@expr, *$expr) ) YYERROR; + $1->push_back($2); $$ = $1; + } + | ALL { + auto ref = new_reference(constant_of(constant_index(ZERO))); + $$ = new refer_list_t(ref); + } + ; + +arg_list: any_arg { $$ = new refer_list_t($1); } + | arg_list any_arg { $1->push_back($2); $$ = $1; } + ; +any_arg: expr + | LITERAL {$$ = new_reference(new_literal($1, quoted_e)); } + ; + + /* + * Because num_literal includes ZERO, this grammar + * allows -ZERO and +ZERO. FWIW. + */ +signed_literal: num_literal + | '+' num_literal { $$ = $2; } + | '-' num_literal + { + $$ = new_tempnumeric(); + struct cbl_field_t *zero = constant_of(constant_index(ZERO)); + parser_subtract( $$, zero, $2, current_rounded_mode() ); + } + | LENGTH_OF name[val] { + location_set(@1); + $$ = new_tempnumeric(); + $$->clear_attr(signable_e); + if( dialect_gcc() ) { + dialect_error(@1, "LENGTH OF", "ibm"); + } + parser_set_numeric($$, $val->data.capacity); + } + | LENGTH_OF name[val] subscripts[subs] { + location_set(@1); + $$ = new_tempnumeric(); + $$->clear_attr(signable_e); + if( dialect_gcc() ) { + dialect_error(@1, "LENGTH OF", "ibm"); + } + if( 0 == dimensions($val) ) { + cbl_refer_t r1($val); + subscript_dimension_error( @subs, $subs->refers.size(), &r1 ); + } + parser_set_numeric($$, $val->data.capacity); + } + ; + +num_literal: NUMSTR { $$ = new_literal($1.string, $1.radix); } + | ZERO { $$ = constant_of(constant_index(ZERO)); } + ; + +open: OPEN { statement_begin(@1, OPEN); } open_files + ; +open_files: open_file + | open_files open_file + ; +open_file: open_io[mode] filenames { + size_t n = $2->files.size(); + parser_file_open( n, use_list($2->files, false), $mode ); + current.declaratives_evaluate($2->files); + $2->files.clear(); + } + ; +open_io: INPUT { $$ = 'r'; } + | OUTPUT { $$ = 'w'; } + | EXTEND { $$ = 'a'; } + | IO { $$ = '+'; } + ; + +close: CLOSE { statement_begin(@1, CLOSE); } close_files + ; +close_files: close_file + | close_files close_file + ; +close_file: NAME close_how + { + struct symbol_elem_t *e = symbol_file(PROGRAM, $1); + if( !e ) { + error_msg(@1, "invalid file name '%s'", $1); + YYERROR; + } + auto how = static_cast<file_close_how_t>($close_how); + bool reel_unit = (file_close_reel_unit_e & $close_how) > 0; + auto file = cbl_file_of(e); + switch( file->org ) { + case file_disorganized_e: + gcc_unreachable(); + break; + case file_sequential_e: + case file_line_sequential_e: + break; + case file_indexed_e:; + case file_relative_e: + if( $close_how & ~file_close_with_lock_e ) { + error_msg(@1, "INDEXED or RELATIVE file " + "closed with incompatible qualifier" ); + YYERROR; + } + break; + } + if(reel_unit) + { + how = file_close_reel_unit_e; + } + parser_file_close( file, how ); + current.declaratives_evaluate( file ); + } + ; +close_how: %empty { $$ = file_close_no_how_e; } + | reel_unit { $$ = file_close_reel_unit_e; } + | reel_unit for_kw REMOVAL { + $$ = file_close_reel_unit_e | file_close_removal_e; + } + | reel_unit WITH NO REWIND { + $$ = file_close_reel_unit_e | file_close_no_rewind_e; + } + | with NO REWIND { $$ = file_close_no_rewind_e; } + | with LOCK { $$ = file_close_with_lock_e; } + ; +reel_unit: REEL + | UNIT + ; +for_kw: %empty + | FOR + ; + +perform: perform_verb perform_proc { perform_free(); } + | perform_verb perform_stmts { + perform_ec_cleanup(); + perform_free(); + } + | perform_verb perform_except { + perform_ec_cleanup(); + perform_free(); + } + ; + +perform_stmts: perform_until perform_inline[in] + { + size_t n = $in->varys.size(); + struct cbl_perform_vary_t varys[n]; + std::copy( $in->varys.begin(), $in->varys.end(), varys ); + + parser_perform_until(&$in->tgt, $in->before, n, varys); + } + | perform_vary perform_inline[in] + { + struct perform_t *p = $in; + size_t n = p->varys.size(); + struct cbl_perform_vary_t varys[n]; + std::copy( p->varys.begin(), p->varys.end(), varys ); + + parser_perform_until(&$in->tgt, $in->before, n, varys); + } + | perform_times perform_inline[in] + { + parser_perform_inline_times(&$in->tgt, *$perform_times); + } + | perform_inline[in] + { + parser_perform_inline_times(&$in->tgt, literally_one); + } + ; + +perform_proc: perform_names %prec NAME + { + struct perform_t *p = perform_current(); + if( yydebug ) p->tgt.dump(); + parser_perform(&p->tgt, NULL); + } + | perform_names num_operand TIMES + { + struct perform_t *p = perform_current(); + if( yydebug ) p->tgt.dump(); + parser_perform(&p->tgt, *$2); + } + | perform_names perform_until + { + struct perform_t *p = perform_current(); + if( yydebug ) p->tgt.dump(); + assert(1 == p->varys.size()); + parser_perform_until( &p->tgt, p->before, 1, &p->varys.front() ); + } + | perform_names perform_vary + { + struct perform_t *p = perform_current(); + if( yydebug ) p->tgt.dump(); + + size_t n = p->varys.size(); + struct cbl_perform_vary_t varys[n]; + std::copy( p->varys.begin(), p->varys.end(), varys ); + + parser_perform_until( &p->tgt, p->before, n, varys ); + } + ; + +perform_names: label_1[para] + { + perform_tgt_set($para); + } + | label_1[para1] THRU label_1[para2] + { + perform_tgt_set($para1, $para2); + } + ; + +perform_times: num_operand TIMES + { + $$ = $1; + } + ; + +perform_vary: test_before varying vary_afters + { + perform_current()->before = $1 == BEFORE; + } + | varying vary_afters + | test_before varying + { + perform_current()->before = $1 == BEFORE; + } + | varying + ; + +perform_verb: PERFORM { + statement_begin(@1, PERFORM); + $$ = perform_alloc(); + } + ; + +perform_until: test_before perform_cond + { + struct perform_t *p = perform_current(); + struct cbl_perform_vary_t vary; + + p->before = $1 == BEFORE; + vary.until = $2; + p->varys.push_back(vary); + } + | perform_cond + { + struct perform_t *p = perform_current(); + struct cbl_perform_vary_t vary; + + vary.until = $1; + p->varys.push_back(vary); + } + ; +perform_cond: UNTIL { parser_perform_conditional( &perform_current()->tgt); } + bool_expr + { + parser_perform_conditional_end( &perform_current()->tgt); + if( !is_conditional($bool_expr) ) { + error_msg(@1, "%s is not a condition expression", + name_of($bool_expr->field)); + YYERROR; + } + $$ = $bool_expr->cond(); + } + ; + +perform_inline: perform_start statements END_PERFORM + { + location_set(@END_PERFORM); + $$ = perform_current(); + if( $perform_start == LOCATION ) { + error_msg(@1, "LOCATION not valid with PERFORM Format 2"); + } + } + | perform_start END_PERFORM + { + location_set(@END_PERFORM); + $$ = perform_current(); + if( $perform_start == LOCATION ) { + error_msg(@1, "LOCATION not valid with PERFORM Format 2"); + } + } + ; +perform_start: %empty %prec LOCATION { + perform_ec_setup(); + $$ = 0; + } + | with LOCATION { + perform_ec_setup(); + $$ = LOCATION; + } + ; + +perform_except: perform_start + statements + { + auto perf = perform_current(); + parser_perform_inline_times(&perf->tgt, literally_one); + } + perform_when // paragraphs + perform_ec_other // paragraph + perform_ec_common // paragraph + { + auto perf = perform_current(); + parser_label_goto(perf->ec_labels.finally); + } + perform_ec_finally + END_PERFORM + { + auto perf = perform_current(); + // produce blob, jumped over by FINALLY paragraph + size_t iblob = symbol_declaratives_add( PROGRAM, perf->dcls ); + auto lave = perf->ec_labels.new_label(LblParagraph, "lave"); + auto handlers = cbl_field_of(symbol_at(iblob)); + + // install blob + parser_label_label(perf->ec_labels.init); + declarative_runtime_match(handlers, lave); + + // uninstall blob + parser_label_label(perf->ec_labels.fini); + } + ; + +perform_when: perform_when1 + | perform_when perform_when1 + ; +perform_when1: WHEN perform_ec { + // accumulate handlers and their paragraphs + auto perf = perform_current(); + auto when = perf->ec_labels.new_label(LblParagraph, "when"); + for( auto& dcl : $perform_ec->elems ) { + // use section to hold paragraph + dcl->section = symbol_index(symbol_elem_of(when)); + } + std::transform( $perform_ec->elems.begin(), + $perform_ec->elems.end(), + std::back_inserter(perf->dcls), + []( cbl_declarative_t *p ) { + return *p; + } ); + ast_enter_paragraph(when); + } + statements { + parser_exit_paragraph(); + } + ; + +perform_ec: EXCEPTION filenames { + auto dcls = new declarative_list_t; + auto p = $filenames->files.begin(); + auto pend = p; + while( pend != $filenames->files.end() ) { + for( size_t i=0; i < COUNT_OF(cbl_declarative_t::files); i++ ) { + if( ++pend == $filenames->files.end() ) break; + } + std::list<size_t> files; + std::transform( p, pend, std::back_inserter(files), + []( const cbl_file_t* f ) { + return symbol_index(symbol_elem_of(f)); } ); + + auto dcl = new cbl_declarative_t(0, ec_io_e, files, file_mode_none_e); + dcls->elems.push_back(dcl); + } + $$ = dcls; + } + | EXCEPTION io_mode { + auto dcl = new cbl_declarative_t($io_mode); + $$ = new declarative_list_t(dcl); + } + | except_names { + auto dcls = new declarative_list_t; + const ec_list_t * ecs($except_names); + // one cbl_declarative_t per EC + std::transform( ecs->elems.begin(), ecs->elems.end(), + std::back_inserter(dcls->elems), + []( ec_type_t ec ) + { + return new cbl_declarative_t(ec); + } ); + $$ = dcls; + } + | except_files { + // one cbl_declarative_t per 16 files + auto dcls = new declarative_list_t; + for( auto p = $except_files->elems.begin(); + p != $except_files->elems.end(); ) { + auto dcl = new cbl_declarative_t; + for( auto file = dcl->files; + file < dcl->files + COUNT_OF(dcl->files); file++ ) { + if( p != $except_files->elems.end() ) break; + *file = *p++; + } + dcls->elems.push_back(dcl); + } + $$ = dcls; + } + ; + +except_names: except_name { $$ = new ec_list_t($1); } + | except_names except_name { + $$ = $1->push_back($2); + } + ; +except_name: EXCEPTION_NAME[ec] { + assert($ec != ec_none_e); + $$ = $1; + } + ; + +except_files: except_name[ec] FILE_KW filenames { + assert($ec != ec_none_e); + if( ec_io_e != (ec_io_e & $ec) ) { + error_msg(@1, "%s is not of type EC-I-O", + ec_type_str($ec)); + } + $$ = new isym_list_t; + std::list<size_t>& files( $$->elems ); + std::transform( $filenames->files.begin(), + $filenames->files.end(), + std::back_inserter(files), + []( const cbl_file_t* f ) { + return symbol_index(symbol_elem_of(f)); } ); + } + ; + +perform_ec_other: + %empty %prec WHEN { + auto& ec_labels( perform_current()->ec_labels ); + ast_enter_paragraph(ec_labels.other); + parser_exit_paragraph(); + } + | WHEN OTHER { + auto& ec_labels( perform_current()->ec_labels ); + ast_enter_paragraph(ec_labels.other); + } + exception statements %prec WHEN { + parser_exit_paragraph(); + } + ; +perform_ec_common: + %empty { + auto& ec_labels( perform_current()->ec_labels ); + ast_enter_paragraph(ec_labels.common); + parser_exit_paragraph(); + } + | WHEN COMMON { + auto& ec_labels( perform_current()->ec_labels ); + ast_enter_paragraph(ec_labels.common); + } + exception statements { + parser_exit_paragraph(); + } + ; +perform_ec_finally: + %empty { + auto& ec_labels( perform_current()->ec_labels ); + ast_enter_paragraph(ec_labels.finally); + parser_exit_paragraph(); + parser_label_goto(ec_labels.fini); + } + | FINALLY { + auto& ec_labels( perform_current()->ec_labels ); + ast_enter_paragraph(ec_labels.finally); + } + exception statements { + parser_exit_paragraph(); + auto& ec_labels( perform_current()->ec_labels ); + parser_label_goto(ec_labels.fini); + } + ; + +test_before: with TEST BEFORE { $$ = BEFORE; } + | with TEST AFTER { $$ = AFTER; } + ; + +varying: VARYING num_operand[tgt] FROM num_operand[from] vary_by[by] + perform_cond[until] + { + struct cbl_perform_vary_t vary(*$tgt, *$from, *$by, $until); + perform_current()->varys.push_back(vary); + } + ; + +vary_afters: vary_after + | vary_afters vary_after + ; +vary_after: AFTER num_operand[tgt] FROM num_operand[from] vary_by[by] + perform_cond[until] + { + struct cbl_perform_vary_t vary(*$tgt, *$from, *$by, $until); + perform_current()->varys.push_back(vary); + } + ; +vary_by: %empty { $$ = new cbl_refer_t(literally_one); } + | BY num_operand { $$ = $2; } + ; + +reserved_value: spaces_etc + | ZERO { $$ = ZERO; } + | NULLS { $$ = NULLS; } + ; +spaces_etc: SPACES { $$ = SPACES; } + | HIGH_VALUES { $$ = HIGH_VALUES; } + | LOW_VALUES { $$ = LOW_VALUES; } + | QUOTES { $$ = QUOTES; } + ; + +variable_type: NUMERIC { $$ = NUMERIC; } + | ALPHABETIC { $$ = ALPHABETIC; } + | ALPHABETIC_LOWER { $$ = ALPHABETIC_LOWER; } + | ALPHABETIC_UPPER { $$ = ALPHABETIC_UPPER; } + | DBCS { $$ = DBCS; } + | KANJI { $$ = KANJI; } + ; + +subtract: subtract_impl end_subtract { ast_subtract($1); } + | subtract_cond end_subtract { ast_subtract($1); } + ; +subtract_impl: SUBTRACT subtract_body[body] + { + statement_begin(@1, SUBTRACT); + $$ = $body; + } + ; +subtract_cond: SUBTRACT subtract_body[body] arith_errs[err] + { + statement_begin(@1, SUBTRACT); + $body->on_error = $err.on_error; + $body->not_error = $err.not_error; + $$ = $body; + } + ; +end_subtract: %empty %prec SUBTRACT + | END_SUBTRACT + ; + +subtract_body: sum FROM rnames + { + $$ = new arith_t(no_giving_e, $sum); + std::copy( rhs.begin(), + rhs.end(), back_inserter($$->tgts) ); + rhs.clear(); + } + | sum FROM num_operand[input] GIVING rnames + { + $$ = new arith_t(giving_e, $sum); + $$->B.push_back(*$input); + std::copy( rhs.begin(), + rhs.end(), back_inserter($$->tgts) ); + rhs.clear(); + } + | CORRESPONDING sum FROM rnames + { + corresponding_fields_t pairs = + corresponding_arith_fields( $sum->refers.front().field, + rhs.front().refer.field ); + if( pairs.empty() ) { + yywarn( "%s and %s have no corresponding fields", + $sum->refers.front().field->name, + rhs.front().refer.field->name ); + } + // First src/tgt elements are templates. + // Their subscripts apply to the correspondents. + $$ = new arith_t(corresponding_e, $sum); + $$->tgts.push_front(rhs.front()); + // use arith_t functor to populate A and tgts + *$$ = std::for_each( pairs.begin(), pairs.end(), *$$ ); + $$->A.pop_front(); + $$->tgts.pop_front(); + rhs.clear(); + } + ; + +vargs: varg { $$ = new vargs_t($varg); } + | vargs[args] varg { $args->push_back($varg); $$ = $args; } + ; + +varg: varg1 + | ALL varg1 { $$ = $2; $$->all = true; } + ; + +varg1: scalar + | varg1a + ; +varg1a: ADDRESS OF scalar { + $$ = $scalar; + $$->addr_of = true; + } + | intrinsic_call + | literal + { + $$ = new_reference($1); + } + | reserved_value + { + $$ = new_reference(constant_of(constant_index($1))); + } + | LENGTH_OF name[val] { + location_set(@1); + $$ = new cbl_refer_t( new_tempnumeric() ); + $$->field->clear_attr(signable_e); + if( dialect_gcc() ) { + dialect_error(@1, "LENGTH OF", "ibm"); + } + parser_set_numeric($$->field, $val->size()); + } + | LENGTH_OF name[val] subscripts[subs] { + location_set(@1); + $$ = new cbl_refer_t( new_tempnumeric() ); + $$->field->clear_attr(signable_e); + if( dialect_gcc() ) { + dialect_error(@1, "LENGTH OF", "ibm"); + } + if( 0 == dimensions($val) ) { + cbl_refer_t r1($val); + subscript_dimension_error( @subs, $subs->refers.size(), &r1 ); + } + parser_set_numeric($$->field, $val->data.capacity); + } + ; + +literal: literalism + { + $$ = $1.isymbol()? + cbl_field_of(symbol_at($1.isymbol())) + : + new_literal($1, quoted_e); + } + | NUMSTR + { + $$ = new_literal($1.string, $1.radix); + } + | DATETIME_FMT + { + $$ = new_literal(strlen($1), $1, quoted_e); + } + | DATE_FMT + { + $$ = new_literal(strlen($1), $1, quoted_e); + } + | TIME_FMT + { + $$ = new_literal(strlen($1), $1, quoted_e); + } + ; + +raise: RAISE EXCEPTION NAME + { + auto ec = ec_type_of($NAME); + if( ec == ec_none_e ) { + error_msg(@NAME, "not an EXCEPTION CONDITION: %s", $NAME); + YYERROR; + } + statement_begin(@$, RAISE); + parser_exception_raise(ec); + } + | RAISE NAME + { + auto ec = ec_type_of($NAME); + if( ec != ec_none_e ) { + error_msg(@NAME, "RAISE EXCEPTION required for " + "EXCEPTION CONDITION: %s", $NAME); + YYERROR; + } + cbl_unimplemented("RAISE <EXCEPTION OBJECT>"); + YYERROR; + } + ; + +read: read_file + { + current.declaratives_evaluate($1.file, $1.handled); + } + ; + +read_file: READ read_body { + file_read_args.call_parser_file_read(); + $$.file = $2; $$.handled = FsSuccess; + } + | READ read_body END_READ { + file_read_args.call_parser_file_read(); + $$.file = $2; $$.handled = FsSuccess; + } + | READ read_body read_eofs[err] { + bool handled = $err.nclause == 2 || !$err.tf; + $$.file = $2; $$.handled = handled? FsEofSeq : FsSuccess; + if( $$.file->access == file_access_rnd_e ) { + // None of ADVANCING, AT END, NEXT, NOT AT END, or PREVIOUS + // shall be specified if ACCESS MODE RANDOM + error_msg(@err, "%s: AT END invalid for ACCESS MODE RANDOM", $$.file->name); + YYERROR; + } + parser_fi(); + } + | READ read_body read_eofs[err] END_READ { + bool handled = $err.nclause == 2 || !$err.tf; + $$.file = $2; $$.handled = handled? FsEofSeq : FsSuccess; + if( $$.file->access == file_access_rnd_e ) { + error_msg(@err, "%s: AT END invalid for ACCESS MODE RANDOM", $$.file->name); + YYERROR; + } + parser_fi(); + } + | READ read_body io_invalids[err] { + bool handled = $err.nclause == 2 || !$err.tf; + $$.file = $2; $$.handled = handled? FsNotFound : FsSuccess; + parser_fi(); + } + | READ read_body io_invalids[err] END_READ { + bool handled = $err.nclause == 2 || !$err.tf; + $$.file = $2; $$.handled = handled? FsNotFound : FsSuccess; + parser_fi(); + } + ; + +read_body: NAME read_next read_into read_key + { + statement_begin(@$, READ); + struct symbol_elem_t *e = symbol_file(PROGRAM, $NAME); + if( !e ) { + error_msg(@1, "invalid file name '%s'", $NAME); + YYERROR; + } + + $$ = cbl_file_of(e); + + struct cbl_field_t *record = symbol_file_record($$); + if( !record ) { + error_msg(@1, "syntax error? invalid file record name"); + YYERROR; + } + if( 0 && $$->access == file_access_dyn_e && $read_next >= 0 ) { + error_msg(@1, "sequential DYNAMIC access requires NEXT RECORD"); + YYERROR; + } + if( $read_key->field && is_sequential($$) ) { + error_msg(@1, "SEQUENTIAL file %s has no KEY", $$->name); + YYERROR; + } + if( $$->org == file_line_sequential_e && $read_next == -2 ) { + error_msg(@1, "LINE SEQUENTIAL file %s cannot READ PREVIOUS", + $$->name); + YYERROR; + } + if( $read_key->field && $read_next < 0 ) { + error_msg(@1, "cannot read NEXT with KEY", $$->name); + YYERROR; + } + + int ikey = $read_next; + if( $read_key->field ) { + ikey = $$->key_one($read_key->field); + } + + file_read_args.init( $$, record, $read_into, ikey ); + } + ; + +read_next: %empty { $$ = 0; } + | PREVIOUS RECORD { $$ = -2; } + | PREVIOUS { $$ = -2; } + | NEXT RECORD { $$ = -1; } + | NEXT { $$ = -1; } + | RECORD { $$ = 0; } + ; + +read_into: %empty { $$ = NULL; } + | INTO scalar { $$ = $scalar; } + ; + + /* + * read_eofs may have 1 or 2 clauses, plus a boolean that + * represents whether the last one is a NOT clause. That is, + * there's an AT END clause if there are 2 clauses, or if + * there's one clause that is an AT END clause (tf is false). + */ +read_eofs: read_eof { $$.nclause = 1; $$.tf = $1; } + | read_eofs read_eof + { + $$ = $1; + if( ++$$.nclause > 2 ) { + error_msg(@2, "too many AT END conditions"); + YYERROR; + } + if( $$.tf == $read_eof ) { + error_msg(@2, "duplicate AT END conditions"); + YYERROR; + } + parser_fi(); + } + ; + +read_eof: END + { + if( file_read_args.ready() ) { + file_read_args.default_march(true); + file_read_args.call_parser_file_read(); + } + + static const struct status_t { file_status_t L, U; } + at_end = { FsEofSeq, FsKeySeq }, + not_at_end = { FsSuccess, FsEofSeq }; + assert( $1 == END || $1 == NOT ); + status_t st = $1 == END? at_end : not_at_end; + // L <= ec < U + cbl_field_t *cond = ast_file_status_between(st.L, st.U); + + parser_if(cond); + parser_exception_clear(); + } statements { + parser_else(); + $$ = $1 == NOT; + } + ; + +write_eops: write_eop { $$.nclause = 1; $$.tf = $1; } + | write_eops write_eop + { + $$ = $1; + if( ++$$.nclause > 2 ) { + error_msg(@2, "too many AT EOP conditions"); + YYERROR; + } + if( $$.tf == $write_eop ) { + error_msg(@2, "duplicate AT EOP conditions"); + YYERROR; + } + } + ; + +write_eop: EOP + { + // cond represents the _FILE_STATUS of the last WRITE. + static cbl_field_t *cond = constant_of(constant_index(ZERO)); + + if( file_write_args.ready() ) { + file_write_args.call_parser_file_write(true); + cond = ast_file_status_between(FsEofSeq, FsKeySeq); + } + assert( $1 == EOP || $1 == NOT ); + if( $1 == NOT ) { + parser_logop(cond, NULL, not_op, cond); + } + parser_if(cond); + parser_exception_clear(); + } statements { + parser_else(); + parser_fi(); + $$ = $1 == NOT; + } + ; + +read_key: %empty { $$ = new cbl_refer_t(); } + | KEY is name { $$ = new cbl_refer_t($name); } + ; + +write: write_file + { + current.declaratives_evaluate( $1.file, $1.handled ); + } + ; + +write_file: WRITE write_body + { + $$.file = $2; $$.handled = FsSuccess; + bool sequentially = $$.file->access == file_access_seq_e; + file_write_args.call_parser_file_write(sequentially); + } + | WRITE write_body END_WRITE + { + $$.file = $2; $$.handled = FsSuccess; + bool sequentially = $$.file->access == file_access_seq_e; + file_write_args.call_parser_file_write(sequentially); + } + | WRITE write_body write_eops[err] { + bool handled = $err.nclause == 2 || !$err.tf; + $$.file = $2; $$.handled = handled? FsEofSeq : FsSuccess; + } + | WRITE write_body write_eops[err] END_WRITE { + bool handled = $err.nclause == 2 || !$err.tf; + $$.file = $2; $$.handled = handled? FsEofSeq : FsSuccess; + } + | WRITE write_body io_invalids[err] { + bool handled = $err.nclause == 2 || !$err.tf; + $$.file = $2; $$.handled = handled? FsEofSeq : FsSuccess; + parser_fi(); + } + | WRITE write_body io_invalids[err] END_WRITE { + bool handled = $err.nclause == 2 || !$err.tf; + $$.file = $2; $$.handled = handled? FsEofSeq : FsSuccess; + parser_fi(); + } + ; + +write_body: write_what[field] advance_when[when] advancing + { + statement_begin(@$, WRITE); + cbl_file_t *file = symbol_record_file($field); + if( !file ) { + error_msg(@1, "no FD record found for %s", $field->name); + YYERROR; + } + $$ = file_write_args.init( file, $field, $when==AFTER, $advancing ); + current.declaratives_evaluate( file ); + } + | write_what[field] + { + statement_begin(@$, WRITE); + cbl_file_t *file = symbol_record_file($field); + if( !file ) { + error_msg(@1, "no FD record found for %s", $field->name); + YYERROR; + } + cbl_refer_t lines; + switch(file->org) { + case file_sequential_e: + break; + case file_line_sequential_e: + lines.field = literally_one; + break; + case file_disorganized_e: + case file_indexed_e: + case file_relative_e: + break; + } + $$ = file_write_args.init( file, $field, false, &lines ); + } + ; +write_what: file_record FROM alpha_val[input] + { + $$ = $1; + parser_move($$, *$input); + } + | file_record + ; +file_record: NAME + { + name_queue.qualify(@1, $1); + auto namelocs( name_queue.pop() ); + auto names( name_queue.namelist_of(namelocs) ); + auto inner = namelocs.back(); + if( ($$ = field_find(names)) == NULL ) { + error_msg(inner.loc, "no record name '%s'", inner.name); + YYERROR; + } + } + | NAME inof filename + { + std::list<const char *> names = {$filename->name, $NAME}; + auto record = symbol_find(names); + if( !record ) { + error_msg(@$, "%s IN %s not found", + $NAME, $filename->name); + YYERROR; + } + $$ = cbl_field_of(record); + } + | FILE_KW filename + { + $$ = cbl_field_of(symbol_at($filename->default_record)); + } + ; +advance_when: BEFORE { $$ = BEFORE; } + | AFTER { $$ = AFTER; } + ; + +advancing: advance_by + | ADVANCING advance_by { $$ = $2; } + ; +advance_by: scalar lines { $$ = $1; } /* BUG: should accept reference */ + | signed_literal lines { $$ = new_reference($1); } + | PAGE + { + /* + * The standard says behavior is undefined when the + * number of lines is negative. So, we use the + * negative Number Of The Beast as a PAGE flag. + */ + $$ = new_reference( new_literal("-666") ); + } + | device_name { $$ = new_reference(literally_one); } + ; + +io_invalids: io_invalid { $$.nclause = 1; $$.tf = $io_invalid; } + | io_invalids io_invalid + { + $$ = $1; + if( ++$$.nclause > 2 ) { + error_msg(@2, "too many INVALID clauses"); + YYERROR; + } + if( $$.tf == $io_invalid ) { + error_msg(@2, "duplicate INVALID conditions"); + YYERROR; + } + parser_fi(); + } + ; + +io_invalid: INVALID key { + if( file_delete_args.ready() ) { + file_delete_args.call_parser_file_delete(false); + } + if( file_read_args.ready() ) { + file_read_args.default_march(false); + file_read_args.call_parser_file_read(); + } + if( file_rewrite_args.ready() ) { + file_rewrite_args.call_parser_file_rewrite(false); + } + if( file_start_args.ready() ) { + file_start_args.call_parser_file_start(); + } + if( file_write_args.ready() ) { + file_write_args.call_parser_file_write(false); + } + + static const struct status_t { file_status_t L, U; } + invalid = { FsKeySeq, FsOsError }, + not_invalid = { FsSuccess, FsEofSeq }; + assert( $1 == INVALID || $1 == NOT ); + status_t st = $1 == INVALID? invalid : not_invalid; + // L <= ec < U + cbl_field_t *cond = ast_file_status_between(st.L, st.U); + + parser_if(cond); + parser_exception_clear(); + } statements { + parser_else(); + $$ = $1 == NOT; + } + ; + +delete: delete_impl end_delete + | delete_cond end_delete + ; +delete_impl: DELETE delete_body[file] + { + file_delete_args.call_parser_file_delete(true); + current.declaratives_evaluate( $file ); + } + ; +delete_cond: DELETE delete_body[file] io_invalids + { + if( is_sequential($file) ) { + error_msg(@2, "INVALID KEY phrase invalid for sequential file '%s'", + $file->name); + YYERROR; + } + if( $file->access == file_access_seq_e ) { + error_msg(@2, "INVALID KEY phrase invalid for " + "sequential access mode on '%s'", + $file->name); + YYERROR; + } + parser_fi(); + // call happens in io_invalid + current.declaratives_evaluate( $file ); + } + ; + +delete_body: filename[file] record + { + statement_begin(@1, DELETE); + file_delete_args.init( $file ); + $$ = $file; + } + ; +end_delete: %empty %prec DELETE + | END_DELETE + ; + +rewrite: rewrite1 + { + current.declaratives_evaluate($1.file, $1.handled); + } + ; + +rewrite1: REWRITE rewrite_body end_rewrite { + $$.file = $2.file; $$.handled = FsSuccess; + file_rewrite_args.call_parser_file_rewrite( true ); + } + | REWRITE rewrite_body io_invalids[err] end_rewrite { + bool handled = $err.nclause == 2 || !$err.tf; + $$.file = $2.file; $$.handled = handled? FsNotFound : FsSuccess; + + if( is_sequential($$.file) ) { + error_msg(@2, "INVALID KEY for sequential file '%s'", + $$.file->name); + YYERROR; + } + if( $$.file->relative_sequential() ) { + error_msg(@2, "%s: INVALID KEY may not be specified for " + "RELATIVE file and SEQUENTIAL access", + $$.file->name); + YYERROR; + } + parser_fi(); + } + ; + +rewrite_body: write_what record + { + statement_begin(@$, REWRITE); + symbol_elem_t *e = symbol_file(PROGRAM, $1->name); + file_rewrite_args.init(cbl_file_of(e), $1); + $$.file = cbl_file_of(e); + $$.buffer = $1; + } + ; +end_rewrite: %empty %prec REWRITE + | END_REWRITE + ; + +start: start_impl end_start + | start_cond end_start + ; +start_impl: START start_body + ; +start_cond: START start_body io_invalids { + parser_fi(); + } + ; +end_start: %empty %prec START + | END_START + ; + +start_body: filename[file] + { + statement_begin(@$, START); + file_start_args.init(@file, $file); + parser_file_start( $file, lt_op, 0 ); + } + | filename[file] KEY relop name[key] + { // lexer swallows IS, although relop allows it. + statement_begin(@$, START); + int key = $file->key_one($key); + int size = key == 0 ? 0 : $file->keys[key - 1].size(); + auto ksize = new_tempnumeric(); + parser_set_numeric(ksize, size); + if( yydebug ) { + yywarn("START: key #%d '%s' has size %d", + key, $key->name, size); + } + file_start_args.init(@file, $file); + parser_file_start( $file, relop_of($relop), key, ksize ); + } + | filename[file] KEY relop name[key] with LENGTH expr + { // lexer swallows IS, although relop allows it. + statement_begin(@$, START); + int key = $file->key_one($key); + file_start_args.init(@file, $file); + parser_file_start( $file, relop_of($relop), key, *$expr ); + } + | filename[file] FIRST + { + statement_begin(@$, START); + file_start_args.init(@file, $file); + parser_file_start( $file, lt_op, -1 ); + } + | filename[file] LAST + { + statement_begin(@$, START); + file_start_args.init(@file, $file); + parser_file_start( $file, gt_op, -2 ); + } + ; + +merge: MERGE { statement_begin(@1, MERGE); } + filename[file] sort_keys sort_seq + USING filenames[inputs] sort_output + { + size_t nkey = $sort_keys->key_list.size(); + cbl_key_t keys[nkey], *pkey = keys; + + for( auto p = $sort_keys->key_list.begin(); + p != $sort_keys->key_list.end(); p++, pkey++ ) + { + cbl_key_t k(*p); + *pkey = k; + } + + size_t ninput = $inputs->files.size(); + size_t noutput = $sort_output->nfile(); + cbl_file_t **inputs = NULL, **outputs = NULL; + cbl_perform_tgt_t *out_proc = NULL; + + inputs = new cbl_file_t * [ ninput ]; + std::copy($inputs->files.begin(), + $inputs->files.end(), inputs); + + if( noutput > 0 ) { + outputs = new cbl_file_t * [ noutput ]; + std::copy($sort_output->file_list.files.begin(), + $sort_output->file_list.files.end(), outputs); + } else { + out_proc = &$sort_output->tgt; + } + + parser_file_merge( $file, $sort_seq, + nkey, keys, + ninput, inputs, + noutput, outputs, + out_proc ); + } + ; + +set_tgts: set_tgt { + $$ = new tgt_list_t; + list_add($$->targets, *$set_tgt, current_rounded_mode()); + } + | set_tgts set_tgt + { + list_add($1->targets, *$set_tgt, current_rounded_mode()); + } + ; +set_operand: set_tgt + | signed_literal { $$ = new_reference($1); } + | ADDRESS of FUNCTION ctx_name[name] + { + $$ = NULL; + auto e = symbol_function(0, $name); + if( e ) { + $$ = new cbl_refer_t(cbl_label_of(e)); + } else { + e = symbol_find(@name, $name); + if( !e ) { + error_msg(@name, "%s not found", $name); + YYERROR; + } + $$ = new cbl_refer_t(cbl_field_of(e)); + } + assert($$); + } + | ADDRESS of PROGRAM_kw ctx_name[name] + { + $$ = NULL; + auto label = symbol_program(0, $name); + if( label ) { + $$ = new cbl_refer_t(label); + } else { + auto e = symbol_find(@name, $name); + if( !e ) { + error_msg(@name, "%s not found", $name); + YYERROR; + } + $$ = new cbl_refer_t(cbl_field_of(e)); + } + assert($$); + } + | ADDRESS of PROGRAM_kw LITERAL[lit] + { + auto label = symbol_program(0, $lit.data); + $$ = new cbl_refer_t( label ); + } + ; +set_tgt: scalar + | ADDRESS of scalar { $$ = $scalar; $$->addr_of = true; } + ; + +set: SET set_tgts[tgts] TO set_operand[src] + { + statement_begin(@1, SET); + + switch( set_operand_type(*$src) ) { + case FldInvalid: + if( ! ($src->prog_func && $src->addr_of) ) { + error_msg(@src, "SET source operand '%s' is invalid", $src->name()); + YYERROR; + break; + } + __attribute__((fallthrough)); + case FldPointer: + if( !valid_set_targets(*$tgts, true) ) { + YYERROR; + } + ast_set_pointers($tgts->targets, *$src); + break; + + case FldIndex: + case FldPacked: + case FldNumericDisplay: + case FldNumericBinary: + case FldFloat: + case FldNumericBin5: + case FldLiteralN: + if( !valid_set_targets(*$tgts, $src->is_pointer()) ) { + YYERROR; + } + parser_index($tgts, *$src); + break; + default: + if( strcmp($src->field->name, "ZEROS") != 0 ) { + error_msg(@src, "%s must be numeric or POINTER type", + $src->field->name); + YYERROR; + } + } + } + | SET set_tgts[tgts] TO NULLS[src] + { + statement_begin(@1, SET); + if( !valid_set_targets(*$tgts, true) ) { + YYERROR; + } + ast_set_pointers($tgts->targets, constant_of(constant_index(NULLS))); + } + | SET set_tgts TO spaces_etc[error] + { + error_msg(@2, "invalid value for SET TO"); + } + | SET set_tgts[tgts] TO ENTRY scalar[src] + { + ast_set_pointers($tgts->targets, *$src); + } + | SET set_tgts[tgts] TO ENTRY LITERAL[src] + { + auto literal = $src.isymbol()? + cbl_field_of(symbol_at($src.isymbol())) + : + new_literal($src, quoted_e); + ast_set_pointers($tgts->targets, literal); + } + | SET set_tgts[tgts] UP BY num_operand[src] + { + statement_begin(@1, SET); + list<cbl_num_result_t>& tgts = $tgts->targets; + + for( auto p = tgts.begin(); p != tgts.end(); p++ ) { + parser_add2( *p, *$src ); + } + delete $tgts; + } + | SET set_tgts[tgts] DOWN BY num_operand[src] + { + statement_begin(@1, SET); + list<cbl_num_result_t>& tgts = $tgts->targets; + + for( auto p = tgts.begin(); p != tgts.end(); p++ ) { + parser_subtract2( *p, *$src ); + } + delete $tgts; + } + | SET ENVIRONMENT envar TO alpha_val[scalar] + { + statement_begin(@1, SET); + parser_set_envar(*$envar, *$scalar); + } + | SET LAST EXCEPTION TO OFF + { + statement_begin(@1, SET); + // send the signal to clear the stashed exception values + parser_exception_raise(ec_none_e); + } + | SET LENGTH_OF scalar TO scalar + { + statement_begin(@1, SET); + cbl_unimplemented("SET LENGTH OF"); + YYERROR; + } + | SET scalar88s[names] TO true_false[yn] + { + statement_begin(@1, SET); + class set_conditional { + bool tf; + public: + set_conditional( int token ) : tf(token == TRUE_kw) {} + void operator()(cbl_refer_t& refer) { + if( refer.field->data.false_value == NULL && !tf ) { + auto loc = symbol_field_location(field_index(refer.field)); + error_msg(loc, "%s has no WHEN SET TO FALSE", + refer.field->name); + return; + } + parser_set_conditional88(refer, tf); + } + }; + std::for_each($names->refers.begin(), $names->refers.end(), + set_conditional($yn)); + } + | SET { statement_begin(@1, SET); } many_switches + ; + +many_switches: set_switches + | many_switches set_switches + ; + +set_switches: switches TO on_off + { + struct switcheroo { + bitop_t op; + switcheroo( bool tf ) : op( tf? bit_set_op : bit_clear_op ) {} + switcheroo& operator()(cbl_field_t* sw) { + assert(sw->type == FldSwitch); + assert(sw->data.initial); // not a switch condition + parser_bitop(NULL, parent_of(sw), + op, sw->data.upsi_mask_of()); + return *this; + } + }; + std::for_each( $switches->fields.begin(), $switches->fields.end(), + switcheroo($on_off) ); + } + ; + +switches: one_switch { $$ = new field_list_t($1); } + | switches one_switch[sw] { $$->fields.push_back($sw); } + ; +one_switch: SWITCH { + $$ = cbl_field_of(symbol_find(@1, $1)); + } + ; + +on_off: ON { $$ = true; } + | OFF { $$ = false; } + ; + +search: search_linear end_search + | search_binary end_search + ; + +search_linear: SEARCH search_1_place search_1_cases + { + parser_lsearch_end(search_current()); + search_free(); + } + ; +end_search: %empty %prec SEARCH + | END_SEARCH + ; + +search_1_place: search_1_body + | search_1_body at END statements + ; + +search_1_body: name[table] search_varying[varying] + { + statement_begin(@$, SEARCH); + cbl_field_t *index = table_primary_index($table); + if( !index ) { + error_msg(@1, "%s has no defined index", $table->name); + YYERROR; + } + + cbl_name_t label_name; + auto len = snprintf(label_name, sizeof(label_name), + "linear_search_%d", yylineno); + if( ! (0 < len && len < int(sizeof(label_name))) ) { + gcc_unreachable(); + } + cbl_label_t *name = label_add( LblSearch, + label_name, yylineno ); + auto varying($varying); + if( index == varying ) varying = NULL; + parser_lsearch_start( name, $table, index, varying ); + search_alloc(name); + } + ; + +search_varying: %empty { $$ = NULL; } + | VARYING name { $$ = $2; } + ; + +search_1_cases: search_1_case + { + if( yydebug ) { + const char *lookahead = "?"; + switch( yychar ) { + case 0: lookahead = "YYEOF"; break; + case -2: lookahead = "YYEMPTY"; break; + default: + if( yychar > 0 ) { + lookahead = keyword_str(yychar); + } + } + yywarn("Just one case, lookahead is '%s'", lookahead); + } + } + | search_1_cases search_1_case + ; +search_1_case: search_1_when search_1_test search_stmts + ; +search_1_when: WHEN { parser_lsearch_conditional(search_current()); } + ; +search_1_test: bool_expr { + parser_lsearch_when( search_current(), $bool_expr->cond() ); + } + ; + +search_binary: SEARCH ALL search_2_body search_2_cases + { + parser_bsearch_end(search_current()); + search_free(); + } + | SEARCH ALL search_2_body at END statements search_2_cases + { + parser_bsearch_end(search_current()); + search_free(); + } + ; + +search_2_body: name[table] + { + statement_begin(@$, SEARCH); + char *label_name = xasprintf("binary_search_%d", yylineno); + cbl_label_t *name = label_add( LblSearch, + label_name, yylineno ); + parser_bsearch_start( name, $table ); + search_alloc(name); + } + ; + +search_2_cases: search_2_case + | search_2_cases search_2_case + ; +search_2_case: WHEN { parser_bsearch_conditional(search_current()); } + search_terms search_stmts + ; + +search_stmts: statements %prec ADD + | NEXT SENTENCE %prec ADD { + next_sentence = label_add(LblNone, "next_sentence", 0); + parser_label_goto(next_sentence); + } + ; + +search_terms: search_term + | search_terms AND search_term + ; +search_term: scalar[key] '=' search_expr[sarg] + { + if( $key->nsubscript == 0 ) { + error_msg(@1, "no index for key"); + YYERROR; + } + if( dimensions($key->field) < $key->nsubscript ) { + error_msg(@1, "too many subscripts: " + "%zu for table of %zu dimensions", + $key->nsubscript, dimensions($key->field) ); + YYERROR; + } + + parser_bsearch_when( search_current(), + *$key, + *$sarg, + is_ascending_key(*$key) ); + } + | scalar88[sarg] { + cbl_field_t *key = field_at($sarg->field->parent); + parser_bsearch_when( search_current(), key, *$sarg, + is_ascending_key(key) ); + } + ; +search_expr: expr + | LITERAL { $$ = new_reference(new_literal($1, quoted_e)); } + ; + +sort: sort_table + | sort_file + ; + +sort_table: SORT tableref[table] sort_keys sort_dup sort_seq { + statement_begin(@1, SORT); + size_t nkey = $sort_keys->key_list.size(); + cbl_key_t keys[nkey], *pkey = keys; + if( ! is_table($table->field) ) { + error_msg(@1, "%s has no OCCURS clause", $table->field->name); + } + // 23) If data-name-1 is omitted, the data item referenced by + // data-name-2 is the key data item. + for( auto k : $sort_keys->key_list ) { + if( k.fields.empty() ) { + k.fields.push_back($table->field); + } + *pkey++ = cbl_key_t(k); + } + + parser_sort( *$table, $sort_dup, $sort_seq, nkey, keys ); + } + | SORT tableref[table] sort_dup sort_seq { + statement_begin(@1, SORT); + if( ! is_table($table->field) ) { + error_msg(@1, "%s has no OCCURS clause", $table->field->name); + } + cbl_key_t + key = cbl_key_t($table->field->occurs.keys[0]), + guess(1, &$table->field); + ; + if( key.nfield == 0 ) key = guess; + parser_sort( *$table, $sort_dup, $sort_seq, 1, &key ); + } + ; + +sort_file: SORT FILENAME[file] sort_keys sort_dup sort_seq + sort_input sort_output + { + statement_begin(@1, SORT); + struct symbol_elem_t *e = symbol_file(PROGRAM, $file); + if( !(e && e->type == SymFile) ) { + error_msg(@file, "invalid file name"); + YYERROR; + } + cbl_file_t *file = cbl_file_of(e); + size_t nkey = $sort_keys->key_list.size(); + cbl_key_t keys[nkey], *pkey = keys; + + for( auto p = $sort_keys->key_list.begin(); + p != $sort_keys->key_list.end(); p++, pkey++ ) + { + cbl_key_t k(*p); + *pkey = k; + } + + size_t ninput = $sort_input->nfile(); + size_t noutput = $sort_output->nfile(); + cbl_file_t **inputs = NULL, **outputs = NULL; + cbl_perform_tgt_t *in_proc = NULL, *out_proc = NULL; + + if( ninput > 0 ) { + inputs = new cbl_file_t * [ ninput ]; + std::copy($sort_input->file_list.files.begin(), + $sort_input->file_list.files.end(), inputs); + } else { + in_proc = &$sort_input->tgt; + } + if( noutput > 0 ) { + outputs = new cbl_file_t * [ noutput ]; + std::copy($sort_output->file_list.files.begin(), + $sort_output->file_list.files.end(), outputs); + } else { + out_proc = &$sort_output->tgt; + } + + parser_file_sort( file, + $sort_dup, + $sort_seq, + nkey, keys, + ninput, inputs, + noutput, outputs, + in_proc, out_proc ); + } + | SORT FILENAME[file] sort_keys sort_dup sort_seq error + { + error_msg(@file, "SORT missing INPUT or OUTPUT phrase"); + } + + +sort_keys: sort_key { + $$ = new sort_keys_t(); + $$->key_list.push_back(*$sort_key); + } + | sort_keys sort_key { $$->key_list.push_back(*$sort_key); } + ; + +sort_key: on forward_order key field_list %prec NAME + { + $$ = new sort_key_t( $forward_order, *$field_list ); + } + | on forward_order key %prec NAME + { + field_list_t flist; + $$ = new sort_key_t( $forward_order, flist ); + } + ; + +forward_order: ASCENDING { $$ = true; } + | DESCENDING { $$ = false; } + ; +field_list: name { $$ = new field_list_t($1); } + | field_list name { $1->fields.push_back($name); } + ; + +sort_dup: %empty { $$ = false; } + | with DUPLICATES in order { $$ = true; } + ; +sort_seq: %empty { $$ = NULL; } + | collating SEQUENCE is ctx_name[name] + { + symbol_elem_t *e = symbol_alphabet(PROGRAM, $name); + if( !e ) { + error_msg(@name, "not an alphabet: '%s'", $name); + $$ = NULL; + } + $$ = cbl_alphabet_of(e); + } + ; + +sort_input: USING filenames + { + $$ = new file_sort_io_t(*$2); + delete $2; + } + | INPUT PROCEDURE is sort_target + { + $$ = new file_sort_io_t(*$sort_target); + delete $sort_target; + } + ; +sort_output: GIVING filenames + { + $$ = new file_sort_io_t(*$2); + } + | OUTPUT PROCEDURE is sort_target + { + $$ = new file_sort_io_t(*$sort_target); + } + ; + +sort_target: label_name + { + $$ = new cbl_perform_tgt_t($1); + } + | label_name THRU label_name + { + $$ = new cbl_perform_tgt_t($1, $3); + } + ; + +release: RELEASE NAME[record] FROM scalar[name] + { + statement_begin(@1, RELEASE); + symbol_elem_t *record = symbol_find(@record, $record); + parser_move(cbl_field_of(record), *$name); + parser_release(cbl_field_of(record)); + } + | RELEASE NAME[record] + { + statement_begin(@1, RELEASE); + symbol_elem_t *record = symbol_find(@record, $record); + parser_release(cbl_field_of(record)); + } + ; + +return_stmt: return_impl return_end + | return_cond return_end + ; + +return_impl: RETURN return_body[body] + { + cbl_file_t *file = cbl_file_of(symbol_at(current_sort_file)); + parser_return_finish(file); + current_sort_file = $body; + } + ; + +return_cond: RETURN return_body[body] return_outputs + { + cbl_file_t *file = cbl_file_of(symbol_at(current_sort_file)); + parser_return_finish(file); + current_sort_file = $body; + } + ; +return_end: %empty %prec RETURN + | END_RETURN + ; + +return_body: return_file + { + file_return_args.call_parser_return_start(); + } + | return_file INTO scalar + { + file_return_args.call_parser_return_start(*$scalar); + } + ; + +return_file: filename + { + statement_begin(@$, RETURN); + $$ = current_sort_file; // preserve current sort file + current_sort_file = symbol_index(symbol_elem_of($filename)); + file_return_args.init($filename); + } + | filename RECORD + { + statement_begin(@$, RETURN); + $$ = current_sort_file; // preserve current sort file + current_sort_file = symbol_index(symbol_elem_of($filename)); + file_return_args.init($filename); + } + ; +return_outputs: return_output + | return_outputs return_output // TODO: only 2, AT END and/or NOT AT END + ; +return_output: output_atend statements %prec RETURN + ; + +output_atend: END { + assert($1 == END || $1 == NOT); + auto func = $1 == END? + parser_return_atend : parser_return_notatend ; + func(cbl_file_of(symbol_at(current_sort_file))); + } + ; +filenames: filename { $$ = new file_list_t($1); } + | filenames filename { $1->files.push_back($2); } + ; +filename: NAME + { + struct symbol_elem_t *e = symbol_file(PROGRAM, $1); + if( !(e && e->type == SymFile) ) { + error_msg(@NAME, "invalid file name"); + YYERROR; + } + $$ = cbl_file_of(e); + } + ; + +label_name: NAME + { + struct cbl_label_t *label = symbol_label(PROGRAM, + LblNone, 0, $1); + if( !label ) { // no line number for forward declaraion + label = label_add(@NAME, LblNone, $1); + } + $$ = label; + } + ; + +inspected: scalar + | intrinsic_call + ; +backward: %empty { $$ = false; } + | BACKWARD { $$ = true; } + ; +inspect: INSPECT backward inspected TALLYING tallies + { + statement_begin(@1, INSPECT); + ast_inspect( *$inspected, $backward, *$tallies ); + } + | INSPECT backward inspected TALLYING tallies REPLACING replacements + { + if( is_constant($inspected->field) ) { + auto name = nice_name_of($inspected->field); + if( !name[0] ) name = "its argument"; + error_msg(@inspected, "INSPECT cannot write to %s", name); + YYERROR; + } + statement_begin(@1, INSPECT); + // All tallying is done before any replacing + ast_inspect( *$inspected, $backward, *$tallies ); + ast_inspect( *$inspected, $backward, *$replacements ); + } + | INSPECT backward inspected REPLACING replacements + { + if( is_constant($inspected->field) ) { + auto name = nice_name_of($inspected->field); + if( !name[0] ) name = "its argument"; + error_msg(@inspected, "INSPECT cannot write to %s", name); + YYERROR; + } + statement_begin(@1, INSPECT); + ast_inspect( *$inspected, $backward, *$replacements ); + } + | INSPECT backward inspected CONVERTING alpha_val[match] + TO all alpha_val[replace_oper] + insp_mtquals[qual] + { + if( $all ) { + $replace_oper->all = true; + if( is_literal($replace_oper->field) ) { + if( $replace_oper->field->data.capacity != 1 ) { + error_msg(@all, "ALL %s must be a single character", + $replace_oper->field->data.initial); + YYERROR; + } + } else { + error_msg(@all, "ALL must be part of a figurative constant"); + YYERROR; + } + } + if( is_constant($inspected->field) ) { + auto name = nice_name_of($inspected->field); + if( !name[0] ) name = "its argument"; + error_msg(@inspected, "INSPECT cannot write to %s", name); + YYERROR; + } + statement_begin(@1, INSPECT); + // IBM Format 4 does not show the qualifiers as optional, but + // they don't appear in Listing-15-1. + parser_inspect_conv( *$inspected, $backward, + *$match, + *$replace_oper, + $qual->before, $qual->after ); + } + ; + +tallies: { need_nume_set(); } tally + { + $$ = new ast_inspect_list_t( *$tally ); + } + | tallies { need_nume_set(); } tally + { + $$ = $1; + cbl_inspect_t& next(*$tally); + + if( !next.tally.field ) { + // prior tally swallowed one too many + cbl_inspect_t& prior = $$->back(); + assert(prior.nbound > 0); + assert(prior.opers); + cbl_inspect_oper_t& prior_op = prior.opers[prior.nbound - 1]; + + assert(prior_op.n_identifier_3 > 0 ); + next.tally = prior_op.matches[--prior_op.n_identifier_3].matching; + } + if( !next.tally.field ) { + error_msg(@$, "missing summation field before FOR"); + YYERROR; + } + $$->push_back(next); + } + ; + + /* + * numref might be "empty" only because it was consumed by a + * prior insp_mtquals, which can end in a scalar. If that + * happens, the tallies target, above, takes back the borrowed + * scalar and assigns it to be the tally total, as the user + * intended. + */ +tally: numeref[total] FOR tally_fors[fors] + { // reduce ast_inspect_t to cbl_inspect_t + if( yydebug && !$total ) { + error_msg(@FOR, "caution: missing summation field before FOR"); + } + cbl_refer_t total( $total? *$total : cbl_refer_t() ); + $$ = new cbl_inspect_t( total, $fors->opers() ); + } + ; + +tally_fors: tally_forth + { // reduce ast_inspect_oper_t to cbl_inspect_oper_t + cbl_inspect_oper_t oper( $1->bound, $1->matches ); + $$ = new ast_inspect_t; + $$ ->push_back(oper); + } + | tally_fors tally_forth + { + cbl_inspect_oper_t oper( $2->bound, $2->matches ); + $1 ->push_back(oper); + } + ; + +tally_forth: CHARACTERS insp_mtquals[q] scalar[next_tally] + { + // Add ensuing scalar as if it were an argument to CHARACTERS. + // It will be moved to the succeeding FOR as its tally. + $q->matching = *$next_tally; + $$ = new ast_inspect_oper_t(*$q); + } + | CHARACTERS insp_mtquals[q] + { + $$ = new ast_inspect_oper_t(*$q); + } + | ALL tally_matches[q] + { $q->bound = bound_all_e; + $$ = $q; + } + | LEADING tally_matches[q] + { $q->bound = bound_leading_e; + $$ = $q; + } + | TRAILING tally_matches[q] + { $q->bound = bound_trailing_e; + $$ = $q; + if( ! dialect_mf() ) { + dialect_error(@1, "TRAILING", "mf"); + } + } + ; + +tally_matches: tally_match { $$ = new ast_inspect_oper_t(*$1); } + | tally_matches tally_match + { // add to the list of matches for an operand + $1->matches.push_back(*$2); + } + ; +tally_match: alpha_val[matching] insp_mtquals[q] + { // include the matching field with the qualifiers + $$ = $q; + $$->matching = *$matching; + } + ; + +numeref: %empty { $$ = NULL; need_nume_set(false); } + | nume[name] subscripts[subs] + { + size_t n = $subs->size(); + auto offsets = new cbl_refer_t[n]; + std::copy( $subs->begin(), $subs->end(), offsets ); + $$ = new cbl_refer_t($name, n, offsets); + } + | nume { $$ = new cbl_refer_t($nume); } + ; + +nume: qnume { + $$ = NULL; + struct symbol_elem_t *e = NULL; + size_t index = 0; + auto names( name_queue.pop() ); + + for( ; !names.empty(); names.pop_front() ) { + auto nameloc = names.front(); + if( (e = symbol_field(PROGRAM, + index, nameloc.name)) == NULL ) { + error_msg(nameloc.loc, "DATA-ITEM '%s' not found", nameloc.name ); + YYERROR; + } + $$ = cbl_field_of(e); + index = symbol_index(e); + } + } + ; + +qnume: NUME { name_queue.qualify(@1, $1); } + | qnume inof NUME { name_queue.qualify(@3, $3); } + ; + +replacements: replacement + { + cbl_inspect_t inspect( cbl_refer_t(), $1->opers() ); + $$ = new ast_inspect_list_t(inspect); + } + ; +replacement: replace_oper + { + $$ = new ast_inspect_t; + $$->push_back( cbl_inspect_oper_t($1->bound, $1->replaces) ); + } + | replacement replace_oper + { + $$->push_back( cbl_inspect_oper_t($2->bound, $2->replaces) ); + } + ; +replace_oper: CHARACTERS BY alpha_val[replace] insp_mtquals[q] + { + $$ = new ast_inspect_oper_t( cbl_inspect_replace_t(NULL, + *$replace, + $q->before, + $q->after) ); + } + | first_leading x_by_ys %prec NAME + { + $$ = $2; + $$->bound = static_cast<cbl_inspect_bound_t>($1); + } + ; + +x_by_ys: x_by_y + { + $$ = new ast_inspect_oper_t(*$1); + } + | x_by_ys x_by_y + { + $$->replaces.push_back(*$2); + } + ; +x_by_y: alpha_val[matching] BY alpha_val[replace] insp_mtquals[q] + { + $$ = new cbl_inspect_replace_t(*$matching, *$replace, + $q->before, $q->after); + } + ; + +insp_mtquals: %empty { $$ = new cbl_inspect_match_t; } + | insp_quals + ; +insp_quals: insp_qual { + $$ = new cbl_inspect_match_t; + if( $insp_qual.before ) { + $$->before = *$insp_qual.qual; + } else { + $$->after = *$insp_qual.qual; + } + } + | insp_quals insp_qual + { + if( ($$->before.active() && $insp_qual.before) || + ($$->after.active() && !$insp_qual.before) ) { + error_msg(@2, "duplicate BEFORE/AFTER phrase"); + YYERROR; + } + auto p = $insp_qual.before? &$$->before : &$$->after; + *p = *$insp_qual.qual; + } + ; +insp_qual: befter initial alpha_val + { + // NIST NC115A: INITIAL has no effect (GnuCOBOL & ISO say same). + bool initial = $initial == INITIAL_kw; + $$.before = $befter == BEFORE; + $$.qual = new cbl_inspect_qual_t(initial, *$3); + } + ; + +first_leading: FIRST { $$ = bound_first_e; } + | ALL { $$ = bound_all_e; } + | LEADING { $$ = bound_leading_e; } + | TRAILING { $$ = bound_trailing_e; + if( ! dialect_mf() ) { + dialect_error(@1, "TRAILING", "mf"); + } + } + ; + +alphaval: LITERAL { $$ = new_reference(new_literal($1, quoted_e)); } + | reserved_value + { + $$ = new_reference( constant_of(constant_index($1)) ); + } + | intrinsic_call + ; + +befter: BEFORE { $$ = BEFORE; } + | AFTER { $$ = AFTER; } + ; + +initialize: INITIALIZE move_tgts[tgts] + { + statement_begin(@1, INITIALIZE); + initialize_statement( $tgts->targets, false, data_category_none ); + } + | INITIALIZE move_tgts[tgts] with FILLER_kw + { + statement_begin(@1, INITIALIZE); + initialize_statement( $tgts->targets, true, data_category_none ); + } + | INITIALIZE move_tgts[tgts] init_clause[ini] + { + statement_begin(@1, INITIALIZE); + initialize_statement( $tgts->targets, false, $ini->category, + $ini->replacement); + } + | INITIALIZE move_tgts[tgts] init_clause[ini] with FILLER_kw + { + statement_begin(@1, INITIALIZE); + initialize_statement( $tgts->targets, true, $ini->category, + $ini->replacement); + } + | INITIALIZE move_tgts[tgts] with FILLER_kw init_clause[ini] + { + statement_begin(@1, INITIALIZE); + initialize_statement( $tgts->targets, true, $ini->category, + $ini->replacement ); + } + ; + +init_clause: init_value + | init_categora + { + $$ = new init_statement_t(false); + $$->category = $1; + } + | init_categora to VALUE + { + $$ = new init_statement_t(true); + $$->category = $1; + } + | init_categora to VALUE init_value + { + $$ = $init_value; + $$->category = $1; + } + ; + +init_value: init_replace then to DEFAULT + { + $$ = new init_statement_t( *$init_replace); + } + | init_replace + { + $$ = new init_statement_t( *$init_replace); + } + | then to DEFAULT + { + $$ = new init_statement_t( false ); + } + ; + +init_categora: init_category + | ALL { $$ = data_category_all; } + ; +init_category: ALPHABETIC { $$ = data_alphabetic_e; } + | ALPHANUMERIC { $$ = data_alphanumeric_e; } + | ALPHANUMERIC_EDITED { $$ = data_alphanumeric_edited_e; } + | DBCS { $$ = data_dbcs_e; } + | EGCS { $$ = data_egcs_e; } + | NATIONAL { $$ = data_national_e; } + | NATIONAL_EDITED { $$ = data_national_edited_e; } + | NUMERIC { $$ = data_numeric_e; } + | NUMERIC_EDITED { $$ = data_numeric_edited_e; } + ; + +init_replace: then REPLACING init_bys { $$ = $init_bys; } + ; +init_bys: init_by + { + $$ = new category_map_t; + category_map_t& replacements = *$$; + replacements[$init_by.category] = $init_by.replacement; + } + | init_bys init_by + { + $$ = $1; + category_map_t& replacements = *$$; + replacements[$init_by.category] = $init_by.replacement; + } + ; +init_by: init_category data BY init_data + { + $$.category = $init_category; + $$.replacement = $init_data; + } + ; +init_data: alpha_val + | NUMSTR { + $$ = new_reference(new_literal($1.string, $1.radix)); + } + ; + +call: call_impl end_call + | call_cond end_call + ; + +call_impl: CALL call_body[body] + { + ffi_args_t *params = $body.using_params; + if( yydebug && params ) params->dump(); + size_t narg = params? params->elems.size() : 0; + cbl_ffi_arg_t args[1 + narg], *pargs = NULL; + if( narg > 0 ) { + pargs = use_list(params, args); + } + ast_call( $body.loc, *$body.ffi_name, + *$body.ffi_returning, narg, pargs, NULL, NULL, false ); + current.declaratives_evaluate(); + } + ; +call_cond: CALL call_body[body] call_excepts[except] + { + ffi_args_t *params = $body.using_params; + if( yydebug && params ) params->dump(); + size_t narg = params? params->elems.size() : 0; + cbl_ffi_arg_t args[1 + narg], *pargs = NULL; + if( narg > 0 ) { + pargs = use_list(params, args); + } + ast_call( $body.loc, *$body.ffi_name, + *$body.ffi_returning, narg, pargs, + $except.on_error, $except.not_error, false ); + auto handled = ec_type_t( static_cast<size_t>(ec_program_e) | + static_cast<size_t>(ec_external_e)); + current.declaratives_evaluate(handled); + } + ; +end_call: %empty %prec CALL + | END_CALL + ; + +call_body: ffi_name + { statement_begin(@1, CALL); + $$.ffi_name = $ffi_name; + $$.using_params = NULL; + $$.ffi_returning = cbl_refer_t::empty(); + } + + | ffi_name USING parameters + { statement_begin(@1, CALL); + $$.ffi_name = $ffi_name; + $$.using_params = $parameters; + $$.ffi_returning = cbl_refer_t::empty(); + } + | ffi_name call_returning scalar[ret] + { statement_begin(@1, CALL); + $$.ffi_name = $ffi_name; + $$.using_params = NULL; + $$.ffi_returning = $ret; + } + | ffi_name USING parameters call_returning scalar[ret] + { statement_begin(@1, CALL); + $$.ffi_name = $ffi_name; + $$.using_params = $parameters; + $$.ffi_returning = $ret; + } + ; +call_returning: RETURNING + | GIVING { + if( !dialect_mf() ) { + dialect_error(@1, "CALL ... GIVING", "mf"); + } + } + ; + +entry: ENTRY LITERAL + { statement_begin(@1, ENTRY); + auto name = new_literal($2, quoted_e); + parser_entry( name ); + } + | ENTRY LITERAL USING parameters + { statement_begin(@1, ENTRY); + auto name = new_literal($2, quoted_e); + ffi_args_t *params = $parameters; + size_t narg = params? params->elems.size() : 0; + cbl_ffi_arg_t args[1 + narg], *pargs = NULL; + if( narg > 0 ) { + pargs = use_list(params, args); + } + parser_entry( name, narg, pargs ); + } + ; + +ffi_name: scalar + { + $$ = $1; + if( ! is_callable($1->field) ) { + error_msg(@1, "CALL requires %s to be " + "PROGRAM-POINTER or alphanumeric", $1->name()); + YYERROR; + } + if( $1->field->type == FldLiteralA ) { + // Replace repository literal with aliased program's name. + assert($1->field->parent > 0); + auto& L = *cbl_label_of(symbol_at($1->field->parent)); + $$->field = new_literal(strlen(L.name), L.name, quoted_e); + } + } + | LITERAL { $$ = new_reference(new_literal($1, quoted_e)); } + ; + +parameters: parameter { $$ = new ffi_args_t($1); } + | parameters parameter + { + $1->push_back($2); + $$ = $1; + } + ; +parameter: ffi_by_ref { $$ = $1; $$->crv = by_default_e; } + | by REFERENCE ffi_by_ref { $$ = $3; } + | by CONTENT ffi_by_con { $$ = $3; } + | by VALUE ffi_by_val { $$ = $3; } + ; +ffi_by_ref: scalar_arg[refer] + { + $$ = new cbl_ffi_arg_t(by_reference_e, $refer); + } + | ADDRESS OF scalar_arg[refer] + { + $$ = new cbl_ffi_arg_t(by_reference_e, $refer, address_of_e); + } + | OMITTED + { + cbl_refer_t *r = new cbl_refer_t(); + $$ = new cbl_ffi_arg_t(by_reference_e, r); + } + ; + +ffi_by_con: expr + { + cbl_refer_t *r = new cbl_refer_t(*$1); + $$ = new cbl_ffi_arg_t(by_content_e, r); + } + | LITERAL + { + cbl_refer_t *r = new_reference(new_literal($1, quoted_e)); + $$ = new cbl_ffi_arg_t(by_content_e, r); + } + | OMITTED + { + cbl_refer_t *r = new cbl_refer_t(); + $$ = new cbl_ffi_arg_t(by_content_e, r); + } + ; + +ffi_by_val: by_value_arg + { + $$ = new cbl_ffi_arg_t(by_value_e, $1); + } + | cce_expr %prec NAME + { + auto r = new_reference(new_literal(string_of($1))); + $$ = new cbl_ffi_arg_t(by_value_e, r); + } + | ADDRESS OF scalar + { + $$ = new cbl_ffi_arg_t(by_value_e, $scalar, address_of_e); + } + | LENGTH_OF scalar + { + $$ = new cbl_ffi_arg_t(by_value_e, $scalar, length_of_e); + } + ; + +scalar_arg: scalar + | scalar AS FIXED LENGTH %prec NAME + ; + +call_excepts: call_excepts[a] call_except[b] statements %prec CALL + { + if( $a.on_error && $a.not_error ) { + error_msg(@b, "too many ON EXCEPTION clauses"); + YYERROR; + } + // "ON" and "NOT ON" could be reversed, but not duplicated. + if( $a.on_error && $b.on_error ) { + error_msg(@b, "duplicate ON EXCEPTION clauses"); + YYERROR; + } + if( $a.not_error && $b.not_error ) { + error_msg(@b, "duplicate NOT ON EXCEPTION clauses"); + YYERROR; + } + $$ = $a; + if( $b.on_error ) { + $$.on_error = $b.on_error; + assert($a.not_error); + } else { + $$.not_error = $b.not_error; + assert($a.on_error); + } + assert( $b.on_error || $b.not_error ); + assert( ! ($b.on_error && $b.not_error) ); + cbl_label_t *tgt = $a.on_error? $a.on_error : $a.not_error; + parser_call_exception_end(tgt); + } + | call_except[a] statements %prec CALL + { + $$ = $a; + assert( $a.on_error || $a.not_error ); + assert( ! ($a.on_error && $a.not_error) ); + cbl_label_t *tgt = $a.on_error? $a.on_error : $a.not_error; + parser_call_exception_end(tgt); + } + ; + +call_except: EXCEPTION + { + $$.not_error = NULL; + $$.on_error = label_add(LblArith, + uniq_label("call"), yylineno); + if( !$$.on_error ) YYERROR; + parser_call_exception( $$.on_error ); + + assert( $1 == EXCEPTION || $1 == NOT ); + if( $1 == NOT ) { + std::swap($$.on_error, $$.not_error); + } + } + | OVERFLOW + { + $$.not_error = NULL; + $$.on_error = label_add(LblArith, + uniq_label("call"), yylineno); + if( !$$.on_error ) YYERROR; + parser_call_exception( $$.on_error ); + + assert( $1 == OVERFLOW || $1 == NOT ); + if( $1 == NOT ) { + std::swap($$.on_error, $$.not_error); + } + } + ; + +cancel: CANCEL ffi_names + { + statement_begin(@1, CANCEL); + auto nprog = $ffi_names->refers.size(); + cbl_refer_t progs[nprog]; + parser_initialize_programs(nprog, $ffi_names->use_list(progs)); + } + ; +ffi_names: ffi_name { $$ = new refer_list_t($1); } + | ffi_names ffi_name { $$ = $1->push_back($2); } + ; + +alter: ALTER { statement_begin(@1, ALTER); } alter_tgts + ; + +alter_tgts: alter_tgt + | alter_tgts alter_tgt + ; +alter_tgt: label_1[old] alter_to label_1[new] + { + cbl_perform_tgt_t tgt( $old, $new ); + parser_alter(&tgt); + + auto prog = cbl_label_of( symbol_at(symbol_elem_of($old)->program)); + if( prog->initial ) { + cbl_unimplemented("ALTER %s", $old->name); + } + } + ; + +alter_to: TO + | TO PROCEED TO + ; + +go_to: GOTO labels[args] + { + statement_begin(@1, GOTO); + size_t narg = $args->elems.size(); + if( 1 != narg ) { + error_msg(@args, "more than one GO TO label requires DEPENDING"); + YYERROR; + } + + for( auto& label : $args->elems ) { + label->used = yylineno; + } + cbl_label_t *args[narg]; + parser_goto( cbl_refer_t(), 1, use_list($args, args) ); + } + | GOTO labels[args] DEPENDING on scalar[value] + { + statement_begin(@1, GOTO); + size_t narg = $args->elems.size(); + assert(narg > 0); + for( auto& label : $args->elems ) { + label->used = yylineno; + } + cbl_label_t *args[narg]; + parser_goto( *$value, narg, use_list($args, args) ); + } + | GOTO + { + cbl_unimplemented("altered GO TO syntax (format 3)"); + YYERROR; + } + ; + +resume: RESUME NEXT STATEMENT + { + statement_begin(@1, RESUME); + parser_clear_exception(); + } + | RESUME label_1[tgt] + { + statement_begin(@1, RESUME); + parser_clear_exception(); + $tgt->used = yylineno; + parser_goto( cbl_refer_t(), 1, &$tgt ); + } + ; + +labels: label_1 { $$ = new Label_list_t($1); } + | labels label_1 { $$ = $1->push_back($2); } + ; +label_1: qname + { // Add a forward label with no line number, or get an existing. + assert(!name_queue.empty()); + auto namelocs( name_queue.pop() ); + + auto nameloc = namelocs.back(); + if( namelocs.size() > 2 ) { + error_msg(nameloc.loc, + "too many qualifications for %s", nameloc.name); + YYERROR; + } + const char *para = nameloc.name; + size_t isect = 0; + + if( namelocs.size() == 2 ) { + auto nameloc = namelocs.front(); + cbl_label_t *sect = label_add(nameloc.loc, LblSection, nameloc.name); + isect = symbol_index(symbol_elem_of(sect)); + } + + $$ = paragraph_reference(para, isect); + assert($$); + if( yydebug ) dbgmsg( "using procedure %s of line %d", + $$->name, $$->line ); + } + | NUMSTR + { + // Add a forward label with no line number, or get an existing. + $$ = label_add(@1, LblNone, $1.string); + assert($$ != NULL); + } + ; + + /* string & unstring */ + + +string: string_impl end_string + | string_cond end_string + ; +string_impl: STRING_kw string_body[body] + { + stringify($body.inputs, *$body.into.first, *$body.into.second); + current.declaratives_evaluate(ec_none_e); + } + ; +string_cond: STRING_kw string_body[body] on_overflows[over] + { + stringify($body.inputs, *$body.into.first, *$body.into.second, + $over.on_error, $over.not_error); + current.declaratives_evaluate(ec_overflow_e); + } + ; +end_string: %empty %prec LITERAL + | END_STRING + ; + +string_body: str_delimiteds[inputs] str_into[into] + { + statement_begin(@$, STRING_kw); + $$.inputs = $inputs; + $$.into = $into; + } + ; + +str_delimiteds: str_delimited + { + refer_marked_list_t marked($1.delimiter, $1.input); + $$ = new refer_collection_t(marked); + } + | str_delimiteds str_delimited[input] + { + // matching delimiters (or none) adds to the list + refer_marked_list_t& marked = $1->lists.back(); + if( !marked.marker ) { + marked.push_on($input.delimiter, $input.input); + } else { // start a new list + $1->push_back( refer_marked_list_t($input.delimiter, + $input.input) ); + } + } + ; + +str_delimited: str_input DELIMITED by str_size + { + $$.input = $str_input; + $$.delimiter = $str_size; + } + | str_input + { + $$.input = $str_input; + $$.delimiter = NULL; + } + ; + +str_input: scalar + | LITERAL { $$ = new_reference(new_literal($1, quoted_e)); } + | reserved_value + { + $$ = new_reference(constant_of(constant_index($1))); + } + | intrinsic_call + ; + +str_size: SIZE { $$ = new_reference(NULL); } + | LITERAL { $$ = new_reference(new_literal($1, quoted_e)); } + | scalar + | reserved_value + { + $$ = new_reference(constant_of(constant_index($1))); + } + ; + +str_into: INTO scalar + { + $$.first = $2; + $$.second = new_reference(NULL); + } + | INTO scalar with POINTER scalar[from] + { + $$.first = $2; + $$.second = $from; + } + ; + +on_overflows: on_overflow[over] statements %prec ADD + { + assert( $over.on_error || $over.not_error ); + assert( ! ($over.on_error && $over.not_error) ); + cbl_label_t *tgt = $over.on_error? + $over.on_error : $over.not_error; + parser_string_overflow_end(tgt); + } + | on_overflows[a] on_overflow[b] statements %prec ADD + { + if( $a.on_error && $a.not_error ) { + error_msg(@b, "too many ON OVERFLOW clauses"); + YYERROR; + } + // "ON" and "NOT ON" could be reversed, but not duplicated. + if( $a.on_error && $b.on_error ) { + error_msg(@b, "duplicate ON OVERFLOW clauses"); + YYERROR; + } + if( $a.not_error && $b.not_error ) { + error_msg(@b, "duplicate NOT ON OVERFLOW clauses"); + YYERROR; + } + $$ = $a; + if( $b.on_error ) { + $$.on_error = $b.on_error; + assert($a.not_error); + } else { + $$.not_error = $b.not_error; + assert($a.on_error); + } + assert( $b.on_error || $b.not_error ); + assert( ! ($b.on_error && $b.not_error) ); + cbl_label_t *tgt = $b.on_error? + $b.on_error : $b.not_error; + parser_string_overflow_end(tgt); + } + ; + +on_overflow: OVERFLOW + { + $$.not_error = NULL; + $$.on_error = label_add(LblString, + uniq_label("string"), yylineno); + if( !$$.on_error ) YYERROR; + parser_string_overflow( $$.on_error ); + + assert( $1 == OVERFLOW || $1 == NOT ); + if( $1 == NOT ) { + std::swap($$.on_error, $$.not_error); + } + } + ; + +unstring: unstring_impl end_unstring + | unstring_cond end_unstring + ; +end_unstring: %empty %prec UNSTRING + | END_UNSTRING + ; + +unstring_impl: UNSTRING unstring_body[body] + { + unstringify( *$body.input, $body.delimited, $body.into ); + current.declaratives_evaluate(ec_none_e); + } + ; +unstring_cond: UNSTRING unstring_body[body] on_overflows[over] + { + unstringify( *$body.input, $body.delimited, $body.into, + $over.on_error, $over.not_error ); + current.declaratives_evaluate(ec_overflow_e); + } + ; + +unstring_body: unstring_src[src] uns_delimited INTO uns_into[into] + { + statement_begin(@$, UNSTRING); + $$.input = $src; + $$.delimited = $uns_delimited; + $$.into = $into; + } +unstring_src: scalar + | intrinsic_call + | LITERAL + { + $$ = new_reference(new_literal($1, quoted_e)); + } + ; + +uns_delimited: %empty { $$ = NULL; } + | DELIMITED by uns_delimiters { $$ = $3; } + ; + +uns_delimiters: uns_delimiter { $$ = new refer_list_t($1); } + | uns_delimiters OR uns_delimiter + { + $$ = $1; + $$->push_back($3); + } + ; +uns_delimiter: all str_input + { + $$ = $2; + $$->all = $all; + } + ; + +uns_into: uns_tgts %prec NAME + { + $$ = new unstring_into_t($1); + } + | uns_tgts with POINTER scalar[ptr] + { + $$ = new unstring_into_t($1, $ptr); + } + | uns_tgts TALLYING in scalar[tally] + { + $$ = new unstring_into_t($1, NULL, $tally); + } + | uns_tgts with POINTER scalar[ptr] TALLYING in scalar[tally] + { + $$ = new unstring_into_t($1, $ptr, $tally); + } + ; + +uns_tgts: uns_tgt { $$ = new unstring_tgt_list_t($1); } + | uns_tgts uns_tgt { $$ = $1; $$->push_back($2); } + ; +uns_tgt: scalar[tgt] + { + $$ = new unstring_tgt_t($tgt); + } + | scalar[tgt] DELIMITER in scalar[delim] + { + $$ = new unstring_tgt_t($tgt, $delim); + } + | scalar[tgt] COUNT in scalar[count] + { + if( ! $count->field->is_integer() ) { + error_msg(@count, "COUNT %s must be integer type", + $count->field->name); + } + if( $count->field->has_attr(scaled_e) ) { + error_msg(@count, "COUNT %s may not be P scaled", + $count->field->name); + } + $$ = new unstring_tgt_t($tgt, NULL, $count); + } + | scalar[tgt] DELIMITER in scalar[delim] COUNT in scalar[count] + { + if( ! $count->field->is_integer() ) { + error_msg(@count, "COUNT %s must be integer type", + $count->field->name); + } + if( $count->field->has_attr(scaled_e) ) { + error_msg(@count, "COUNT %s may not be P scaled", + $count->field->name); + } + $$ = new unstring_tgt_t($tgt, $delim, $count); + } + ; + + /* intrinsics */ +intrinsic_call: function intrinsic { // "intrinsic" includes UDFs. + $$ = new_reference($intrinsic); + $$->field->attr |= constant_e; + } + | function intrinsic refmod[ref] + { + if( $ref.from->is_reference() || $ref.len->is_reference() ) { + error_msg(@ref, "subscripts on start:len refmod " + "parameters are unsupported"); + YYERROR; + } + if( $intrinsic->type != FldAlphanumeric ) { + error_msg(@ref, "'%s' only AlphaNumeric fields accept refmods", + $intrinsic->name); + YYERROR; + } + cbl_span_t span( $ref.from, $ref.len ); + $$ = new cbl_refer_t($intrinsic, span); + $$->field->attr |= constant_e; + } + | function NAME { + error_msg(@NAME, "no such function: %s", $NAME); + YYERROR; + } + + ; +function: %empty %prec FUNCTION + { + statement_begin(@$, FUNCTION); + } + | FUNCTION + { + statement_begin(@1, FUNCTION); + } + ; + +function_udf: FUNCTION_UDF '(' arg_list[args] ')' { + std::vector<function_descr_arg_t> params; + auto L = cbl_label_of(symbol_at($1)); + if( ! current.udf_args_valid(L, $args->refers, params) ) { + YYERROR; + } + $$ = new_temporary_clone(cbl_field_of(symbol_at(L->returning))); + auto narg = $args->refers.size(); + cbl_ffi_arg_t args[narg]; + size_t i = 0; + // Pass parameters as defined by the function. + std::transform( $args->refers.begin(), $args->refers.end(), args, + [params, &i]( cbl_refer_t& arg ) { + function_descr_arg_t param = params.at(i++); + auto ar = new cbl_refer_t(arg); + cbl_ffi_arg_t actual(param.crv, ar); + return actual; + } ); + auto name = new_literal(strlen(L->name), L->name, quoted_e); + ast_call( @1, name, $$, narg, args, NULL, NULL, true ); + } + | FUNCTION_UDF_0 { + static const size_t narg = 0; + static cbl_ffi_arg_t *args = NULL; + + auto L = cbl_label_of(symbol_at($1)); + $$ = new_temporary_clone(cbl_field_of(symbol_at(L->returning))); + + auto name = new_literal(strlen(L->name), L->name, quoted_e); + ast_call( @1, name, $$, narg, args, NULL, NULL, true ); + } + ; + + /* + * The scanner returns a function-token (e.g. NUMVAL) if it was + * preceded by FUNCTION, or if the name is in the program's + * function repository. Else it returns NAME, because it looks + * like a user-defined name (possibly a data item). If the user + * attempts to use an intrinsic function without using + * REPOSITORY or FUNCTION, the NAME results in a syntax error. + * + * Function arguments may be variables or literals or + * functions, and string-valued functions accept a refmod. In + * addition to "scalar", we have this inconsistent set: + * var: [ALL] LITERAL, NUMSTR, instrinsic, or scalar + * num_operand: signed NUMSTR/ZERO, instrinsic, or scalar + * alpahaval: LITERAL, reserved_value, instrinsic, or scalar + * Probably any numeric argument could be an expression. + */ +intrinsic: function_udf + | intrinsic0 + | intrinsic_v '(' arg_list[args] ')' { + location_set(@1); + size_t n = $args->size(); + assert(n > 0); + cbl_refer_t args[n]; + std::copy( $args->begin(), $args->end(), args ); + cbl_refer_t *p = intrinsic_inconsistent_parameter(n, args); + if( p != NULL ) { + auto loc = symbol_field_location(field_index(p->field)); + error_msg(loc, "FUNCTION %s has " + "inconsistent parameter type %zu ('%s')", + keyword_str($1), p - args, name_of(p->field) ); + YYERROR; + } + $$ = is_numeric(args[0].field)? + new_tempnumeric_float() : + new_alphanumeric(args[0].field->data.capacity); + + parser_intrinsic_callv( $$, intrinsic_cname($1), n, args ); + } + + | PRESENT_VALUE '(' expr_list[args] ')' + { + static char s[] = "__gg__present_value"; + location_set(@1); + $$ = new_tempnumeric_float(); + size_t n = $args->size(); + assert(n > 0); + if( n < 2 ) { + error_msg(@args, "PRESENT VALUE requires 2 parameters"); + YYERROR; + } + cbl_refer_t args[n]; + parser_intrinsic_callv( $$, s, n, $args->use_list(args) ); + } + + | BASECONVERT '(' varg[r1] varg[r2] varg[r3] ')' { + location_set(@1); + $$ = new_tempnumeric(); + cbl_unimplemented("BASECONVERT"); + if( ! intrinsic_call_3($$, BASECONVERT, $r1, $r2, $r3 )) YYERROR; + } + | BIT_OF '(' expr[r1] ')' { + location_set(@1); + $$ = new_alphanumeric(8 * $r1->field->data.capacity); + if( ! intrinsic_call_1($$, BIT_OF, $r1, @r1)) YYERROR; + } + | CHAR '(' expr[r1] ')' { + location_set(@1); + $$ = new_alphanumeric(1); + if( ! intrinsic_call_1($$, CHAR, $r1, @r1)) YYERROR; + } + + | CONVERT '(' varg[r1] convert_src[src] convert_dst[dst] ')' { + location_set(@1); + $$ = new_alphanumeric(1); + cbl_unimplemented("CONVERT"); + /* if( ! intrinsic_call_3($$, CONVERT, $r1, $src, $dst) ) YYERROR; */ + } + + | DISPLAY_OF '(' varg[r1] ')' { + location_set(@1); + uint32_t len = $r1->field->data.capacity; + $$ = new_alphanumeric(4 * len); + if( ! intrinsic_call_2($$, DISPLAY_OF, $r1, NULL) ) YYERROR; + } + | DISPLAY_OF '(' varg[r1] varg[r2] ')' { + location_set(@1); + uint32_t len = $r1->field->data.capacity + + $r2->field->data.capacity; + $$ = new_alphanumeric(4 * len); + if( ! intrinsic_call_2($$, DISPLAY_OF, $r1, $r2) ) YYERROR; + } + + | EXCEPTION_FILE filename { + location_set(@1); + $$ = new_alphanumeric(256); + parser_exception_file( $$, $filename ); + } + + | FIND_STRING '(' varg[r1] last start_after anycase ')' { + location_set(@1); + $$ = new_alphanumeric($r1->field->data.capacity); + /* auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e)); */ + cbl_unimplemented("FIND_STRING"); + /* if( ! intrinsic_call_4($$, FIND_STRING, r1, $r2) ) YYERROR; */ + } + + | FORMATTED_DATE '(' DATE_FMT[r1] expr[r2] ')' { + location_set(@1); + $$ = new_alphanumeric(MAXLENGTH_FORMATTED_DATE); + auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e)); + if( ! intrinsic_call_2($$, FORMATTED_DATE, r1, $r2) ) YYERROR; + } + + + | FORMATTED_DATETIME '(' DATETIME_FMT[r1] expr[r2] + expr[r3] ')' { + location_set(@1); + $$ = new_alphanumeric(MAXLENGTH_FORMATTED_DATETIME); + auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e)); + static cbl_refer_t r3(literally_zero); + if( ! intrinsic_call_4($$, FORMATTED_DATETIME, + r1, $r2, $r3, &r3) ) YYERROR; + } + | FORMATTED_DATETIME '(' DATETIME_FMT[r1] expr[r2] + expr[r3] expr[r4] ')' { + location_set(@1); + $$ = new_alphanumeric(MAXLENGTH_FORMATTED_DATETIME); + auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e)); + if( ! intrinsic_call_4($$, FORMATTED_DATETIME, + r1, $r2, $r3, $r4) ) YYERROR; + } + | FORMATTED_DATETIME '(' error ')' { + YYERROR; + } + | FORMATTED_TIME '(' TIME_FMT[r1] expr[r2] + expr[r3] ')' { + location_set(@1); + $$ = new_alphanumeric(MAXLENGTH_FORMATTED_TIME); + auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e)); + if( ! intrinsic_call_3($$, FORMATTED_TIME, + r1, $r2, $r3) ) YYERROR; + } + | FORMATTED_TIME '(' TIME_FMT[r1] expr[r2] ')' { + location_set(@1); + $$ = new_alphanumeric(MAXLENGTH_FORMATTED_TIME); + auto r3 = new_reference(new_literal("0")); + auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e)); + if( ! intrinsic_call_3($$, FORMATTED_TIME, + r1, $r2, r3) ) YYERROR; + } + | FORMATTED_CURRENT_DATE '(' DATETIME_FMT[r1] ')' { + location_set(@1); + $$ = new_alphanumeric(MAXLENGTH_FORMATTED_DATETIME); + auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e)); + if( ! intrinsic_call_1($$, FORMATTED_CURRENT_DATE, r1, @r1) ) + YYERROR; + } + | TEST_FORMATTED_DATETIME '(' DATE_FMT[r1] varg[r2] ')' { + location_set(@1); + $$ = new_tempnumeric(); + auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e)); + if( ! intrinsic_call_2($$, TEST_FORMATTED_DATETIME, + r1, $r2) ) YYERROR; + } + | TEST_FORMATTED_DATETIME '(' TIME_FMT[r1] varg[r2] ')' { + location_set(@1); + $$ = new_tempnumeric(); + auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e)); + if( ! intrinsic_call_2($$, TEST_FORMATTED_DATETIME, + r1, $r2) ) YYERROR; + } + | TEST_FORMATTED_DATETIME '(' DATETIME_FMT[r1] varg[r2] ')' + { + location_set(@1); + $$ = new_tempnumeric(); + auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e)); + if( ! intrinsic_call_2($$, TEST_FORMATTED_DATETIME, + r1, $r2) ) YYERROR; + } + | INTEGER_OF_FORMATTED_DATE '(' DATE_FMT[r1] varg[r2] ')' { + location_set(@1); + $$ = new_tempnumeric(); + auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e)); + if( ! intrinsic_call_2($$, INTEGER_OF_FORMATTED_DATE, + r1, $r2) ) YYERROR; + } + | INTEGER_OF_FORMATTED_DATE '(' DATETIME_FMT[r1] varg[r2] ')' + { + location_set(@1); + $$ = new_tempnumeric(); + auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e)); + if( ! intrinsic_call_2($$, INTEGER_OF_FORMATTED_DATE, + r1, $r2) ) YYERROR; + } + | SECONDS_FROM_FORMATTED_TIME '(' TIME_FMT[r1] varg[r2] ')' { + location_set(@1); + $$ = new_tempnumeric(); + auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e)); + if( ! intrinsic_call_2($$, SECONDS_FROM_FORMATTED_TIME, + r1, $r2) ) YYERROR; + } + | SECONDS_FROM_FORMATTED_TIME '(' DATETIME_FMT[r1] varg[r2] ')' + { + location_set(@1); + $$ = new_tempnumeric(); + auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e)); + if( ! intrinsic_call_2($$, SECONDS_FROM_FORMATTED_TIME, + r1, $r2) ) YYERROR; + } + + | HEX_OF '(' varg[r1] ')' { + location_set(@1); + $$ = new_alphanumeric(2 * $r1->field->data.capacity); + if( ! intrinsic_call_1($$, HEX_OF, $r1, @r1)) YYERROR; + } + | LENGTH '(' tableish[val] ')' { + location_set(@1); + $$ = new_tempnumeric(); + $$->clear_attr(signable_e); + parser_set_numeric($$, $val->field->size()); + if( ! intrinsic_call_1($$, LENGTH, $val, @val)) YYERROR; + } + | LENGTH '(' varg1a[val] ')' { + location_set(@1); + $$ = new_tempnumeric(); + $$->clear_attr(signable_e); + parser_set_numeric($$, $val->field->data.capacity); + if( ! intrinsic_call_1($$, LENGTH, $val, @val)) YYERROR; + } + | lopper_case[func] '(' alpha_val[r1] ')' { + location_set(@1); + $$ = new_alphanumeric($r1->field->data.capacity); + if( ! intrinsic_call_1($$, $func, $r1, @r1)) YYERROR; + } + + | MODULE_NAME '(' module_type[type] ')' + { + $$ = new_alphanumeric(sizeof(cbl_name_t)); + parser_module_name( $$, $type ); + } + + | NUMVAL_C '(' varg[r1] numval_locale[r2] anycase ')' { + location_set(@1); + $$ = new_tempnumeric(); + parser_intrinsic_numval_c( $$, *$r1, $r2.is_locale, + *$r2.arg2, $anycase ); + } + | ORD '(' alpha_val[r1] ')' + { + location_set(@1); + $$ = new_tempnumeric(); + if( ! intrinsic_call_1($$, ORD, $r1, @r1)) YYERROR; + } + | RANDOM + { + location_set(@1); + $$ = new_tempnumeric_float(); + parser_intrinsic_call_0( $$, intrinsic_cname(RANDOM) ); + } + | RANDOM_SEED expr[r1] ')' + { // left parenthesis consumed by lexer + location_set(@1); + $$ = new_tempnumeric_float(); + if( ! intrinsic_call_1($$, RANDOM, $r1, @r1)) YYERROR; + } + + | STANDARD_COMPARE '(' varg[r1] varg[r2] varg[r3] varg[r4] ')' + { + location_set(@1); + $$ = new_tempnumeric(); + cbl_unimplemented("STANDARD-COMPARE"); + /* if( ! intrinsic_call_4($$, STANDARD_COMPARE, $r1) ) YYERROR; */ + } + | STANDARD_COMPARE '(' varg[r1] varg[r2] varg[r3] ')' + { + location_set(@1); + $$ = new_tempnumeric(); + cbl_unimplemented("STANDARD-COMPARE"); + /* if( ! intrinsic_call_4($$, STANDARD_COMPARE, $r1) ) YYERROR; */ + } + | STANDARD_COMPARE '(' varg[r1] varg[r2] ')' + { + location_set(@1); + $$ = new_tempnumeric(); + cbl_unimplemented("STANDARD-COMPARE"); + /* if( ! intrinsic_call_4($$, STANDARD_COMPARE, $r1) ) YYERROR; */ + } + + | SUBSTITUTE '(' varg[r1] subst_inputs[inputs] ')' { + location_set(@1); + $$ = new_alphanumeric(64); + auto narg = $inputs->size(); + cbl_substitute_t args[narg]; + std::transform( $inputs->begin(), $inputs->end(), args, + []( const substitution_t& arg ) { + cbl_substitute_t output( arg.anycase, + char(arg.first_last), + arg.orig, + arg.replacement ); + return output; } ); + + parser_intrinsic_subst($$, *$r1, narg, args); + } + + + | TEST_NUMVAL_C '(' varg[r1] numval_locale[r2] anycase ')' { + location_set(@1); + $$ = new_tempnumeric(); + parser_intrinsic_numval_c( $$, *$r1, $r2.is_locale, + *$r2.arg2, $anycase, true ); + } + | TRIM '(' error ')' { + error_msg(@error, "invalid TRIM argument"); + YYERROR; + } + | TRIM '(' expr[r1] trim_trailing ')' + { + location_set(@1); + switch( $r1->field->type ) { + case FldGroup: + case FldAlphanumeric: + case FldLiteralA: + case FldAlphaEdited: + case FldNumericEdited: + break; // alphanumeric OK + default: + // BLANK WHEN ZERO implies numeric-edited, so OK + if( $r1->field->has_attr(blank_zero_e) ) { + break; + } + error_msg(@r1, "TRIM argument must be alphanumeric"); + YYERROR; + break; + } + $$ = new_alphanumeric($r1->field->data.capacity); + cbl_refer_t * how = new_reference($trim_trailing); + if( ! intrinsic_call_2($$, TRIM, $r1, how) ) YYERROR; + } + + | USUBSTR '(' alpha_val[r1] expr[r2] expr[r3] ')' { + location_set(@1); + $$ = new_alphanumeric(32); // how long? + if( ! intrinsic_call_3($$, FORMATTED_DATETIME, + $r1, $r2, $r3) ) YYERROR; + } + + | intrinsic_I '(' expr[r1] ')' + { + location_set(@1); + $$ = new_tempnumeric(); + if( ! intrinsic_call_1($$, $1, $r1, @r1)) YYERROR; + } + + | intrinsic_N '(' expr[r1] ')' + { + location_set(@1); + $$ = new_tempnumeric_float(); + if( ! intrinsic_call_1($$, $1, $r1, @r1)) YYERROR; + } + + | intrinsic_X '(' varg[r1] ')' + { + location_set(@1); + auto type = intrinsic_return_type($1); + switch(type) { + case FldAlphanumeric: + $$ = new_alphanumeric($r1->field->data.capacity); + break; + default: + if( $1 == NUMVAL || $1 == NUMVAL_F ) + { + $$ = new_temporary(FldFloat); + } + else + { + $$ = new_temporary(type); + } + } + if( $1 == NUMVAL_F ) { + if( is_literal($r1->field) ) { + _Float128 output __attribute__ ((__unused__)); + auto input = $r1->field->data.initial; + auto local = xstrdup(input), pend = local; + std::replace(local, local + strlen(local), ',', '.'); + std::remove_if(local, local + strlen(local), isspace); + output = strtof128(local, &pend); + // bad if strtof128 could not convert input + if( *pend != '\0' ) { + error_msg(@r1, "'%s' is not a numeric string", input); + } + } + } + if( ! intrinsic_call_1($$, $1, $r1, @r1)) YYERROR; + } + + | intrinsic_I2 '(' expr[r1] expr[r2] ')' + { + location_set(@1); + $$ = new_tempnumeric(); + if( ! intrinsic_call_2($$, $1, $r1, $r2) ) YYERROR; + } + + | DATE_TO_YYYYMMDD '(' expr[r1] ')' + { + location_set(@1); + static auto r2 = new_reference(FldNumericDisplay, "50"); + static auto one = new cbl_refer_t( new_literal("1") ); + static auto four = new cbl_refer_t( new_literal("4") ); + cbl_span_t year(one, four); + auto r3 = new_reference(new_alphanumeric(21)); + r3->refmod = year; + + parser_intrinsic_call_0( r3->field, "__gg__current_date" ); + + $$ = new_tempnumeric(); + if( ! intrinsic_call_3($$, DATE_TO_YYYYMMDD, + $r1, r2, r3) ) YYERROR; + } + + | DATE_TO_YYYYMMDD '(' expr[r1] expr[r2] ')' + { + location_set(@1); + static auto one = new cbl_refer_t( new_literal("1") ); + static auto four = new cbl_refer_t( new_literal("4") ); + cbl_span_t year(one, four); + auto r3 = new_reference(new_alphanumeric(21)); + r3->refmod = year; + + parser_intrinsic_call_0( r3->field, "__gg__current_date" ); + + $$ = new_tempnumeric(); + if( ! intrinsic_call_3($$, DATE_TO_YYYYMMDD, + $r1, $r2, r3) ) YYERROR; + } + + | DATE_TO_YYYYMMDD '(' expr[r1] + expr[r2] expr[r3] ')' + { + location_set(@1); + $$ = new_tempnumeric(); + if( ! intrinsic_call_3($$, DATE_TO_YYYYMMDD, + $r1, $r2, $r3) ) YYERROR; + } + + | DAY_TO_YYYYDDD '(' expr[r1] ')' + { + location_set(@1); + static auto r2 = new_reference(FldNumericDisplay, "50"); + static auto one = new cbl_refer_t( new_literal("1") ); + static auto four = new cbl_refer_t( new_literal("4") ); + cbl_span_t year(one, four); + auto r3 = new_reference(new_alphanumeric(21)); + r3->refmod = year; + + parser_intrinsic_call_0( r3->field, "__gg__current_date" ); + + $$ = new_tempnumeric(); + if( ! intrinsic_call_3($$, DAY_TO_YYYYDDD, + $r1, r2, r3) ) YYERROR; + } + + | DAY_TO_YYYYDDD '(' expr[r1] expr[r2] ')' + { + location_set(@1); + static auto one = new cbl_refer_t( new_literal("1") ); + static auto four = new cbl_refer_t( new_literal("4") ); + cbl_span_t year(one, four); + auto r3 = new_reference(new_alphanumeric(21)); + r3->refmod = year; + + parser_intrinsic_call_0( r3->field, "__gg__current_date" ); + + $$ = new_tempnumeric(); + if( ! intrinsic_call_3($$, DAY_TO_YYYYDDD, + $r1, $r2, r3) ) YYERROR; + } + + | DAY_TO_YYYYDDD '(' expr[r1] + expr[r2] expr[r3] ')' + { + location_set(@1); + $$ = new_tempnumeric(); + if( ! intrinsic_call_3($$, DAY_TO_YYYYDDD, + $r1, $r2, $r3) ) YYERROR; + } + + | YEAR_TO_YYYY '(' expr[r1] ')' + { + location_set(@1); + static auto r2 = new_reference(new_literal("50", decimal_e)); + static auto one = new cbl_refer_t( new_literal("1") ); + static auto four = new cbl_refer_t( new_literal("4") ); + cbl_span_t year(one, four); + auto r3 = new_reference(new_alphanumeric(21)); + r3->refmod = year; + + parser_intrinsic_call_0( r3->field, "__gg__current_date" ); + + $$ = new_tempnumeric(); + if( ! intrinsic_call_3($$, YEAR_TO_YYYY, + $r1, r2, r3) ) YYERROR; + } + + | YEAR_TO_YYYY '(' expr[r1] expr[r2] ')' + { + location_set(@1); + static auto one = new cbl_refer_t( new_literal("1") ); + static auto four = new cbl_refer_t( new_literal("4") ); + cbl_span_t year(one, four); + auto r3 = new_reference(new_alphanumeric(21)); + r3->refmod = year; + + parser_intrinsic_call_0( r3->field, "__gg__current_date" ); + + $$ = new_tempnumeric(); + if( ! intrinsic_call_3($$, YEAR_TO_YYYY, + $r1, $r2, r3) ) YYERROR; + } + + | YEAR_TO_YYYY '(' expr[r1] + expr[r2] expr[r3] ')' + { + location_set(@1); + $$ = new_tempnumeric(); + if( ! intrinsic_call_3($$, YEAR_TO_YYYY, + $r1, $r2, $r3) ) YYERROR; + } + + | intrinsic_N2 '(' expr[r1] expr[r2] ')' + { + location_set(@1); + switch($1) + { + case ANNUITY: + $$ = new_tempnumeric_float(); + break; + case COMBINED_DATETIME: + $$ = new_tempnumeric(); + break; + case REM: + $$ = new_tempnumeric_float(); + break; + } + if( ! intrinsic_call_2($$, $1, $r1, $r2) ) YYERROR; + } + + | intrinsic_X2 '(' varg[r1] varg[r2] ')' + { + location_set(@1); + $$ = new_alphanumeric($r1->field->data.capacity); + if( ! intrinsic_call_2($$, $1, $r1, $r2) ) YYERROR; + } + | intrinsic_locale + ; + +module_type: ACTIVATING { $$ = module_activating_e; } + | CURRENT { $$ = module_current_e; } + | NESTED { $$ = module_nested_e; } + | STACK { $$ = module_stack_e; } + | TOP_LEVEL { $$ = module_toplevel_e; } + ; + +convert_src: ANY + | HEX + | convert_fmt + ; +convert_dst: convert_fmt HEX + | BYTE + ; +convert_fmt: ALPHANUMERIC + | ANUM + | NAT + | NATIONAL + ; + +numval_locale: %empty { + $$.is_locale = false; + $$.arg2 = cbl_refer_t::empty(); + } + | LOCALE NAME { $$.is_locale = true; $$.arg2 = NULL; + cbl_unimplemented("NUMVAL_C LOCALE"); YYERROR; + } + | varg { $$.is_locale = false; $$.arg2 = $1; } + ; + +subst_inputs: subst_input { $$ = new substitutions_t; $$->push_back($1); } + | subst_inputs subst_input { $$ = $1; $$->push_back($2); } + ; +subst_input: anycase first_last varg[v1] varg[v2] { + $$.init( $anycase, $first_last, $v1, $v2 ); + } + ; + +intrinsic_locale: + LOCALE_COMPARE '(' varg[r1] varg[r2] ')' + { + location_set(@1); + $$ = new_alphanumeric($r1->field->data.capacity); + cbl_refer_t dummy = {}; + if( ! intrinsic_call_3($$, LOCALE_COMPARE, $r1, $r2, &dummy) ) YYERROR; + } + | LOCALE_COMPARE '(' varg[r1] varg[r2] varg[r3] ')' + { + location_set(@1); + $$ = new_alphanumeric($r1->field->data.capacity); + if( ! intrinsic_call_3($$, LOCALE_COMPARE, $r1, $r2, $r3) ) YYERROR; + } + + | LOCALE_DATE '(' varg[r1] ')' + { + location_set(@1); + $$ = new_alphanumeric($r1->field->data.capacity); + cbl_refer_t dummy = {}; + if( ! intrinsic_call_2($$, LOCALE_DATE, $r1, &dummy) ) YYERROR; + } + | LOCALE_DATE '(' varg[r1] varg[r2] ')' + { + location_set(@1); + $$ = new_alphanumeric($r1->field->data.capacity); + if( ! intrinsic_call_2($$, LOCALE_DATE, $r1, $r2) ) YYERROR; + } + | LOCALE_TIME '(' varg[r1] ')' + { + location_set(@1); + $$ = new_alphanumeric($r1->field->data.capacity); + cbl_refer_t dummy = {}; + if( ! intrinsic_call_2($$, LOCALE_TIME, $r1, &dummy) ) YYERROR; + } + | LOCALE_TIME '(' varg[r1] varg[r2] ')' + { + location_set(@1); + $$ = new_alphanumeric($r1->field->data.capacity); + if( ! intrinsic_call_2($$, LOCALE_TIME, $r1, $r2) ) YYERROR; + } + | LOCALE_TIME_FROM_SECONDS '(' varg[r1] ')' + { + location_set(@1); + $$ = new_alphanumeric($r1->field->data.capacity); + cbl_refer_t dummy = {}; + if( ! intrinsic_call_2($$, LOCALE_TIME_FROM_SECONDS, $r1, &dummy) ) YYERROR; + } + | LOCALE_TIME_FROM_SECONDS '(' varg[r1] varg[r2] ')' + { + location_set(@1); + $$ = new_alphanumeric($r1->field->data.capacity); + if( ! intrinsic_call_2($$, LOCALE_TIME_FROM_SECONDS, $r1, $r2) ) YYERROR; + } + ; + +lopper_case: LOWER_CASE { $$ = LOWER_CASE; } + | UPPER_CASE { $$ = UPPER_CASE; } + ; + +trim_trailing: %empty { $$ = new_literal("0"); } // Remove both + | LEADING { $$ = new_literal("1"); } // Remove leading spaces + | TRAILING { $$ = new_literal("2"); } // Remove trailing spaces + ; + +intrinsic0: CURRENT_DATE { + location_set(@1); + $$ = new_alphanumeric(21); + parser_intrinsic_call_0( $$, "__gg__current_date" ); + } + | E { + location_set(@1); + $$ = new_tempnumeric(); + parser_intrinsic_call_0( $$, "__gg__e" ); + } + + | EXCEPTION_FILE_N { + location_set(@1); + $$ = new_alphanumeric(256); + intrinsic_call_0( $$, EXCEPTION_FILE_N ); + } + + | EXCEPTION_FILE { + location_set(@1); + $$ = new_alphanumeric(256); + parser_exception_file( $$ ); + } + | EXCEPTION_LOCATION_N { + location_set(@1); + $$ = new_alphanumeric(256); + intrinsic_call_0( $$, EXCEPTION_LOCATION_N ); + } + | EXCEPTION_LOCATION { + location_set(@1); + $$ = new_alphanumeric(256); + intrinsic_call_0( $$, EXCEPTION_LOCATION ); + } + | EXCEPTION_STATEMENT { + location_set(@1); + $$ = new_alphanumeric(63); + intrinsic_call_0( $$, EXCEPTION_STATEMENT ); + } + | EXCEPTION_STATUS { + location_set(@1); + $$ = new_alphanumeric(31); + intrinsic_call_0( $$, EXCEPTION_STATUS ); + } + + | PI { + location_set(@1); + $$ = new_tempnumeric_float(); + parser_intrinsic_call_0( $$, "__gg__pi" ); + } + | SECONDS_PAST_MIDNIGHT { + location_set(@1); + $$ = new_tempnumeric(); + intrinsic_call_0( $$, SECONDS_PAST_MIDNIGHT ); + } + | UUID4 { + location_set(@1); + $$ = new_alphanumeric(32); // don't know correct size + parser_intrinsic_call_0( $$, "__gg__uuid4" ); + } + | WHEN_COMPILED { + location_set(@1); + $$ = new_alphanumeric(21); // Returns YYYYMMDDhhmmssss-0500 + parser_intrinsic_call_0( $$, "__gg__when_compiled" ); + } + ; + +intrinsic_I: BOOLEAN_OF_INTEGER { $$ = BOOLEAN_OF_INTEGER; + cbl_unimplemented("BOOLEAN-OF-INTEGER"); + } + | CHAR_NATIONAL { $$ = CHAR_NATIONAL; + cbl_unimplemented("CHAR-NATIONAL"); + } + | DATE_OF_INTEGER { $$ = DATE_OF_INTEGER; } + | DAY_OF_INTEGER { $$ = DAY_OF_INTEGER; } + | FACTORIAL { $$ = FACTORIAL; } + | FRACTION_PART { $$ = FRACTION_PART; } + | HIGHEST_ALGEBRAIC { $$ = HIGHEST_ALGEBRAIC; } + | INTEGER { $$ = INTEGER; } + | INTEGER_OF_BOOLEAN { $$ = INTEGER_OF_BOOLEAN; + cbl_unimplemented("INTEGER-OF-BOOLEAN"); + } + | INTEGER_OF_DATE { $$ = INTEGER_OF_DATE; } + | INTEGER_OF_DAY { $$ = INTEGER_OF_DAY; } + | INTEGER_PART { $$ = INTEGER_PART; } + | LOWEST_ALGEBRAIC { $$ = LOWEST_ALGEBRAIC; } + | SIGN { $$ = SIGN; } + | TEST_DATE_YYYYMMDD { $$ = TEST_DATE_YYYYMMDD; } + | TEST_DAY_YYYYDDD { $$ = TEST_DAY_YYYYDDD; } + | ULENGTH { $$ = ULENGTH; } + | UPOS { $$ = UPOS; } + | USUPPLEMENTARY { $$ = USUPPLEMENTARY; } + | UVALID { $$ = UVALID; } + | UWIDTH { $$ = UWIDTH; } + ; + +intrinsic_I2: MOD { $$ = MOD; } + ; + +intrinsic_N: ABS { $$ = ABS; } + | ACOS { $$ = ACOS; } + | ASIN { $$ = ASIN; } + | ATAN { $$ = ATAN; } + | COS { $$ = COS; } + | EXP { $$ = EXP; } + | EXP10 { $$ = EXP10; } + | LOG { $$ = LOG; } + | LOG10 { $$ = LOG10; } + | SIN { $$ = SIN; } + | SMALLEST_ALGEBRAIC { $$ = SMALLEST_ALGEBRAIC; + cbl_unimplemented("SMALLEST-ALGEBRAIC"); + } + | SQRT { $$ = SQRT; } + | TAN { $$ = TAN; } + ; + +intrinsic_N2: ANNUITY { $$ = ANNUITY; } + | COMBINED_DATETIME { $$ = COMBINED_DATETIME; } + | REM { $$ = REM; } + ; + +intrinsic_X: BIT_TO_CHAR { $$ = BIT_TO_CHAR; } + | BYTE_LENGTH { $$ = BYTE_LENGTH; } + | HEX_TO_CHAR { $$ = HEX_TO_CHAR; } + | NUMVAL { $$ = NUMVAL; } + | NUMVAL_F { $$ = NUMVAL_F; } + | REVERSE { $$ = REVERSE; } + | TEST_NUMVAL { $$ = TEST_NUMVAL; } + | TEST_NUMVAL_F { $$ = TEST_NUMVAL_F; } + ; + +intrinsic_X2: NATIONAL_OF { $$ = NATIONAL_OF; } + ; + +intrinsic_v: CONCAT { $$ = CONCAT; } + | MAXX { $$ = MAXX; } + | MEAN { $$ = MEAN; } + | MEDIAN { $$ = MEDIAN; } + | MIDRANGE { $$ = MIDRANGE; } + | MINN { $$ = MINN; } + | ORD_MAX { $$ = ORD_MAX; } + | ORD_MIN { $$ = ORD_MIN; } + | RANGE { $$ = RANGE; } + | STANDARD_DEVIATION { $$ = STANDARD_DEVIATION; } + | SUM { $$ = SUM; } + | VARIANCE { $$ = VARIANCE; } + ; + +all: %empty { $$ = false; } + | ALL { $$ = true; } + ; + +anycase: %empty { $$ = false; } + | ANYCASE { $$ = true; } + ; + +as: %empty + | AS + ; + +at: %empty + | AT + ; + +by: %empty + | BY + ; + +characters: %empty + | CHARACTERS + ; + +collating: %empty + | COLLATING + ; + +contains: %empty + | CONTAINS + ; + +in: %empty + | IN + ; + +data: %empty + | DATA + ; + +exception: %empty + | EXCEPTION + ; + +file: %empty + | FILE_KW + ; + +first_last: %empty { $$ = 0; } + | FIRST { $$ = 'F'; } + | LAST { $$ = 'L'; } + ; + +is_global: %empty %prec GLOBAL { $$ = false; } + | is GLOBAL { $$ = true; } + ; + +global: %empty %prec GLOBAL { $$ = false; } + | GLOBAL { $$ = true; } + ; + +initial: %empty { $$ = 0; } + | INITIAL_kw { $$ = INITIAL_kw; } + ; + +is: %empty + | IS + ; + +key: %empty + | KEY + ; + +last: %empty %prec LAST + | LAST + ; + +lines: %empty + | LINE + | LINES + ; + +mode: %empty + | MODE + ; + +native: %empty + | NATIVE + ; + +of: %empty + | OF + ; + +on: %empty + | ON + ; + +optional: %empty { $$ = false; } + | OPTIONAL { $$ = true; } + ; + +program_kw: %empty + | PROGRAM_kw + ; + +order: %empty + | ORDER + ; + +record: %empty + | RECORD + ; + +sign: %empty + | SIGN + ; + +start_after: %empty %prec AFTER + | START AFTER varg + ; + +status: %empty + | STATUS + ; +strong: %empty { $$ = true; } + | STRONG { $$ = false; } + ; + +times: %empty + | TIMES + ; +then: %empty + | THEN + ; + +to: %empty + | TO + ; + +usage: %empty + | USAGE + | USAGE IS + ; + +with: %empty + | WITH + ; + + /* + * CDF: Compiler-directing Facility + */ +cdf: cdf_none + | cdf_library + | cdf_listing + | cdf_option + ; + +cdf_library: cdf_basis + /* | DELETE */ + | INSERTT + ; +cdf_basis: BASIS NAME /* BASIS is never passed to the parser. */ + | BASIS LITERAL + ; + +cdf_use: USE DEBUGGING on labels + { + if( ! current.declarative_section_name() ) { + error_msg(@1, "USE valid only in DECLARATIVES"); + YYERROR; + } + std::for_each($labels->elems.begin(), $labels->elems.end(), + add_debugging_declarative); + + } + | USE DEBUGGING on ALL PROCEDURES + { + if( ! current.declarative_section_name() ) { + error_msg(@1, "USE valid only in DECLARATIVES"); + YYERROR; + } + static const cbl_label_t all = { + .type = LblNone, + .name = { ':', 'a', 'l', 'l', ':', } // workaround for gcc < 11.3 + }; + add_debugging_declarative(&all); + } + + | USE globally mistake procedure on filenames + { + if( ! current.declarative_section_name() ) { + error_msg(@1, "USE valid only in DECLARATIVES"); + YYERROR; + } + bool global = $globally == GLOBAL; + std::list<size_t> files; + auto& culprits = $filenames->files; + std::transform( culprits.begin(), culprits.end(), + std::back_inserter(files), + file_list_t::symbol_index ); + cbl_declarative_t declarative(current.declarative_section(), + ec_all_e, files, + file_mode_none_e, global); + current.declaratives.add(declarative); + } + + | USE globally mistake procedure on io_mode + { // Format 1 + if( ! current.declarative_section_name() ) { + error_msg(@1, "USE valid only in DECLARATIVES"); + YYERROR; + } + bool global = $globally == GLOBAL; + std::list<size_t> files; + cbl_declarative_t declarative(current.declarative_section(), + ec_all_e, files, + $io_mode, global); + current.declaratives.add(declarative); + } + | USE cdf_use_excepts // Format 3: AFTER swallowed by lexer + { + if( ! current.declarative_section_name() ) { + error_msg(@1, "USE valid only in DECLARATIVES"); + YYERROR; + } + } + ; + +cdf_use_excepts: + cdf_use_except + | cdf_use_excepts cdf_use_except + ; +cdf_use_except: EC NAME cdf_use_files[files] + { + auto ec = ec_type_of($NAME); + if( ec == ec_none_e ) { + error_msg(@NAME, "not an EXCEPTION CONDITION: %s", $NAME); + YYERROR; + } + std::list<size_t> files; + if( $files ) { + if( ec_io_e != (ec_io_e & ec) ) { + error_msg(@NAME, "not an I-O EXCEPTION CONDITION: %s", $NAME); + YYERROR; + } + auto& culprits = $files->files; + std::transform( culprits.begin(), culprits.end(), + std::back_inserter(files), + file_list_t::symbol_index ); + } + + cbl_declarative_t declarative(current.declarative_section(), + ec, files, file_mode_none_e); + // Check for duplicates, but keep going. + current.declaratives.add(declarative); + } + ; +cdf_use_files: %empty { $$ = NULL; } + | FILE_KW filenames { $$ = $2; } + ; + +io_mode: INPUT { $$ = file_mode_input_e; } + | OUTPUT { $$ = file_mode_output_e; } + | IO { $$ = file_mode_io_e; } + | EXTEND { $$ = file_mode_extend_e; } + ; + +globally: global { $$ = $1? GLOBAL : 0; } + | global STANDARD { $$ = $1? GLOBAL : STANDARD; } + | global AFTER { $$ = $1? GLOBAL : 0; } + | global AFTER STANDARD { $$ = $1? GLOBAL : STANDARD; } + ; +mistake: EXCEPTION { $$ = EXCEPTION; } + | ERROR { $$ = ERROR; } + ; +procedure: %empty + | PROCEDURE + ; + +cdf_listing: STAR_CBL star_cbl_opts + ; +star_cbl_opts: star_cbl_opt + | star_cbl_opts star_cbl_opt + ; +star_cbl_opt: LIST { $$ = $LIST[0] == 'N'? NOLIST : LIST; } + | MAP { $$ = $MAP[0] == 'N'? NOMAP : MAP; } + /* | SOURCE { $$ = $SOURCE[0] == 'N'? NOSOURCE : SOURCE; } */ + ; + +cdf_option: CBL cbl_options + ; +cbl_options: cbl_option + | cbl_options cbl_option + ; +cbl_option: LITERAL + ; /* Ignore all options. */ + + /* The following compiler directing statements have no effect */ +cdf_none: ENTER + | READY + | RESET + | TRACE + | SERVICE_RELOAD + ; + + +%% + +static YYLTYPE +first_line_of( YYLTYPE loc ) { + if( loc.first_line < loc.last_line ) loc.last_line = loc.first_line; + if( loc.last_column < loc.first_column ) loc.last_column = loc.first_column; + return loc; +} + +void ast_call( const YYLTYPE& loc, cbl_refer_t name, cbl_refer_t returning, + size_t narg, cbl_ffi_arg_t args[], + cbl_label_t *except, + cbl_label_t *not_except, + bool is_function) +{ + if( is_literal(name.field) ) { + cbl_field_t called = { 0, FldLiteralA, FldInvalid, quoted_e | constant_e, + 0, 0, 77, nonarray, 0, "", + 0, cbl_field_t::linkage_t(), + {0,0,0,0, NULL, NULL, {NULL}, {NULL}}, NULL }; + snprintf(called.name, sizeof(called.name), "_%s", name.field->data.initial); + called.data = name.field->data; + name.field = cbl_field_of(symbol_field_add(PROGRAM, &called)); + symbol_field_location(field_index(name.field), loc); + parser_symbol_add(name.field); + } + + if( getenv("ast_call") ) { + dbgmsg("%s: calling %s returning %s with %zu args:", __func__, + name_of(name.field), + (returning.field)? returning.field->name : "[none]", + narg); + for( size_t i=0; i < narg; i++ ) { + const char *crv = "?"; + switch(args[i].crv) { + case by_default_e: crv = "def"; break; + case by_reference_e: crv = "ref"; break; + case by_content_e: crv = "con"; break; + case by_value_e: crv = "val"; break; + } + dbgmsg("%s: %4zu: %s @%p %s", __func__, + i, crv, args[i].refer.field, args[i].refer.field->name); + } + } + parser_call( name, returning, narg, args, except, not_except, is_function ); +} + +static size_t +statement_begin( const YYLTYPE& loc, int token ) { + // The following statement generates a message at run-time + // parser_print_string("statement_begin()\n"); + location_set(loc); + prior_statement = token; + + parser_statement_begin(); + + if( token != CONTINUE ) { + if( enabled_exceptions.size() ) { + current.declaratives_evaluate(ec_none_e); + cbl_enabled_exceptions_array_t enabled(enabled_exceptions); + parser_exception_prepare( keyword_str(token), &enabled ); + } + } + return 0; +} + +#include "parse_util.h" +#include <sys/types.h> + +struct string_match { + const char *name; + string_match( const char name[] ) : name(name) {} + bool operator()( const char input[] ) const { + return strlen(name) == strlen(input) && 0 == strcasecmp(name, input); + } +}; + +const char * +keyword_str( int token ) { + if( token == YYEOF ) return "YYEOF"; + if( token == YYEMPTY ) return "YYEMPTY"; + + if( token < 256 ) { + static char ascii[2]; + ascii[0] = token; + return ascii; + } + + return tokens.name_of(token); +} + +/* + * Return the token for the Cobol name, unless it is a function name. The + * lexer uses keyword_tok to determine if what appears to be a NAME is in fact + * a token defined by the parser. For functions, the situation is unambiguous: + * a function name appears only after FUNCTION or in the REPOSITORY paragraph. + * All function names are rejected here; the lexer uses typed_name to check + * REPOSITORY names. + */ + +// tokens.h is generated as needed from parse.h with tokens.h.gen +tokenset_t::tokenset_t() { +#include "token_names.h" +} + +// Look up the lowercase form of a keyword, excluding some CDF names. +int +tokenset_t::find( const cbl_name_t name, bool include_intrinsics ) { + static const cbl_name_t non_names[] = { // including CDF NAMES, and "SWITCH" + "CHECKING", "LIST", "LOCATION", "MAP", "SWITCH", + }, * const eonames = non_names + COUNT_OF(non_names); + + if( std::any_of(non_names, eonames, + [candidate=name](const cbl_name_t non_name) { + return 0 == strcasecmp(non_name, candidate) + && strlen(non_name) == strlen(candidate); + } ) ) { + return 0; // CDF names are never ordinary tokens + } + + if( dialect_ibm() ) { + static const cbl_name_t ibm_non_names[] = { + "RESUME", + }, * const eonames = ibm_non_names + COUNT_OF(ibm_non_names); + + if( std::any_of(ibm_non_names, eonames, + [candidate=name](const cbl_name_t non_name) { + return 0 == strcasecmp(non_name, candidate) + && strlen(non_name) == strlen(candidate); + } ) ) { + return 0; // Names not reserved by IBM are never ordinary IBM tokens + } + } + + cbl_name_t lname; + std::transform(name, name + strlen(name) + 1, lname, tolower); + auto p = tokens.find(lname); + if( p == tokens.end() ) return 0; + int token = p->second; + + if( token == SECTION ) yylval.number = 0; + + if( include_intrinsics ) return token; + + return intrinsic_cname(token)? 0 : token; +} + +int +keyword_tok( const char * text, bool include_intrinsics ) { + return tokens.find(text, include_intrinsics); +} + +static inline size_t +verify_figconst( enum cbl_figconst_t figconst , size_t pos ) { + cbl_field_t *f = cbl_field_of(symbol_at(pos)); + assert((f->attr & FIGCONST_MASK) == figconst); + return pos; +} + +static size_t +constant_index( int token ) { + switch(token) { + case SPACES : return 0; + case LOW_VALUES : return verify_figconst(low_value_e, 2); + case ZERO : return verify_figconst(zero_value_e, 3); + case HIGH_VALUES : return verify_figconst(high_value_e, 4); + case QUOTES : return 5; + case NULLS : return 6; + } + cbl_errx( "%s:%d: no such constant %d", __func__, __LINE__, token); + return (size_t)-1; +} + + +static enum relop_t +relop_of(int token) { + switch(token) { + case '<': return lt_op; + case LE: return le_op; + case '=': return eq_op; + case NE: return ne_op; + case GE: return ge_op; + case '>': return gt_op; + } + cbl_internal_error( "%s:%d: invalid relop token %d", + __func__, __LINE__, token); + + return lt_op; // not reached +} + +static relop_t +relop_invert(relop_t op) { + switch(op) { + case lt_op: return ge_op; + case le_op: return gt_op; + case eq_op: return ne_op; + case ne_op: return eq_op; + case ge_op: return lt_op; + case gt_op: return le_op; + } + cbl_errx( "%s:%d: invalid relop_t %d", __func__, __LINE__, op); + + return relop_t(0); // not reached +} + +#if needed +static const char * +relop_debug_str(int token) { + switch(token) { + case 0: return "zilch"; + case '<': return "<"; + case LE: return "LE"; + case '=': return "="; + case NE: return "NE"; + case GE: return "GE"; + case '>': return ">"; + } + dbgmsg("%s:%d: invalid relop token %d", __func__, __LINE__, token); + return "???"; +} + +static int +token_of(enum relop_t op) { + switch(op) { + case lt_op: return '<'; + case le_op: return LE; + case eq_op: return '='; + case ne_op: return NE; + case ge_op: return GE; + case gt_op: return '>'; + } + cbl_errx( "%s:%d: invalid relop_t %d", __func__, __LINE__, op); + + return 0; // not reached +} +#endif + +static enum classify_t +classify_of( int token ) { + switch(token) { + case NUMERIC: return ClassNumericType; + case ALPHABETIC: return ClassAlphabeticType; + case ALPHABETIC_LOWER: return ClassLowerType; + case ALPHABETIC_UPPER: return ClassUpperType; + case DBCS: return ClassDbcsType; + case KANJI: return ClassKanjiType; + } + return (enum classify_t)-1; +} + +static cbl_round_t +rounded_of( int token ) { + cbl_round_t mode = current_rounded_mode(); + + switch(token) { + case 0 ... int(truncation_e): + mode = cbl_round_t(token); + break; + case ROUNDED: + mode = current.rounded_mode(); + break; + case AWAY_FROM_ZERO: + mode = away_from_zero_e; + break; + case NEAREST_TOWARD_ZERO: + mode = nearest_toward_zero_e; + break; + case TOWARD_GREATER: + mode = toward_greater_e; + break; + case TOWARD_LESSER: + mode = toward_lesser_e; + break; + case NEAREST_AWAY_FROM_ZERO: + mode = nearest_away_from_zero_e; + break; + case NEAREST_EVEN: + mode = nearest_even_e; + break; + case PROHIBITED: + mode = prohibited_e; + break; + case TRUNCATION: + mode = truncation_e; + break; + default: + dbgmsg("%s: logic error: unrecognized rounding value %d", __func__, token); + } + return mode; +} + +static cbl_round_t +current_rounded_mode( int token ) { + cbl_round_t mode = rounded_of(token); + return current.rounded_mode(mode); +} + +template <cbl_label_type_t T> +class label_named { + size_t program; + const char *name; + public: + label_named( size_t program, const char name[] ) + : program(program), name(name) {} + bool operator()( const symbol_elem_t& sym ) const { + if( sym.program == program && sym.type == SymLabel ) { + auto p = cbl_label_of(&sym); + return p->type == T && 0 == strcasecmp(p->name, name); + } + return false; + } +}; + +typedef label_named<LblSection> section_named; +typedef label_named<LblParagraph> paragraph_named; + +static struct cbl_label_t * +label_add( const YYLTYPE& loc, + enum cbl_label_type_t type, const char name[] ) { + size_t parent = 0; + + // Verify the new paragraph doesn't conflict with a section + if( type == LblParagraph ) { + parent = current.program_section(); + auto p = std::find_if(symbols_begin(PROGRAM), symbols_end(), + section_named(PROGRAM, name)); + if( p != symbols_end() ) { + error_msg(loc, "paragraph %s conflicts with section %s on line %d", + name, cbl_label_of(p)->name, cbl_label_of(p)->line); + } + } + + // Verify the new section doesn't conflict with a paragraph + if( type == LblSection ) { + // line is zero if the forward reference is to PARA OF SECT + auto p = std::find_if(symbols_begin(PROGRAM), symbols_end(), + paragraph_named(PROGRAM, name)); + if( p != symbols_end() ) { + error_msg(loc, "section %s conflicts with paragraph %s on line %d", + name, cbl_label_of(p)->name, cbl_label_of(p)->line); + } + } + struct cbl_label_t label = { type, parent, loc.last_line }; + + if( !namcpy(loc, label.name, name) ) return NULL; + auto p = symbol_label_add(PROGRAM, &label); + + if( type == LblParagraph || type == LblSection ) { + procedure_definition_add(PROGRAM, p); + } + + assert( !(p->type == LblSection && p->parent > 0) ); + + if( getenv(__func__) ) { + yywarn("%s: added label %3zu %10s for '%s' of %zu", __func__, + symbol_elem_of(p) - symbols_begin(), p->type_str()+3, p->name, p->parent); + } + + return p; +} + +/* + * Many label names are defined statically and so are guaranteed to be in + * bounds. Often they are created far away from the yacc metavariables, so + * there's no location to access. + */ +static struct cbl_label_t * +label_add( enum cbl_label_type_t type, const char name[], int line ) { + YYLTYPE loc { line, 1, line, 1 }; + return label_add(loc, type, name); +} + +cbl_label_t * +perform_t::ec_labels_t::new_label( cbl_label_type_t type, + const cbl_name_t role ) +{ + size_t n = 1 + symbols_end() - symbols_begin(); + cbl_name_t name; + sprintf(name, "_perf_%s_%zu", role, n); + return label_add( type, name, yylineno ); +} + +/* + * An unqualified procedure reference occurs within a section may refer to a: + * 1. section + * 2. paragraph, perhaps in a section, perhaps the current section. + * + * The named procedure need only be unique, either within the current section + * or globally. A paragraph within one section may be referenced without + * qualification in another section if its name is unique. + * + * An otherwise globally unique name is shadowed by the same name in the + * current section, and the section-local name may be referenced before being + * defined. That is, given: + * + * S1 SECTION. + * PROC. + * ... + * S2 SECTION. + * PERFORM PROC. + * PROC. ... + * + * the procedure performed is PROC OF S2. + * + * That creates a challenge for the compiler, because PROC appears to have been + * defined when PERFORM is encountered. When PROC OF S2 is defined, the parser + * detects and corrects its misstep. + */ +static struct cbl_label_t * +paragraph_reference( const char name[], size_t section ) +{ + // A reference has line == 0. It is LblParagraph if the section is + // explicitly named, else LblNone (because we don't know). + struct cbl_label_t *p, label = { section? LblParagraph : LblNone, section }; + assert(strlen(name) < sizeof(label.name)); // caller ensures + strcpy(label.name, name); + if( label.type == LblNone ) assert(label.parent == 0); + + const symbol_elem_t *last = symbols_end(); + + p = symbol_label_add(PROGRAM, &label); + assert(p); + + const char *sect_name = section? cbl_label_of(symbol_at(section))->name : NULL; + procedure_reference_add(sect_name, p->name, yylineno, current.program_section()); + + if( getenv(__func__) ) { + yywarn("%s: %s label %3zu %10s for '%s' of %zu", __func__, + symbols_end() == last? "added" : "found", + symbol_index(symbol_elem_of(p)), p->type_str()+3, p->name, p->parent); + } + + return p; +} + +static struct cbl_refer_t * +use_vargs( struct vargs_t *v, struct cbl_refer_t *tgt) { + assert(v); + assert(tgt); + std::copy(v->args.begin(), v->args.end(), tgt); + v->args.clear(); + delete v; + + return tgt; +} + +void +current_t::repository_add_all() { + assert( !programs.empty() ); + auto& repository = programs.top().function_repository; + std::copy( function_descrs, function_descrs_end, + std::inserter(repository, repository.begin()) ); +} + +/* + * A function is added to the symbol table when first named, in Identification + * Division. It's also added to the current list of UDFs in current_t::udfs. + * Its return type and parameters, if any, are defined later, in Procedure + * Division. When they are parsed, we call udf_update to finalize the + * functions's descriptor, giving us enough information to validate the + * arguments at point of invocation. + */ +void +current_t::udf_update( const ffi_args_t *ffi_args ) { + auto L = cbl_label_of(symbol_at(program_index())); + assert(L); + assert(L->type == LblFunction); + assert(L->returning); + if( ! ffi_args ) return; + assert(ffi_args->elems.size() < sizeof(function_descr_t::types)); + + auto returning = cbl_field_of(symbol_at(L->returning)); + auto key = function_descr_t::init(L->name); + auto func = udfs.find(key); + assert(func != udfs.end()); + + function_descr_t udf = *func; + + udf.ret_type = returning->type; + udf.token = ffi_args->elems.empty()? FUNCTION_UDF_0 : FUNCTION_UDF; + auto types = ffi_args->parameter_types(); + strcpy(udf.types, types); + + std::transform( ffi_args->elems.begin(), ffi_args->elems.end(), + std::back_inserter(udf.linkage_fields), + []( const cbl_ffi_arg_t& arg ) { + return function_descr_arg_t( field_index( arg.refer.field ), + arg.crv, arg.optional ); + } ); + + udfs.erase(func); + auto result = udfs.insert(udf); + assert(result.second); +} + +bool +current_t::udf_args_valid( const cbl_label_t *L, + const std::list<cbl_refer_t>& args, + std::vector<function_descr_arg_t>& params /*out*/ ) +{ + auto key = function_descr_t::init(L->name); + auto func = udfs.find(key); + assert(func != udfs.end()); + function_descr_t udf = *func; + params = udf.linkage_fields; + + if( udf.linkage_fields.size() < args.size() ) { + auto loc = symbol_field_location(field_index(args.back().field)); + error_msg(loc, "too many parameters for UDF %s", L->name); + return false; + } + + size_t i = 0; + for( cbl_refer_t arg : args ) { + if( arg.field ) { // else omitted + auto tgt = cbl_field_of(symbol_at(udf.linkage_fields.at(i).isym)); + if( ! valid_move(tgt, arg.field) ) { + auto loc = symbol_field_location(field_index(arg.field)); + error_msg(loc, "FUNCTION %s arg %zu, '%s' cannot be passed to %s, type %s", + L->name, i, arg.field->pretty_name(), + tgt->pretty_name(), 3 + cbl_field_type_str(tgt->type) ); + return false; + } + } + i++; + } + return true; +} + +bool +current_t::repository_add( const char name[]) { + assert( !programs.empty() ); + function_descr_t arg = function_descr_t::init(name); + auto parg = std::find( function_descrs, function_descrs_end, arg ); + if( parg == function_descrs_end ) return false; + auto p = programs.top().function_repository.insert(*parg); + if( yydebug ) { + for( auto descr : programs.top().function_repository ) { + dbgmsg("%s:%d: %-20s %-20s %-20s", __func__, __LINE__, + keyword_str(descr.token), descr.name, descr.cname); + } + } + return p.second; +} + +int +current_t::repository_in( const char name[]) { + assert( !programs.empty() ); + auto isym = programs.top().program_index; + // possible to call self + auto self = cbl_label_of(symbol_at(isym)); + if( self->type == LblFunction ) { + if( 0 == strcasecmp(self->name, name) ) { + return FUNCTION_UDF; + } + } + function_descr_t arg = function_descr_t::init(name); + auto repository = programs.top().function_repository; + auto p = repository.find(arg); + return p != repository.end()? p->token : 0; +} + +int repository_function_tok( const char name[] ) { + return current.repository_in(name); +} + +function_descr_t +function_descr_t::init( int isym ) { + function_descr_t descr = { .token = FUNCTION_UDF_0, .ret_type = FldInvalid }; + auto L = cbl_label_of(symbol_at(isym)); + bool ok = namcpy(YYLTYPE(), descr.name, L->name); + gcc_assert(ok); + return descr; +} + +arith_t::arith_t( cbl_arith_format_t format, refer_list_t * refers ) + : format(format), on_error(NULL), not_error(NULL) +{ + std::copy( refers->refers.begin(), refers->refers.end(), back_inserter(A) ); + refers->refers.clear(); + delete refers; +} + + +cbl_key_t::cbl_key_t( const sort_key_t& that ) + : ascending(that.ascending) + , nfield(that.fields.size()) + , fields(NULL) +{ + if( nfield > 0 ) { + fields = new cbl_field_t* [nfield]; + std::copy(that.fields.begin(), that.fields.end(), fields); + } +} + +static cbl_refer_t * +ast_op( cbl_refer_t *lhs, char op, cbl_refer_t *rhs ) { + assert(lhs); + assert(rhs); + if( ! (is_numeric(lhs->field) && is_numeric(rhs->field)) ) { + // If one of the fields isn't numeric, allow for index addition. + switch(op) { + case '+': + case '-': + // Simple addition OK for table indexes. + if( lhs->field->type == FldIndex || rhs->field->type == FldIndex ) { + goto ok; + } + } + + auto f = !is_numeric(lhs->field)? lhs->field : rhs->field; + auto loc = symbol_field_location(field_index(f)); + error_msg(loc, "'%s' is not numeric", f->name); + return NULL; + } + ok: + cbl_field_t skel = determine_intermediate_type( *lhs, op, *rhs ); + cbl_refer_t *tgt = new_reference_like(skel); + if( !mode_syntax_only() ) { + parser_op( *tgt, *lhs, op, *rhs, current.compute_label() ); + } + return tgt; +} + +static void +ast_add( arith_t *arith ) { + size_t nC = arith->tgts.size(), nA = arith->A.size(); + cbl_num_result_t *pC, C[nC]; + cbl_refer_t *pA, A[nA]; + + pC = use_any(arith->tgts, C); + pA = use_any(arith->A, A); + + if( getenv(__func__) ) { + dbgmsg("%s:%d: %-12s C{%zu %p} A{%zu %p}", __func__, __LINE__, + arith->format_str(), nC, pC, nA, pA ); + } + parser_add( nC, pC, nA, pA, arith->format, arith->on_error, arith->not_error ); + + ec_type_t handled = arith->on_error || arith->not_error ? ec_size_e : ec_none_e; + current.declaratives_evaluate(handled); +} + +static bool +ast_subtract( arith_t *arith ) { + size_t nC = arith->tgts.size(), nA = arith->A.size(), nB = arith->B.size(); + cbl_num_result_t *pC, C[nC]; + cbl_refer_t *pA, A[nA], *pB, B[nB]; + + pC = use_any(arith->tgts, C); + pA = use_any(arith->A, A); + pB = use_any(arith->B, B); + + parser_subtract( nC, pC, nA, pA, nB, pB, arith->format, arith->on_error, arith->not_error ); + + ec_type_t handled = arith->on_error || arith->not_error ? ec_size_e : ec_none_e; + current.declaratives_evaluate(handled); + return true; +} + +static bool +ast_multiply( arith_t *arith ) { + size_t nC = arith->tgts.size(), nA = arith->A.size(), nB = arith->B.size(); + cbl_num_result_t *pC, C[nC]; + cbl_refer_t *pA, A[nA], *pB, B[nB]; + + pC = use_any(arith->tgts, C); + pA = use_any(arith->A, A); + pB = use_any(arith->B, B); + + parser_multiply( nC, pC, nA, pA, nB, pB, arith->on_error, arith->not_error ); + + ec_type_t handled = arith->on_error || arith->not_error ? ec_size_e : ec_none_e; + current.declaratives_evaluate(handled); + return true; +} + +static bool +ast_divide( arith_t *arith ) { + size_t nC = arith->tgts.size(), nA = arith->A.size(), nB = arith->B.size(); + cbl_num_result_t *pC, C[nC]; + cbl_refer_t *pA, A[nA], *pB, B[nB]; + + pC = use_any(arith->tgts, C); + pA = use_any(arith->A, A); + pB = use_any(arith->B, B); + + parser_divide( nC, pC, nA, pA, nB, pB, + arith->remainder, arith->on_error, arith->not_error ); + + ec_type_t handled = arith->on_error || arith->not_error ? ec_size_e : ec_none_e; + current.declaratives_evaluate(handled); + return true; +} + +/* + * Populate a parser API struct from lists built up by the parser. + * The API doesn't use STL containers or classes that exist only for + * the convenience of the parser. +*/ +struct stringify_src_t : public cbl_string_src_t { + stringify_src_t( const refer_marked_list_t& marked = refer_marked_list_t() ) + : cbl_string_src_t( marked.marker? *marked.marker : null_reference, + marked.refers.size(), + new cbl_refer_t[marked.refers.size()] ) + { + std::copy( marked.refers.begin(), marked.refers.end(), inputs ); + } + + static void dump( const cbl_string_src_t& src ) { + dbgmsg( "%s:%d:, %zu inputs delimited by %s:", __func__, __LINE__, + src.ninput, + src.delimited_by.field? field_str(src.delimited_by.field) : "SIZE" ); + std::for_each(src.inputs, src.inputs + src.ninput, dump_input); + } + + protected: + static void dump_input( const cbl_refer_t& refer ) { + yywarn( "%s:\t%s", __func__, field_str(refer.field) ); + } +}; + +void +stringify( refer_collection_t *inputs, + cbl_refer_t into, cbl_refer_t pointer, + cbl_label_t *on_error, + cbl_label_t *not_error ) +{ + size_t n = inputs->lists.size(); + stringify_src_t sources[n]; + + if( inputs->lists.back().marker == NULL ) { + inputs->lists.back().marker = cbl_refer_t::empty(); + } + assert( inputs->lists.back().marker ); + std::copy( inputs->lists.begin(), inputs->lists.end(), sources ); + if( getenv(__func__) ) { + std::for_each(sources, sources+n, stringify_src_t::dump); + } + parser_string( into, pointer, n, sources, on_error, not_error ); +} + +void +unstringify( cbl_refer_t& src, + refer_list_t *delimited, + unstring_into_t * into, + cbl_label_t *on_error, + cbl_label_t *not_error ) +{ + size_t ndelimited = delimited? delimited->size() : 0; + cbl_refer_t delimiteds[1 + ndelimited], *pdelimited = NULL; + if( ndelimited > 0 ) { + pdelimited = delimited->use_list( delimiteds ); + } + + size_t noutput = into->size(); + cbl_refer_t outputs[noutput]; + into->use_list( outputs, unstring_tgt_t::tgt_of ); + + cbl_refer_t delimiters[noutput]; + into->use_list( delimiters, unstring_tgt_t::delimiter_of ); + + cbl_refer_t counts[noutput]; + into->use_list( counts, unstring_tgt_t::count_of ); + + parser_unstring( src, + ndelimited, pdelimited, + // into + noutput, + outputs, delimiters, counts, + into->pointer, into->tally, + on_error, not_error ); + delete into; +} + +static const char * +data_section_str( data_section_t section ) { + switch(section) { + case not_data_datasect_e: + return "NONE"; + case local_storage_datasect_e: + return "LOCAL"; + case file_datasect_e: + return "FILE"; + case working_storage_datasect_e: + return "WORKING"; + case linkage_datasect_e: + return "LINKAGE"; + } + gcc_unreachable(); + return NULL; +} + +static bool +current_data_section_set(const YYLTYPE& loc, data_section_t data_section ) { + // order is mandatory + if( data_section < current_data_section ) { + error_msg(loc, "%s SECTION must precede %s SECTION", + data_section_str(data_section), + data_section_str(current_data_section)); + return false; + } + + cbl_section_type_t type = file_sect_e; + + switch(data_section) { + case not_data_datasect_e: + gcc_unreachable(); + break; + case file_datasect_e: + type = file_sect_e; + break; + case working_storage_datasect_e: + type = working_sect_e; + break; + case local_storage_datasect_e: + type = local_sect_e; + break; + case linkage_datasect_e: + type = linkage_sect_e; + break; + } + + cbl_section_t section = { type, yylineno, NULL }; + + if( ! symbol_section_add(PROGRAM, §ion) ) { + error_msg(loc, "could not add section %s to program %s, exists line %d", + section.name(), current.program()->name, + symbol_section(PROGRAM, §ion)->line ); + return false; + } + + current_data_section = data_section ; + return true; +} + +void apply_declaratives() { + // look for declaratives for this procedure, and all procedures + bool tf[2] = { false, true }; + for( bool *yn = tf; yn < tf + COUNT_OF(tf); yn++ ) { + auto declaratives = current.debugging_declaratives(*yn); + for( auto p = declaratives.begin() ; + p != declaratives.end(); p++ ) { + // TODO: delarative for PARA OF SECTION + cbl_label_t *label = symbol_label(PROGRAM, LblNone, 0, p->c_str()); + assert(label); + parser_perform(label); + } + } +} +#define FIG_CONST(X) constant_of(constant_index((X))) + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" + +int warn_abi_version = -1; +int cp_unevaluated_operand; +void +lang_check_failed (const char* file, int line, const char* function) {} + +#pragma GCC diagnostic pop + +void ast_inspect( cbl_refer_t& input, bool backward, ast_inspect_list_t& inspects ) { + if( yydebug ) { + dbgmsg("%s:%d: INSPECT %zu operations on %s, line %d", __func__, __LINE__, + inspects.size(), input.field->name, yylineno); + } + std::for_each(inspects.begin(), inspects.end(), dump_inspect); + auto array = inspects.as_array(); + parser_inspect( input, backward, inspects.size(), array ); + delete[] array; +} + +static const char * +cbl_refer_str( char output[], const cbl_refer_t& R ) { + sprintf( output, "refer = %s %s %s", + R.field? field_str(R.field) : "(none)", + R.is_table_reference()? "(table)" : "", + R.is_refmod_reference()? "(refmod)" : "" ); + return output; +} + +static void +dump_inspect_match( const cbl_inspect_match_t& M ) { + static char fields[3][4 * 64]; + cbl_refer_str(fields[0], M.matching); + cbl_refer_str(fields[1], M.before.identifier_4); + cbl_refer_str(fields[2], M.after.identifier_4); + + yywarn( "matching %s \n\t\tbefore %s%s \n\t\tafter %s%s", + fields[0], + M.before.initial? "initial " : "", fields[1], + M.after.initial? "initial " : "", fields[2] ); +} + +static void +dump_inspect_replace( const cbl_inspect_replace_t& R ) { + static char fields[4][4 * 64]; + cbl_refer_str(fields[0], R.matching); + cbl_refer_str(fields[1], R.before.identifier_4); + cbl_refer_str(fields[2], R.after.identifier_4); + cbl_refer_str(fields[3], R.replacement); + + yywarn( "matching %s \n\treplacement %s\n\t\tbefore %s%s \n\t\tafter %s%s", + fields[0], fields[3], + R.before.initial? "initial " : "", fields[1], + R.after.initial? "initial " : "", fields[2] ); +} + +static const char * +bound_str( cbl_inspect_bound_t bound ) { + switch(bound) { + case bound_characters_e: return "characters"; + case bound_all_e: return "all"; + case bound_first_e: return "first"; + case bound_leading_e: return "leading"; + case bound_trailing_e: return "trailing"; + } + return "bound?"; +} + +/* + * INITIALIZE + */ +static data_category_t +data_category_of( const cbl_refer_t& refer ) { + assert(refer.field); + switch( refer.field->type ) { + case FldInvalid: + assert(refer.field->type != FldInvalid); + return data_category_none; + + case FldGroup: + return data_category_none; + + case FldLiteralA: + case FldAlphanumeric: + return refer.field->has_attr(all_alpha_e)? + data_alphabetic_e : data_alphanumeric_e; + + case FldNumericBinary: + case FldFloat: + case FldNumericBin5: + case FldPacked: + case FldNumericDisplay: + case FldLiteralN: + return data_numeric_e; + + case FldNumericEdited: + return data_numeric_edited_e; + case FldAlphaEdited: + return data_alphanumeric_edited_e; + + case FldPointer: + return data_data_pointer_e; + + case FldClass: + case FldConditional: + case FldForward: + case FldIndex: + case FldSwitch: + case FldDisplay: + case FldBlob: + return data_category_none; + } + gcc_unreachable(); + return data_category_none; +} + +static bool +valid_target( const cbl_refer_t& refer ) { + assert(refer.field); + switch( refer.field->type ) { + case FldInvalid: + assert(refer.field->type != FldInvalid); + return false; + case FldGroup: + case FldAlphanumeric: + case FldNumericBinary: + case FldFloat: + case FldNumericBin5: + case FldPacked: + case FldNumericDisplay: + case FldNumericEdited: + case FldAlphaEdited: + case FldPointer: + return true; + case FldLiteralA: + case FldLiteralN: + case FldClass: + case FldConditional: + case FldForward: + case FldIndex: + case FldSwitch: + case FldDisplay: + case FldBlob: + return false; + } + gcc_unreachable(); + return false; +} + +static _Float128 +numstr2i( const char input[], radix_t radix ) { + _Float128 output = 0.0; + size_t bit, integer = 0; + int erc=0, n=0; + + switch( radix ) { + case decimal_e: { // Use decimal point for comma, just in case. + auto local = xstrdup(input), pend = local; + if( !local ) { erc = -1; break; } + std::replace(local, local + strlen(local), ',', '.'); + output = strtof128(local, &pend); + n = pend - local; + } + break; + case hexadecimal_e: + erc = sscanf(input, "%zx%n", &integer, &n); + output = integer; + break; + case boolean_e: + for( const char *p = input; *p != '\0'; p++ ) { + if( ssize_t(8 * sizeof(integer) - 1) < p - input ) { + yywarn("'%s' was accepted as %d", input, integer); + return integer; + } + switch(*p) { + case '0': bit = 0; break; + case '1': bit = 1; break; + break; + default: + yywarn("'%s' was accepted as %d", input, integer); + return integer; + } + integer = (integer << (p - input)); + integer |= bit; + } + return integer; + break; + } + if( erc == -1 || n < int(strlen(input)) ) { + yywarn("'%s' was accepted as %lld", input, output); + } + return output; +} + +static inline cbl_field_t * +new_literal( const char initial[], enum radix_t radix ) { + auto attr = constant_e; + + switch( radix ) { + case decimal_e: + break; + case hexadecimal_e: + attr = hex_encoded_e; + break; + case boolean_e: + attr = bool_encoded_e; + break; + } + return new_literal(strlen(initial), initial, + cbl_field_attr_t(constant_e | attr)); +} + +class is_elementary_type { // for INITIALIZE purposes + bool with_filler; +public: + is_elementary_type( bool with_filler ) : with_filler(with_filler) {} + + bool operator()( const symbol_elem_t& elem ) const { + if( elem.type != SymField ) return false; + const cbl_field_t *f = cbl_field_of(&elem); + if( symbol_redefines(f) ) return false; + return ( f->has_attr(filler_e) && with_filler ) + || ::is_elementary(f->type); + } +}; + +size_t end_of_group( size_t igroup ); + +static std::list<cbl_refer_t> +symbol_group_data_members( cbl_refer_t refer, bool with_filler ) { + std::list<cbl_refer_t> refers; + refers.push_front( refer ); + + if( refer.field->type != FldGroup ) return refers; + + class refer_of : public cbl_refer_t { + public: + refer_of( const cbl_refer_t& refer ) : cbl_refer_t(refer) {} + cbl_refer_t operator()( symbol_elem_t& elem ) { + this->field = cbl_field_of(&elem); // preserve subscript/refmod + return *this; + } + }; + + size_t igroup = field_index(refer.field), eogroup = end_of_group(igroup); + std::list<symbol_elem_t> elems; + is_elementary_type is_elem(with_filler); + + std::copy_if( symbols_begin(igroup), symbols_begin(eogroup), + std::back_inserter(elems), [is_elem]( const symbol_elem_t& elem ) { + return is_elem(elem) || cbl_field_of(&elem)->occurs.ntimes() > 0; } ); + std::transform( elems.begin(), elems.end(), + std::back_inserter(refers), refer_of(refer) ); + return refers; +} + +struct expand_group : public std::list<cbl_refer_t> { + static cbl_refer_t referize( cbl_field_t *field ) { + return cbl_refer_t(field); + } + bool with_filler; + expand_group( bool with_filler ) : with_filler(with_filler) {} + + void operator()( const cbl_refer_t& refer ) { + assert(refer.field); + if( refer.field->type != FldGroup ) { + push_back(refer); + return; + } + std::list<cbl_refer_t> members = symbol_group_data_members( refer, + with_filler ); + std::copy( members.begin(), members.end(), back_inserter(*this) ); + } +}; + + +static const char * initial_default_value; + const char * wsclear() { return initial_default_value; } + +void +wsclear( char ch ) { + static char byte = ch; + initial_default_value = &byte; + current.program_needs_initial(); +} + +static void +initialize_allocated( cbl_refer_t input ) { + cbl_num_result_t result = { truncation_e, input }; + std::list<cbl_num_result_t> results; + results.push_back(result); + initialize_statement(results, true, + data_category_all, category_map_t()); +} + +static int +initialize_with( cbl_refer_t tgt ) { + if( tgt.field->type == FldPointer ) return ZERO; + if( tgt.is_refmod_reference() ) return SPACES; + return is_numeric(tgt.field)? ZERO : SPACES; +} + +static bool +initialize_one( cbl_num_result_t target, bool with_filler, + data_category_t value_category, + const category_map_t& replacements, + bool explicitly ) +{ + cbl_refer_t& tgt( target.refer ); + if( ! valid_target(tgt) ) return false; + + // Rule 1 c: is valid for VALUE, REPLACING, or DEFAULT + // If no VALUE (category none), set to blank/zero. + if( value_category == data_category_none && replacements.empty() ) { + auto token = initialize_with(tgt); + auto src = constant_of(constant_index(token)); + cbl_refer_t source(src); + auto s = wsclear(); + if( s ) { + char ach[5]; + int v = *s; + sprintf(ach, "%d", v); + source.field = new_literal(ach); + source.addr_of = true; + } + + if( tgt.field->type == FldPointer ) { + parser_set_pointers(1, &tgt, source); + } else { + parser_move(tgt, src, current_rounded_mode()); + } + if( getenv(__func__) ) { + yywarn("%s:%-5s: %s", __func__, keyword_str(token), field_str(tgt.field)); + } + return true; + } + + /* + * Either VALUE or REPLACING specified. + */ + + if( value_category == data_category_all || + value_category == data_category_of(tgt) ) { + // apply any applicable VALUE + if( explicitly || tgt.field->data.initial ) { + assert( with_filler || !tgt.field->has_attr(filler_e) ); + if( tgt.field->data.initial ) { + parser_initialize(tgt); + } + } + + if( getenv(__func__) ) { + yywarn("%s: value: %s", __func__, field_str(tgt.field)); + } + } + + // apply REPLACING, possibly overwriting VALUE + // N.B., may be wrong: + /* + * "If the data item does not qualify as a receiving-operand because of the + * VALUE phrase, but does qualify because of the REPLACING phrase ..." + */ + auto r = replacements.find(data_category_of(tgt)); + if( r != replacements.end() ) { + parser_move( tgt, *r->second ); + + if( getenv(__func__) ) { + cbl_field_t *from = r->second->field; + char from_str[128]; // copy static buffer from field_str + strcpy( from_str, field_str(from) ); + yywarn("%s: move: %-18s %s \n\t from %-18s %s", __func__, + cbl_field_type_str(tgt.field->type) + 3, field_str(tgt.field), + cbl_field_type_str(from->type) + 3, from_str); + } + return true; + } + + return true; + +} + +typedef std::pair<cbl_field_t*,cbl_field_t*> field_span_t; +typedef std::pair<size_t, size_t> cbl_bytespan_t; + +static void +dump_spans( size_t isym, + const cbl_field_t *table, + const std::list<field_span_t>& spans, + size_t nrange, + const cbl_bytespan_t ranges[], + size_t depth, + const std::list<cbl_subtable_t>& subtables ) +{ + int i=0; + assert( nrange == 0 || nrange == spans.size() ); + + if( isym != field_index(table) ) { + dbgmsg("%s:%d: isym %zu is not #%zu %02u %s", __func__, __LINE__, + isym, field_index(table), table->level, table->name); + } + dbgmsg( "%s: [%zu] #%zu %s has %zu spans and %zu subtables", + __func__, depth, isym, table->name, nrange, subtables.size() ); + for( auto span : spans ) { + unsigned int last_level = 0; + const char *last_name = "<none>"; + if( span.second ) { + last_level = span.second->level; + last_name = span.second->name; + } + + char at_subtable[64] = {}; + size_t offset = nrange? ranges[i].first : 0; + auto p = std::find_if(subtables.begin(), subtables.end(), + [offset]( const cbl_subtable_t& tbl ) { + return tbl.offset == offset; + }); + if( p != subtables.end() ) { + sprintf(at_subtable, "(subtable #%zu)", p->isym); + } + dbgmsg("\t %02u %-20s to %02u %-20s: %3zu-%zu %s", + span.first->level, span.first->name, + last_level, last_name, + nrange? ranges[i].first : 1, + nrange? ranges[i].second : 0, + at_subtable); + i++; + } + if( ! subtables.empty() ) { + dbgmsg("\ttable #%zu has %zu subtables", isym, subtables.size()); + for( auto tbl : subtables ) { + dbgmsg("\t #%zu @ %4zu", tbl.isym, tbl.offset); + } + } +} + +/* + * After the 1st record is initialized, copy it to the others. + */ +static bool +initialize_table( cbl_num_result_t target, + size_t nspan, const cbl_bytespan_t spans[], + const std::list<cbl_subtable_t>& subtables ) +{ + if( getenv("initialize_statement") ) { + dbgmsg("%s:%d: %s ", __func__, __LINE__, target.refer.str()); + } + assert( target.refer.nsubscript == dimensions(target.refer.field) ); + const cbl_refer_t& src( target.refer ); + size_t n( src.field->occurs.ntimes()); + assert( 0 < n ); + + size_t isym( field_index(src.field) ); + size_t ntbl = subtables.size(); + cbl_subtable_t tbls[ntbl], *ptbls = 0 < ntbl? tbls : NULL; + std::copy( subtables.begin(), subtables.end(), tbls ); + parser_initialize_table( n, src, nspan, spans, isym, ntbl, ptbls ); + return true; +} + +static cbl_refer_t +synthesize_table_refer( cbl_refer_t tgt ) { + // For a table, use supplied subscripts or start with 1. + auto ndim( dimensions(tgt.field) ); + if( tgt.nsubscript < ndim ) { // it's an incomplete table + cbl_refer_t subscripts[ndim]; + for( size_t i=0; i < ndim; i++ ) { + if( i < tgt.nsubscript ) { + subscripts[i] = tgt.subscripts[i]; + continue; + } + subscripts[i].field = new_tempnumeric(); + parser_set_numeric(subscripts[i].field, 1); + } + return cbl_refer_t( tgt.field, ndim, subscripts ); + } + return tgt; +} + +static size_t +group_offset( const cbl_field_t *field ) { + if( field->parent ) { + auto e = symbol_at(field->parent); + if( e->type == SymField ) { + auto parent = cbl_field_of(e); + return field->offset - parent->offset; + } + } + return field->offset; +} + +static bool +initialize_statement( const cbl_num_result_t& target, bool with_filler, + data_category_t value_category, + const category_map_t& replacements, + size_t depth = 0 ) +{ + if( getenv(__func__) ) { + dbgmsg("%s:%d: %2zu: %s (%s%zuR)", + __func__, __LINE__, depth, target.refer.str(), + with_filler? "F" : "", + replacements.size()); + } + const cbl_refer_t& tgt( target.refer ); + assert(dimensions(tgt.field) == tgt.nsubscript || 0 < depth); + assert(!is_literal(tgt.field)); + + if( tgt.field->type == FldGroup ) { + if( tgt.field->data.initial ) goto initialize_this; + if( tgt.is_refmod_reference() ) goto initialize_this; + // iterate over group memebers + auto imember = field_index(tgt.field); + auto eogroup = end_of_group(imember); + bool fOK = true; + std::list<cbl_field_t*> members; + std::list<cbl_subtable_t> subtables; + + while( ++imember < eogroup ) { + auto e = symbol_at(imember); + if( e->type != SymField ) continue; + auto f = cbl_field_of(e); + if( ! (f->type == FldGroup || is_elementary(f->type)) ) continue; + if( ! symbol_redefines(f) ) { + members.push_back(f); + if( is_table(f) ) { + size_t offset = group_offset(f); + subtables.push_back( cbl_subtable_t { offset, imember } ); + } + cbl_num_result_t next_target(target); + next_target.refer.field = f; + // recurse on each member, which might be a table or group + fOK = fOK && initialize_statement( next_target, with_filler, value_category, + replacements, 1 + depth ); + } + if( f->type == FldGroup ) { + imember = end_of_group(imember) - 1; + } + } + + if( fOK && is_table(tgt.field) ) { + cbl_num_result_t output = { target.rounded, synthesize_table_refer(tgt) }; + if( tgt.nsubscript < output.refer.nsubscript ) { // tgt is whole table + std::list<field_span_t> field_spans; + static const field_span_t empty_span = { NULL, NULL }; + field_span_t span = empty_span; + bool honor_filler = false; + // construct non-filler field spans + for( auto member : members ) { + if( !with_filler && member->has_attr(filler_e) ) { + if( span.first ) { // conclude the span and begin to skip filler + field_spans.push_back(span); + span = empty_span; + honor_filler = true; + } + continue; + } + if( span.first ) { + span.second = member; // extend the span + } else { + span.first = member; // start a new span + } + } + if( span.first ) { + field_spans.push_back(span); + } + // convert field spans to byte ranges + cbl_bytespan_t ranges[ field_spans.size() ]; + size_t nrange = 0; + if( honor_filler ) { + nrange = COUNT_OF(ranges); + std::transform( field_spans.begin(), field_spans.end(), ranges, + []( const auto& span ) { + size_t first, second; + first = second = group_offset(span.first); + if( ! span.second ) { + second += std::max(span.first->data.capacity, + span.first->data.memsize); + } else { + second = group_offset(span.second) + - group_offset(span.first); + second += std::max(span.second->data.capacity, + span.second->data.memsize); + } + return std::make_pair(first, second); + } ); + } + if( getenv("initialize_statement") ) { + dump_spans( field_index(output.refer.field), output.refer.field, + field_spans, nrange, ranges, depth, subtables ); + } + return initialize_table( output, nrange, ranges, subtables ); + } + } + return fOK; + } + + if( !is_elementary(tgt.field->type) ) return false; + + assert(is_elementary(tgt.field->type)); + assert(data_category_of(tgt) != data_category_none); + + /* + * Initialize elementary field. + */ + + initialize_this: + // Cannot initialize constants + if( is_constant(tgt.field) ) { + auto loc = symbol_field_location(field_index(tgt.field)); + error_msg(loc, "%s is constant", name_of(tgt.field)); + return false; + } + // Ignore filler unless instructed otherwise. + if( !with_filler && tgt.field->has_attr(filler_e) ) return true; + + cbl_num_result_t output = { target.rounded, synthesize_table_refer(tgt) }; + + bool fOK = initialize_one( output, with_filler, value_category, + replacements, depth == 0 ); + + if( fOK && is_table(tgt.field) ) { + return initialize_table( output, + 0, NULL, std::list<cbl_subtable_t>() ); + } + + return fOK; +} + +const char * +data_category_str( data_category_t category ) { + switch(category) { + case data_category_none: return "category_none"; + case data_category_all: return "category_all"; + case data_alphabetic_e: return "alphabetic"; + case data_alphanumeric_e: return "alphanumeric"; + case data_alphanumeric_edited_e: return "alphanumeric_edited"; + case data_boolean_e: return "data_boolean"; + case data_data_pointer_e: return "data_data_pointer"; + case data_function_pointer_e: return "data_function_pointer"; + case data_msg_tag_e: return "data_msg_tag"; + case data_dbcs_e: return "dbcs"; + case data_egcs_e: return "egcs"; + case data_national_e: return "national"; + case data_national_edited_e: return "national_edited"; + case data_numeric_e: return "numeric"; + case data_numeric_edited_e: return "numeric_edited"; + case data_object_referenc_e: return "data_object_referenc"; + case data_program_pointer_e: return "data_program_pointer"; + } + return "???"; +} + +static void +initialize_statement( std::list<cbl_num_result_t>& tgts, bool with_filler, + data_category_t value_category, + const category_map_t& replacements) { + if( yydebug && getenv(__func__) ) { + yywarn( "%s: %zu targets, %s filler", + __func__, tgts.size(), with_filler? "with" : "no"); + for( auto tgt : tgts ) { + fprintf( stderr, "%28s: %s\n", __func__, name_of(tgt.refer.field) ); + } + for( const auto& elem : replacements ) { + fprintf( stderr, "%28s: %s <-%s\n", __func__, + data_category_str(elem.first), + name_of(elem.second->field) ); + } + } + + bool is_refmod = std::any_of( tgts.begin(), tgts.end(), + []( const auto& tgt ) { + return tgt.refer.is_refmod_reference(); + } ); + if( false && is_refmod ) { // refmod seems valid per ISO + dbgmsg("INITIALIZE cannot initialize a refmod"); + return; + } + + for( auto tgt : tgts ) { + initialize_statement( tgt, with_filler, value_category, + replacements ); + } + tgts.clear(); +} + +static void +dump_inspect_oper( const cbl_inspect_oper_t& op ) { + dbgmsg("\t%s: %zu \"matches\", %zu \"replaces\"", + bound_str(op.bound), + op.matches? op.n_identifier_3 : 0, op.replaces? op.n_identifier_3 : 0); + if( op.matches ) + std::for_each(op.matches, op.matches + op.n_identifier_3, dump_inspect_match); + if( op.replaces ) + std::for_each(op.replaces, op.replaces + op.n_identifier_3, dump_inspect_replace); +} + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-function" + +static void +dump_inspect( const cbl_inspect_t& I ) { + if( !yydebug ) return; + if( I.tally.field ) { + fprintf( stderr, "\tTALLYING to %s %s %s:\n", + field_str(I.tally.field), + I.tally.is_table_reference()? "(table)" : "", + I.tally.is_refmod_reference()? "(refmod)" : "" ); + } else { + fprintf( stderr, "\tREPLACING:\n" ); + } + std::for_each( I.opers, I.opers + I.nbound, dump_inspect_oper ); +} +#pragma GCC diagnostic pop + +#include <iterator> + +struct declarative_file_list_t : protected cbl_declarative_t { + declarative_file_list_t( const cbl_declarative_t& d ) + : cbl_declarative_t(d) + { + if( nfile > 0 ) + assert(d.files[0] == this->files[0]); + } + static std::ostream& + splat( std::ostream& os, const declarative_file_list_t& dcl ) { + static int i=0; + + os << "static size_t dcl_file_list_" << i++ + << "[" << dcl.nfile << "] = { "; + std::ostream_iterator<size_t> out(os, ", "); + std::copy( dcl.files, dcl.files + dcl.nfile, out ); + return os << "};"; + } +}; + +std::ostream& +operator<<( std::ostream& os, const declarative_file_list_t& dcl ) { + return dcl.splat( os, dcl ); +} + +static declarative_file_list_t +file_list_of( const cbl_declarative_t& dcl ) { + return dcl; +} + +std::ostream& +operator<<( std::ostream& os, const cbl_declarative_t& dcl ) { + static int i=0; + + return os << + "\t{ " << dcl.section << ", " + << std::boolalpha << dcl.global << ", " + << ec_type_str(dcl.type) << ", " + << dcl.nfile << ", " + << "dcl_file_list_" << i++ << ", " + << cbl_file_mode_str(dcl.mode) << " }" + << std::flush; +} + +void parser_add_declaratives( size_t n, cbl_declarative_t *declaratives) { + const char *prog = cbl_label_of(symbol_at(PROGRAM))->name; + char *filename = xasprintf("declaratives.%s.h", prog); + std::ofstream os(filename); + { + std::ostream_iterator<declarative_file_list_t> out(os, "\n"); + std::transform( declaratives, declaratives + n, out, file_list_of ); + } + os << "\nstatic cbl_declarative_base_t declaratives[] = {\n"; + std::ostream_iterator<cbl_declarative_t> out(os, ", \n"); + std::copy( declaratives, declaratives + n, out ); + os << "};\n" << std::endl; +} + +cbl_field_t * +new_literal( const literal_t& lit, enum cbl_field_attr_t attr ) { + bool zstring = lit.prefix[0] == 'Z'; + if( !zstring && lit.data[lit.len] != '\0' ) { + dbgmsg("%s:%d: line %d, no NUL terminator '%-*.*s'{%zu/%zu}", + __func__, __LINE__, yylineno, + int(lit.len), int(lit.len), + lit.data, strlen(lit.data), lit.len); + } + assert(zstring || lit.data[lit.len] == '\0'); + + size_t attrs(attr); + attrs |= constant_e; + attrs |= literal_attr(lit.prefix); + + return new_literal(lit.len, lit.data, cbl_field_attr_t(attrs)); +} + +bool +cbl_file_t::validate_forward( size_t isym ) const { + if( isym > 0 && FldForward == symbol_field_forward(isym)->type ) { + auto loc = symbol_field_location(isym); + error_msg(loc, "line %d: %s of %s is not defined", + this->line, cbl_field_of(symbol_at(isym))->name, + this->name ); + return false; + } + return true; +} + +bool +cbl_file_t::validate_key( const cbl_file_key_t& key ) const { + for( auto f = key.fields; f < key.fields + key.nfield; f++ ) { + if( ! validate_forward(*f) ) return false; + } + return true; +} + +bool +cbl_file_t::validate() const { + size_t members[] = { user_status, vsam_status, record_length }; + bool tf = true; + + for( auto isym : members ) { + if( ! validate_forward(isym) ) tf = false; + } + + for( auto p = keys; p < keys + nkey; p++ ) { + if( ! validate_key(*p) ) tf = false; + } + + return tf; +} + +bool +cbl_file_lock_t::mode_set( int token ) { + switch( token ) { + case MANUAL: mode = manual_e; break; + case RECORD: mode = record_e; break; + case AUTOMATIC: mode = automatic_e; break; + default: + return false; + } + return true; +} + +enum cbl_figconst_t +cbl_figconst_of( const char *value ) { + struct values_t { + const char *value; cbl_figconst_t type; + } static const values[] = { + { constant_of(constant_index(ZERO))->data.initial, zero_value_e }, + { constant_of(constant_index(SPACES))->data.initial, space_value_e }, + { constant_of(constant_index(HIGH_VALUES))->data.initial, high_value_e }, + { constant_of(constant_index(LOW_VALUES))->data.initial, low_value_e }, + { constant_of(constant_index(QUOTES))->data.initial, quote_value_e }, + { constant_of(constant_index(NULLS))->data.initial, null_value_e }, + }, *eovalues = values + COUNT_OF(values); + + auto p = std::find_if( values, eovalues, + [value]( const values_t& elem ) { + return elem.value == value; + } ); + + return p == eovalues? normal_value_e : p->type; +} + +cbl_field_attr_t +literal_attr( const char prefix[] ) { + switch(strlen(prefix)) { + case 0: return none_e; + + case 1: + switch(prefix[0]) { + case 'B': return bool_encoded_e; + case 'N': cbl_unimplemented("National"); return none_e; + case 'X': return hex_encoded_e; + case 'Z': return quoted_e; + } + break; + + case 2: + switch(prefix[1]) { + case 'X': + switch(prefix[0]) { + case 'B': return cbl_field_attr_t(hex_encoded_e | bool_encoded_e); + case 'N': cbl_unimplemented("National"); return none_e; + } + break; + } + } + + // must be [BN]X + cbl_internal_error("'%s': invalid literal prefix", prefix); + gcc_unreachable(); + return none_e; +} + +bool +cbl_field_t::has_subordinate( const cbl_field_t *that ) const { + while( (that = parent_of(that)) != NULL ) { + if( field_index(this) == field_index(that) ) return true; + } + return false; +} + +bool +cbl_field_t::value_set( _Float128 value ) { + data.value = value; + char *initial = string_of(data.value); + if( !initial ) return false; + + // Trim trailing zeros. + char *p = initial + strlen(initial); + for( --p; initial <= p; --p ) { + if( *p != '0' ) break; + *p = '\0'; + } + + data.digits = (p - initial) + 1; + p = strchr(initial, '.'); + data.rdigits = p? initial + data.digits - p : 0; + + data.initial = initial; + data.capacity = type_capacity(type, data.digits); + return true; +} + +const char * +cbl_field_t::value_str() const { + return string_of(data.value); +} + +static const cbl_division_t not_syntax_only = cbl_division_t(-1); + cbl_division_t cbl_syntax_only = not_syntax_only; + +void +mode_syntax_only( cbl_division_t division ) { + cbl_syntax_only = division; +} + +// Parser moves to syntax-only mode if data-division errors preclude compilation. +bool +mode_syntax_only() { + return cbl_syntax_only != not_syntax_only + && cbl_syntax_only <= current_division; +} + +void +cobol_dialect_set( cbl_dialect_t dialect ) { + cbl_dialect = dialect; + if( dialect & dialect_ibm_e ) cobol_gcobol_feature_set(feature_embiggen_e); +} +cbl_dialect_t cobol_dialect() { return cbl_dialect; } + +static bool internal_ebcdic_locked = false; + +void internal_ebcdic_lock() { + internal_ebcdic_locked = true; +} +void internal_ebcdic_unlock() { + internal_ebcdic_locked = false; +} + +bool +cobol_gcobol_feature_set( cbl_gcobol_feature_t gcobol_feature, bool on ) { + if( gcobol_feature == feature_internal_ebcdic_e ) { + if( internal_ebcdic_locked ) return false; + } + if( on ) { + cbl_gcobol_features |= gcobol_feature; + } else { + cbl_gcobol_features &= ~gcobol_feature; + } + return true; +} + +static bool +literal_refmod_valid( YYLTYPE loc, const cbl_refer_t& r ) { + if( r.field->has_attr(any_length_e) ) return true; + + const cbl_span_t& refmod(r.refmod); + + if( ! is_literal(refmod.from->field) ) { + if( ! refmod.len ) return true; + if( ! is_literal(refmod.len->field) ) return true; + auto edge = refmod.len->field->data.value; + if( 0 < edge ) { + if( --edge < r.field->data.capacity ) return true; + } + // len < 0 or not: 0 < from + len <= capacity + error_msg(loc, "%s(%s:%zu) out of bounds, " + "size is %u", + r.field->name, + refmod.from->name(), + size_t(refmod.len->field->data.value), + static_cast<unsigned int>(r.field->data.capacity) ); + return false; + } + + if( refmod.from->field->data.value > 0 ) { + auto edge = refmod.from->field->data.value; + if( --edge < r.field->data.capacity ) { + if( ! refmod.len ) return true; + if( ! is_literal(refmod.len->field) ) return true; + if( refmod.len->field->data.value > 0 ) { + edge += refmod.len->field->data.value; + if( --edge < r.field->data.capacity ) return true; + } + // len < 0 or not: 0 < from + len <= capacity + auto loc = symbol_field_location(field_index(r.field)); + error_msg(loc, "%s(%zu:%zu) out of bounds, " + "size is %u", + r.field->name, + size_t(refmod.from->field->data.value), + size_t(refmod.len->field->data.value), + static_cast<unsigned int>(r.field->data.capacity) ); + return false; + } + } + // not: 0 < from <= capacity + error_msg(loc,"%s(%zu) out of bounds, size is %u", + r.field->name, + size_t(refmod.from->field->data.value), + static_cast<unsigned int>(r.field->data.capacity) ); + return false; +} + +const cbl_field_t * +literal_subscript_oob( const cbl_refer_t& r, size_t& isub ); + +static bool +literal_subscripts_valid( YYLTYPE loc, const cbl_refer_t& name ) { + static char subs[ 7 * 32 ], *esub = subs + sizeof(subs); + char *p = subs; + size_t isub; + + // Find subscript in the supplied refer + const cbl_field_t *oob = literal_subscript_oob(name, isub); + if( oob ) { + const char *sep = ""; + for( auto r = name.subscripts; r < name.subscripts + name.nsubscript; r++ ) { + snprintf( p, esub - p, "%s%s", sep, nice_name_of(r->field) ); + sep = " "; + } + + const char *upper_phrase = ""; + if( ! oob->occurs.bounds.fixed_size() ) { + static char ub[32] = "boo"; + sprintf(ub, " to %lu", oob->occurs.bounds.upper); + upper_phrase = ub; + } + + // X(0): subscript 1 of for out of range for 02 X OCCURS 4 to 6 + error_msg(loc, "%s(%s): subscript %zu out of range " + "for %s %s OCCURS %lu%s", + oob->name, subs, 1 + isub, + oob->level_str(), oob->name, + oob->occurs.bounds.lower, upper_phrase ); + return false; + } + return true; +} + +static void +subscript_dimension_error( YYLTYPE loc, size_t nsub, const cbl_refer_t *scalar ) { + if( 0 == dimensions(scalar->field) ) { + error_msg(loc, "%zu subscripts provided for %s, " + "which has no dimensions", + nsub, scalar->name() ); + } else { + error_msg(loc, "%zu subscripts provided for %s, " + "which requires %zu dimensions", + nsub, scalar->name(), dimensions(scalar->field) ); + } +} + +static void +reject_refmod( YYLTYPE loc, cbl_refer_t scalar ) { + if( scalar.is_refmod_reference() ) { + error_msg(loc, "%s cannot be reference-modified here", scalar.name()); + } +} + +static bool +require_pointer( YYLTYPE loc, cbl_refer_t scalar ) { + if( scalar.field->type != FldPointer ) { + error_msg(loc, "%s must have USAGE POINTER", scalar.name()); + return false; + } + return true; +} + +static bool +require_numeric( YYLTYPE loc, cbl_refer_t scalar ) { + if( ! is_numeric(scalar.field) ) { + error_msg(loc, "%s must have numeric USAGE", scalar.name()); + return false; + } + return true; +} + +/* eval methods */ + +eval_subject_t::eval_subject_t() + : result( new_temporary(FldConditional) ) +{ + labels.when = label("when"); + labels.yeah = label("yeah"); + labels.done = label("done"); + pcol = columns.begin(); +} + +cbl_label_t * +eval_subject_t::label( const char skel[] ) { + static const cbl_label_t protolabel = { .type = LblEvaluate }; + cbl_label_t label = protolabel; + label.line = yylineno; + size_t n = 1 + symbols_end() - symbols_begin(); + snprintf(label.name, sizeof(label.name), "_eval_%s_%zu", skel, n); + auto output = symbol_label_add( PROGRAM, &label ); + return output; +} + +bool +eval_subject_t::compatible( const cbl_field_t *object ) const { + assert(pcol != columns.end()); + assert(pcol->field); + auto subject(pcol->field); + if( subject->type != object->type ) { + if( is_conditional(subject) ) { + return is_conditional(object); + } + return ! is_conditional(object); + } + return true; +} + + +cbl_field_t * +eval_subject_t::compare( int token ) { + size_t tf( very_false_register() ); + + switch( token ) { + case ANY: + parser_logop(result, + field_at(very_true_register()), and_op, + field_at(very_true_register())); + break; + case TRUE_kw: + tf = very_true_register(); + __attribute__((fallthrough)); + case FALSE_kw: + assert( is_conditional(pcol->field) ); + parser_logop(this->result, pcol->field, xnor_op, field_at(tf)); + break; + default: + assert(token == -1 && false ); + break; + } + return result; +} + +cbl_field_t * +eval_subject_t::compare( relop_t op, const cbl_refer_t& object, bool deciding ) { + auto subject(*pcol); + if( compatible(object.field) ) { + if( ! is_conditional(subject.field) ) { + auto result = deciding? this->result : new_temporary(FldConditional); + parser_relop(result, subject, op, object); + return result; + } + } + if( yydebug ) { + dbgmsg("%s:%d: failed for %s %s %s", + __func__, __LINE__, + name_of(subject.field), relop_str(op), name_of(object.field)); + } + return nullptr; +} + +cbl_field_t * +eval_subject_t::compare( const cbl_refer_t& object, + const cbl_refer_t& object2 ) { + auto subject(*pcol); + + if( ! compatible( object.field ) ) { + if( yydebug ) { + dbgmsg("%s:%d: failed for %s %s", + __func__, __LINE__, + name_of(subject.field), name_of(object.field)); + } + return nullptr; + } + if( object2.field ) { + if( ! compatible( object2.field ) ) { + if( yydebug ) { + dbgmsg("%s:%d: failed for %s %s", + __func__, __LINE__, + name_of(subject.field), name_of(object2.field)); + } + return nullptr; + } + } + + if( is_conditional(subject.field) ) { + assert( object2.field == nullptr ); + parser_logop(result, subject.field, xnor_op, object.field); + return result; + } + + if( object2.field ) { + assert( ! is_conditional(object.field) ); + assert( ! is_conditional(object2.field) ); + + cbl_field_t * gte = new_temporary(FldConditional); + cbl_field_t * lte = new_temporary(FldConditional); + + parser_relop( gte, object, le_op, subject ); + parser_relop( lte, subject, le_op, object2 ); + + parser_logop(result, gte, and_op, lte); + return result; + } + + parser_relop(result, subject, eq_op, object); + return result; +} diff --git a/gcc/cobol/parse_ante.h b/gcc/cobol/parse_ante.h new file mode 100644 index 0000000..573355c --- /dev/null +++ b/gcc/cobol/parse_ante.h @@ -0,0 +1,3552 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <assert.h> +#include <string.h> +#include <stdio.h> + +#include <algorithm> +#include <list> +#include <map> +#include <numeric> +#include <stack> +#include <string> + +#define MAXLENGTH_FORMATTED_DATE 10 +#define MAXLENGTH_FORMATTED_TIME 19 +#define MAXLENGTH_FORMATTED_DATETIME 30 + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" + +extern void declarative_runtime_match(cbl_field_t *declaratives, + cbl_label_t *lave ); + +extern YYLTYPE yylloc; + +extern int yylineno, yyleng, yychar; +extern char *yytext; + +bool need_nume_set( bool tf = true ); + +bool max_errors_exceeded( int nerr ); + +extern cbl_label_t *next_sentence; +void next_sentence_label(cbl_label_t* label) { + parser_label_label(label); + next_sentence = NULL; + // release codegen label structure, so it can be reused. + assert(label->structs.goto_trees || mode_syntax_only()); + free(label->structs.goto_trees); + label->structs.goto_trees = NULL; +} + +void apply_declaratives(); +const char * keyword_str( int token ); +void labels_dump(); + +cbl_dialect_t cbl_dialect; +size_t cbl_gcobol_features; + +static size_t nparse_error = 0; + +size_t parse_error_inc() { return ++nparse_error; } +size_t parse_error_count() { return nparse_error; } +void input_file_status_notify(); + +#define YYLLOC_DEFAULT(Current, Rhs, N) \ + do { \ + if (N) \ + { \ + (Current).first_line = YYRHSLOC (Rhs, 1).first_line; \ + (Current).first_column = YYRHSLOC (Rhs, 1).first_column; \ + (Current).last_line = YYRHSLOC (Rhs, N).last_line; \ + (Current).last_column = YYRHSLOC (Rhs, N).last_column; \ + location_dump("parse.c", N, \ + "rhs N ", YYRHSLOC (Rhs, N)); \ + } \ + else \ + { \ + (Current).first_line = \ + (Current).last_line = YYRHSLOC (Rhs, 0).last_line; \ + (Current).first_column = \ + (Current).last_column = YYRHSLOC (Rhs, 0).last_column; \ + } \ + location_dump("parse.c", __LINE__, "current", (Current)); \ + gcc_location_set( location_set(Current) ); \ + input_file_status_notify(); \ + } while (0) + +int yylex(void); +extern int yydebug; + +#include <stdarg.h> + +const char * +consistent_encoding_check( const YYLTYPE& loc, const char input[] ) { + cbl_field_t faux = { + .type = FldAlphanumeric, + .data = { .capacity = capacity_cast(strlen(input)), .initial = input } + }; + + auto s = faux.internalize(); + if( !s ) { + error_msg(loc, "inconsistent string literal encoding for '%s'", input); + } else { + if( s != input ) return s; + } + return NULL; +} + +const char * original_picture(); + char * original_number( char input[] = NULL ); + +static const relop_t invalid_relop = static_cast<relop_t>(-1); + +static enum cbl_division_t current_division; + +static cbl_refer_t null_reference; +static cbl_field_t *literally_one, *literally_zero; + +cbl_field_t * +literal_of( size_t value ) { + switch(value) { + case 0: return literally_zero; + case 1: return literally_one; + } + cbl_err("logic error: %s: %zu not supported", __func__, value); + return NULL; +} + +enum data_section_t { // values reflect mandatory order + not_data_datasect_e, + file_datasect_e, + working_storage_datasect_e, + local_storage_datasect_e, + linkage_datasect_e, +} current_data_section; + +static bool current_data_section_set( const YYLTYPE& loc, enum data_section_t ); + +enum data_clause_t { + picture_clause_e = 0x0001, + usage_clause_e = 0x0002, + value_clause_e = 0x0004, + occurs_clause_e = 0x0008, + global_clause_e = 0x0010, + external_clause_e = 0x0020, + justified_clause_e = 0x0040, + redefines_clause_e = 0x0080, + blank_zero_clause_e = 0x0100, + synched_clause_e = 0x0200, + sign_clause_e = 0x0400, + based_clause_e = 0x0800, + same_clause_e = 0x1000, + volatile_clause_e = 0x2000, + type_clause_e = 0x4000, + typedef_clause_e = 0x8000, +}; + +static inline bool +has_clause( int data_clauses, data_clause_t clause ) { + return clause == (data_clauses & clause); +} + +static bool +is_cobol_word( const char name[] ) { + auto eoname = name + strlen(name); + auto p = std::find_if( name, eoname, + []( char ch ) { + switch(ch) { + case '-': + case '_': + return false; + case '$': // maybe one day (IBM allows) + break; + } + return !ISALNUM(ch); + } ); + return p == eoname; +} + +bool +in_procedure_division(void) { + return current_division == procedure_div_e; +} + +static inline bool +in_file_section(void) { return current_data_section == file_datasect_e; } + +static cbl_refer_t * +intrinsic_inconsistent_parameter( size_t n, cbl_refer_t *args ); + +static inline bool +namcpy(const YYLTYPE& loc, cbl_name_t tgt, const char *src ) { + // snprintf(3): writes at most size bytes (including the terminating NUL byte) + auto len = snprintf(tgt, sizeof(cbl_name_t), "%s", src); + if( ! (0 < len && len < int(sizeof(cbl_name_t))) ) { + error_msg(loc, "name truncated to '%s' (max %zu characters)", + tgt, sizeof(cbl_name_t)-1); + return false; + } + return true; +} + +cbl_field_t * +new_alphanumeric( size_t capacity ); + +static inline cbl_refer_t * +new_reference( enum cbl_field_type_t type, const char *initial ) { + return new cbl_refer_t( new_temporary(type, initial) ); +} +static inline cbl_refer_t * +new_reference( cbl_field_t *field ) { + return new cbl_refer_t(field); +} +static inline cbl_refer_t * +new_reference_like( const cbl_field_t& skel ) { + return new cbl_refer_t( new_temporary_like(skel) ); +} + +static void reject_refmod( YYLTYPE loc, cbl_refer_t ); +static bool require_pointer( YYLTYPE loc, cbl_refer_t ); +static bool require_numeric( YYLTYPE loc, cbl_refer_t ); + +struct cbl_field_t * constant_of( size_t isym ); + +static const struct cbl_occurs_t nonarray = cbl_occurs_t(); + +using std::list; + +static inline bool isquote( char ch ) { + return ch == '\'' || ch == '"'; +} + +static inline char * dequote( char input[] ) { + char *pend = input + strlen(input) - 1; + assert(isquote(*input)); + assert(isquote(*pend)); + assert(*input == *pend); + *input = *pend = '\0'; + return ++input; +} + +static const char * +name_of( cbl_field_t *field ) { + assert(field); + return field->name[0] == '_' && field->data.initial? + field->data.initial : field->name; +} + +static const char * +nice_name_of( cbl_field_t *field ) { + auto name = name_of(field); + return name[0] == '_'? "" : name; +} + +struct evaluate_elem_t { + size_t nother; + struct cbl_label_t label; + struct cbl_field_t *result; + struct case_t { + private: + relop_t oper; + public: + cbl_field_t *subject, *object, *cond; + case_t( cbl_field_t * subject ) + : oper(eq_op) + , subject(subject) + , object(NULL) + , cond( keep_temporary(FldConditional) ) + {} + + cbl_field_t * object_set( cbl_field_t *obj, relop_t op ) { + oper = op; + return object = obj; + } + + inline relop_t op() const { return oper; } + + void dump() const { + dbgmsg( " cond is '%s'\n\t" + "subject is '%s'\n\t" + " oper is %s \n\t" + " object is '%s'", + cond? xstrdup(field_str(cond)) : "none", + subject? xstrdup(field_str(subject)) : "none", + relop_str(oper), + object? xstrdup(field_str(object)) : "none"); + } + static void Dump( const case_t& c ) { c.dump(); } + }; + list<case_t> cases; + typedef list<case_t>::iterator case_iter; + case_iter pcase; + + void dump() const { + dbgmsg( "nother=%zu label '%s', %zu cases", nother, label.name, cases.size() ); + std::for_each( cases.begin(), cases.end(), case_t::Dump ); + } + + explicit evaluate_elem_t( const char skel[] ) + : nother(0) + , result( keep_temporary(FldConditional) ) + , pcase( cases.end() ) + { + static const cbl_label_t protolabel = { .type = LblEvaluate }; + label = protolabel; + label.line = yylineno; + if( -1 == snprintf(label.name, sizeof(label.name), + "%.*s_%d", (int)sizeof(label.name)-6, skel, yylineno) ) { + yyerror("could not create unique label '%s_%d' because it is too long", + skel, yylineno); + } + } + + size_t ncolumn() const { return cases.size(); } + size_t nready() const { + size_t n=0; + for( const auto& c : cases ) { + if( c.object == NULL ) break; + n++; + } + return n; + } +}; + +/* + * The file_X_args variables hold the arguments to parser_file_X. The + * X_body nonterminal collects the arguments, but we defer calling + * parser_file_X until either: + * 1. end of statement, implying sequentiality, or + * 2. ON ERROR, implying random access + * In the 2nd case, the call to parser_file_X is made at the top of + * the io_error nonterminal, before any statements are parsed. The + * effect is to delay the call only until we've parsed ON ERROR. + * Because there are no intervening statements, there's no need for a + * stack of arguments. One global does the trick. +*/ +static class file_delete_args_t { + cbl_file_t *file; +public: + void init( cbl_file_t *file ) { + this->file = file; + } + bool ready() const { return file != NULL; } + void call_parser_file_delete( bool sequentially ) { + parser_file_delete(file, sequentially); + file = NULL; + } +} file_delete_args; + +cbl_round_t current_rounded_mode(); + +static struct file_read_args_t { + cbl_file_t *file; + cbl_refer_t record, *read_into; + int where; + enum { where_unknown = 0 }; + + file_read_args_t() : file(NULL), read_into(NULL), where(where_unknown) {} + + void + init( struct cbl_file_t *file, + cbl_refer_t record, + cbl_refer_t *read_into, + int where ) { + this->file = file; + this->record = record; + this->read_into = read_into; + this->where = where; + } + + bool ready() const { return file != NULL; } + void default_march( bool sequential ) { + if( where == where_unknown ) { + where = sequential? -1 : 1; + } + } + + void + call_parser_file_read( int w = where_unknown) { + if( w != where_unknown ) where = w; + if( where == where_unknown) { + switch( file->access ) { + case file_inaccessible_e: + case file_access_seq_e: + where = -1; + break; + case file_access_rnd_e: + where = 1; + break; + case file_access_dyn_e: + where = 1; + break; + } + } + parser_file_read(file, record, where); + if( read_into ) { + parser_move( *read_into, record, current_rounded_mode() ); + } + *this = file_read_args_t(); + } +} file_read_args; + +static class file_return_args_t { + cbl_file_t *file; +public: + file_return_args_t() : file(NULL) {} + void init( cbl_file_t *file ) { + this->file = file; + } + bool ready() const { return file != NULL; } + void call_parser_return_start(cbl_refer_t into = cbl_refer_t() ) { + parser_return_start(file, into); + file = NULL; + } +} file_return_args; + +static class file_rewrite_args_t { + cbl_file_t *file; + cbl_field_t *record; +public: + void init( cbl_file_t *file, cbl_field_t *record ) { + this->file = file; + this->record = record; + } + bool ready() const { return file != NULL; } + void call_parser_file_rewrite( bool sequentially ) { + sequentially = sequentially || file->access == file_access_seq_e; + if( file->access == file_access_rnd_e ) sequentially = false; + parser_file_rewrite(file, record, sequentially); + file = NULL; + record = NULL; + } +} file_rewrite_args; + +static class file_start_args_t { + cbl_file_t *file; +public: + file_start_args_t() : file(NULL) {} + void init( YYLTYPE loc, cbl_file_t *file ) { + this->file = file; + if( is_sequential(file) ) { + error_msg(loc, "START invalid with sequential file %s", file->name); + } + } + bool ready() const { return file != NULL; } + void call_parser_file_start() { + // not needed: parser_file_start(file, sequentially); + file = NULL; + } +} file_start_args; + +static class file_write_args_t { + cbl_file_t *file; + cbl_field_t *data_source; + bool after; + cbl_refer_t *advance; +public: + file_write_args_t() + : file(NULL) + , after(false) + , advance(NULL) + {} + cbl_file_t * init( cbl_file_t *file, + cbl_field_t *data_source, + bool after, + cbl_refer_t *advance ) { + this->file = file; + this->data_source = data_source; + this->after = after; + this->advance = new cbl_refer_t(*advance); + return this->file; + } + bool ready() const { return file != NULL; } + void call_parser_file_write( bool sequentially ) { + sequentially = sequentially || file->access == file_access_seq_e; + parser_file_write(file, data_source, after, *advance, sequentially); + *this = file_write_args_t(); + } +} file_write_args; + +/* + * Fields + */ +struct group_attr_t { + cbl_field_type_t default_usage; // for COMP-5 etc. + int encoding; // for ASCII, National, etc. + cbl_field_t *field; + + group_attr_t( cbl_field_t *field, + cbl_field_type_t default_usage, + int encoding ) + : default_usage(default_usage) + , encoding(encoding) + , field(field) + {} +}; + +struct refer_list_t; + +struct arith_t { + cbl_arith_format_t format; + list<cbl_num_result_t> tgts; + list<cbl_refer_t> A, B; + cbl_refer_t remainder; + cbl_label_t *on_error, *not_error; + + arith_t( cbl_arith_format_t format ) + : format(format), on_error(NULL), not_error(NULL) + {} + arith_t( cbl_arith_format_t format, refer_list_t * refers ); + + bool corresponding() const { return format == corresponding_e; } + + void another_pair( size_t src, size_t tgt ) { + assert(src > 0 && tgt > 0); + + cbl_refer_t a(A.front()); + a.field = cbl_field_of(symbol_at(src)); + A.push_back( a ); + + cbl_num_result_t res = tgts.front(); + res.refer.field = cbl_field_of(symbol_at(tgt)); + tgts.push_back( res ); + + dbgmsg("%s:%d: SRC: %3zu %s", __func__, __LINE__, src, a.str()); + dbgmsg("%s:%d: to %3zu %s", __func__, __LINE__, tgt, res.refer.str()); + } + void operator()( const corresponding_fields_t::const_reference elem ) { + another_pair( elem.first, elem.second ); + } + + const char * format_str() const { + switch(format) { + case not_expected_e: return "not_expected_e"; + case no_giving_e: return "no_giving_e"; + case giving_e: return "giving_e"; + case corresponding_e: return "corresponding_e"; + } + return "???"; + } +}; + +static cbl_refer_t * ast_op( cbl_refer_t *lhs, char op, cbl_refer_t *rhs ); + +static void ast_add( arith_t *arith ); +static bool ast_subtract( arith_t *arith ); +static bool ast_multiply( arith_t *arith ); +static bool ast_divide( arith_t *arith ); + +static cbl_field_type_t intrinsic_return_type( int token ); + +template <typename T> +static T* use_any( list<T>& src, T *tgt) { + if( src.empty() ) return NULL; + + std::copy(src.begin(), src.end(), tgt); + src.clear(); + + return tgt; +} + +class evaluate_t; +/* + * Evaluate + */ +class eval_subject_t { + friend evaluate_t; + struct { cbl_label_t *done, *yeah, *when; } labels; + cbl_field_t *result; + relop_t abbr_relop; + typedef std::list<cbl_refer_t> column_list_t; + column_list_t columns; + column_list_t::iterator pcol; + + static cbl_label_t * label( const char skel[] ); + + void new_object_labels(); + public: + eval_subject_t(); + void append( cbl_refer_t field ) { + columns.push_back(field); + pcol = columns.begin(); + } + cbl_label_t *yeah() { return labels.yeah; } + cbl_label_t *when() { return labels.when; } + cbl_label_t *done() { return labels.done; } + + cbl_field_t *subject() const { + if( pcol == columns.end() ) return nullptr; + return pcol->field; + } + size_t subject_count() const { return columns.size(); } + size_t object_count() { return std::distance(columns.begin(), pcol); } + + void object_relop( relop_t op ) { abbr_relop = op; } + relop_t object_relop() const { return abbr_relop; } + + void rewind() { pcol = columns.begin(); } + + bool compatible( const cbl_field_t *object ) const; + + // compare sets result + cbl_field_t * compare( int token ); + cbl_field_t * compare( relop_t op, + const cbl_refer_t& object, bool deciding = false); + cbl_field_t * compare( const cbl_refer_t& object, + const cbl_refer_t& object2 = nullptr); + + void write_when_label() { + parser_label_label(labels.when); + labels.when = label("when"); + } + void write_yeah_label() { + parser_label_label(labels.yeah); + labels.yeah = label("yeah"); + } + + // decide() calls codegen with the result and increments the subject column. + // On FALSE, skip past <statements> and fall into next WHEN. + bool decided( cbl_field_t *result ) { + this->result = result; + parser_if( result ); + parser_else(); + parser_label_goto( labels.when ); + parser_fi(); + pcol++; + return true; + } + bool decide( int token ) { + if( pcol == columns.end() ) return false; + if( compare( token ) ) { + parser_if( result ); + parser_else(); + parser_label_goto( labels.when ); + parser_fi(); + } + pcol++; + return true; + } + bool decide( const cbl_refer_t& object, bool invert ) { + if( pcol == columns.end() ) return false; + if( compare( object ) ) { + if( invert ) { + parser_logop( result, NULL, not_op, result ); + } + parser_if( result ); + parser_else(); + parser_label_goto( labels.when ); + parser_fi(); + } + pcol++; + return true; + } + bool decide( relop_t op, const cbl_refer_t& object, bool invert ) { + if( pcol == columns.end() ) return false; + dbgmsg("%s() if not %s goto %s", __func__, result->name, when()->name); + + if( compare(op, object, true) ) { + if( invert ) { + parser_logop( result, NULL, not_op, result ); + } + parser_if( result ); + parser_else(); + parser_label_goto( labels.when ); + parser_fi(); + } + pcol++; + return true; + } + bool decide( const cbl_refer_t& object, const cbl_refer_t& object2, bool invert ) { + if( pcol == columns.end() ) return false; + if( compare(object, object2) ) { + if( invert ) { + parser_logop( result, NULL, not_op, result ); + } + parser_if( result ); + parser_else(); + parser_label_goto( labels.when ); + parser_fi(); + } + pcol++; + return true; + } +}; + +class evaluate_t : private std::stack<eval_subject_t> { +public: + size_t depth() const { return size(); } + + void alloc() { + push(eval_subject_t()); + } + void free() { assert(!empty()); pop(); } + + eval_subject_t& current() { + assert(!empty()); + if( yydebug ) { + auto& ev( top() ); + dbgmsg("eval_subject: res: %s, When %s, Yeah %s, Done %s", + ev.result->name, + ev.when()->name, ev.yeah()->name, ev.done()->name); + } + return top(); + } + +} eval_stack; + + + +static void dump_inspect( const cbl_inspect_t& i ); + +struct perform_t { + struct cbl_perform_tgt_t tgt; + bool before; + list<cbl_perform_vary_t> varys; + list<cbl_declarative_t> dcls; + + struct ec_labels_t { + cbl_label_t + *init, // Format 3, code that installs handlers + *fini, // Format 3, code that reverts handlers + *top, // Format 3, above imperative-statement-1 + *from, // Format 3, imperative-statement-1 + *finally, + *other, *common; + ec_labels_t() + : init(NULL), fini(NULL), + top(NULL), from(NULL), finally(NULL), + other(NULL), common(NULL) + {} + void generate() { + init = new_label( LblLoop, "init" ); + fini = new_label( LblLoop, "fini" ); + top = new_label( LblLoop, "top" ); + from = new_label( LblLoop, "from" ); + other = new_label( LblLoop, "other" ); + common = new_label( LblLoop, "common" ); + finally = new_label( LblLoop, "finally" ); + } + static cbl_label_t * + new_label( cbl_label_type_t type, const cbl_name_t role ); + } ec_labels; + + struct { + cbl_label_t *start, *end; + cbl_field_t *unsatisfied, *size; + cbl_refer_t table; + } search; + + perform_t( cbl_label_t *from, cbl_label_t *to = NULL ) + : tgt( from, to ), before(true) + { + search = {}; + } + ~perform_t() { varys.clear(); } + cbl_field_t * until() { + assert(!varys.empty()); + cbl_field_t *f = varys.front().until; + assert(f->type == FldConditional); + return f; + } +}; + +static list<perform_t> performs; + +static inline perform_t * +perform_alloc() { + performs.push_back(perform_t(NULL)); + return &performs.back(); +} + +static inline void +perform_free(void) { + assert(performs.size() > 0); + performs.pop_back(); +} + +static inline perform_t * +perform_current(void) { + assert(performs.size() > 0); + return &performs.back(); +} + +static inline perform_t * + perform_tgt_set( cbl_label_t *from, cbl_label_t *to = NULL ) { + struct perform_t *perf = perform_current(); + perf->tgt = cbl_perform_tgt_t(from, to); + return perf; +} + +#define PERFORM_EXCEPT 1 +static void +perform_ec_setup() { + struct perform_t *perf = perform_current(); + perf->ec_labels.generate(); + perf->tgt.from( perf->ec_labels.from ); + +#if PERFORM_EXCEPT + parser_label_goto(perf->ec_labels.init); + parser_label_label(perf->ec_labels.top); +#endif + parser_perform_start(&perf->tgt); +} + +static void +perform_ec_cleanup() { + struct perform_t *perf = perform_current(); +#if PERFORM_EXCEPT + parser_label_goto(perf->ec_labels.fini); + parser_label_label(perf->ec_labels.init); + /* ... empty init block ... */ + parser_label_goto(perf->ec_labels.top); + parser_label_label(perf->ec_labels.fini); +#endif +} + +static list<cbl_label_t*> searches; + +static inline cbl_label_t * +search_alloc( cbl_label_t *name ) { + searches.push_back(name); + return searches.back(); +} + +static inline void +search_free(void) { + assert(searches.size() > 0); + searches.pop_back(); +} + +static inline cbl_label_t * +search_current(void) { + assert(searches.size() > 0); + return searches.back(); +} + +static list<cbl_num_result_t> rhs; +typedef list<cbl_num_result_t>::iterator rhs_iter; + +struct tgt_list_t { + list<cbl_num_result_t> targets; +}; + +static struct cbl_label_t * +label_add( const YYLTYPE& loc, enum cbl_label_type_t type, const char name[] ); +static struct cbl_label_t * +label_add( enum cbl_label_type_t type, const char name[], int line ); + +static struct cbl_label_t * +paragraph_reference( const char name[], size_t section ); + +static inline void +list_add( list<cbl_num_result_t>& list, cbl_refer_t refer, int round ) { + struct cbl_num_result_t arg = { static_cast<cbl_round_t>(round), refer }; + list.push_back(arg); +} + +static list<cbl_domain_t> domains; +typedef list<cbl_domain_t>::iterator domain_iter; + +/* + * The name queue is a queue of lists of data-item names recognized by the + * lexer, but not returned to the parser. These lists are "teed up" by the + * lexer until no more qualifiers are found. At that point, the last name is + * returned as a NAME or NAME88 token. NAME88 is returned only if a correctly, + * uniquely specified Level 88 data item is found in the symbol table (because + * else we can't know). + * + * When the parser gets a NAME or NAME88 token, it retrieves the pending list + * of qualifiers, if any, from the name queue. It adds the returned name to + * the list and calls symbol_find() to search the name map. For correctly + * specified names, the lexer has already done that work, which is now + * unfortunately repeated. For incorrect names, the parser emits a most useful + * diagnostic. + */ +static name_queue_t name_queue; + +void +tee_up_empty() { + name_queue.allocate(); +} +void +tee_up_name( const YYLTYPE& loc, const char name[] ) { + name_queue.push(loc, name); +} +cbl_namelist_t +teed_up_names() { + return name_queue_t::namelist_of( name_queue.peek() ); +} + +class tokenset_t { + std::vector<const char *>token_names; + std::map <std::string, int> tokens; + std::set<std::string> cobol_words; + + static std::string + lowercase( const cbl_name_t name ) { + cbl_name_t lname; + std::transform(name, name + strlen(name) + 1, lname, ftolower); + return lname; + } + + public: + tokenset_t(); + int find( const cbl_name_t name, bool include_intrinsics ); + + bool equate( const YYLTYPE& loc, int token, const cbl_name_t name ) { + auto lname( lowercase(name) ); + auto cw = cobol_words.insert(lname); + if( ! cw.second ) { + error_msg(loc, "COBOL-WORDS EQUATE: %s may appear but once", name); + return false; + } + auto p = tokens.find(lowercase(name)); + bool fOK = p == tokens.end(); + if( fOK ) { // name not already in use + tokens[lname] = token; + } else { + error_msg(loc, "EQUATE: %s already defined as a token", name); + } + return fOK; + } + bool undefine( const YYLTYPE& loc, const cbl_name_t name ) { + auto lname( lowercase(name) ); + auto cw = cobol_words.insert(lname); + if( ! cw.second ) { + error_msg(loc, "COBOL-WORDS UNDEFINE: %s may appear but once", name); + return false; + } + auto p = tokens.find(lname); + bool fOK = p != tokens.end(); + if( fOK ) { // name in use + tokens.erase(p); + } else { + error_msg(loc, "UNDEFINE: %s not defined as a token", name); + } + return fOK; + } + bool substitute( const YYLTYPE& loc, const cbl_name_t extant, int token, const cbl_name_t name ) { + return equate( loc, token, name ) && undefine( loc, extant ); + } + bool reserve( const YYLTYPE& loc, const cbl_name_t name ) { + auto lname( lowercase(name) ); + auto cw = cobol_words.insert(lname); + if( ! cw.second ) { + error_msg(loc, "COBOL-WORDS RESERVE: %s may appear but once", name); + return false; + } + tokens[lname] = -42; + return true; + } + int redefined_as( const cbl_name_t name ) { + auto lname( lowercase(name) ); + if( cobol_words.find(lname) != cobol_words.end() ) { + auto p = tokens.find(lname); + if( p != tokens.end() ) { + return p->second; + } + } + return 0; + } + const char * name_of( int tok ) const { + tok -= (255 + 3); + gcc_assert(0 <= tok && size_t(tok) < token_names.size()); + return token_names[tok]; + } +}; + +class current_tokens_t { + tokenset_t tokens; + public: + current_tokens_t() {} + int find( const cbl_name_t name, bool include_intrinsics ) { + return tokens.find(name, include_intrinsics); + } + bool equate( const YYLTYPE& loc, cbl_name_t keyword, const cbl_name_t name ) { + int token = keyword_tok(keyword); + if( 0 == token ) { + error_msg(loc, "EQUATE %s: not a valid token", keyword); + return false; + } + return tokens.equate(loc, token, name); + } + bool undefine( const YYLTYPE& loc, cbl_name_t keyword ) { + return tokens.undefine(loc, keyword); + } + bool substitute( const YYLTYPE& loc, cbl_name_t keyword, const cbl_name_t name ) { + int token = keyword_tok(keyword); + if( 0 == token ) { + error_msg(loc, "SUBSTITUTE %s: not a valid token", keyword); + return false; + } + return tokens.substitute(loc, keyword, token, name); + } + bool reserve( const YYLTYPE& loc, const cbl_name_t name ) { + return tokens.reserve(loc, name); + } + int redefined_as( const cbl_name_t name ) { + return tokens.redefined_as(name); + } + const char * name_of( int tok ) const { + return tokens.name_of(tok); + } +} tokens; + +int +redefined_token( const cbl_name_t name ) { + return tokens.redefined_as(name); +} + +struct file_list_t { + list<cbl_file_t*> files; + file_list_t() {} + file_list_t( cbl_file_t* file ) { + files.push_back(file); + } + file_list_t( file_list_t& that ) : files(that.files.size()) { + std::copy( that.files.begin(), that.files.end(), files.begin() ); + } + + static size_t symbol_index( cbl_file_t* file ) { + return ::symbol_index( symbol_elem_of(file) ); + } +}; + +struct field_list_t { + list<cbl_field_t*> fields; + field_list_t( cbl_field_t *field ) { + fields.push_back(field); + } + explicit field_list_t() {} +}; + +cbl_field_t ** +use_list( field_list_t *src, cbl_field_t *tgt[] ) { + assert(src); + std::copy(src->fields.begin(), src->fields.end(), tgt); + src->fields.clear(); + delete src; + + return tgt; +} + +cbl_file_t ** + use_list( list<cbl_file_t*>& src, bool clear = true ) { + if( src.empty() ) return NULL; + auto tgt = new cbl_file_t*[ src.size() ]; + std::copy(src.begin(), src.end(), tgt); + + if( clear ) + src.clear(); + + return tgt; +} + +struct refer_list_t { + list<cbl_refer_t> refers; + refer_list_t( cbl_refer_t *refer ) { + if( refer ) { + refers.push_back(*refer); + delete refer; + } + } + refer_list_t * push_back( cbl_refer_t *refer ) { + refers.push_back(*refer); + delete refer; + return this; + } + inline list<cbl_refer_t>& items() { return refers; } + inline list<cbl_refer_t>::iterator begin() { return refers.begin(); } + inline list<cbl_refer_t>::iterator end() { return refers.end(); } + inline size_t size() const { return refers.size(); } + + cbl_refer_t * + use_list( cbl_refer_t tgt[] ) { + std::copy(refers.begin(), refers.end(), tgt); + refers.clear(); + return tgt; + } +}; + +struct refer_marked_list_t : public refer_list_t { + cbl_refer_t *marker; + + refer_marked_list_t() : refer_list_t(NULL), marker(NULL) {} + refer_marked_list_t( cbl_refer_t *marker, refer_list_t *refers ) + : refer_list_t(*refers), marker(marker) {} + refer_marked_list_t( cbl_refer_t *marker, cbl_refer_t *input ) + : refer_list_t(input) + , marker(marker) {} + + refer_marked_list_t * push_back( refer_list_t *refers ) { + push_back(refers); + return this; + } + refer_marked_list_t * push_on( cbl_refer_t *marker, cbl_refer_t *input ) { + refers.push_back(*input); + this->marker = marker; + return this; + } +}; + +struct refer_collection_t { + list<refer_marked_list_t> lists; + + refer_collection_t( const refer_marked_list_t& marked_list ) + { + lists.push_back( marked_list ); + } + refer_collection_t * push_back( const refer_marked_list_t& marked_list ) + { + lists.push_back( marked_list ); + return this; + } + + const cbl_refer_t* last_delimiter() const { + return lists.back().marker; + } + cbl_refer_t* last_delimiter( cbl_refer_t* marker) { + return lists.back().marker = marker; + } + + size_t total_size() const { + size_t n = 0; + for( auto p=lists.begin(); p != lists.end(); p++ ) { + n += p->refers.size(); + } + return n; + } +}; + +struct ast_inspect_oper_t { + cbl_inspect_bound_t bound; // CHARACTERS/ALL/LEADING/FIRST + std::list<cbl_inspect_match_t> matches; + std::list<cbl_inspect_replace_t> replaces; + +ast_inspect_oper_t( const cbl_inspect_match_t& match, + cbl_inspect_bound_t bound = bound_characters_e ) + : bound(bound) + { + matches.push_back(match); + } + ast_inspect_oper_t( const cbl_inspect_replace_t& replace, + cbl_inspect_bound_t bound = bound_characters_e ) + : bound(bound) + { + replaces.push_back(replace); + } +}; + +struct ast_inspect_t : public std::list<cbl_inspect_oper_t> { + cbl_refer_t tally; // field is NULL for REPLACING + const std::list<cbl_inspect_oper_t>& opers() const { return *this; } +}; + +struct ast_inspect_list_t : public std::list<cbl_inspect_t> { + ast_inspect_list_t( const cbl_inspect_t& insp ) { + push_back(insp); + } + + cbl_inspect_t * as_array() { + cbl_inspect_t *output = new cbl_inspect_t[ size() ]; + std::copy( begin(), end(), output ); + return output; + } +}; + +void ast_inspect( cbl_refer_t& input, bool backward, ast_inspect_list_t& inspects ); + +template <typename E> +struct elem_list_t { + list<E*> elems; + elem_list_t( E *elem ) { + elems.push_back(elem); + } + void clear() { + for( auto p = elems.begin(); p != elems.add(); p++ ) { + delete *p; + } + elems.clear(); + } +}; + +typedef elem_list_t<cbl_label_t> label_list_t; + +template <typename L, typename E> + E use_list( L *src, E tgt ) { + assert(src); + std::copy(src->elems.begin(), src->elems.end(), tgt); + src->elems.clear(); + delete src; + + return tgt; +} + +struct unstring_tgt_t { + cbl_refer_t *tgt, *delimiter, *count; + unstring_tgt_t( cbl_refer_t *tgt, + cbl_refer_t *delimiter = NULL, + cbl_refer_t *count = NULL ) + : tgt(tgt), delimiter(delimiter), count(count) + {} + + static cbl_refer_t tgt_of( const unstring_tgt_t& that ) { + return maybe_empty(that.tgt); + } + static cbl_refer_t delimiter_of( const unstring_tgt_t& that ) { + return maybe_empty(that.delimiter); + } + static cbl_refer_t count_of( const unstring_tgt_t& that ) { + return maybe_empty(that.count); + } +private: + static cbl_refer_t maybe_empty( cbl_refer_t *p ) { + return p? *p : cbl_refer_t(); + } +}; + +struct unstring_tgt_list_t { + list<unstring_tgt_t> unstring_tgts; + + unstring_tgt_list_t( unstring_tgt_t *unstring_tgt ) { + unstring_tgts.push_back(*unstring_tgt); + delete unstring_tgt; + } + unstring_tgt_list_t * push_back( unstring_tgt_t *unstring_tgt ) { + unstring_tgts.push_back(*unstring_tgt); + delete unstring_tgt; + return this; + } + + size_t size() const { return unstring_tgts.size(); } + + typedef cbl_refer_t xform_t( const unstring_tgt_t& that ); + void use_list( cbl_refer_t *output, xform_t func ) { + std::transform( unstring_tgts.begin(), + unstring_tgts.end(), + output, func ); + } +}; + +struct unstring_into_t : public unstring_tgt_list_t { + cbl_refer_t pointer, tally; + unstring_into_t( unstring_tgt_list_t *tgt_list, + cbl_refer_t *pointer = NULL, + cbl_refer_t *tally = NULL ) + : unstring_tgt_list_t(*tgt_list) + , pointer( pointer? *pointer : cbl_refer_t() ) + , tally( tally? *tally : cbl_refer_t() ) + { + delete tgt_list; + if( pointer ) delete pointer; + if( tally ) delete tally; + } +}; + +struct ffi_args_t { + list<cbl_ffi_arg_t> elems; + + ffi_args_t( cbl_ffi_arg_t *arg ) { + this->push_back(arg); + } + + ffi_args_t( size_t narg, cbl_ffi_arg_t *args ) { + std::copy(args, args+narg, std::back_inserter(elems)); + } + + // set explicitly, or assume + ffi_args_t * push_back( cbl_ffi_arg_t *arg ) { + if( arg->crv == by_default_e ) { + arg->crv = elems.empty()? by_reference_e : elems.back().crv; + } + elems.push_back(*arg); + delete arg; + return this; + } + + // infer reference/content/value from previous + ffi_args_t * push_back( cbl_refer_t* refer, + cbl_ffi_arg_attr_t attr = none_of_e ) { + cbl_ffi_crv_t crv = elems.empty()? by_reference_e : elems.back().crv; + cbl_ffi_arg_t arg( crv, refer, attr ); + elems.push_back(arg); + return this; + } + void dump() const { + int i=0; + for( const auto& arg : elems ) { + dbgmsg( "%8d) %-10s %-16s %s", i++, + cbl_ffi_crv_str(arg.crv), + 3 + cbl_field_type_str(arg.refer.field->type), + arg.refer.field->pretty_name() ); + } + } + + const char * + parameter_types() const { + auto output = new char[ 1 + elems.size() ]; + auto p = std::transform( elems.begin(), elems.end(), output, + []( auto arg ) { + return function_descr_t::parameter_type(*arg.field()); + } ); + assert(output < p); + p[-1] = '\0'; + return output; + } +}; + +struct relop_abbr_t { + relop_t relop; + cbl_refer_t *rhs; +}; + +typedef struct elem_list_t<relop_abbr_t> relop_abbr__list_t; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wreorder" + +struct sort_key_t : public field_list_t { + bool ascending; + sort_key_t( bool ascending, field_list_t key ) + : ascending(ascending), field_list_t(key) + {} +}; + +#pragma GCC diagnostic pop + +struct sort_keys_t { + list<sort_key_t> key_list; +}; + +struct file_sort_io_t { + file_list_t file_list; + cbl_perform_tgt_t tgt; + + file_sort_io_t( file_list_t& files ) : file_list(files) {} + file_sort_io_t( cbl_perform_tgt_t& tgt ) : tgt(tgt.from(), tgt.to()) {} + size_t nfile() const { return file_list.files.size(); } +}; + + +struct merge_t { + cbl_file_t *master; + list<cbl_file_t*> updates; + // collation missing + enum output_type_t { output_unknown_e, + output_proc_e, + output_file_e } type; + cbl_perform_tgt_t tgt; + list<cbl_file_t*> outputs; + + merge_t( cbl_file_t *input ) : master(input), type(output_unknown_e) {} +}; + +static list<merge_t> merges; + +static inline merge_t& +merge_alloc( cbl_file_t *file ) { + merges.push_back(file); + return merges.back(); +} + +static inline void +merge_free(void) { + assert(merges.size() > 0); + merges.pop_back(); +} + +static inline merge_t& +merge_current(void) { + assert(merges.size() > 0); + return merges.back(); +} + +static list<cbl_refer_t> lhs; + +struct vargs_t { + std::list<cbl_refer_t> args; + vargs_t() {} + vargs_t( struct cbl_refer_t *p ) { args.push_back(*p); delete p; } + void push_back( cbl_refer_t *p ) { args.push_back(*p); delete p; } +}; + +static const char intermediate[] = ":intermediate"; + +#include <set> + +std::set<const char *> pristine_values; + +// key is a name after DEBUGGING/ERROR/EXCEPTION +// value is the list of sections invoked +std::map<std::string, std::list<std::string>> + debugging_clients, error_clients, exception_clients; + +class prog_descr_t { + std::set<std::string> call_targets, subprograms; + public: + std::set<function_descr_t> function_repository; + size_t program_index, declaratives_index; + cbl_label_t *declaratives_eval, *paragraph, *section; + const char *collating_sequence; + struct locale_t { + cbl_name_t name; const char *os_name; + locale_t(const cbl_name_t name = NULL, const char *os_name = NULL) + : name(""), os_name(os_name) { + if( name ) { + bool ok = namcpy(YYLTYPE(), this->name, name); + gcc_assert(ok); + } + } + } locale; + cbl_call_convention_t call_convention; + cbl_options_t options; + + prog_descr_t( size_t isymbol ) + : program_index(isymbol) + , declaratives_index(0) + , declaratives_eval(NULL) + , paragraph(NULL) + , section(NULL) + , collating_sequence(NULL) + { + call_convention = current_call_convention(); + } + + std::set<std::string> external_targets() { + std::set<std::string> externals; + std::set_difference( call_targets.begin(), call_targets.end(), + subprograms.begin(), subprograms.end(), + std::inserter(externals, externals.begin()) ); + return externals; + } +}; + +static char * +uniq_label_impl( const char stem[], int line ) { + char *name = xasprintf("%s_%d_%d", stem, yylineno, line); + return name; +} +#define uniq_label(S) uniq_label_impl( (S), __LINE__ ) + +/* + * One of these days, paragraph and section will have to move into + * prog_descr_t, because the current section and paragraph depend on the + * current program, which may be nested and "pop back" into existence at END + * PROGRAM. + */ +struct error_labels_t { + cbl_label_t *on_error, *not_error, *compute_error; + error_labels_t() : on_error(NULL), not_error(NULL), compute_error(NULL) {} + void clear() { on_error = not_error = compute_error = NULL; } + error_labels_t& generate() { + on_error = label_add(LblArith, uniq_label("arith"), yylineno); + not_error = label_add(LblArith, uniq_label("arith"), yylineno); + compute_error = label_add(LblCompute, uniq_label("compute"), yylineno); + return *this; + } +}; + +struct cbl_typedef_less { + bool operator()( const cbl_field_t *a, const cbl_field_t *b ) const { + auto result = strcasecmp(a->name, b->name); + if( result < 0 ) return true; + if( result > 0 ) return false; + + // Names that match are different if they're in different programs + // and neither is external. + auto lhs = field_index(a); + auto rhs = field_index(b); + if( lhs != rhs ) { + if( !a->has_attr(external_e) && !b->has_attr(external_e) ) { + return lhs < rhs; + } + } + return false; + } +}; + +static bool +is_conditional( const cbl_field_t *field ) { + return FldConditional == field->type; +} +static bool +is_conditional( const cbl_refer_t *refer ) { + return is_conditional(refer->field); +} + +typedef std::set< const cbl_field_t*, cbl_typedef_less > unique_typedefs_t; + +static cbl_label_t * implicit_paragraph(); +static cbl_label_t * implicit_section(); + +/* + * Incomplete because not needed at this time: we do not attempt to + * set used/lain for labels used by these functions: + * parser_lsearch_start( cbl_label_t *name, + * parser_lsearch_conditional(cbl_label_t * name) + * parser_lsearch_when( cbl_label_t *name, cbl_field_t *conditional ) + * parser_lsearch_end( cbl_label_t *name ) + * parser_bsearch_start( cbl_label_t* name, + * parser_bsearch_conditional( cbl_label_t* name ) + * parser_bsearch_when(cbl_label_t* name, + * parser_bsearch_end( cbl_label_t* name ) + * parser_string_overflow( cbl_label_t *name ) + * parser_string_overflow_end( cbl_label_t *name ) + * parser_call_exception( cbl_label_t *name ) + * parser_call_exception_end( cbl_label_t *name ) + * parser_entry_activate( size_t iprog, const cbl_label_t *declarative ) + */ + +class program_stack_t : protected std::stack<prog_descr_t> { + struct pending_t { + cbl_call_convention_t call_convention; + bool initial; + pending_t() + : call_convention(cbl_call_convention_t(0)) + , initial(false) + {} + } pending; + public: + cbl_call_convention_t + pending_call_convention( cbl_call_convention_t convention ) { + return pending.call_convention = convention; + } + bool pending_initial() { return pending.initial = true; } + + void push( prog_descr_t descr ) { + cbl_call_convention_t current_call_convention = cbl_call_cobol_e; + if( !empty() ) current_call_convention = top().call_convention; + descr.call_convention = current_call_convention; + std::stack<prog_descr_t>& me(*this); + me.push(descr); + } + inline void pop() { + std::stack<prog_descr_t>& me(*this); + me.pop(); + } + inline prog_descr_t& top() { + std::stack<prog_descr_t>& me(*this); + return me.top(); + } + inline const prog_descr_t& top() const { + const std::stack<prog_descr_t>& me(*this); + return me.top(); + } + inline size_t size() const { + const std::stack<prog_descr_t>& me(*this); + return me.size(); + } + inline bool empty() const { + const std::stack<prog_descr_t>& me(*this); + return me.empty(); + } + + void apply_pending() { + if( size() == 1 && 0 != pending.call_convention ) { + top().call_convention = pending.call_convention; + } + if( pending.initial ) { + auto e = symbol_at(top().program_index); + auto prog(cbl_label_of(e)); + prog->initial = pending.initial; + } + } + + cbl_label_t *first_declarative() { + auto eval = top().declaratives_eval; + if( eval ) return eval; + // scan stack container for declaratives + for( auto& prog : c ) { + if( prog.declaratives_eval ) { + eval = prog.declaratives_eval; + break; + } + } + return eval; + } +}; + +struct rel_part_t { + cbl_refer_t *operand; // lhs + bool has_relop, invert; + relop_t relop; + + rel_part_t( cbl_refer_t *operand = NULL, + relop_t relop = relop_t(-1), + bool invert = false ) + : operand(operand), + has_relop(relop != -1), + invert(invert), + relop(relop) + {} + rel_part_t& relop_set( relop_t op ) { + has_relop = true; + relop = op; + return *this; + } + + bool is_value() const { return operand && is_elementary(operand->field->type); } +}; + +/* + * Evaluation of OR is deferred in case it's followed by AND. As each + * logical operand is encountered, it's first assigned to the + * "andable" member. As ANDs are encountered, they're ANDed to + * andable. When OR is first encountered, we've reached the end of a + * string of ANDs (possibly empty): we move andable to orable, and + * assign the rhs to andable (because it could be followed by AND). + * Successive ORs produce (orable = orable OR andable), followed by + * assigning the rhs to andable. + * + * At the end of the AND/OR evaluation, there is always an andable + * value, because that's where we began. If there is a orable, that + * indicates that the final OR remains unevaluated. In the resolve() + * method, we OR the two, and return that orable. If there's no + * orable, we simply return the andable. +*/ +class log_expr_t { + cbl_field_t *orable, *andable; + public: + log_expr_t( cbl_field_t *init ) : orable(NULL), andable(init) { + if( ! is_conditional(init) ) { + dbgmsg("%s:%d: logic error: %s is not a truth value", + __func__, __LINE__, name_of(init)); + } + } + + cbl_field_t * and_term() { + return andable; + } + log_expr_t * and_term( cbl_field_t *rhs ) { + if( ! is_conditional(rhs) ) { + dbgmsg("%s:%d: logic error: %s is not a truth value", + __func__, __LINE__, name_of(rhs)); + } else { + parser_logop( andable, andable, and_op, rhs ); + } + return this; + } + log_expr_t * or_term( cbl_field_t *rhs ) { + if( ! is_conditional(rhs) ) { + dbgmsg("%s:%d: logic error: %s is not a truth value", + __func__, __LINE__, name_of(rhs)); + return this; + } + if( ! orable ) { + orable = andable; + } else { + parser_logop( orable, orable, or_op, andable ); + } + andable = rhs; + return this; + } + cbl_field_t * resolve() { + assert(andable); + if( orable ) { + parser_logop( andable, orable, or_op, andable ); + orable = NULL; + } + assert(!orable); + return andable; // leave in (initial) ANDable state + } + bool unresolved() const { + return orable != NULL; + } +}; + +static void ast_enter_section( cbl_label_t * ); +static void ast_enter_paragraph( cbl_label_t * ); + +static class current_t { + friend cbl_options_t current_options(); + cbl_options_t options_paragraph; + program_stack_t programs; + unique_typedefs_t typedefs; + std::set<function_descr_t> udfs; + int first_statement; + bool in_declaratives; + // from command line or early TURN + std::list<cbl_exception_files_t> cobol_exceptions; + + error_labels_t error_labels; + + static void declarative_execute( cbl_label_t *eval ) { + if( !eval ) { + if( !enabled_exceptions.empty() ) { + auto index = new_temporary(FldNumericBin5); + parser_match_exception(index, NULL); + } + return; + } + assert(eval); + auto iprog = symbol_elem_of(eval)->program; + if( iprog == current_program_index() ) { + parser_perform(eval); + } else { + parser_entry_activate( iprog, eval ); + auto name = cbl_label_of(symbol_at(iprog))->name; + cbl_unimplemented("Global declarative %s for %s", + eval->name, name); + parser_call( new_literal(strlen(name), name, quoted_e), + cbl_refer_t(), 0, NULL, NULL, NULL, false ); + } + } + + rel_part_t antecedent_cache; + + public: + current_t() + : first_statement(0) + , in_declaratives(false) + {} + + bool option( cbl_options_t::arith_t option ) { + if( programs.size() == 1 ) { + options_paragraph.arith = option; + return true; + } + return false; + } + bool option_binary( cbl_options_t::float_endidanism_t option ) { + if( programs.size() == 1 ) { + options_paragraph.binary_endidanism = option; + return true; + } + return false; + } + bool option_decimal( cbl_options_t::float_endidanism_t option ) { + if( programs.size() == 1 ) { + options_paragraph.decimal_endidanism = option; + return true; + } + return false; + } + bool option( cbl_options_t::float_encoding_t option ) { + if( programs.size() == 1 ) { + options_paragraph.float_encoding = option; + return true; + } + return false; + } + bool default_round( cbl_round_t option ) { + if( programs.size() == 1 ) { + options_paragraph.default_round = option; + return true; + } + return false; + } + bool intermediate_round( cbl_round_t option ) { + if( programs.size() == 1 ) { + options_paragraph.intermediate_round = option; + return true; + } + return false; + } + + template <typename T> + bool initial_option( cbl_section_type_t section, T value ) { + if( programs.size() == 1 ) { + switch( section ) { + case file_sect_e: + case linkage_sect_e: + break; + case working_sect_e: + options_paragraph.initial_value.working = value; + return true; + break; + case local_sect_e: + options_paragraph.initial_value.local = value; + return true; + break; + } + } + return false; + } + + bool initial_value( cbl_section_type_t section, size_t isym ) { + return initial_option( section, isym ); + } + + cbl_enabled_exceptions_t enabled_exception_cache; + + typedef std::list<cbl_declarative_t> declaratives_list_t; + class declaratives_t : protected declaratives_list_t { + struct file_exception_t { + ec_type_t type; uint32_t file; + bool operator<( const file_exception_t& that ) const { + if( type == that.type ) return file < that.file; + return type < that.type; + } + }; + std::set<file_exception_t> file_exceptions; + public: + bool empty() const { + return declaratives_list_t::empty(); + } + inline const declaratives_list_t& as_list() const { return *this; } + + bool add( const_reference declarative ) { + auto d = std::find_if( begin(), end(), + [sect = declarative.section]( const_reference decl ) { + return decl.section == sect; + } ); + if( d != end() ) { + auto label = cbl_label_of(symbol_at(d->section)); + yyerror("USE already defined for %s", label->name); + return false; + } + for( auto f = declarative.files; + f && f < declarative.files + declarative.nfile; f++ ) { + file_exception_t ex = { declarative.type, *f }; + auto result = file_exceptions.insert(ex); + if( ! result.second ) { + yyerror("%s defined twice for %s", + ec_type_str(declarative.type), + cbl_file_of(symbol_at(*f))->name); + return false; + } + } + declaratives_list_t::push_back(declarative); + return true; + } + } declaratives; + + void exception_add( ec_type_t ec, bool enabled = true) { + std::set<size_t> files; + enabled_exceptions.turn_on_off(enabled, + false, // for now + ec, files); + if( yydebug) enabled_exceptions.dump(); + } + + bool typedef_add( const cbl_field_t *field ) { + auto result = typedefs.insert(field); + return result.second; + } + const cbl_field_t * has_typedef( const cbl_field_t *field ) { + auto found = typedefs.find(field); + return found == typedefs.end()? NULL : *found; + return found == typedefs.end()? NULL : *found; + } + + void udf_add( size_t isym ) { + auto udf = function_descr_t::init(isym); + auto p = udfs.insert(udf); + assert(p.second); + } + const function_descr_t * udf_in( const char name[] ) { + auto udf = function_descr_t::init(name); + auto p = udfs.find(udf); + const function_descr_t *output = NULL; + if( p != udfs.end() ) output = &*p; + return output; + } + void udf_update( const ffi_args_t *ffi_args ); + bool udf_args_valid( const cbl_label_t *func, + const std::list<cbl_refer_t>& args, + std::vector<function_descr_arg_t>& params /*out*/ ); + + void udf_dump() const { + if( yydebug ) { + int i=0; + for( auto udf : udfs ) { + dbgmsg("%4d %-30s %-30s", i++, keyword_str(udf.token), udf.name); + } + } + } + + void repository_add_all(); + bool repository_add( const char name[] ); + int repository_in( const char name[] ); + + bool repository_add( size_t isym ) { + auto udf = function_descr_t::init(isym); + auto p = udfs.find(udf); // previously defined functions in "udfs" + assert(p != udfs.end()); // If it's a symbol, it must be in udfs. + auto result = programs.top().function_repository.insert(*p); + if( yydebug ) { + for( auto descr : programs.top().function_repository ) { + dbgmsg("%s:%d: %-20s %-20s %-20s", __func__, __LINE__, + keyword_str(descr.token), descr.name, descr.cname); + } + } + return result.second; + } + + size_t declarative_section() const { + return symbol_index(symbol_elem_of(programs.top().section)); + } + const char * declarative_section_name() const { + return in_declaratives? programs.top().section->name : NULL; + } + + std::list<std::string>& debugging_declaratives(bool all) const { + const char *para = programs.top().paragraph->name; + auto declaratives = debugging_clients.find(all? ":all:" : para); + if( declaratives == debugging_clients.end() ) { + static std::list<std::string> empty; + return empty; + } + return declaratives->second; + } + + bool + collating_sequence( const cbl_name_t name ) { + assert(name); + assert(!programs.empty()); + prog_descr_t& program = programs.top(); + if( program.collating_sequence ) return false; // already defined + program.collating_sequence = name; + return true; + } + const char * + collating_sequence() const { + assert(!programs.empty()); + return programs.top().collating_sequence; + } + + cbl_round_t rounded_mode() const { return programs.top().options.default_round; } + cbl_round_t rounded_mode( cbl_round_t mode ) { + return programs.top().options.default_round = mode; + } + + cbl_call_convention_t + call_convention() { + return programs.empty()? cbl_call_cobol_e : programs.top().call_convention; + } + cbl_call_convention_t + call_convention( cbl_call_convention_t convention) { + if( programs.empty() ) { + return programs.pending_call_convention(convention); + } + auto& prog( programs.top() ); + return prog.call_convention = convention; + } + + const char * + locale() { + return programs.empty()? NULL : programs.top().locale.os_name; + } + const char * + locale( const cbl_name_t name ) { + if( programs.empty() ) return NULL; + const prog_descr_t::locale_t& locale = programs.top().locale; + return 0 == strcmp(name, locale.name)? locale.name : NULL; + } + const prog_descr_t::locale_t& + locale( const cbl_name_t name, const char os_name[] ) { + if( programs.empty() ) { + static prog_descr_t::locale_t empty; + return empty; + } + return programs.top().locale = prog_descr_t::locale_t(name, os_name); + } + + bool new_program ( const YYLTYPE& loc, cbl_label_type_t type, + const char name[], const char os_name[], + bool common, bool initial ) + { + size_t parent = programs.empty()? 0 : programs.top().program_index; + cbl_label_t label = { + .type = type, + .parent = parent, + .line = yylineno, + .common = common, + .initial = initial, + .os_name = os_name + }; + if( !namcpy(loc, label.name, name) ) { gcc_unreachable(); } + + const cbl_label_t *L; + if( (L = symbol_program_add(parent, &label)) == NULL ) return false; + programs.push( symbol_index(symbol_elem_of(L))); + programs.apply_pending(); + + bool fOK = symbol_at(programs.top().program_index) + 1 == symbols_end(); + assert(fOK); + + if( (L = symbol_program_local(name)) != NULL ) { + error_msg(loc, "program '%s' already defined on line %d", + L->name, L->line); + return false; + } + + options_paragraph = cbl_options_t(); + first_statement = 0; + + return fOK; + } + + void program_needs_initial() { programs.pending_initial(); } + + size_t program_index(void) const { + assert(!programs.empty()); + return programs.top().program_index; + } + size_t program_declaratives(void) const { + if( programs.empty() ) return 0; + return programs.top().declaratives_index; + } + const cbl_label_t * program(void) { + return programs.empty()? + NULL : cbl_label_of(symbol_at(programs.top().program_index)); + } + cbl_label_t * section(void) { + return programs.empty()? NULL : programs.top().section; + } + cbl_label_t * paragraph(void) { + return programs.empty()? NULL : programs.top().paragraph; + } + + bool is_first_statement( const YYLTYPE& loc ) { + if( ! in_declaratives && first_statement == 0 ) { + if( ! symbol_label_section_exists(program_index()) ) { + if( ! dialect_ibm() ) { + error_msg(loc, + "Per ISO a program with DECLARATIVES must begin with a SECTION, " + "requires -dialect ibm"); + } + } + first_statement = loc.first_line; + return true; + } + return false; + } + + /* + * At the end of each program, ensure there are no uses of an ambiguous + * procedure (SECTION or PARAGRAPH) name. At the end of a top-level program, + * adjust any CALL targets to use the mangled name of the internal (contained + * or COMMON ) program. We ensure there are no duplicate program names, per + * ISO, in new_program. + */ + std::set<std::string> end_program() { + if( enabled_exceptions.size() ) { + declaratives_evaluate(ec_none_e); + } + + assert(!programs.empty()); + + procref_t *ref = ambiguous_reference(program_index()); + std::set<std::string> externals = programs.top().external_targets(); + + /* + * For each called local program, replace the original undecorated + * target with the mangled name. + * + * At END-PROGRAM for the top-level program, we know all + * subprograms, and whether or not they are COMMON. PROGRAM may be + * the caller, or a subprogram could call COMMON sibling. + */ + if( programs.size() == 1 ) { + if( yydebug ) parser_call_targets_dump(); + for( size_t caller : symbol_program_programs() ) { + const char *caller_name = cbl_label_of(symbol_at(caller))->name; + for( auto callable : symbol_program_callables(caller) ) { + auto called = cbl_label_of(symbol_at(callable)); + auto mangled_name = + called->mangled_name? called->mangled_name : called->name; + + size_t n = + parser_call_target_update(caller, called->name, mangled_name); + // Zero is not an error + dbgmsg("updated %zu calls from #%-3zu (%s) s/%s/%s/", + n, caller, caller_name, called->name, mangled_name); + } + } + if( yydebug ) parser_call_targets_dump(); + } + + parser_leave_paragraph( programs.top().paragraph ); + parser_leave_section( programs.top().section ); + programs.pop(); + + debugging_clients.clear(); + error_clients.clear(); + exception_clients.clear(); + + if( ref ) { + yywarn("could not resolve paragraph (or section) '%s' at line %d", + ref->paragraph(), ref->line_number()); + // add string to indicate ambiguity error + externals.insert(":ambiguous:"); + } + return externals; + } + + size_t program_level() const { return programs.size(); } + + size_t program_section() const { + if( programs.empty() || programs.top().section == NULL ) return 0; + auto section = programs.top().section; + return symbol_index(symbol_elem_of(section)); + } + + cbl_label_t *doing_declaratives( bool begin ) { + if( begin ) { + in_declaratives = true; + return NULL; + } + assert( !begin ); + in_declaratives = false; + if( declaratives.empty() ) return NULL; + assert(!declaratives.empty()); + + size_t idcl = symbol_declaratives_add(program_index(), declaratives.as_list()); + programs.top().declaratives_index = idcl; + + // Create section to evaluate declaratives. Given them unique names so + // that we can figure out what is going on in a trace or looking at the + // assembly language. + static int eval_count=1; + char eval[32]; + char lave[32]; + sprintf(eval, "_DECLARATIVES_EVAL%d", eval_count); + sprintf(lave, "_DECLARATIVES_LAVE%d", eval_count); + eval_count +=1 ; + + struct cbl_label_t*& eval_label = programs.top().declaratives_eval; + eval_label = label_add(LblSection, eval, yylineno); + struct cbl_label_t * lave_label = label_add(LblSection, lave, yylineno); + ast_enter_section(eval_label); + declarative_runtime_match(cbl_field_of(symbol_at(idcl)), lave_label); + return lave_label; + } + + cbl_label_t * new_section( cbl_label_t * section ) { + std::swap( programs.top().section, section ); + return section; + } + + /* + * END DECLARATIVES causes: + * 1. Add DECLARATIVES symbol, containing criteria blob. + * 2. Create section _DECLARATIVES_EVAL + * and exit label _DECLARATIVES_LAVE + * 3. declarative_runtime_match generates runtime evaluation "ladder". + * 4. After a declarative is executed, control branches to the exit label. + * + * After each verb, we call declaratives_evaluate, + * which PERFORMs _DECLARATIVES_EVAL. + * + * If the matched declarative is defined by a superior program as + * GLOBAL, it cannot be PERFORMed. Instead, it is CALLed with an + * alternative entry point (TODO). + */ + void + declaratives_evaluate( cbl_file_t *file, + file_status_t status = FsSuccess ) { + // The exception file number is assumed to be zero at all times unless + // it has been set to non-zero, at which point whoever picks it up and takes + // action on it is charged with setting it back to zero. + if( file ) + { + parser_set_file_number((int)symbol_index(symbol_elem_of(file))); + } + // parser_set_file_number(file ? (int)symbol_index(symbol_elem_of(file)) : 0); + parser_set_handled((ec_type_t)status); + + parser_file_stash(file); + + cbl_label_t *eval = programs.first_declarative(); + if( eval ) { + auto iprog = symbol_elem_of(eval)->program; + if( iprog == current_program_index() ) { + parser_perform(eval); + } else { + parser_entry_activate( iprog, eval ); + auto name = cbl_label_of(symbol_at(iprog))->name; + parser_call( new_literal(strlen(name), name, quoted_e), + cbl_refer_t(), 0, NULL, NULL, NULL, false ); + } + } + } + + void + declaratives_evaluate( std::list<cbl_file_t*>& files ) { + for( auto& file : files ) { + declaratives_evaluate(file); + } + } + + /* + * To indicate to the runtime-match function that we want to evaluate + * only the exception condition, unrelated to a file, we set the + * file register to 0 and the handled-exception register to the + * handled exception condition (not file status). + * + * declaratives_execute performs the "declarative ladder" produced + * by declaratives_runtime_match. That section CALLs the + * runtime-match procedure __gg__match_exception, passing it the + * values of those two registers. When that function sees there's + * no file involved, it interprets the "handled" parameter as + * ec_type_t, and returns the matching declarative symbol-table + * index, per usual. + */ + void + declaratives_evaluate( ec_type_t handled = ec_none_e ) { + // The exception file number is assumed to be zero unless it has been + // changed to a non-zero value. The program picking it up and referencing + // it is charged with setting it back to zero. + // parser_set_file_number(0); + + parser_set_handled(handled); + + cbl_label_t *eval = programs.first_declarative(); + declarative_execute(eval); + } + + cbl_label_t * new_paragraph( cbl_label_t *para ) { + auto& prog( programs.top() ); + auto old(prog.paragraph); + prog.paragraph = para; + return old; + } + + void antecedent_dump() const { + if( ! yydebug ) return; + if( ! antecedent_cache.operand ) { + yywarn( "Antecedent: none" ); + } else { + yywarn( "Antecedent: %c %s %s %c", + antecedent_cache.invert? '!':' ', + name_of(antecedent_cache.operand->field), + relop_str(antecedent_cache.relop), + antecedent_cache.has_relop? 'T' : 'F' ); + } + } + void antecedent( const rel_part_t& ante ) { antecedent_cache = ante; antecedent_dump(); } + void antecedent_reset() { antecedent_cache = rel_part_t(); antecedent_dump(); } + rel_part_t& antecedent() { return antecedent_cache; } + rel_part_t& antecedent( relop_t op ) { + antecedent_cache.relop_set(op); + antecedent_dump(); + return antecedent_cache; + } + rel_part_t& antecedent_invert( bool invert=true ) { + antecedent_cache.invert = invert; + antecedent_dump(); + return antecedent_cache; + } + + void compute_begin() { error_labels.generate(); } + bool in_compute() { return error_labels.on_error != NULL; } + void compute_end() { error_labels.clear(); } + cbl_label_t * compute_on_error() { return error_labels.on_error; } + cbl_label_t * compute_not_error() { return error_labels.not_error; } + cbl_label_t * compute_label() { return error_labels.compute_error; } +} current; + +#define PROGRAM current.program_index() + +static void +add_debugging_declarative( const cbl_label_t * label ) { + const char *section = current.declarative_section_name(); + if( section ) { + debugging_clients[label->name].push_back(section); + } +}; + +cbl_options_t current_options() { + return current.options_paragraph; +} + +size_t current_program_index() { + return current.program()? current.program_index() : 0; +} + +cbl_label_t * current_section() { + return current.section(); +} +cbl_label_t * current_paragraph() { + return current.paragraph(); +} + +const char * +current_declarative_section_name() { + return current.declarative_section_name(); +} + +void +add_cobol_exception( ec_type_t type, bool enabled ) { + current.exception_add( type, enabled ); +} + +static cbl_round_t rounded_of( int token ); + +cbl_round_t +current_rounded_mode() { + return current.rounded_mode(); +} + +#if needed +static cbl_round_t +current_rounded_mode( cbl_round_t rounded) { + return current.rounded_mode(rounded); +} +#endif +static cbl_round_t current_rounded_mode( int token ); + +cbl_call_convention_t +current_call_convention() { + return current.call_convention(); +} +cbl_call_convention_t +current_call_convention( cbl_call_convention_t convention) { + return current.call_convention(convention); +} + +size_t program_level() { return current.program_level(); } + +static size_t constant_index( int token ); + +static relop_t relop_of(int); +static relop_t relop_invert(relop_t op); + +static enum classify_t classify_of( int token ); + +static void subscript_dimension_error( YYLTYPE loc, size_t, const cbl_refer_t *name ); + +/* + * Utility functions + */ + +char * +normalize_picture( char picture[] ); + +static inline cbl_field_t * +new_tempnumeric(void) { return new_temporary(FldNumericBin5); } + +static inline cbl_field_t * +new_tempnumeric_float(void) { return new_temporary(FldFloat); } + +uint32_t +type_capacity( enum cbl_field_type_t type, uint32_t digits ); + +bool +valid_picture( enum cbl_field_type_t type, const char picture[] ); + +bool +move_corresponding( cbl_refer_t& tgt, cbl_refer_t& src ); + +static bool +literal_subscripts_valid( YYLTYPE loc, const cbl_refer_t& name ); +static bool +literal_refmod_valid( YYLTYPE loc, const cbl_refer_t& r ); + +static bool +is_integer_literal( const cbl_field_t *field ) { + if( is_literal(field) ) { + int v, n; + const char *initial = field->data.initial; + + return 1 == sscanf(initial, "%d%n", &v, &n) && n == (int)strlen(initial); + } + return false; +} + +static inline bool +is_string_literal( const cbl_field_t *field ) { + return is_literal(field) && is_quoted(field); +} + +static inline bool +needs_picture( cbl_field_type_t type ) { + switch(type) { + case FldDisplay: + case FldInvalid: + gcc_unreachable(); + return false; // not a valid question + + case FldAlphaEdited: + case FldAlphanumeric: + case FldNumericBinary: + case FldNumericDisplay: + case FldNumericEdited: + case FldPacked: + return true; + + case FldFloat: + case FldNumericBin5: + return false; + + case FldBlob: + case FldClass: + case FldConditional: + case FldForward: + case FldGroup: + case FldIndex: + case FldLiteralA: + case FldLiteralN: + case FldPointer: + case FldSwitch: + return false; + } + + dbgmsg("%s:%d: unknown cbl_field_type_t %u", __func__, __LINE__, type); + gcc_unreachable(); + return false; +} + +static bool +is_callable( const cbl_field_t *field ) { + switch ( field->type ) { + case FldInvalid: + case FldNumericEdited: + case FldAlphaEdited: + case FldClass: + case FldConditional: + case FldForward: + case FldSwitch: + case FldDisplay: + case FldBlob: + case FldNumericDisplay: + case FldNumericBinary: + case FldFloat: + case FldPacked: + case FldNumericBin5: + case FldLiteralN: + case FldIndex: + return false; + case FldGroup: + case FldLiteralA: + case FldAlphanumeric: + case FldPointer: + return true; + } + cbl_internal_error( "%s:%d: invalid symbol_type_t %d", __func__, __LINE__, field->type ); + return false; +} + +/* + * intrinsic calls + */ +struct cbl_fieldloc_t { + YYLTYPE loc; + cbl_field_t *field; + + cbl_fieldloc_t() : loc{ 1,1, 1,1 }, field(NULL) {} + cbl_fieldloc_t( const YYLTYPE& loc, cbl_field_t *field ) + : loc(loc), field(field) + {} +}; + +static size_t +intrinsic_invalid_parameter( int token, const std::vector<cbl_refer_t>& args ); + +static const char * +intrinsic_cname( int token ); + +static bool +intrinsic_call_0( cbl_field_t *output, int token ) { + const char *name = intrinsic_cname(token); + if( !name ) return false; + parser_intrinsic_call_0( output, name ); + return true; +} + +static bool +intrinsic_call_1( cbl_field_t *output, int token, + cbl_refer_t *r1, const YYLTYPE& loc ) { + std::vector<cbl_refer_t> args { *r1 }; + if( 0 == intrinsic_invalid_parameter(token, args) ) { + error_msg(loc, "invalid parameter '%s'", r1->field->name); + return false; + } + + const char *func = intrinsic_cname(token); + if( !func ) return false; + parser_intrinsic_call_1( output, func, *r1 ); + return true; +} + +static bool +intrinsic_call_2( cbl_field_t *tgt, int token, cbl_refer_t *r1, cbl_refer_t *r2 ) { + std::vector<cbl_refer_t> args { *r1, *r2 }; + size_t n = intrinsic_invalid_parameter(token, args); + if( n < args.size() ) { + error_msg(args[n].loc, "invalid parameter '%s'", args[n].field->name); + return false; + } + const char *fund = intrinsic_cname(token); + if( !fund ) return false; + parser_intrinsic_call_2( tgt, fund, args[0], args[1] ); + return true; +} + +static bool +intrinsic_call_3( cbl_field_t *tgt, int token, + cbl_refer_t *r1, cbl_refer_t *r2, cbl_refer_t *r3 ) { + std::vector<cbl_refer_t> args { *r1, *r2, *r3 }; + size_t n = intrinsic_invalid_parameter(token, args); + if( n < args.size() ) { + error_msg(args[n].loc, "invalid parameter '%s'", args[n].field->name); + return false; + } + const char *func = intrinsic_cname(token); + if( !func ) return false; + parser_intrinsic_call_3( tgt, func, *r1, *r2, *r3 ); + return true; +} + +static bool +intrinsic_call_4( cbl_field_t *tgt, int token, + cbl_refer_t *r1, cbl_refer_t *r2, + cbl_refer_t *r3, cbl_refer_t *r4 ) { + std::vector<cbl_refer_t> args { *r1, *r2, *r3, *r4 }; + size_t n = intrinsic_invalid_parameter(token, args); + if( n < args.size() ) { + error_msg(args[n].loc, "invalid parameter '%s'", args[n].field->name); + return false; + } + const char *func = intrinsic_cname(token); + if( !func ) return false; + parser_intrinsic_call_4( tgt, func, *r1, *r2, *r3, *r4 ); + return true; +} + +/* + * Local functions + */ + +static inline cbl_field_t * +new_literal( const char initial[] ) { + return new_literal( strlen(initial), initial ); +} + +cbl_refer_t * +negate( cbl_refer_t * refer, bool neg = true ) { + if( ! neg ) return refer; + assert( is_numeric(refer->field) ); + auto output = new_reference(new_tempnumeric()); + parser_subtract( *output, literally_zero, *refer, current_rounded_mode() ); + return output; +} + +cbl_field_t * +conditional_set( cbl_field_t *tgt, bool tf ) { + static cbl_field_t *one = new_literal("1"); + + enum relop_t op = tf? eq_op : ne_op; + parser_relop( tgt, one, op, one ); + return tgt; +} + +static inline cbl_field_t * +table_primary_index( cbl_field_t *table ) { + assert(table); + return 0 == table->occurs.indexes.nfield? + NULL : cbl_field_of(symbol_at(table->occurs.indexes.fields[0])); +} + +static inline const cbl_refer_t // & // Removed the '&' to stop a weird compiler error +invalid_key( const cbl_refer_t& ref ) { + assert(ref.field); + + if( ref.nsubscript == 0 ) return ref; + + for( size_t i=0; i < ref.nsubscript; i++ ) { + if( ref.subscripts[i].field->parent != ref.field->parent ) { + return ref.subscripts[i]; + } + } + return NULL; +} + +static inline symbol_elem_t * +symbol_find( const std::list<const char *>& names ) { + auto found = symbol_find(PROGRAM, names); + if( found.first && !found.second ) { + auto field = cbl_field_of(found.first); + yyerror( "%s is not unique, first defined on line %d", + field->name, field->line ); + return NULL; + } + return found.first; +} + +static inline cbl_field_t * +field_find( const std::list<const char *>& names ) { + if( names.size() == 1 ) { + auto value = cdf_value(names.front()); + if( value ) { + cbl_field_t * field; + if( value->is_numeric() ) { + field = new_tempnumeric(); + parser_set_numeric(field, value->as_number()); + } else { + field = new_literal(value->string); + } + return field; + } + } + symbol_elem_t *e = symbol_find(names); + return e? cbl_field_of(e) : NULL; +} + +static inline symbol_elem_t * +symbol_find( const YYLTYPE& loc, const char *name ) { + cbl_namelist_t names; + if( ! name_queue.empty() ) { + auto names = name_queue.pop_as_names(); + } + names.push_front(name); + auto found = symbol_find( PROGRAM, names ); + if( found.first && !found.second ) { + auto field = cbl_field_of(found.first); + error_msg(loc, "'%s' is not unique, first defined on line %d", + field->name, field->line); + return NULL; + } + return found.first; +} + +static inline cbl_field_t * +register_find( const char *name ) { + return cbl_field_of(symbol_register(name)); +} + +static bool +valid_redefine( const YYLTYPE& loc, + const cbl_field_t *field, const cbl_field_t *orig ) { + // Must have same level. + if( field->level != orig->level ) { + error_msg(loc, "cannot redefine %s %s as %s %s " + "because they have different levels", + orig->level_str(), orig->name, + field->level_str(), field->name); + return false; + } + + // no higher level intervenes + /* + * No entry having a level-number numerically lower than the + * level-number of data-name-2 may occur between the data + * description entries of data-name-2 and the subject of the entry. + */ + struct { symbol_elem_t *field, *orig; } sym = { + symbol_at(field_index(field)), + symbol_at(field_index(orig)) }; + + auto e = std::find_if( sym.orig + 1, sym.field, + [lowest = field->level]( auto& elem ) { + if( elem.type != SymField ) return false; + auto f = cbl_field_of(&elem); + return 0 < f->level && f->level < lowest; + } ); + if( e != sym.field ) { + auto wrong = cbl_field_of(e); + error_msg(loc, "%s %s on line %d lies between %s and %s", + wrong->level_str(), wrong->name, wrong->line, + orig->name, field->name); + return false; + } + + // cannot redefine a table + if( orig->occurs.ntimes() ) { + error_msg(loc, "cannot redefine table %s %s", + orig->level_str(), orig->name); + return false; + } + + // redefined field cannot be ODO + if( orig->occurs.depending_on ) { + error_msg(loc, "redefined data item %s %s has OCCURS DEPENDING ON", + orig->level_str(), orig->name); + return false; + } + // redefiner cannot have ODO + if( field->occurs.depending_on ) { + error_msg(loc, "data item %s %s cannot use REDEFINES and OCCURS DEPENDING ON", + field->level_str(), field->name); + return false; + } + + if( is_variable_length(orig) ) { + error_msg(loc, "redefined data item %s %s has OCCURS DEPENDING ON", + orig->level_str(), orig->name); + return false; + } + // We don't know about the redefining group until it's completely defined. + + /* + * 8) The storage area required for the subject of the entry + * shall not be larger than the storage area required for the + * data item referenced by data-name-2, unless the data item + * referenced by data- name-2 has been specified with level + * number 1 and without the EXTERNAL clause. + */ + if( field->type != FldGroup && orig->type != FldGroup ) { + if( orig->size() < field->size() ) { + if( orig->level > 1 || orig->has_attr(external_e) ) { + dbgmsg( "size error orig: %s", field_str(orig) ); + dbgmsg( "size error redef: %s", field_str(field) ); + error_msg(loc, "%s (%s size %u) larger than REDEFINES %s (%s size %u)", + field->name, + 3 + cbl_field_type_str(field->type), field->size(), + orig->name, + 3 + cbl_field_type_str(orig->type), orig->size() ); + } + } + } + + /* + * 4) No entry having a level-number numerically lower than the + * level-number of data-name-2 may occur between the data + * description entries of data-name-2 and the subject of the entry. + */ + bool same_group = std::none_of( symbol_at(field_index(orig)), + symbol_at(field_index(field)), + [level = field->level]( const auto& elem ) { + if( elem.type == SymField ) { + auto f = cbl_field_of(&elem); + return 0 < f->level && f->level < level; + } + return false; + } ); + if( ! same_group ) { + error_msg(loc, "cannot redefine %s %s as %s %s " + "because they belong to different groups", + orig->level_str(), orig->name, + field->level_str(), field->name); + return false; + } + + return true; +} + +static void +field_value_all(struct cbl_field_t * field ) { + // Expand initial by repeating its contents until it is of length capacity: + assert(field->data.initial != NULL); + size_t initial_length = strlen(field->data.initial); + char *new_initial = static_cast<char*>(xmalloc(field->data.capacity + 1)); + size_t i = 0; + while(i < field->data.capacity) { + new_initial[i] = field->data.initial[i%initial_length]; + i += 1; + } + new_initial[field->data.capacity] = '\0'; + free(const_cast<char *>(field->data.initial)); + field->data.initial = new_initial; +} + +static cbl_field_t * +parent_has_value( cbl_field_t *field ) { + while( (field = parent_of(field)) != NULL ) { + if( field->data.initial ) break; + } + return field; +} + +static uint32_t +group_attr( const cbl_field_t * field ) { + if( field->parent == 0 ) return 0; + + const symbol_elem_t *e = symbol_at(field->parent); + if( SymField != e->type ) return 0; + + const cbl_field_t *p = cbl_field_of(e); + if( p->type != FldGroup ) return 0; + + return p->attr; +} + +static struct symbol_elem_t * +field_of( const char F[], int L, const char name[] ) { + struct symbol_elem_t *e = symbol_field(PROGRAM, 0, name); + if( !e ) { + cbl_internal_error("%s:%d: no symbol '%s' found", F, L, name); + } + assert( procedure_div_e != current_division ); + return e; +} +#define field_of( F ) field_of(__func__, __LINE__, (F)) + +static struct cbl_field_t * +field_add( const YYLTYPE& loc, cbl_field_t *field ) { + switch(current_data_section) { + case not_data_datasect_e: + case file_datasect_e: + case working_storage_datasect_e: + break; + case local_storage_datasect_e: + field->attr |= local_e; + break; + case linkage_datasect_e: + field->attr |= linkage_e; + break; + } + + // Use isym 0 to indicate the location of the field under construction. + symbol_field_location(0, loc); + + struct symbol_elem_t *e = symbol_field_add(PROGRAM, field); + if( !e ) return NULL; + symbol_field_location(symbol_index(e), loc); + field = cbl_field_of(e); + assert(field->type != FldDisplay); + + if( field->parent == 0 ) { + switch(field->level) { + case 0: case 1: case 77: case 78: + break; + default: + error_msg(loc, "%s %s is not part of an 01 record", + field->level_str(), field->name ); + return NULL; + break; + } + } + return field; +} + +static const char * +field_attr_str( const cbl_field_t *field ) { + static const std::vector<cbl_field_attr_t> attrs { + figconst_1_e, figconst_2_e, figconst_4_e, rjust_e, ljust_e, + zeros_e, signable_e, constant_e, function_e, quoted_e, filler_e, + intermediate_e, embiggened_e, all_alpha_e, all_x_e, + all_ax_e, prog_ptr_e, scaled_e, refmod_e, based_e, any_length_e, + global_e, external_e, blank_zero_e, linkage_e, local_e, leading_e, + separate_e, envar_e, dnu_1_e, bool_encoded_e, hex_encoded_e, + depends_on_e, initialized_e, has_value_e, ieeedec_e, big_endian_e, + same_as_e, record_key_e, typedef_e, strongdef_e, + }; + return field->attr_str(attrs); +} + +static bool +uniform_picture( const char *picture, char model ) { + const char *eopicture( picture + strlen(picture) ); + model = TOLOWER(model); + return std::all_of(picture, eopicture, + [model]( char ch ) { + return model == TOLOWER(ch); + } ); +} + +static enum cbl_field_attr_t +uniform_picture( const char *picture ) { + static char ch[] = { 'A', 'X' }; + for( auto p = ch; p < ch + sizeof(ch); p++ ) { + if( uniform_picture(picture, *p) ) { + switch(*p) { + case 'A': return all_alpha_e; + case 'X': return all_x_e; + } + } + } + return none_e; +} + +static bool +field_type_update( cbl_field_t *field, cbl_field_type_t type, + YYLTYPE loc, + bool is_usage = false) +{ + // preserve NumericEdited if already established + if( !is_usage && field->has_attr(blank_zero_e) ) { + if( type == FldNumericDisplay && field->type == FldNumericEdited ) { + return true; + } + } + + // disallow USAGE if inherited from parent (all members must be of same type) + if( is_usage && field->usage != type ) { + switch( field->usage ) { + case FldInvalid: + case FldDisplay: + break; // ok + default: + error_msg(loc, "cannot set %s to USAGE %s " + "because the group is restricted to USAGE %s", + field->name, cbl_field_type_str(type), + cbl_field_type_str(field->usage)); + return false; + } + } + + if( ! symbol_field_type_update(field, type, is_usage) ) { + error_msg(loc, "cannot set USAGE of %s to %s (from %s)", field->name, + cbl_field_type_str(type) + 3, cbl_field_type_str(field->type) + 3); + return false; + } + + dbgmsg( "%s:%d: %s became %s based on %s", __func__, __LINE__, field->name, + cbl_field_type_str(field->type), cbl_field_type_str(type) ); + + return true; +} + +static bool +field_capacity_error( const YYLTYPE& loc, const cbl_field_t *field ) { + uint32_t parent_capacity = 0; + if( field->parent ) { + auto e = symbol_at(field->parent); + if( e->type == SymField ) parent_capacity = cbl_field_of(e)->data.capacity; + } + /* + * Field may become a table whose capacity was inherited from a parent with + * data. If so, the field's capacity will be overwritten by its + * PICTURE-defined size. + */ + if( parent_capacity < field->data.capacity && !symbol_redefines(field) ) { + dbgmsg( "%s: %s", __func__, field_str(field) ); + error_msg(loc, "%s has USAGE incompatible with PICTURE", + field->name ); + return true; + } + return false; +} +#define ERROR_IF_CAPACITY(L, F) \ + do { if( field_capacity_error(L, F) ) YYERROR; } while(0) + +static const char * +blank_pad_initial( const char initial[], size_t capacity, size_t new_size ) { + assert(capacity < new_size); + assert(initial != NULL); + + if( normal_value_e != cbl_figconst_of(initial) ) return initial; + + auto p = reinterpret_cast<char *>( xmalloc(2 + new_size) ); + memset(p, 0x20, new_size); + memcpy(p, initial, capacity); + p[new_size] = '\0'; // for debugging + p[++new_size] = '\0'; // for debugging + return p; +} + +static bool +value_encoding_check( const YYLTYPE& loc, cbl_field_t *field ) { + if( ! field->internalize() ) { + error_msg(loc, "inconsistent string literal encoding for '%s'", + field->data.initial); + return false; + } + return true; +} + + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" + +static struct cbl_field_t * +field_alloc( const YYLTYPE& loc, cbl_field_type_t type, size_t parent, const char name[] ) { + cbl_field_t *f, field = { .type = type, .usage = FldInvalid, + .parent = parent, .line = yylineno }; + if( !namcpy(loc, field.name, name) ) return NULL; + f = field_add(loc, &field); + assert(f); + return f; +} + +static cbl_file_key_t no_key; +static const struct +cbl_file_t protofile = { .org = file_disorganized_e, + .access = file_access_seq_e, + .keys = &no_key }; + +// Add a file to the symbol table with its record area field. +// The default organization is sequential. +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wformat-truncation" +static cbl_file_t * +file_add( YYLTYPE loc, cbl_file_t *file ) { + gcc_assert(file); + struct cbl_field_t area = { .type = FldAlphanumeric, + .level = 1, + .line = yylineno, + .data = { .capacity = 0 } }, + *field = field_add(loc, &area); + file->default_record = field_index(field); + + // install file, and set record area's name + auto e = symbol_file_add(PROGRAM, file); + if( !e ) { + error_msg(loc, "%s was defined previously on line %d", file->name, file->line); + return NULL; + } + file = cbl_file_of(e); + snprintf(field->name, sizeof(field->name), + "%s%zu_%s", + record_area_name_stem, symbol_index(e), file->name); + if( file->attr & external_e ) { + snprintf(field->name, sizeof(field->name), + "%s%s", record_area_name_stem, file->name); + } + field->file = field->parent = symbol_index(e); + + return file; +} +#pragma GCC diagnostic pop +#pragma GCC diagnostic pop + + +static cbl_alphabet_t * +alphabet_add( const YYLTYPE& loc, cbl_encoding_t encoding ) { + cbl_alphabet_t alphabet(loc, encoding); + symbol_elem_t *e = symbol_alphabet_add(PROGRAM, &alphabet); + assert(e); + return cbl_alphabet_of(e); +} + +// The current field always exists in the symbol table, even if it's incomplete. +static cbl_field_t * +current_field(cbl_field_t * field = NULL) { + static cbl_field_t *local; + if( field ) local = field; + gcc_assert(field_index(local)); + return local; +} + +static struct cbl_special_name_t * +special_of( const char F[], int L, const char name[] ) { + struct symbol_elem_t *e = symbol_special(PROGRAM, name); + if( !e ) { + dbgmsg("%s:%d: no special symbol '%s' found", F, L, name); + return NULL; + } + return cbl_special_name_of(e); +} +#define special_of( F ) special_of(__func__, __LINE__, (F)) + +static inline void +parser_add2( struct cbl_num_result_t& to, + struct cbl_refer_t from ) { + parser_add(to.refer, to.refer, from, to.rounded); +} + +static inline void +parser_subtract2( struct cbl_num_result_t to, + struct cbl_refer_t from ) { + parser_subtract(to.refer, to.refer, from, to.rounded); +} + +static bool +parser_move_carefully( const char */*F*/, int /*L*/, + tgt_list_t *tgt_list, + const cbl_refer_t& src, + bool is_index ) +{ + for( const auto& num_result : tgt_list->targets ) { + const cbl_refer_t& tgt = num_result.refer; + + if( is_index ) { + if( tgt.field->type != FldIndex && src.field->type != FldIndex) { + error_msg(src.loc, "invalid SET %s (%s) TO %s (%s): not a field index", + tgt.field->name, cbl_field_type_str(tgt.field->type), + src.field->name, cbl_field_type_str(src.field->type)); + delete tgt_list; + return false; + } + } else { + if( ! valid_move( tgt.field, src.field ) ) { + if( ! is_index ) { + char ach[16]; + char stype[32]; + char dtype[32]; + strcpy(stype, cbl_field_type_str(src.field->type)); + strcpy(dtype, cbl_field_type_str(tgt.field->type)); + + if( src.field->attr & all_alpha_e ) + { + strcpy(stype, "FldAlphabetic"); + } + if( tgt.field->attr & all_alpha_e ) + { + strcpy(dtype, "FldAlphabetic"); + } + if( !(src.field->attr & scaled_e) && src.field->data.rdigits ) + { + sprintf(ach, ".%d", src.field->data.rdigits); + strcat(stype, ach); + } + if( !(tgt.field->attr & scaled_e) && tgt.field->data.rdigits ) + { + sprintf(ach, ".%d", tgt.field->data.rdigits); + strcat(dtype, ach); + } + + error_msg(src.loc, "cannot MOVE '%s' (%s) to '%s' (%s)", + name_of(src.field), stype, + name_of(tgt.field), dtype); + delete tgt_list; + return false; + } + } + } + } + size_t ntgt = tgt_list->targets.size(); + cbl_refer_t tgts[ntgt]; + std::transform( tgt_list->targets.begin(), tgt_list->targets.end(), tgts, + []( const cbl_num_result_t& res ) { return res.refer; } ); + parser_move(ntgt, tgts, src); + delete tgt_list; + return true; +} +#define parser_move2(P, S) \ + parser_move_carefully(__func__, __LINE__, (P), (S), false) +#define parser_index(P, S) \ + parser_move_carefully(__func__, __LINE__, (P), (S), true) + +static void +ast_set_pointers( const list<cbl_num_result_t>& tgts, cbl_refer_t src ) { + assert(!tgts.empty()); + assert(src.field || src.prog_func); + size_t nptr = tgts.size(); + cbl_refer_t ptrs[nptr]; + + std::transform( tgts.begin(), tgts.end(), ptrs, cbl_num_result_t::refer_of ); + parser_set_pointers(nptr, ptrs, src); +} + +static struct cbl_refer_t * +use_vargs( struct vargs_t *v, struct cbl_refer_t *tgt); + +void +stringify( refer_collection_t *inputs, + cbl_refer_t into, cbl_refer_t pointer, + cbl_label_t *on_error = NULL, + cbl_label_t *not_error = NULL); + +void unstringify( cbl_refer_t& src, refer_list_t *delimited, + unstring_into_t * into, + cbl_label_t *on_error = NULL, + cbl_label_t *not_error = NULL ); + +static cbl_label_t * +implicit_paragraph() +{ + cbl_name_t name; + sprintf(name, "_implicit_paragraph_%zu", symbol_index()); + // Programs have to start with an implicit paragraph + return label_add(LblParagraph, name, yylineno); +} +static cbl_label_t * +implicit_section() +{ + cbl_name_t name; + sprintf(name, "_implicit_section_%zu", symbol_index()); + // Programs have to start with an implicit section + return label_add(LblSection, name, yylineno); +} + +static void +ast_enter_exit_section( cbl_label_t * section ) { + auto implicit = section? implicit_paragraph() : NULL; + + struct { cbl_label_t *para, *sect; + inline bool exists() const { return sect != NULL && para != NULL; } + } prior = { + current.new_paragraph(implicit), + current.new_section(section) + }; + if( false && yydebug ) { + fprintf(stderr, "( %d ) %s:%d: leaving section %s paragraph %s\n", + yylineno, __func__, __LINE__, + prior.sect? prior.sect->name : "''", + prior.para? prior.para->name : "''"); + } + if( prior.exists() ) { + parser_leave_paragraph(prior.para); + parser_leave_section(prior.sect); + } + if( section ) { + parser_enter_section(section); + parser_enter_paragraph(implicit); + } +} + +static inline void +ast_enter_section( cbl_label_t * section ) { + assert(section); + section->lain = yylineno; + ast_enter_exit_section( section ); +} + +static inline void +ast_exit_section() { + ast_enter_exit_section( NULL ); +} + +static void +ast_enter_paragraph( cbl_label_t * para ) { + para->lain = yylineno; + cbl_label_t *prior = current.new_paragraph(para); + if( prior ) { + parser_leave_paragraph(prior); + } + parser_enter_paragraph(para); +} + +static bool +data_division_ready() { + // Install and use any alphabets. + if( nparse_error == 0 ) { // error might have stemmed from the alphabet itself + const char *name = current.collating_sequence(); + + if( ! symbols_alphabet_set(PROGRAM, name) ) { + error_msg(yylloc, "no alphabet '%s' defined", name); + return false; + } + } + + // Tell codegen about symbols. + static size_t nsymbol = 0; + if( (nsymbol = symbols_update(nsymbol, nparse_error == 0)) > 0 ) { + if( ! literally_one ) { + literally_one = new_literal("1"); + literally_zero = new_literal("0"); + } + } + + if( nsymbol == 0 || nparse_error > 0 ) { + dbgmsg( "%d errors in DATA DIVISION, compilation ceases", nparse_error ); + return false; + } + + return true; +} + +static +bool +anybody_redefines(cbl_field_t *tree) + { + bool retval = false; + while(tree) + { + if( symbol_redefines(tree) ) + { + retval = true; + break; + } + tree = parent_of(tree); + } + return retval; + } + +static bool +procedure_division_ready( YYLTYPE loc, cbl_field_t *returning, ffi_args_t *ffi_args ) { + auto prog = cbl_label_of(symbols_begin(current.program_index())); + + if( prog->type == LblFunction ) { + if( ! returning ) { + error_msg(loc, "FUNCTION %s requires RETURNING", prog->name); + return false; + } else { + prog->returning = field_index(returning); + } + current.udf_update(ffi_args); + } + + if( returning ) { + if( ! (returning->level == 1 || returning->level == 77) ) { + error_msg(loc, "RETURNING %s must be level 01 or 77", returning->name); + } + if( symbol_redefines(returning) ) { + error_msg(loc, "RETURNING %s cannot REDFINE anything", returning->name); + } + } + if( ffi_args ) { + size_t i=0; + for( const auto& arg : ffi_args->elems ) { + auto field = arg.refer.field; + i++; + if( returning == field ) { + error_msg(loc, "RETURNING %s duplicates USING parameter %zu", + returning->name, i); + } + if( ! (field->level == 1 || field->level == 77) ) { + error_msg(loc, "USING %s must be level 01 or 77", + field->name); + } + if( symbol_redefines(field) ) { + error_msg(loc, "USING %s cannot REDEFINE anything", + field->name ); + } + } + } + + // Start the Procedure Division. + size_t narg = ffi_args? ffi_args->elems.size() : 0; + cbl_ffi_arg_t args[1 + narg], *pargs = NULL; + if( narg > 0 ) { + pargs = use_list(ffi_args, args); + } + + // Create program initialization section. We build it on an island, + // that gets executed only if the program is IS INITIAL, or when the + // program is the subject of a CANCEL statement. + + static const char init[] = "_INITIALIZE_PROGRAM"; + static const char tini[] = "_INITIALIZE_DONE"; + + struct cbl_label_t * init_label = label_add(LblSection, init, yylineno); + struct cbl_label_t * tini_label = label_add(LblSection, tini, yylineno); + + // parser_division(procedure_div_e) needs initial_section: + prog->initial_section = symbol_index(symbol_elem_of(init_label)); + + if( current.program_index() > 1 ) { + ast_exit_section(); + } + parser_division( procedure_div_e, returning, narg, pargs ); + + std::for_each( symbols_begin(current.program_index()), symbols_end(), + []( auto& elem ) { + if( elem.type == SymField ) { + auto f = cbl_field_of(&elem); + if( f->has_attr(local_e) ) { + parser_local_add(f); + } + } + } ); + + // At this point we count up the number of variables that will need to be + // initialized in _INITIALIZE_PROGRAM: + int count_of_variables = 0; + for( symbol_elem_t *e = + symbols_begin(1 + current.program_index()); + e < symbols_end(); e++ ) { + if( is_program(*e) ) break; + if( e->type != SymField ) continue; + cbl_field_t *f = cbl_field_of(e); + if( !f->var_decl_node ) + { + // This can happen when there was an error parsing the data division + continue; + } + if( f->type == FldForward ) continue; + if( f->type == FldLiteralA ) continue; + if( anybody_redefines(f) ) continue; + if( f->has_attr(linkage_e) ) continue; + if( f->has_attr(local_e) ) continue; + if( f->is_typedef() ) { + auto isym = end_of_group( symbol_index(e) ); + e = symbol_at(--isym); + continue; + } + count_of_variables += 1; + } + // Allocate space for the static table of variables + parser_init_list_size(count_of_variables); + + // Do a second pass: + // Initialize the static table with the variables: + for( symbol_elem_t *e = + symbols_begin(1 + current.program_index()); + e < symbols_end(); e++ ) { + if( is_program(*e) ) break; + if( e->type != SymField ) continue; + cbl_field_t *f = cbl_field_of(e); + if( !f->var_decl_node ) + { + // This can happen when there was an error parsing the data division + continue; + } + if( f->type == FldForward ) continue; + if( f->type == FldLiteralA ) continue; + if( anybody_redefines(f) ) continue; + if( f->has_attr(linkage_e) ) continue; + if( f->has_attr(local_e) ) continue; + if( f->is_typedef() ) { + auto isym = end_of_group( symbol_index(e) ); + e = symbol_at(--isym); + continue; + } + parser_init_list_element(f); + } + + // This is where we jump over the island + parser_label_goto(tini_label); + + // And here we create the initialization section: + ast_enter_section(init_label); // _INITIALIZE_PROGRAM section. + + parser_init_list(); + + // Lay down an implicit section to end the init_label + ast_enter_section(implicit_section()); + + // This is the end of the island + parser_label_label(tini_label); + + if( current.program()->initial ) { + // We perform the section we just layed down when IS INITIAL + parser_perform(init_label); + } + return true; +} + +static size_t file_section_fd; +static size_t current_sort_file; + +static bool +file_section_fd_set( file_entry_type_t type, char name[], const YYLTYPE& loc ) { + static std::set<size_t> has_fd; + + // File must have been uniquely created by SELECT. + // FD names are also unique within a program. + auto e = symbol_file(PROGRAM, name); + if( !e ) { + error_msg(loc, "file name not found"); + return false; + } + + file_section_fd = symbol_index(e); + auto result = has_fd.insert(file_section_fd); + if( !result.second ) { + auto f = cbl_file_of(e); + const char *type_str = "???"; + switch(type) { + case fd_e: type_str = "FD"; break; + case sd_e: type_str = "SD"; break; + } + error_msg(loc, "%s %s previously defined on line %d", + type_str, f->name, f->line); + return false; + } + + auto& file(*cbl_file_of(e)); + file.entry_type = type; + + if( file.org == file_disorganized_e ) { + file.org = file_sequential_e; + } + + return file_section_fd > 0; +} + +/* + * While in the File Section, set the parent of each 01 to be the FD + * default_record, and its file member to the file's symbol index. + */ +static bool +file_section_parent_set( cbl_field_t *field ) { + if( symbol_at(file_section_fd)->type == SymFile ) { + auto file = cbl_file_of(symbol_at(file_section_fd)); + auto record_area = cbl_field_of(symbol_at(file->default_record)); + + record_area->data.capacity = std::max(record_area->data.capacity, + field->data.capacity); + + field->file = file_section_fd; + auto redefined = symbol_redefines(record_area); + field->parent = redefined? record_area->parent : file->default_record; + } + return file_section_fd > 0; +} + +void ast_call(const YYLTYPE& loc, cbl_refer_t name, + cbl_refer_t returning, + size_t narg, cbl_ffi_arg_t args[], + cbl_label_t *except, + cbl_label_t *not_except, + bool is_function ); + +cbl_field_t * +ast_file_status_between( file_status_t lower, file_status_t upper ); + +void internal_ebcdic_lock(); +void internal_ebcdic_unlock(); + +void +ast_end_program(const char name[] ) { + std::for_each( symbols_begin(), symbols_end(), + []( const auto& elem ) { + if( elem.type == SymLabel ) { + auto& L( *cbl_label_of(&elem) ); + if( L.used ) { + if( ! L.lain ) { + YYLTYPE loc { L.line, 1, L.line, 1 }; + error_msg(loc, "line %d: %s " + "is used on line %d and never defined", + L.line, L.name, L.used ); + } + dbgmsg("label: %.20s: %d/%d/%d", + L.name, L.line, L.lain, L.used); + } + } + } ); + if( current_program_index() == 0 ) { + parser_program_hierarchy( cbl_prog_hier_t() ); + } else { + ast_exit_section(); + } + parser_end_program(name); + internal_ebcdic_unlock(); +} + +static bool +goodnight_gracie() { + const cbl_label_t *prog = current.program(); + assert(prog); + + std::set<std::string> externals = current.end_program(); + + if( !externals.empty() ) { + for( const auto& name : externals ) { + yywarn("%s calls external symbol '%s'", + prog->name, name.c_str()); + } + return false; + } + + // pointer still valid because name is in symbol table + ast_end_program(prog->name); + return true; +} + +const char * keyword_str( int token ); + +static YYLTYPE current_location; + +const YYLTYPE& cobol_location() { return current_location; } + +static inline YYLTYPE +location_set( const YYLTYPE& loc ) { + return current_location = loc; +} + +static int prior_statement; + +static size_t statement_begin( const YYLTYPE& loc, int token ); + +static void ast_first_statement( const YYLTYPE& loc ) { + if( current.is_first_statement( loc ) ) { + parser_first_statement(loc.first_line); + } +} + +#pragma GCC diagnostic push diff --git a/gcc/cobol/parse_util.h b/gcc/cobol/parse_util.h new file mode 100644 index 0000000..e504f46 --- /dev/null +++ b/gcc/cobol/parse_util.h @@ -0,0 +1,478 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +// This file is included only by parse.y + +#include <map> + +/* + * Intrinsics + * types are: + * A Alphabetic + * D DBCS + * I Integer + * K Keyword + * N Numeric + * O Other + * U National + * 8 UTF-8 + * X Alphanumeric + * n variadic + * We use just A, I, N, or X, choosing the most general for each parameter. + */ +static const function_descr_t function_descrs[] = { + { ABS, "ABS", + "__gg__abs", "N", {}, FldNumericBin5 }, + { ACOS, "ACOS", + "__gg__acos", "N", {}, FldNumericBin5 }, + { ANNUITY, "ANNUITY", + "__gg__annuity", "NI", {}, FldNumericBin5 }, + { ASIN, "ASIN", + "__gg__asin", "N", {}, FldNumericBin5 }, + { ATAN, "ATAN", + "__gg__atan", "N", {}, FldNumericBin5 }, + { BASECONVERT, "BASECONVERT", + "__gg__baseconvert", "XII", {}, FldNumericBin5 }, + { BIT_OF, "BIT-OF", + "__gg__bit_of", "X", {}, FldAlphanumeric }, + { BIT_TO_CHAR, "BIT-TO-CHAR", + "__gg__bit_to_char", "X", {}, FldAlphanumeric }, + // BOOLEAN-OF-INTEGER requires FldBoolean + { BOOLEAN_OF_INTEGER, "BOOLEAN-OF-INTEGER", + "__gg__boolean_of_integer", "II", {}, FldNumericBin5 }, + { BYTE_LENGTH, "BYTE-LENGTH", + "__gg__byte_length", "X", {}, FldNumericBin5 }, + { CHAR, "CHAR", + "__gg__char", "I", {}, FldAlphanumeric }, + { CHAR_NATIONAL, "CHAR-NATIONAL", + "__gg__char_national", "I", {}, FldAlphanumeric }, + { COMBINED_DATETIME, "COMBINED-DATETIME", + "__gg__combined_datetime", "IN", {}, FldNumericBin5 }, + { CONCAT, "CONCAT", + "__gg__concat", "n", {}, FldAlphanumeric }, + { CONVERT, "CONVERT", + "__gg__convert", "XII", {}, FldAlphanumeric }, + { COS, "COS", + "__gg__cos", "N", {}, FldNumericBin5 }, + { CURRENT_DATE, "CURRENT-DATE", + "__gg__current_date", "", {}, FldAlphanumeric }, + { DATE_OF_INTEGER, "DATE-OF-INTEGER", + "__gg__date_of_integer", "I", {}, FldNumericBin5 }, + { DATE_TO_YYYYMMDD, "DATE-TO-YYYYMMDD", + "__gg__date_to_yyyymmdd", "III", {}, FldNumericBin5 }, + { DAY_OF_INTEGER, "DAY-OF-INTEGER", + "__gg__day_of_integer", "I", {}, FldNumericBin5 }, + { DAY_TO_YYYYDDD, "DAY-TO-YYYYDDD", + "__gg__day_to_yyyyddd", "III", {}, FldNumericBin5 }, + { DISPLAY_OF, "DISPLAY-OF", + "__gg__display_of", "UUI", {}, FldAlphanumeric }, + { E, "E", + "__gg_e", "", {}, FldNumericBin5 }, + + { EXCEPTION_FILE, "EXCEPTION-FILE", + "__gg__func_exception_file", "", {}, FldAlphanumeric }, + { EXCEPTION_FILE_N, "EXCEPTION-FILE-N", + "__gg__func_exception_file_n", "", {}, FldAlphanumeric }, + { EXCEPTION_LOCATION, "EXCEPTION-LOCATION", + "__gg__func_exception_location", "", {}, FldAlphanumeric }, + { EXCEPTION_LOCATION_N, "EXCEPTION-LOCATION-N", + "__gg__func_exception_location_n", "", {}, FldAlphanumeric }, + { EXCEPTION_STATEMENT, "EXCEPTION-STATEMENT", + "__gg__func_exception_statement", "", {}, FldAlphanumeric }, + { EXCEPTION_STATUS, "EXCEPTION-STATUS", + "__gg__func_exception_status", "", {}, FldAlphanumeric }, + + { EXP, "EXP", + "__gg__exp", "N", {}, FldNumericBin5 }, + { EXP10, "EXP10", + "__gg__exp10", "N", {}, FldNumericBin5 }, + { FACTORIAL, "FACTORIAL", + "__gg__factorial", "I", {}, FldNumericBin5 }, + { FIND_STRING, "FIND-STRING", + "__gg__find_string", "AXI", {}, FldNumericBin5 }, + { FORMATTED_CURRENT_DATE, "FORMATTED-CURRENT-DATE", + "__gg__formatted_current_date", "X", {}, FldAlphanumeric }, + { FORMATTED_DATE, "FORMATTED-DATE", + "__gg__formatted_date", "XX", {}, FldAlphanumeric }, + { FORMATTED_DATETIME, "FORMATTED-DATETIME", + "__gg__formatted_datetime", "XINI", {}, FldAlphanumeric }, + { FORMATTED_TIME, "FORMATTED-TIME", + "__gg__formatted_time", "INI", {}, FldNumericBin5 }, + { FRACTION_PART, "FRACTION-PART", + "__gg__fraction_part", "N", {}, FldNumericBin5 }, + { HEX_OF, "HEX-OF", + "__gg__hex_of", "X", {}, FldAlphanumeric }, + { HEX_TO_CHAR, "HEX-TO-CHAR", + "__gg__hex_to_char", "X", {}, FldAlphanumeric }, + { HIGHEST_ALGEBRAIC, "HIGHEST-ALGEBRAIC", + "__gg__highest_algebraic", "N", {}, FldNumericBin5 }, + { INTEGER, "INTEGER", + "__gg__integer", "N", {}, FldNumericBin5 }, + // requires FldBoolean + { INTEGER_OF_BOOLEAN, "INTEGER-OF-BOOLEAN", + "__gg__integer_of_boolean", "B", {}, FldNumericBin5 }, + { INTEGER_OF_DATE, "INTEGER-OF-DATE", + "__gg__integer_of_date", "I", {}, FldNumericBin5 }, + { INTEGER_OF_DAY, "INTEGER-OF-DAY", + "__gg__integer_of_day", "I", {}, FldNumericBin5 }, + { INTEGER_OF_FORMATTED_DATE, "INTEGER-OF-FORMATTED-DATE", + "__gg__integer_of_formatted_date", "XX", {}, FldAlphanumeric }, + { INTEGER_PART, "INTEGER-PART", + "__gg__integer_part", "N", {}, FldNumericBin5 }, + { LENGTH, "LENGTH", + "__gg__length", "X", {}, FldNumericBin5 }, + { LOCALE_COMPARE, "LOCALE-COMPARE", + "__gg__locale_compare", "XXX", {}, FldNumericBin5 }, + { LOCALE_DATE, "LOCALE-DATE", + "__gg__locale_date", "XX", {}, FldNumericBin5 }, + { LOCALE_TIME, "LOCALE-TIME", + "__gg__locale_time", "XX", {}, FldNumericBin5 }, + { LOCALE_TIME_FROM_SECONDS, "LOCALE-TIME-FROM-SECONDS", + "__gg__locale_time_from_seconds", "NX", {}, FldNumericBin5 }, + + { LOG, "LOG", + "__gg__log", "N", {}, FldNumericBin5 }, + { LOG10, "LOG10", + "__gg__log10", "N", {}, FldNumericBin5 }, + { LOWER_CASE, "LOWER-CASE", + "__gg__lower_case", "X", {}, FldAlphanumeric }, + { LOWEST_ALGEBRAIC, "LOWEST-ALGEBRAIC", + "__gg__lowest_algebraic", "N", {}, FldNumericBin5 }, + + { MAXX, "MAX", + "__gg__max", "n", {}, FldAlphanumeric }, + { MEAN, "MEAN", + "__gg__mean", "n", {}, FldNumericBin5 }, + { MEDIAN, "MEDIAN", + "__gg__median", "n", {}, FldNumericBin5 }, + { MIDRANGE, "MIDRANGE", + "__gg__midrange", "n", {}, FldNumericBin5 }, + { MINN, "MIN", + "__gg__min", "n", {}, FldAlphanumeric }, + { MOD, "MOD", + "__gg__mod", "IN", {}, FldNumericBin5 }, + { MODULE_NAME, "MODULE-NAME", + "__gg__module_name", "I", {}, FldAlphanumeric }, + { NATIONAL_OF, "NATIONAL-OF", + "__gg__national_of", "XX", {}, FldAlphanumeric }, + { NUMVAL, "NUMVAL", + "__gg__numval", "X", {}, FldNumericBin5 }, + { NUMVAL_C, "NUMVAL-C", + "__gg__numval_c", "XXU", {}, FldNumericBin5 }, + { NUMVAL_F, "NUMVAL-F", + "__gg__numval_f", "X", {}, FldNumericBin5 }, + { ORD, "ORD", + "__gg__ord", "X", {}, FldNumericBin5 }, + { ORD_MAX, "ORD-MAX", + "__gg__ord_max", "n", {}, FldNumericBin5 }, + { ORD_MIN, "ORD-MIN", + "__gg__ord_min", "n", {}, FldNumericBin5 }, + { PI, "PI", + "__gg__pi", "", {}, FldNumericBin5 }, + { PRESENT_VALUE, "PRESENT-VALUE", + "__gg__present_value", "n", {}, FldNumericBin5 }, + { RANDOM, "RANDOM", + "__gg__random", "I", {}, FldNumericBin5 }, + { RANGE, "RANGE", + "__gg__range", "n", {}, FldNumericBin5 }, + { REM, "REM", + "__gg__rem", "NN", {}, FldNumericBin5 }, + { REVERSE, "REVERSE", + "__gg__reverse", "X", {}, FldAlphanumeric }, + { SECONDS_FROM_FORMATTED_TIME, "SECONDS-FROM-FORMATTED-TIME", + "__gg__seconds_from_formatted_time", "XX", {}, FldAlphanumeric }, + { SECONDS_PAST_MIDNIGHT, "SECONDS_PAST_MIDNIGHT", + "__gg__seconds_past_midnight", "", {}, FldAlphanumeric }, + { SIGN, "SIGN", + "__gg__sign", "N", {}, FldNumericBin5 }, + { SIN, "SIN", + "__gg__sin", "N", {}, FldNumericBin5 }, + { SMALLEST_ALGEBRAIC, "SMALLEST-ALGEBRAIC", + "__gg__smallest_algebraic", "N", {}, FldNumericBin5 }, + { SQRT, "SQRT", + "__gg__sqrt", "N", {}, FldNumericBin5 }, + { STANDARD_COMPARE, "STANDARD-COMPARE", + "__gg__standard_compare", "XXXI", {}, FldAlphanumeric }, + { STANDARD_DEVIATION, "STANDARD-DEVIATION", + "__gg__standard_deviation", "n", {}, FldNumericBin5 }, + { SUBSTITUTE, "SUBSTITUTE", + "__gg__substitute", "XXX", {}, FldAlphanumeric }, + { SUM, "SUM", + "__gg__sum", "n", {}, FldNumericBin5 }, + { TAN, "TAN", + "__gg__tan", "N", {}, FldNumericBin5 }, + { TEST_DATE_YYYYMMDD, "TEST-DATE-YYYYMMDD", + "__gg__test_date_yyyymmdd", "I", {}, FldNumericBin5 }, + { TEST_DAY_YYYYDDD, "TEST-DAY-YYYYDDD", + "__gg__test_day_yyyyddd", "I", {}, FldNumericBin5 }, + { TEST_FORMATTED_DATETIME, "TEST-FORMATTED-DATETIME", + "__gg__test_formatted_datetime", "XX", {}, FldNumericBin5 }, + { TEST_NUMVAL, "TEST-NUMVAL", + "__gg__test_numval", "X", {}, FldNumericBin5 }, + { TEST_NUMVAL_C, "TEST-NUMVAL-C", + "__gg__test_numval_c", "XXU", {}, FldNumericBin5 }, + { TEST_NUMVAL_F, "TEST-NUMVAL-F", + "__gg__test_numval_f", "X", {}, FldNumericBin5 }, + { TRIM, "TRIM", + "__gg__trim", "XI", {}, FldNumericBin5 }, + { ULENGTH, "ULENGTH", + "__gg__ulength", "X", {}, FldAlphanumeric }, + { UPOS, "UPOS", + "__gg__upos", "XI", {}, FldAlphanumeric }, + { UPPER_CASE, "UPPER-CASE", + "__gg__upper_case", "X", {}, FldAlphanumeric }, + { USUBSTR, "USUBSTR", + "__gg__usubstr", "XII", {}, FldAlphanumeric }, + { USUPPLEMENTARY, "USUPPLEMENTARY", + "__gg__usupplementary", "X", {}, FldAlphanumeric }, + { UUID4, "UUID4", + "__gg_uuid4", "", {}, FldAlphanumeric }, + { UVALID, "UVALID", + "__gg__uvalid", "X", {}, FldAlphanumeric }, + { UWIDTH, "UWIDTH", + "__gg__uwidth", "XI", {}, FldAlphanumeric }, + { VARIANCE, "VARIANCE", + "__gg__variance", "n", {}, FldNumericBin5 }, + { WHEN_COMPILED, "WHEN-COMPILED", + "__gg__when_compiled", "", {}, FldAlphanumeric }, + { YEAR_TO_YYYY, "YEAR-TO-YYYY", + "__gg__year_to_yyyy", "III", {}, FldNumericBin5 }, + }; + +static const +function_descr_t *function_descrs_end = function_descrs + COUNT_OF(function_descrs); + +class cname_cmp { + const char *cname; + public: + cname_cmp( const char *cname ) : cname(cname) {} + + bool operator()( const function_descr_t& descr ) { + return strlen(cname) == strlen(descr.cname) && + 0 == strcmp(cname, descr.cname); + } + bool operator()( const char that[] ) { + return strlen(cname) == strlen(that) && + 0 == strcmp(cname, that); + } +}; + +/* + * For variadic intrinsic functions, ensure all parameters are commensurate. + * Return pointer in 1st inconsistent parameter type. + * Return NULL to indicate success. + */ +static cbl_refer_t * +intrinsic_inconsistent_parameter( size_t n, cbl_refer_t *args ) { + class commensurate_type { + cbl_refer_t first; + public: + commensurate_type( const cbl_refer_t& first ) : first(first) {} + bool operator()( cbl_refer_t& arg ) const { + return is_numeric(first.field) == is_numeric(arg.field); + } + }; + + auto p = std::find_if_not(args, args + n, commensurate_type(args[0])); + return p == args + n? NULL : p; +} + +static cbl_field_type_t +intrinsic_return_type( int token ) { + auto p = std::find_if( function_descrs, + function_descrs_end, + [token]( const auto& descr ) { + return token == descr.token; + } ); + return p == function_descrs_end? FldAlphanumeric : p->ret_type; +} + +static const char * +intrinsic_cname( int token ) { + auto p = std::find_if( function_descrs, + function_descrs_end, + [token]( const auto& descr ) { + return token == descr.token; + } ); + return p == function_descrs_end? NULL : p->cname; +} + +const char * +intrinsic_function_name( int token ) { + auto p = std::find_if( function_descrs, + function_descrs_end, + [token]( const auto& descr ) { + return token == descr.token; + } ); + return p == function_descrs_end? NULL : p->name; +} + +/* + * Provide supplied function parameters. + * Return index to 1st invalid parameter type. + * Return N to indicate success. + */ +static size_t +intrinsic_invalid_parameter( int token, + const std::vector<cbl_refer_t>& args ) +{ + auto p = std::find_if( function_descrs, + function_descrs_end, + [token]( const auto& descr ) { + return token == descr.token; + } ); + if( p == function_descrs_end ) { + cbl_internal_error( "%s: intrinsic function %s not found", + __func__, keyword_str(token) ); + } + + gcc_assert(!args.empty()); + gcc_assert(p < function_descrs_end); + + const function_descr_t& descr = *p; + + size_t i = 0; + for( auto arg : args ) { + if( arg.field == NULL ) { + i++; + continue; + } + assert(i < strlen(descr.types)); + + switch(descr.types[i]) { + case 'A' : //Alphabetic + case 'I' : //Integer + case 'N' : //Numeric + case 'X' : //Alphanumeric + break; + case 'n' : //variadic + return args.size(); + break; + case 'D' : //DBCS + case 'K' : //Keyword + case 'O' : //Other + case 'U' : //National + case '8' : //UTF-8 + default: + cbl_internal_error( "%s: invalid function descr type '%c'", + __func__, descr.types[i]); + } + + static std::map<char, const char*> typenames + { + { 'A', "Alphabetic" }, + { 'I', "Integer" }, + { 'N', "Numeric" }, + { 'X', "Alphanumeric" }, + }; + + switch( arg.field->type ) { + case FldInvalid: + case FldClass: + case FldConditional: + case FldForward: + case FldIndex: + yyerror("%s: field '%s' (%s) invalid for %s parameter", + descr.name, + arg.field->name, cbl_field_type_str(arg.field->type), + typenames[descr.types[i]]); + return i; + break; + case FldGroup: + default: + break; + } + + if( is_numeric(arg.field) || is_integer_literal(arg.field)) { + if( strchr("A", descr.types[i]) != NULL ) { + yyerror("%s: numeric field '%s' (%s) invalid for %s parameter", + descr.name, + arg.field->name, cbl_field_type_str(arg.field->type), + typenames[descr.types[i]]); + return i; + } + } else { // string field + if( strchr("IN", descr.types[i]) != NULL ) { + if( data_category_of(arg.field) == data_alphabetic_e ) { + yyerror("%s: non-numeric field '%s' (%s) invalid for %s parameter", + descr.name, + arg.field->name, cbl_field_type_str(arg.field->type), + typenames[descr.types[i]]); + return i; + } + } + } + i++; + } // end loop + + return args.size(); +} + +/* + * Functions used by code gen + */ + +size_t +intrinsic_parameter_count( const char cname[] ) { + const function_descr_t *descr = std::find_if(function_descrs, + function_descrs_end, cname_cmp(cname)); + return descr == function_descrs_end || descr->types[0] == 'n'? + size_t(-1) : strlen(descr->types); +} + +#if 0 +static int +yyreport_syntax_error (const yypcontext_t *ctx) +{ + int res = 0; + YYLOCATION_PRINT (stderr, yypcontext_location (ctx)); + fprintf (stderr, ": syntax error"); + // Report the tokens expected at this point. + { + enum { TOKENMAX = 5 }; + yysymbol_kind_t expected[TOKENMAX]; + int n = yypcontext_expected_tokens (ctx, expected, TOKENMAX); + if (n < 0) + // Forward errors to yyparse. + res = n; + else + for (int i = 0; i < n; ++i) + fprintf (stderr, "%s %s", + i == 0 ? ": expected" : " or", yysymbol_name (expected[i])); + } + // Report the unexpected token. + { + yysymbol_kind_t lookahead = yypcontext_token (ctx); + if (lookahead != YYSYMBOL_YYEMPTY) + fprintf (stderr, " before %s", yysymbol_name (lookahead)); + } + fprintf (stderr, "\n"); + return res; +} +#endif diff --git a/gcc/cobol/scan.l b/gcc/cobol/scan.l new file mode 100644 index 0000000..e4df4e8 --- /dev/null +++ b/gcc/cobol/scan.l @@ -0,0 +1,2487 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +%{ +#include <fstream> // Before cobol-system because it uses poisoned functions +#include "cobol-system.h" + +#include "ec.h" +#include "common-defs.h" +#include "util.h" +#include "cbldiag.h" +#include "symbols.h" +#include "parse.h" +#include "cdf.h" +#include "copybook.h" +#include "scan_ante.h" +#include "lexio.h" +#include "exceptl.h" + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wsign-compare" +#pragma GCC diagnostic ignored "-Wunused-function" + +%} + + /* C99 floating point constant, per flex(1) appendix "common patterns" */ +dseq ([[:digit:]]+) +dseq_opt ([[:digit:]]*) +Afrac (({dseq_opt}"."{dseq})|{dseq}".") /* American */ +frac (({dseq_opt}[.,]{dseq})|{dseq}[.,]) +exp ([eE][+-]?{dseq}) +exp_opt ({exp}?) +fsuff [flFL] +fsuff_opt ({fsuff}?) +hpref (0[xX]) +hdseq ([[:xdigit:]]+) +hdseq_opt ([[:xdigit:]]*) +hfrac (({hdseq_opt}"."{hdseq})|({hdseq}".")) +bexp ([pP][+-]?{dseq}) +dfc (({frac}{exp_opt}{fsuff_opt})|({dseq}{exp}{fsuff_opt})) +hfc (({hpref}{hfrac}{bexp}{fsuff_opt})|({hpref}{hdseq}{bexp}{fsuff_opt})) + +boolseq (([''][01]+[''])|([""][01]+[""])) +hexseq ((['']{hdseq}[''])|([""]{hdseq}[""])) +nonseq (([''][[:alnum:]]+][''])|([""][[:alnum:]]+[""])) + +INTEGER 0*[1-9][[:digit:]]* +INTEGERZ [[:digit:]]+ + +SPC [[:space:]]+ +OSPC [[:space:]]* +EOL \r?\n +BLANK_EOL [[:blank:]]*{EOL} +BLANK_OEOL [[:blank:]]*{EOL}? + + +DOTSEP [.][[:space:]] +DOTEOL [[:blank:]]*[.]{BLANK_EOL} + +SKIP [[:blank:]]*SKIP[123][[:blank:]]*[.]?{BLANK_EOL} +TITLE [[:blank:]]*TITLE($|[.]|[^\n]*) + +COUNT [(][[:digit:]]+[)] +N9 9+|(9{COUNT}) +NP P+|(P{COUNT}) + +UNSIGNED [[:space:]]+UNSIGNED +SIGNED [[:space:]]+SIGNED +DBLLONG (LONG-LONG|DOUBLE) + +ALNUM [AX9]+ + +AX [AX]{COUNT}? +B0 [B0/]{COUNT}? +ALPHEDREQ ({N9}*{AX}+{N9}*{B0}+{N9}*)|({N9}*({B0}|[.])+{N9}*{AX}+{N9}*) +ALPHED {ALPHEDREQ}([AX9B0/]{COUNT}?)* + + /* Must contain at least one 0, B, /, Z, *, +, + * (comma), ., –, CR, DB, or cs. Can contain + * Ps, 9s, and one V. Must describe 1 to 31 + * digit positions, which can be represented + * by 9s, zero suppression symbols (Z, *), and + * floating insertion symbols (+, –, cs). + * Cannot end with '.'. // BPVZ90/,.+- CR DB * cs + */ +NUMEDCHAR [BPVZ90/,]+{COUNT}? +NUMEDCHARS {NUMEDCHAR}([.]?{NUMEDCHAR})* +NUMED ([+-]{NUMEDCHARS}+)|({NUMEDCHARS}+[+-]) +CURRENCY [A-Zfhijklmoqtuwy\x80-\xFF]{-}[ABCDEGNPRSVXZ] +NUMEDCUR (([.]?[-$0B/Z*+,P9()V+–]|{CURRENCY}+|{COUNT})+([.][$0B/Z*+P9()V+\–])*)+ + +NUMEDITED {NUMED}|{NUMEDCUR} +EDITED {ALPHED}|{NUMED}|{NUMEDCUR} + +DATE_FMT_B (YYYYMMDD)|(YYYYDDD)|(YYYYWwwD) +DATE_FMT_E (YYYY-MM-DD)|(YYYY-DDD)|(YYYY-Www-D) +DATE_FMT {DATE_FMT_B}|{DATE_FMT_E} + +TIME_FMT1 hhmmss([.,]s+)? +TIME_FMT3 hhmmss([.,]s+)?Z +TIME_FMT5 hhmmss([.,]s+)?[+]hhmm +TIME_FMT2 hh:mm:ss([.,]s+)? +TIME_FMT4 hh:mm:ss([.,]s+)?Z +TIME_FMT6 hh:mm:ss([.,]s+)?[+]hh:mm + +TIME_FMT_B {TIME_FMT1}|{TIME_FMT3}|{TIME_FMT5} +TIME_FMT_E {TIME_FMT2}|{TIME_FMT4}|{TIME_FMT6} +TIME_FMT {TIME_FMT_B}|{TIME_FMT_E} + +DATETIME_FMT ({DATE_FMT_B}T{TIME_FMT_B})|({DATE_FMT_E}T{TIME_FMT_E}) + +NAME [[:alnum:]]+([_-]+[[:alnum:]]+)* +SUBELEMS {NAME}({SPC}{NAME})* + +EOP (EOP|END-OF-PAGE) + +PARENS [(]{OSPC}[)] +SUBSCRIPT [(]{OSPC}{SUBELEMS}{OSPC}[)] +NAMEQUAL OF{SPC}{NAME} +NAMEQUALS {NAMEQUAL}({SPC}{NAMEQUAL})* + +STRING [^\r\n""]+ +STRING1 [^\r\n'']+ + /* comma & semicolon must be followed by a space */ +COMMA [,;][[:blank:]]* + +ISNT (IS{SPC})?NOT + + +COMMENTARY DATE-COMPILED|DATE-WRITTEN|INSTALLATION|SECURITY + +SORT_MERGE SORT(-MERGE)? + +LESS_THAN (IS{SPC})?LESS({SPC}THAN)? +GREATER_THAN (IS{SPC})?GREATER({SPC}THAN)? +OR_EQUAL OR{SPC}EQUALS?({SPC}TO)? + + /* for reasons unclear, flex refuses {SPC} here */ +SIZE_ERROR (ON[[[:space:]]+)?SIZE[[:space:]]+ERROR + +VARTYPE NUMERIC|ALPHABETIC|ALPHABETIC_LOWER|ALPHABETIC_UPPER|DBCS|KANJI +NAMTYP {NAME}|{VARTYPE} + +NL [[:blank:]]*\r?\n[[:blank:]]* + +PUSH_FILE \f?[#]FILE{SPC}PUSH{SPC}[^\f]+\f +POP_FILE \f?[#]FILE{SPC}POP\f +LINE_DIRECTIVE [#]line{SPC}[[:alnum:]]+{SPC}[""''].+\n + +%x procedure_div ident_state addr_of function classify +%x program_id_state comment_entries +%x author_state date_state field_level field_state dot_state +%x numeric_state name_state +%x quoted1 quoted2 quoteq +%x picture picture_count integer_count +%x basis copy_state sort_state +%x cdf_state bool_state hex_state subscripts numstr_state exception +%x datetime_fmt raising partial_name cobol_words + +%option debug noyywrap stack yylineno case-insensitive +%% + /* CDF */ +<bool_state>{ + [''""]/[01] + [01]+/[''""] { if( copy_state == YY_START ) { + ydflval.boolean = ((*yytext == 1) ^ is_not); + return YDF_BOOL; + } + yylval.numstr.radix = boolean_e; + yylval.numstr.string = xstrdup(yytext); + if( ! original_number(yylval.numstr.string) ) { + error_msg(yylloc, "input inconceivably long"); + return NO_CONDITION; + } + static int nwarn; + if( !nwarn++ ) + not_implemented("Boolean literals are " + "not expected to work correctly"); + return NUMSTR; + } + [''""] { yy_pop_state(); } +} +<hex_state>{ + [''""]/{hdseq} + {hdseq}/[''""] { if( copy_state == YY_START ) { + ydflval.number = integer_of(yytext, true); + return YDF_NUMBER; + } + if( 0 == yyleng % 2 ) { + yylval.literal.set_data( yyleng/2, hex_decode(yytext) ); + update_location_col(yytext, -3); + return LITERAL; + } + dbgmsg( "hex literal '%s' " + "has an odd number (%d) of characters", + yytext, yyleng ); + return '@'; // invalid token + } + [''""] { yy_pop_state(); } +} + + /* Initial start condition only. */ + +WORKING-STORAGE{SPC}SECTION { + yy_push_state(field_state); + return WORKING_STORAGE_SECT; } +LOCAL-STORAGE{SPC}SECTION { + yy_push_state(field_state); + return LOCAL_STORAGE_SECT; } +WORKING-STORAGE { + return WORKING_STORAGE; } +LOCAL-STORAGE { + return LOCAL_STORAGE; } +SCREEN { + return SCREEN; } + +LINKAGE{SPC}SECTION { + yy_push_state(field_state); + return LINKAGE_SECT; } + +FUNCTION-ID { yy_push_state(ident_state); + yy_push_state(program_id_state); + yy_push_state(name_state); return FUNCTION; } + +PROGRAM-ID { yy_push_state(ident_state); + yy_push_state(program_id_state); + yy_push_state(name_state); return PROGRAM_ID; } + +PROGRAM-ID/{DOTEOL} { yy_push_state(ident_state); + yy_push_state(name_state); + yy_push_state(dot_state); return PROGRAM_ID; } + +PROCEDURE{SPC}DIVISION { yy_push_state(procedure_div); + return PROCEDURE_DIV; } +<comment_entries>{ + (ENVIRONMENT|DATA|PROCEDURE){SPC}DIVISION { myless(0); yy_pop_state(); } + {BLANK_EOL} + [^[:space:]]{1,512}{BLANK_OEOL} // about 1/2 KB at a time +} + +<ident_state>{ + AS{SPC}[""] { yy_push_state(quoted2); return AS; } + AS{SPC}[''] { yy_push_state(quoted1); return AS; } + IS { pop_return IS; } + + OPTIONS { yy_pop_state(); myless(0); } + [[:blank:]]*(ENVIRONMENT|DATA|PROCEDURE)[[:blank:]]+DIVISION/.+\n { + yy_pop_state(); myless(0); } + [[:blank:]]*AUTHOR[[:blank:].]+{EOL}? { + // Might not have an EOL, but stop on one. + yy_push_state(author_state); } + + {DOTEOL} + + {COMMENTARY} { BEGIN(comment_entries); } +} +<author_state>{ + [[:blank:]]+ + ^{BLANK_EOL} + [^\r\n]+ { yy_pop_state(); + yylval.string = xstrdup(yytext); + } +} + + +<INITIAL>{ + COBOL { return COBOL; } + CODE-SET { return CODESET; } + FUNCTION { return FUNCTION; } + GLOBAL { return GLOBAL; } + + ^[[:blank:]]*0?1/[[:space:]] { /* If in File Section parse record */ + yy_push_state(field_state); + yy_set_bol(1); + myless(0); } +} + +<INITIAL,procedure_div,cdf_state>{ + + /* unused Context Words */ +ARITHMETIC { return ARITHMETIC; } +ATTRIBUTE { return ATTRIBUTE; } +AUTO { return AUTO; } +AUTOMATIC { return AUTOMATIC; } +AWAY-FROM-ZERO { return AWAY_FROM_ZERO; } +BACKGROUND-COLOR { return BACKGROUND_COLOR; } +BELL { return BELL; } +BINARY-ENCODING { return BINARY_ENCODING; } +BLINK { return BLINK; } +CAPACITY { return CAPACITY; } + +CENTER { + if( ! dialect_ibm() ) return CENTER; + yylval.string = xstrdup(yytext); + return typed_name(yytext); + } + +BINARY { return BINARY; } +CLASSIFICATION { return CLASSIFICATION; } +CYCLE { return CYCLE; } +DECIMAL-ENCODING { return DECIMAL_ENCODING; } +ENTRY-CONVENTION { return ENTRY_CONVENTION; } +EOL { return EOL; } +EOS { return EOS; } +ERASE { return ERASE; } +EXPANDS { return EXPANDS; } +FLOAT-BINARY { return FLOAT_BINARY; } +FLOAT-DECIMAL { return FLOAT_DECIMAL; } +FOREGROUND-COLOR { return FOREGROUND_COLOR; } +FOREVER { return FOREVER; } +FULL { return FULL; } +HIGHLIGHT { return HIGHLIGHT; } +HIGH-ORDER-LEFT { return HIGH_ORDER_LEFT; } +HIGH-ORDER-RIGHT { return HIGH_ORDER_RIGHT; } +IGNORING { return IGNORING; } +IMPLEMENTS { return IMPLEMENTS; } +INITIALIZED { return INITIALIZED; } +INTERMEDIATE { return INTERMEDIATE; } +LC_ALL { return LC_ALL_kw; } +LC_COLLATE { return LC_COLLATE_kw; } +LC_CTYPE { return LC_CTYPE_kw; } +LC_MESSAGES { return LC_MESSAGES_kw; } +LC_MONETARY { return LC_MONETARY_kw; } +LC_NUMERIC { return LC_NUMERIC_kw; } +LC_TIME { return LC_TIME_kw; } +LENGTH { return LENGTH; } +LENGTH{SPC}OF { return LENGTH_OF; } +LOCALE { return LOCALE; } +LOWLIGHT { return LOWLIGHT; } +NEAREST-AWAY-FROM-ZERO { return NEAREST_AWAY_FROM_ZERO; } +NEAREST-EVEN { return NEAREST_EVEN; } +NEAREST-TOWARD-ZERO { return NEAREST_TOWARD_ZERO; } +NONE { return NONE; } +NORMAL { return NORMAL; } +NUMBERS { return NUMBERS; } +PREFIXED { return PREFIXED; } +PREVIOUS { return PREVIOUS; } +PROTOTYPE { return PROTOTYPE; } +PROHIBITED { return PROHIBITED; } +RAISING{SPC}/LAST[[:space:]] { yy_push_state(raising); return RAISING; } +RELATION { return RELATION; } +REQUIRED { return REQUIRED; } +REVERSE-VIDEO { return REVERSE_VIDEO; } +ROUNDING { return ROUNDING; } +SECONDS { return SECONDS; } +SECURE { return SECURE; } +SHORT { return SHORT; } +SIGNED { return SIGNED; } +STANDARD-BINARY { return STANDARD_BINARY; } +STANDARD-DECIMAL { return STANDARD_DECIMAL; } +STATEMENT { return STATEMENT; } +STEP { return STEP; } +STRONG { return STRONG; } +STRUCTURE { return STRUCTURE; } + +TALLY { // Use TALLY register for IBM, else it's just a name. + static const char tally[] = "_TALLY"; + auto p = dialect_ibm()? tally : tally + 1; + yylval.string = xstrdup(p); + return NAME; + } + +TOWARD-GREATER { return TOWARD_GREATER; } +TOWARD-LESSER { return TOWARD_LESSER; } +TRUNCATION { return TRUNCATION; } +UCS-4 { return UCS_4; } +UNDERLINE { return UNDERLINE; } +UNSIGNED { return UNSIGNED; } +UTF-16 { return UTF_16; } +UTF-8 { return UTF_8; } + +SYSIN { return SYSIN; } +SYSIPT { return SYSIPT; } +SYSOUT { return SYSOUT; } +SYSLIST { return SYSLIST; } +SYSLST { return SYSLST; } +SYSPUNCH { return SYSPUNCH; } +SYSPCH { return SYSPCH; } +CONSOLE { return CONSOLE; } +C01 { return C01; } +C02 { return C02; } +C03 { return C03; } +C04 { return C04; } +C05 { return C05; } +C06 { return C06; } +C07 { return C07; } +C08 { return C08; } +C09 { return C09; } +C10 { return C10; } +C11 { return C11; } +C12 { return C12; } +CSP { return CSP; } +S01 { return S01; } +S02 { return S02; } +S03 { return S03; } +S04 { return S04; } +S05 { return S05; } +AFP-5A { return AFP_5A; } +STDIN { return STDIN; } +STDOUT { return STDOUT; } +STDERR { return STDERR; } +SYSERR { return STDERR; } + +CANCEL { return CANCEL; } +COMMIT { return COMMIT; } +COMMON { return COMMON; } +CONTINUE { return CONTINUE; } + +COPY { + yy_push_state(copy_state); + myless(0); + } + +EXTEND { return EXTEND;} +INITIALIZE { return INITIALIZE; } +INSPECT { return INSPECT; } +INVOKE { return INVOKE; } +INTRINSIC { return INTRINSIC; } +MERGE { return MERGE; } +UNSTRING { return UNSTRING; } +XML { return XML; } +XMLGENERATE { return XMLGENERATE; } +XMLPARSE { return XMLPARSE; } + +ZEROE?S? { return ZERO; } + +WRITE { return WRITE; } + +WITH{SPC}NO/[[:^alnum:]_-] { return NO; } + +WITH { return WITH; } + +WHEN { return WHEN; } +ALSO { return ALSO; } + +VARYING { return VARYING; } +VALUE { return VALUE; } +UTILITY { return UTILITY; } +USING { return USING; } +USE{SPC}(AFTER{SPC})?/(EC|EXCEPTION) { return USE; } +USE { return USE; } + +UPON { return UPON; } +UP { return UP; } +UPSI { return UPSI; } +UNTIL { return UNTIL; } +UNITS { return UNITS; } +UNIT-RECORD { return UNIT_RECORD; } +UNIT { return UNIT; } +TYPE { return TYPE; } +TRY { return TRY; } +FALSE { return FALSE_kw; } +TRUE { return TRUE_kw; } +TRANSFORM { return TRANSFORM; } +TRACKS { return TRACKS; } +TRACK-AREA { return TRACK_AREA; } +TRACE { return TRACE; } +TOP { return TOP; } +TO { return TO; } +TIMES { return TIMES; } +THRU|THROUGH { return THRU; } +THEN { return THEN; } +THAN { return THAN; } +TEST { return TEST; } +TERMINATE { return TERMINATE; } +TALLYING { return TALLYING; } +TALLY { return TALLY; } +SYSPUNCH { return SYSPUNCH; } +SYSOUT { return SYSOUT; } +SYSIN { return SYSIN; } +SYMBOLIC { return SYMBOLIC; } +SYMBOL { return SYMBOL; } +SUM { return SUM; } +SUBTRACT { return SUBTRACT; } +STOP { return STOP ; } +START { return START ; } +STATUS { return STATUS ; } +STANDARD { return STANDARD ; } +STANDARD-[12] { return STANDARD_ALPHABET; } +STANDARD { return STANDARD ; } +SPECIAL-NAMES { return SPECIAL_NAMES ; } +SPACES? { yylval.string = NULL; return SPACES; } +SOURCE-COMPUTER { return SOURCE_COMPUTER; } +SOURCE { return SOURCE; } +{SORT_MERGE} { return SORT; } +SIZE { return SIZE; } +SIGN { return SIGN; } +SET { return SET; } +SHARING { return SHARING; } +SEQUENCE { return SEQUENCE; } + +SEQUENTIAL { return SEQUENTIAL; } +SENTENCE { return SENTENCE; } +SELECT { return SELECT; } +SECURITY { return SECURITY; } + +SECTION{SPC}[+-]?{INTEGERZ}/{OSPC}{DOTSEP} { + auto eotext = yytext + yyleng; + auto p = std::find_if(yytext, eotext, fisspace); + p = std::find_if(p, eotext, nonspace); + yylval.string = p; + return SECTION; + } + +SECTION{OSPC}{DOTSEP}/USE[[:space:]] { yylval.string = NULL; return SECTION; } +SECTION { yylval.string = NULL; return SECTION; } + +PARAGRAPH { return PARAGRAPH; } +SEARCH { return SEARCH; } + +SAME { return SAME; } +RUN { return RUN; } +ROUNDED { return ROUNDED; } +RIGHT { return RIGHT; } +RH { return RH; } +RF { return RF; } +REWRITE { return REWRITE; } +REWIND { return REWIND; } +REVERSED { return REVERSED; } +RETURN { return RETURN; } +RESTRICTED { return RESTRICTED; } + +RESUME { + if( ! dialect_ibm() ) return RESUME; + yylval.string = xstrdup(yytext); + return typed_name(yytext); + } + +RESET { return RESET; } +RESERVE { return RESERVE; } +RERUN { return RERUN; } + +REPOSITORY { return REPOSITORY; } + +REPORTS { return REPORTS; } +REPORTING { return REPORTING; } +REPORT { return REPORT; } +REPLACING { return REPLACING; } +REPLACE { return REPLACE; } +RENAMES { return RENAMES; } +REMAINDER { return REMAINDER; } +REMARKS { return REMARKS; } +RELEASE { return RELEASE; } + +RELATIVE{SPC}(KEY{SPC})?(IS{SPC})?{NAME} { + // RELATIVE ... NAME returns KEY + // RELATIVE ... token returns RELATIVE + std::reverse_iterator<char *> + p(yytext), pend(yytext + yyleng); + p = std::find_if(pend, p, fisspace); + char *name = p.base(); + assert(ISALNUM(name[0])); + assert(ISSPACE(name[-1])); + int token = keyword_tok(name)? RELATIVE : KEY; + myless( name - yytext ); + return token; + } +RELATIVE { return RELATIVE; } + +REEL { return REEL; } +RECORDING { return RECORDING; } +RECORD { return RECORD; } +RECORD{SPC}(IS) { return RECORD; } +RECORDS{SPC}(ARE) { return RECORDS; } +RECORDS { return RECORDS; } +READY { return READY; } +READ { return READ; } +RD { return RD; } +RANDOM { return RANDOM; } +RAISE { return RAISE; } +QUOTES { return QUOTES; } +QUOTE { return QUOTES; } + +PROGRAM { return PROGRAM_kw; } +PROCESS { return PROCESS; } +PROCEED { return PROCEED; } +PROCEDURE { return PROCEDURE; } +PROCEDURES { return PROCEDURES; } + +PRINT-SWITCH { return PRINT_SWITCH; } +POSITIVE { return POSITIVE; } +PLUS { return PLUS; } +PICTURE { return PICTURE; } +PH { return PH; } +PF { return PF; } +PERFORM { yylval.boolean = false; return PERFORM; } +PERFORM{SPC}CYCLE { yylval.boolean = true; return PERFORM; } + +PAGE-COUNTER { return PAGE_COUNTER; } +PAGE { return PAGE; } +PADDING { return PADDING; } +OUTPUT { return OUTPUT; } +OTHERWISE { return OTHERWISE; } +OTHER { return OTHER; } +ORGANI[SZ]ATION { return ORGANIZATION; } +ORDER { return ORDER; } + +OPTIONS{SPC}?[.] { return OPTIONS; } +OPTIONAL { return OPTIONAL; } +OPEN { return OPEN; } +ON { return ON; } +OMITTED { return OMITTED; } +OFF { return OFF; } +OF { return OF; } + +OBJECT-COMPUTER { return OBJECT_COMPUTER; } + +MEMORY{SPC}(SIZE{SPC})?[0-9]+{SPC}(WORDS|CHARACTERS|MODULES) {/*ignore*/} + +NUMERIC { return NUMERIC; } +NUMERIC-EDITED { return NUMERIC_EDITED; } + +NULLS? { return NULLS; } + +NOTE { return NOTE; } +NOT { return NOT; } +NO { return NO; } +NEXT { return NEXT; } +NEGATIVE { return NEGATIVE; } +NATIVE { return NATIVE; } +NAMED { return NAMED; } +NAT { return NAT; } +NATIONAL { return NATIONAL; } +NATIONAL-EDITED { return NATIONAL_EDITED; } +MULTIPLY { return MULTIPLY; } +MOVE { return MOVE; } +MODE { return MODE; } +LOW-VALUES? { return LOW_VALUES; } +LOCK{SPC}ON { return LOCK_ON; } +LOCK { return LOCK; } +LINKAGE { return LINKAGE; } +LINES { return LINES; } +LINE-COUNTER { return LINE_COUNTER; } +LINAGE { return LINAGE; } +LINE { return LINE; } +LIMITS { return LIMITS; } +LIMIT { return LIMIT; } + +LEADING { return LEADING; } +LAST { return LAST; } +LABEL { return LABEL; } +TRAILING { return TRAILING; } + +KEY({SPC}IS)? { return KEY; } +KANJI { return KANJI; } + +JUSTIFIED { return JUSTIFIED; } + +IS { return IS; } + +INTO { return INTO; } + /* INSTALLATION { return INSTALLATION; } */ + +INPUT-OUTPUT{SPC}SECTION { return INPUT_OUTPUT_SECT; } + +INPUT { return INPUT; } +INITIATE { return INITIATE; } +INITIALIZE { return INITIALIZE; } +INITIAL { return INITIAL_kw; } +INDICATE { return INDICATE; } +INDEXED { return INDEXED; } +INCLUDE { return INCLUDE; } +IN { return IN; } +IF { return IF; } + +ID(ENTIFICATION)?{SPC}DIVISION { BEGIN(0); return IDENTIFICATION_DIV; } + +IBM-360 { return IBM_360; } +I-O-CONTROL { return IO_CONTROL; } +I-O { return IO; } +HOLD { return HOLD; } +HIGH-VALUES? { return HIGH_VALUES; } +HEX { return HEX; } +HEADING { return HEADING; } +GROUP { return GROUP; } + +GOBACK { return GOBACK; } +BEAT-FEET { return GOBACK; } + +GO({SPC}TO)? { return GOTO; } + +GLOBAL { return GLOBAL; } +GIVING { return GIVING; } +GENERATE { return GENERATE; } + +FROM/[[:space:]]+(DATE|DAY|TIME)[[:space:]] { yy_push_state(date_state); return FROM; } +FROM { return FROM; } +FREE { return FREE; } + +FORM-OVERFLOW { return FORM_OVERFLOW; } +FOR { return FOR; } +FOOTING { return FOOTING; } +FIRST { return FIRST; } +FINAL { return FINAL; } +FILE-LIMIT { return FILE_LIMIT; } +FILE-CONTROL { return FILE_CONTROL; } + +FILE{SPC}SECTION { return FILE_SECT; } + +FILE { return FILE_KW; } + +FD { return FD; } +SD { return SD; } + +EXTERNAL { return EXTERNAL; } +EXIT { return EXIT; } +EXHIBIT { return EXHIBIT; } +EXAMINE { return EXAMINE; } +EVERY { return EVERY; } +ERROR { return ERROR; } +EVALUATE { return EVALUATE; } + +EQUALS? { return '='; } +ENVIRONMENT[[:blank:]]+DIVISION { return ENVIRONMENT_DIV; } + +ENTRY { return ENTRY; } +ENTER { return ENTER; } +END-WRITE { return END_WRITE; } +END-UNSTRING { return END_UNSTRING; } +END-SUBTRACT { return END_SUBTRACT; } +END-STRING { return END_STRING; } +END-START { return END_START ; } + +END-SEARCH { return END_SEARCH; } +END-REWRITE { return END_REWRITE; } +END-RETURN { return END_RETURN; } +END-READ { return END_READ; } +END-PERFORM { return END_PERFORM; } +END-MULTIPLY { return END_MULTIPLY; } + +END-IF { return END_IF; } +END-EVALUATE { return END_EVALUATE; } +END-DIVIDE { return END_DIVIDE; } +END-DISPLAY { return END_DISPLAY; } +END-DELETE { return END_DELETE; } +END-COMPUTE { return END_COMPUTE; } +END-CALL { return END_CALL; } +END-ADD { return END_ADD; } +END-ACCEPT { return END_ACCEPT; } +END { yylval.number = END; return END; } + +ELSE { return ELSE; } + +EC { return EC; } +EXCEPTION{SPC}CONDITION { return EC; } + +EBCDIC { return EBCDIC; } + +DYNAMIC { return DYNAMIC; } +DUPLICATES { return DUPLICATES; } +DOWN { return DOWN; } +DIVIDE { return DIVIDE; } + +DISPLAY { return DISPLAY; } + +DIRECT-ACCESS { return DIRECT_ACCESS; } +DIRECT { return DIRECT; } +DETAIL { return DETAIL; } +DESCENDING { return DESCENDING; } +DEPENDING { return DEPENDING; } + +DELIMITER { return DELIMITER; } +DELETE { return DELETE; } +DEFAULT { return DEFAULT; } +DECLARATIVES { return DECLARATIVES; } +DECIMAL-POINT { return DECIMAL_POINT; } +DEBUGGING { return DEBUGGING; } +DE { return DE; } +EGCS { return EGCS; } +DBCS { return DBCS; } +DATE-WRITTEN { return DATE_WRITTEN; } +DATE-COMPILED { return DATE_COMPILED; } +DAY-OF-WEEK { return DAY_OF_WEEK; } + +DATA{SPC}DIVISION{DOTEOL} { return DATA_DIV; } +DATA { return DATA; } + +CURRENCY { return CURRENCY; } +COUNT { return COUNT; } + +CORR(ESPONDING)? { return CORRESPONDING; } + +CONVERTING { return CONVERTING; } +CONTROLS { return CONTROLS; } +CONTROL { return CONTROL; } + +CONSOLE { return CONSOLE; } + +CONTAINS { return CONTAINS; } +CONFIGURATION{SPC}SECTION { return CONFIGURATION_SECT; } + +COMPUTE { return COMPUTE; } +COMMA { return COMMA; } + +COLUMN { return COLUMN; } +COLLATING { return COLLATING; } +CODE { return CODE; } +CLASS { return CLASS; } +CLOSE { return CLOSE; } + +CHARACTERS { return CHARACTERS; } +CHARACTER { return CHARACTER; } +CHANGED { return CHANGED; } +CH { return CH; } +CF { return CF; } +CALL { return CALL; } + +BY { return BY; } +BOTTOM { return BOTTOM; } +BEFORE { return BEFORE; } +BLOCK { return BLOCK; } +BACKWARD { return BACKWARD; } + +AT { return AT; } +ASSIGN { return ASSIGN; } +ASCENDING { return ASCENDING; } +AREAS { return AREAS; } +AREA { return AREA; } +ARE { return ARE; } +APPLY { return APPLY; } +ANYCASE { return ANYCASE; } +ANY { return ANY; } +ANUM { return ANUM; } + +ALTERNATE { return ALTERNATE; } +ALTER { return ALTER; } +ALSO { return ALSO; } +ALPHABET { return ALPHABET; } +ALPHABETIC { return ALPHABETIC; } +ALPHABETIC-LOWER { return ALPHABETIC_LOWER; } +ALPHABETIC-UPPER { return ALPHABETIC_UPPER; } +ALPHANUMERIC { return ALPHANUMERIC; } +ALPHANUMERIC-EDITED { return ALPHANUMERIC_EDITED; } + +ALLOCATE { return ALLOCATE; } +ALL { return ALL; } +AFTER { return AFTER; } +ADVANCING { return ADVANCING; } +ADDRESS { return ADDRESS; } +ADD { return ADD; } +ACTUAL { return ACTUAL; } +ACCESS { return ACCESS; } +ACCEPT { return ACCEPT; } + +DELETE { return DELETE; } +EJECT{DOTEOL}? { + if( ! dialect_ibm() ) { + dialect_error(yylloc, "EJECT is not ISO syntax,", "ibm"); + } + } +INSERTT { return INSERTT; } +LABEL { return LABEL; } +PROCESS { return PROCESS; } +SERVICE[[:blank:]]+RELOAD { return SERVICE_RELOAD; } +TITLE { return TITLE; } +USE({SPC}FOR)? { return USE; } + +} + +<field_level>{ + 66/{SPC}(\f#)?{NAME} { yy_pop_state(); + if( !parsing.on() ) orig_picture[0] = '\0'; + if( level_needed() ) { + level_found(); + yylval.number = level_of(yytext); return LEVEL66; + } else { + return numstr_of(yytext); + } + } + 78/{SPC}(\f#)?{NAME} { yy_pop_state(); + if( !parsing.on() ) orig_picture[0] = '\0'; + if( level_needed() ) { + level_found(); + yylval.number = level_of(yytext); return LEVEL78; + } else { + return numstr_of(yytext); + } + } + 88/{SPC}(\f#)?{NAME} { yy_pop_state(); + if( !parsing.on() ) orig_picture[0] = '\0'; + if( level_needed() ) { + level_found(); + yylval.number = level_of(yytext); return LEVEL88; + } else { + return numstr_of(yytext); + } + } + [[:digit:]]{1,2}/[[:space:]] { yy_pop_state(); + if( !parsing.on() ) orig_picture[0] = '\0'; + if( level_needed() ) { + level_found(); + yylval.number = level_of(yytext); return LEVEL; + } else { + return numstr_of(yytext); + } + } + + . { cbl_errx( "failed to parse field level on line %d", yylineno); } +} + +<field_state>{ + ^[[:blank:]]*[[:digit:]]{1,2}{OSPC}/[.] { + if( !parsing.on() ) orig_picture[0] = '\0'; + level_found(); + yylval.number = level_of(yytext); + return LEVEL; + } + + ^[[:blank:]]+ {} + ^[[:digit:]]{1,2}[[:space:]] { yy_push_state(field_level); } + [[:blank:]]*/[[:digit:]]{1,2}{SPC}(\f#)?{NAME} { yy_push_state(field_level); } + + [+-]?{INTEGERZ} { return numstr_of(yytext); } + [+-]?{dfc} { char *s = xstrdup(yytext); + // "The decimal point can appear anywhere within + // the literal except as the rightmost character." + size_t len = strlen(s); + assert(len); + if( s[--len] == '.' ) { + s[len] = '\0'; + myless(len); + } + numstr_of(s); free(s); + return NUMSTR; + } + + PIC(TURE)?({SPC}IS)?[[:space:]]{BLANK_OEOL} { + yy_push_state(picture); return PIC; } + + ANY { return ANY; } + LENGTH { return LENGTH; } + LENGTH{SPC}OF { return LENGTH_OF; } + BASED { return BASED; } + USAGE { return USAGE; } + UNBOUNDED { return UNBOUNDED; } + /* use coded capacity 255 to indicate comp-x */ + COMP(UTATIONAL)?-X { return ucomputable(FldNumericBin5, 0xFF); } + COMP(UTATIONAL)?-6 { return ucomputable(FldPacked, 0); } + COMP(UTATIONAL)?-5 { return ucomputable(FldNumericBin5, 0); } + COMP(UTATIONAL)?-4 { return scomputable(FldNumericBinary, 0); } + COMP(UTATIONAL)?-3 { return PACKED_DECIMAL; } + COMP(UTATIONAL)?-2 { return ucomputable(FldFloat, 8); } + COMP(UTATIONAL)?-1 { return ucomputable(FldFloat, 4); } + COMP(UTATIONAL)? { return ucomputable(FldNumericBinary, 0); } + BINARY { return scomputable(FldNumericBinary, 0); } + + BINARY-CHAR{SIGNED} { return scomputable(FldNumericBin5, 1); } + BINARY-CHAR{UNSIGNED} { return ucomputable(FldNumericBin5, 1); } + BINARY-CHAR { return scomputable(FldNumericBin5, 1); } + BINARY-SHORT{SIGNED} { return scomputable(FldNumericBin5, 2); } + BINARY-SHORT{UNSIGNED} { return ucomputable(FldNumericBin5, 2); } + BINARY-SHORT { return scomputable(FldNumericBin5, 2); } + BINARY-LONG{SIGNED} { return scomputable(FldNumericBin5, 4); } + BINARY-LONG{UNSIGNED} { return ucomputable(FldNumericBin5, 4); } + BINARY-LONG { return scomputable(FldNumericBin5, 4); } + BINARY-{DBLLONG}{SIGNED} { return scomputable(FldNumericBin5, 8); } + BINARY-{DBLLONG}{UNSIGNED} { return ucomputable(FldNumericBin5, 8); } + BINARY-{DBLLONG} { return scomputable(FldNumericBin5, 8); } + BIT { not_implemented("USAGE type: BIT"); + return BIT; } + FLOAT-BINARY-32 { return ucomputable(FldFloat, 4); } + FLOAT-BINARY-64 { return ucomputable(FldFloat, 8); } + FLOAT-BINARY-128 { return ucomputable(FldFloat, 16); } + FLOAT-DECIMAL-(16|34) { not_implemented("USAGE type: FLOAT_DECIMAL"); + return FLOAT_DECIMAL; // causes syntax error + } + /* 21) The representation and length of a data item described with USAGE + BINARY-CHAR, BINARY-SHORT, BINARY-LONG, BINARY-DOUBLE, FLOAT-SHORT, + FLOAT-LONG, or FLOAT-EXTENDED is implementor-defined. */ + FLOAT-EXTENDED { return ucomputable(FldFloat, 16); } + FLOAT-LONG { return ucomputable(FldFloat, 8); } + FLOAT-SHORT { return ucomputable(FldFloat, 4); } + + INDEX { return INDEX; } + MESSAGE-TAG { not_implemented("USAGE type: MESSAGE-TAG"); } + NATIONAL { not_implemented("USAGE type: NATIONAL"); + return NATIONAL; } + OBJECT{SPC}REFERENCE { not_implemented("USAGE type: OBJECT REFERENCE"); } + + PACKED-DECIMAL { return PACKED_DECIMAL; } + + FUNCTION-POINTER | + PROGRAM-POINTER { yylval.field_attr = prog_ptr_e; return POINTER; } + POINTER { yylval.field_attr = none_e; return POINTER; } + + PROCEDURE-POINTER { if( dialect_gcc() ) { + error_msg(yylloc, "%s requires -dialect ibm or mf", yytext); + } + yylval.field_attr = prog_ptr_e; + return POINTER; // return it anyway + } + + ZEROE?S? { return ZERO; } + SPACES? { yylval.string = NULL; return SPACES; } + LOW-VALUES? { return LOW_VALUES; } + HIGH-VALUES? { return HIGH_VALUES; } + QUOTES? { return QUOTES; } + NULLS? { return NULLS; } + + OF { return OF; } + VALUE({SPC}IS)? { return VALUE; } + VALUES({SPC}ARE)? { return VALUE; } + THRU|THROUGH { return THRU; } + + VALUES?({SPC}(IS|ARE))?{SPC}NULLS? { return NULLPTR; } + VALUES?({SPC}(IS|ARE))?/{SPC}[+-]?{dfc} { + yy_push_state(numeric_state); return VALUE; } + + (THRU|THROUGH)/{SPC}[[:digit:].,+-] { + yy_push_state(numeric_state); return THRU; } + + ALL { return ALL; } + AS { return AS; } + ASCENDING { return ASCENDING; } + BLANK { return BLANK; } + BLOCK { return BLOCK; } + BY { return BY; } + BYTE-LENGTH { return BYTE_LENGTH; } + CHARACTER { return CHARACTER; } + CHARACTERS { return CHARACTERS; } + CODE-SET { return CODESET; } + CONSTANT { return CONSTANT; } + CONTAINS { return CONTAINS; } + DATA { return DATA; } + DEPENDING { return DEPENDING; } + DESCENDING { return DESCENDING; } + DISPLAY { return DISPLAY; } + EJECT{DOTEOL}? { + if( ! dialect_ibm() ) { + dialect_error(yylloc, "EJECT is not ISO syntax,", "ibm"); + } + auto len = yyleng - 1; + if( yytext[len] == '\f' ) myless(--len); + } + EXTERNAL { return EXTERNAL; } + FALSE { return FALSE_kw; } + FROM { return FROM; } + GLOBAL { return GLOBAL; } + IN { return IN; } + INDEXED { return INDEXED; } + IS { return IS; } + JUST(IFIED)?({SPC}RIGHT)? { return JUSTIFIED; } + KEY { return KEY; } + LABEL { return LABEL; } + LEADING { return LEADING; } + LEFT { return LEFT; } + MODE { return MODE; } + OCCURS/{SPC}{NAME} { return OCCURS; } + OCCURS { yy_push_state(integer_count); return OCCURS; } + OF { return OF; } + OMITTED { return OMITTED; } + ON { return ON; } + RECORD { return RECORD; } + RECORDING { return RECORDING; } + RECORDS { return RECORDS; } + RECORDS{SPC}ARE { return RECORDS; } + RECORD{SPC}IS { return RECORD; } + REDEFINES { return REDEFINES; } + RENAMES { return RENAMES; } + RIGHT { return RIGHT; } + SEPARATE { return SEPARATE; } + SET { return SET; } + SAME { return SAME; } + SIGN { return SIGN; } + SIZE { return SIZE; } + STANDARD { return STANDARD; } + STRONG { return STRONG; } + SYNC(HRONIZED)? { return SYNCHRONIZED; } + TIMES { return TIMES; } + TIMES[[:space::]]+DEPENDING { return DEPENDING; } + TO { return TO; } + TRAILING { return TRAILING; } + TRUE { return TRUE_kw; } + TYPE { return TYPE; } + TYPEDEF { return TYPEDEF; } + VARYING { return VARYING; } + VOLATILE { return VOLATILE; } + WHEN { return WHEN; } + + COPY { + yy_push_state(copy_state); + myless(0); + } + + FD/[[:blank:]]+ { parsing.need_level(false); return FD; } + SD/[[:blank:]]+ { parsing.need_level(false); return SD; } + + {NAME} { // NAME here is never a token name + if( is_integer_token() ) return numstr_of(yytext); + ydflval.string = yylval.string = xstrdup(yytext); + auto token = typed_name(yytext); + return token == NAME88? NAME : token; + } + + Z?[''] { yylval.literal.set_prefix(yytext, yyleng-1); + yy_push_state(quoted1); } + Z?[""] { yylval.literal.set_prefix(yytext, yyleng-1); + yy_push_state(quoted2); } + N?X/{hexseq} { yylval.literal.set_prefix(yytext, yyleng); + yy_push_state(hex_state); } + N?X{nonseq} { dbgmsg("invalid hexadecimal value: %s", yytext); + return NO_CONDITION; } + [[:blank:]]*\r?\n {} + + WORKING-STORAGE{SPC}SECTION { return WORKING_STORAGE_SECT; } + LOCAL-STORAGE{SPC}SECTION { return LOCAL_STORAGE_SECT; } + LINKAGE{SPC}SECTION { return LINKAGE_SECT; } + SCREEN{SPC}/SECTION { return SCREEN; } + SECTION{OSPC}/{DOTSEP} { yylval.string = NULL; return SECTION; } + + PROCEDURE{SPC}DIVISION { BEGIN(procedure_div); return PROCEDURE_DIV; } + + [*]>.*$ // ignore inline comment +} + +<numstr_state>{ + [''""]/{hdseq} + {hdseq}/[''""] { + switch( yylval.numstr.radix ) { + case boolean_e: + if( 1 != yyleng ) { + error_msg(yylloc, "syntax error: Boolean literal '%s' " + "has too many (%d) characters", + yytext, yyleng ); + return NEG; // invalid token + } + return numstr_of(yytext, yylval.numstr.radix); + case hexadecimal_e: + if( 0 != yyleng % 2 ) { + error_msg(yylloc, "syntax error: hex literal '%s' " + "has an odd number (%d) of characters", + yytext, yyleng ); + return NEG; // invalid token + } + return numstr_of(yytext, yylval.numstr.radix); + default: + return NEG; + } + } + [''""] { yy_pop_state(); } +} + + /* + * dot dot dot: sayeth the standard: + * 01 xxx PIC 999. VALUE something. is a syntax error. + * 01 xxx PIC 999. is just three nines, and will be NumericDisplay. + * 01 xxx PIC 999.. is three nines and a dot, and is NumericEdited. + * + * On entry, we might have found a newline. If so, we accept any leading + * blanks, and ignore blank lines. This sets up recognizing SKIP2 etc. + * + * Any blank or separator period ends terminates the picture. + */ +<picture>{ + ^[[:blank:]]+ + ^{BLANK_EOL} + + {COMMA} | + [[:blank:]]*{EOL} | + [[:blank:]]+{EOL}? { yy_pop_state(); /* embedded/trailing blank */ } + {DOTSEP}[[:blank:].]+$ { yy_pop_state(); return '.'; } + {DOTSEP} { yy_pop_state(); return '.'; } + + + [[:blank:]]+[-+]/{EDITED} { return picset(yytext[yyleng-1]); } + + S/({N9}|{NP}|V)+ { return picset('S'); } + V?{NP}/{N9} { yylval.number = ndigit(yyleng); return picset(PIC_P); } + {N9}/{N9}*{NP}V? { yylval.number = ndigit(yyleng); return picset(NINES); } + {NP}V?/[,.]? { yylval.number = ndigit(yyleng); return picset(PIC_P); } + {N9}*V/{N9}* { yylval.number = ndigit(yyleng - 1); return picset(NINEV); } + {N9}/{N9}*[,.]? { yylval.number = ndigit(yyleng); return picset(NINES); } + P+/[,.]?\r?\n { yylval.number = yyleng; return picset(PIC_P); } + + {ALNUM}/{COUNT}({ALNUM}{COUNT}?)+ { + yy_push_state(picture_count); + yylval.string = xstrdup(yytext); return picset(ALNUM); } + {ALNUM}/{COUNT} { yy_push_state(picture_count); + yylval.string = xstrdup(yytext); return picset(ALNUM); } + {ALNUM}/[(]{NAME}[)] { yy_push_state(picture_count); + yylval.string = xstrdup(yytext); return picset(ALNUM); } + {ALNUM} { yylval.string = xstrdup(yytext); return picset(ALNUM); } + + {ALPHED} { yylval.string = xstrdup(yytext); return picset(ALPHED); } + {NUMEDITED} { yylval.string = xstrdup(yytext); return picset(NUMED); } + {NUMEDITED}[.]?CR { yylval.string = xstrdup(yytext); return picset(NUMED_CR); } + {NUMEDITED}[.]?DB { yylval.string = xstrdup(yytext); return picset(NUMED_DB); } + {NUMEDITED}[.]/{DOTEOL} { + yylval.string = xstrdup(yytext); return picset(NUMED); } + + [^[:space:].,;]+([.,;][^[:space:].,;]+)* { + yylval.string = xstrdup(yytext); return picset(ALPHED); } + + . { dbgmsg("unrecognized character '%c' (0x%x) in PICTURE", + *yytext, *yytext ); return NO_CONDITION; } + +} +<picture_count>{ + [(] { return picset(*yytext); } + [)] { pop_return picset(*yytext); } + {INTEGER} { return picset(numstr_of(yytext)); } + {NAME} { yylval.string = xstrdup(yytext); + return picset(NAME); } +} + +<integer_count>{ + {SPC}/{INTEGER} + {INTEGERZ} { yy_pop_state(); + return numstr_of(yytext); } +} + +<copy_state>{ + BY { return BY; } + IN|OF { return IN; } + SUPPRESS { return SUPPRESS; } + REPLACING { return REPLACING; } + COPY { return COPY; } + {DOTSEP}[[:blank:].]+$ { pop_return *yytext; } + {DOTSEP} { pop_return *yytext; } + + [(][^().]*[)] { ydflval.string = xstrdup(yytext); + return SUBSCRIPT; + } + [(][^().]*/[(] {ydflval.string = xstrdup(yytext); + return LSUB; + } + [^().]*[)] { ydflval.string = xstrdup(yytext); + return RSUB; + } + + {NAME} { + ydflval.string = xstrdup(yytext); + return NAME; + } + + /* CDF REPLACING needs quotes to distinquish strings from identifiers. */ + Z?['']{STRING1}[''] { auto *s = xstrdup(yytext); + std::replace(s, s + strlen(s), '\'', '"'); + ydflval.string = s; + update_location_col(s); + return LITERAL; } + Z?[""]{STRING}[""] { ydflval.string = xstrdup(yytext); + update_location_col(yytext); + return LITERAL; } + [=]{4} { static char nullstring[] = ""; + ydflval.string = nullstring; return PSEUDOTEXT; } + [=]{2} { yy_push_state(quoteq); } +} + +<quoteq>{ + [^=]+[=]/[^=] { tmpstring_append(yyleng); } + [^=]+/[=]{2} { yylval.string = xstrdup(tmpstring_append(yyleng)); + ydflval.string = yylval.string; + update_location_col(yylval.string); + return PSEUDOTEXT; } + [=]{2} { tmpstring = NULL; yy_pop_state(); } +} + +<quoted2>{ + {STRING}$ { tmpstring_append(yyleng); } + ^-[ ]{4,}[""]/.+ /* ignore continuation mark */ + {STRING}?[""]{2} { tmpstring_append(yyleng - 1); } + {STRING} { tmpstring_append(yyleng); } + [""]{SPC}[&]{SPC}[""''] { + if( yytext[yyleng - 1] == '\'' ) BEGIN(quoted1); + } + [""]-{OSPC}(\r?\n{OSPC})+[""] /* continue ... */ + [""] { + char *s = xstrdup(tmpstring? tmpstring : "\0"); + yylval.literal.set_data(strlen(s), s); + ydflval.string = yylval.literal.data; + update_location_col(yylval.literal.data, -2); + tmpstring = NULL; pop_return LITERAL; } +} + +<quoted1>{ + {STRING1}$ { tmpstring_append(yyleng); } + ^-[ ]{4,}['']/.+ /* ignore continuation mark */ + {STRING1}?['']{2} { tmpstring_append(yyleng - 1); } + {STRING1} { tmpstring_append(yyleng); } + ['']{SPC}[&]{SPC}[""''] { + if( yytext[yyleng - 1] == '"' ) BEGIN(quoted2); + } + ['']-{OSPC}(\r?\n{OSPC})+[''] /* continue ... */ + [''] { + char *s = xstrdup(tmpstring? tmpstring : "\0"); + yylval.literal.set_data(strlen(s), s); + ydflval.string = yylval.literal.data; + update_location_col(yylval.literal.data, -2); + tmpstring = NULL; pop_return LITERAL; } +} + +<*>{ + AS { return AS; } + CONSTANT { return CONSTANT; } + (IS{SPC})?DEFINED { ydflval.boolean = true; return DEFINED; } + {ISNT}{SPC}DEFINED { ydflval.boolean = false; return DEFINED; } + OFF { return OFF; } +} + +<cdf_state>{ + [+-]?{INTEGERZ} { int value; + if( is_integer_token(&value) ) { + ydflval.number = value; + return YDF_NUMBER; + } + dbgmsg("%s not an integer = %d", + yytext, value); + return NO_CONDITION; + } + + {NAME}{SPC}AS { char *s = xstrdup(yytext); + char *p = strchr(s, 0x20); + gcc_assert(p); // just found via regex + *p = '\0'; + ydflval.string = yylval.string = s; + return NAME; + } + {NAME} { ydflval.string = yylval.string = xstrdup(yytext); + return NAME; + } + %EBCDIC-MODE { ydflval.number = feature_internal_ebcdic_e; + return FEATURE; } + %64-BIT-POINTER { ydflval.number = feature_embiggen_e; + return FEATURE; } + [[:blank:]]+ + {BLANK_EOL} + . { myless(0); yy_pop_state(); } // not a CDF token +} + +<program_id_state>{ + ^[[:blank:]]+ + ^{BLANK_EOL} + (IS)?[[:space:]] + + COMMON/[.]|{SPC}[[:alnum:].] { return COMMON; } + INITIAL/[.]|{SPC}[[:alnum:].] { return INITIAL_kw; } + RECURSIVE { return RECURSIVE; } + PROGRAM/[.]|{SPC}[[:alnum:].] { return PROGRAM_kw; } + + INITIAL { pop_return INITIAL_kw; } + COMMON { pop_return COMMON; } + PROGRAM { pop_return PROGRAM; } + + AS/{SPC} { myless(0); yy_pop_state(); } /* => ident_state */ + [[:blank:]]*{DOTSEP}[[:blank:].]+{EOL} { pop_return '.'; } + {DOTEOL} { pop_return '.'; } +} + +<name_state>{ + ^[[:blank:]]+ + ^{BLANK_EOL} + {NAME}/{OSPC}[.] { yy_pop_state(); + yylval.string = xstrdup(yytext); return NAME; } + {NAME} { yy_pop_state(); + yylval.string = xstrdup(yytext); return NAME; } + + Z?[''] { yylval.literal.set_prefix(yytext, yyleng-1); + yy_push_state(quoted1); } + Z?[""] { yylval.literal.set_prefix(yytext, yyleng-1); + yy_push_state(quoted2); } + + [.]/[[:blank:]]+. { return *yytext; } + + [[:blank:]]*{DOTSEP}[[:blank:].]+{EOL} { + yy_pop_state(); myless(0); } + {DOTEOL} { yy_pop_state(); myless(0); } +} +<dot_state>{ + [[:blank:]]*[.][[:blank:].]+{EOL} { pop_return '.'; } + [[:blank:]]*[.] { pop_return '.'; } +} + +<date_state>{ + ^[[:blank:]]+ + {BLANK_EOL} + + DATE { pop_return DATE; } + DAY { pop_return DAY; } + DATE/[[:blank:]]+Y { return DATE; } + DAY/[[:blank:]]+Y { return DAY; } + TIME { pop_return TIME; } + + YYYYMMDD { yy_pop_state(); + yylval.string = xstrdup(yytext); return YYYYMMDD; } + YYYYDDD { yy_pop_state(); + yylval.string = xstrdup(yytext); return YYYYDDD; } + DAY-OF-WEEK { yy_pop_state(); + yylval.string = xstrdup(yytext); return DAY_OF_WEEK; } +} + +<INITIAL,procedure_div,copy_state>{ + NOT{SPC}B/{boolseq} { is_not = true; yy_push_state(bool_state); } + B/{boolseq} { is_not = false; yy_push_state(bool_state); } + N?X/{hexseq} { yylval.literal.set_prefix(yytext, yyleng); + yy_push_state(hex_state); } + N?X{nonseq} { dbgmsg("invalid hexadecimal value: %s", yytext); + return NO_CONDITION; } + + BX/{hexseq} { yylval.numstr.radix = hexadecimal_e; + yy_push_state(numstr_state); } + + Z?[''] { yylval.literal.set_prefix(yytext, yyleng-1); + yy_push_state(quoted1); } + Z?[""] { yylval.literal.set_prefix(yytext, yyleng-1); + yy_push_state(quoted2); } + Z?[""]/{STRING}[""] { yylval.literal.set_prefix(yytext, yyleng-1); + yy_push_state(quoted2); } + + {INTEGERZ}/[[:punct:]][[:space:]]{BLANK_OEOL} { return numstr_of(yytext); } + {dfc}/[[:blank:][:punct:]] { return numstr_of(yytext); } + + [+-]?({dfc}|{dseq})([.,][[:digit:]])* { auto eotext = yytext + yyleng - 1; + if( *eotext == '.' ) { + myless(yyleng - 1); + *eotext = '\0'; + } + return numstr_of(yytext); } + + UPSI-[0-7] { char *p = yytext + yyleng - 1; + ydflval.string = yylval.string = xstrdup(p); + return UPSI; } +} + + /* + * "The decimal point can appear anywhere within the literal except as the + * rightmost character." + */ +<numeric_state>{ + [[:blank:]]+ + {BLANK_EOL} + + [+-]?{INTEGERZ} { pop_return numstr_of(yytext); } + [+-]?{dfc}([.][[:digit:]])* { + char *s = xstrdup(yytext); + char *p = strchr(s, '.'); + if( p && strlen(p) == 1 ) { + *p = '\0'; + myless(p - s); + } + numstr_of(s); free(s); + pop_return NUMSTR; + } +} + +<cdf_state,procedure_div>{ + (IS{SPC})?"<" { return '<'; } + (IS{SPC})?"<=" { return LE; } + (IS{SPC})?"=" { return '='; } + (IS{SPC})?"<>" { return NE; } + (IS{SPC})?">=" { return GE; } + (IS{SPC})?">" { return '>'; } + + {LESS_THAN} { return '<'; } + {LESS_THAN}{SPC}{OR_EQUAL}/[[:space:]] { return LE; } + (IS{SPC})?EQUALS?({SPC}TO)?/[[:space:]] { return '='; } + {GREATER_THAN}{SPC}{OR_EQUAL}/[[:space:]] { return GE; } + {GREATER_THAN} { return '>'; } + + {ISNT}{SPC}">=" { return '<'; } + {ISNT}{SPC}">" { return LE; } + {ISNT}{SPC}"=" { return NE; } + {ISNT}{SPC}"<" { return GE; } + {ISNT}{SPC}"<=" { return '>'; } + + {ISNT}{SPC}GREATER{SPC}(THAN)?{SPC}{OR_EQUAL}/[[:space:]] { return '<'; } + {ISNT}{SPC}GREATER{SPC}(THAN)? { return LE; } + {ISNT}{SPC}EQUALS?{SPC}(TO)? { return NE; } + {ISNT}{SPC}LESS{SPC}(THAN)? { return GE; } + {ISNT}{SPC}LESS{SPC}(THAN)?{SPC}{OR_EQUAL}/[[:space:]] { return '>'; } + + [*]{2}{SPC}[+] { return POW; } + "**" { return POW; } +} + +<procedure_div>{ + (ID|IDENTIFICATION|ENVIRONMENT|DATA|PROCEDURE){SPC}DIVISION { + myless(0); yy_pop_state(); } + + EXIT{SPC}/(PROGRAM|SECTION|PARAGRAPH|PERFORM) { + return EXIT; } + EXIT{OSPC}/{DOTSEP} { return SIMPLE_EXIT; } + EXIT { return EXIT; } // (PROGRAM|SECTION|PARAGRAPH|PERFORM) + RETURNING { return RETURNING; } + + ACTIVATING { return ACTIVATING; } + CURRENT { return CURRENT; } + NESTED { return NESTED; } + STACK { return STACK; } + TOP-LEVEL { return TOP_LEVEL; } + + {NAME}/{SPC}SECTION{OSPC}{DOTSEP} { + yylval.string = xstrdup(yytext); + return NAME; } + + (IS{SPC})?POSITIVE/[[:space:]] { yylval.number = IS; return POSITIVE; } + (IS{SPC})?NEGATIVE/[[:space:]] { yylval.number = IS; return NEGATIVE; } + (IS{SPC})?ZERO/[[:space:]] { yylval.number = IS; return ZERO; } + + {ISNT}{SPC}POSITIVE/[[:space:]] { yylval.number = NOT; return POSITIVE; } + {ISNT}{SPC}NEGATIVE/[[:space:]] { yylval.number = NOT; return NEGATIVE; } + {ISNT}{SPC}ZERO/[[:space:]] { yylval.number = NOT; return ZERO; } + + [(:)] { return *yytext; } + [(]/[^(:)""'']*[:][^)]*[)] { return LPAREN; /* parentheses around a colon */ } + + FILLER { return FILLER_kw; } + INVALID { yylval.number = INVALID; return INVALID; } + NOT{SPC}INVALID { yylval.number = NOT; return INVALID; } + + ON{SPC}SIZE { return SIZE; } + + (ON{SPC})?EXCEPTION { yylval.number = EXCEPTION; return EXCEPTION; } + NOT{SPC}(ON{SPC})?EXCEPTION { + yylval.number = NOT; return EXCEPTION; } + + (ON{SPC})?OVERFLOW { yylval.number = OVERFLOW; return OVERFLOW; } + NOT{SPC}(ON{SPC})?OVERFLOW { + yylval.number = NOT; return OVERFLOW; } + + (AT{SPC})?END/[[:space:]] { yylval.number = END; + return END; } + NOT{SPC}(AT{SPC})?END/[[:space:]] { yylval.number = NOT; + return END; } + + (AT{SPC})?{EOP}/[[:space:]] { yylval.number = EOP; + return EOP; } + NOT{SPC}(AT{SPC})?{EOP}/[[:space:]] { yylval.number = NOT; + return EOP; } + + {SIZE_ERROR} { yylval.number = ERROR; return SIZE_ERROR; } + NOT{SPC}{SIZE_ERROR} { yylval.number = NOT; return SIZE_ERROR; } + + STRING { return STRING_kw; } + UNSTRING { return UNSTRING; } + POINTER { return POINTER; } + REFERENCE { return REFERENCE; } + COMMAND-LINE { return COMMAND_LINE; } + COMMAND-LINE-COUNT { return COMMAND_LINE_COUNT; } + CONTENT { return CONTENT; } + DELIMITED { return DELIMITED; } + DELIMITER { return DELIMITER; } + ENVIRONMENT { return ENVIRONMENT; } + + END{SPC}PROGRAM { yy_push_state(name_state); + return program_level() > 1? + END_SUBPROGRAM : END_PROGRAM; } + + END{SPC}FUNCTION { yy_push_state(name_state); + return program_level() > 1? + END_SUBPROGRAM /*invalid*/ : + END_FUNCTION; } + + {ISNT}{SPC}{VARTYPE} { yylval.number = NOT; + yy_push_state(classify); + myless(0); + return MIGHT_BE; + } + IS{SPC}{VARTYPE} { yylval.number = IS; + yy_push_state(classify); + myless(0); + return MIGHT_BE; + } + + {SORT_MERGE}{SPC}(\f#)?/{NAME} { yy_push_state(sort_state); return SORT; } + + ADDRESS{SPC}(OF{SPC})?/FUNCTION { yy_push_state(addr_of); return ADDRESS; } + + FUNCTION { yy_push_state(function); return FUNCTION; } + + SECTION{OSPC}[.]{SPC}/USE[[:space:]] { yylval.string = NULL; return SECTION; } + + {NAME}{OSPC}[.]({SPC}(EJECT|SKIP[123]))*{SPC}EXIT{OSPC}/{DOTSEP} { + // EXIT format-1 is a "continue" statement + yylval.string = xstrdup(yytext); + auto p = strchr(yylval.string, '.'); + assert(p); + assert( ISSPACE(p[1]) ); + *p = '\0'; + while( p > yylval.string && ISSPACE(p[-1]) ) { + *--p = '\0'; + } + + int token = keyword_tok(yylval.string); + if( token ) return token; + if( is_integer_token() ) return numstr_of(yylval.string); + return typed_name(yylval.string); + } + {NAME}/{OSPC}{DOTSEP} { + assert(YY_START == procedure_div); + int token = keyword_tok(yytext); + if( token ) return token; + if( is_integer_token() ) return numstr_of(yytext); + + ydflval.string = yylval.string = xstrdup(yytext); + return typed_name(yytext); + } + LENGTH{SPC}OF/{SPC}{NAME} { return LENGTH_OF; } + {NAME}/{SPC}(IN|OF){SPC}{NAME}{SPC}(IN|OF)[[:space:]] { + int token = keyword_tok(yytext); + if( token ) return token; + if( is_integer_token() ) return numstr_of(yytext); + myless(0); + yy_push_state(partial_name); + tee_up_empty(); + } + {NAME}/{SPC}(IN|OF){SPC}{NAME} { + int token = keyword_tok(yytext); + if( token ) return token; + if( is_integer_token() ) return numstr_of(yytext); + // if the 2nd name is a filename, return NAME for normal processing + // skip {SPC}(IN|OF){SPC} + char *p = yytext + yyleng + 1; + while( ISSPACE(*p) ) p++; + assert(TOUPPER(p[0]) == 'I' || TOUPPER(p[0]) == 'O' ); + assert(TOUPPER(p[1]) == 'N' || TOUPPER(p[1]) == 'F' ); + p += 2; + while( ISSPACE(*p) ) p++; + cbl_name_t name2; + std::transform( p, p + sizeof(name2), name2, + []( char ch ) { + switch(ch) { + case '-': + case '_': return ch; + default: + if( ISALNUM(ch) ) return ch; + } + return '\0'; + } ); + symbol_elem_t *e = symbol_file(PROGRAM, name2); + /* + * For NAME IN FILENAME, we want the parser to handle it. + * For NAME IN NAME (of filename), the scanner handles it. + */ + if( e ) { // e is an FD, but name2 could be its 01 + cbl_namelist_t names = {name2, yytext}; + auto p = symbol_find(PROGRAM, names); + if( !p.second ) { + ydflval.string = yylval.string = xstrdup(yytext);; + return NAME; + } + } + myless(0); + yy_push_state(partial_name); + tee_up_empty(); + } +} + +<partial_name>{ + {NAME}/{SPC}(IN|OF)[[:space:]] { + tee_up_name(yylloc, xstrdup(yytext)); + } + {SPC}(IN|OF){SPC} + {NAME} { yy_pop_state(); + auto name = xstrdup(yytext); + auto names = teed_up_names(); + names.push_front(name); + auto found = symbol_find( PROGRAM, names); + + ydflval.string = yylval.string = name; + if( found.first && found.second ) { // unique + symbol_elem_t *e = found.first; + if( e->type == SymField ) { + auto f( cbl_field_of(e) ); + if( f->level == 88 ) return NAME88; + } + } + return NAME; + } + {NAME}{OSPC}/[(] { BEGIN(subscripts); + auto name = xstrdup(yytext); + char *eoname = name + strlen(name); + auto p = std::find_if(name, eoname, fisspace); // stop at blank, if any + if( p < eoname ) *p = '\0'; + + auto names = teed_up_names(); + names.push_front(name); + auto found = symbol_find( PROGRAM, names); + + ydflval.string = yylval.string = name; + if( found.first && found.second ) { // unique + symbol_elem_t *e = found.first; + if( e->type == SymField ) { + auto f( cbl_field_of(e) ); + if( f->level == 88 ) return NAME88; + } + } + return NAME; + } +} + +<addr_of>FUNCTION { pop_return FUNCTION; } + +<classify>{ + {ISNT}/{SPC}{NAMTYP} { yy_pop_state(); } + IS/{SPC}{NAMTYP} { yy_pop_state(); } +} + +<sort_state>{ + {NAME} { yylval.string = xstrdup(yytext); + pop_return symbol_file(PROGRAM, yytext)? FILENAME : NAME; + } +} + +<datetime_fmt>{ + [(] { return *yytext; } + + ['']{DATETIME_FMT}[''] | + [""]{DATETIME_FMT}[""] { yylval.string = xstrdup(yytext + 1); + yylval.string[yyleng-2] = '\0'; + pop_return DATETIME_FMT; } + + ['']{DATE_FMT}[''] | + [""]{DATE_FMT}[""] { yylval.string = xstrdup(yytext + 1); + yylval.string[yyleng-2] = '\0'; + pop_return DATE_FMT; } + + ['']{TIME_FMT}[''] | + [""]{TIME_FMT}[""] { yylval.string = xstrdup(yytext + 1); + yylval.string[yyleng-2] = '\0'; + pop_return TIME_FMT; } + + {SPC} // ignore + {NAME} { + int token = NAME; + char type = 0; + auto elem = symbol_field(PROGRAM, 0, yytext); + + if( elem->type == SymField ) { + auto f = cbl_field_of(elem); + if( f->type == FldLiteralA && f->has_attr(constant_e) ) { + type = date_time_fmt(f->data.initial); + yylval.string = xstrdup(f->data.initial); + } + } else { + yylval.string = xstrdup(yytext); + } + switch(type) { + case 'D': token = DATETIME_FMT; break; + case 'd': token = DATE_FMT; break; + case 't': token = TIME_FMT; break; + default: + dbgmsg("format must be literal"); + pop_return token; + break; + } + pop_return token; + } + + . { myless(0); yy_pop_state(); } +} + +<function>{ + + + ABS{OSPC}/[(]? { pop_return ABS; } + ACOS{OSPC}/[(]? { pop_return ACOS; } + ANNUITY{OSPC}/[(]? { pop_return ANNUITY; } + ASIN{OSPC}/[(]? { pop_return ASIN; } + ATAN{OSPC}/[(]? { pop_return ATAN; } + BASECONVERT{OSPC}/[(]? { pop_return BASECONVERT; } + BIT-OF{OSPC}/[(]? { pop_return BIT_OF; } + BIT-TO-CHAR{OSPC}/[(]? { pop_return BIT_TO_CHAR; } + BOOLEAN-OF-INTEGER{OSPC}/[(]? { pop_return BOOLEAN_OF_INTEGER; } + BYTE-LENGTH{OSPC}/[(]? { pop_return BYTE_LENGTH; } + CHAR-NATIONAL{OSPC}/[(]? { pop_return CHAR_NATIONAL; } + CHAR{OSPC}/[(]? { pop_return CHAR; } + COMBINED-DATETIME{OSPC}/[(]? { pop_return COMBINED_DATETIME; } + CONCAT{OSPC}/[(]? { pop_return CONCAT; } + CONTENT-LENGTH{OSPC}/[(]? { pop_return NO_CONDITION; /* GNU only*/ } + CONTENT-OF{OSPC}/[(]? { pop_return NO_CONDITION; /* GNU only*/ } + CONVERT{OSPC}/[(]? { pop_return CONVERT; } + COS{OSPC}/[(]? { pop_return COS; } + CURRENCY-SYBOL{OSPC}/[(]? { pop_return NO_CONDITION; /* GNU only*/ } + CURRENT-DATE{OSPC}/[(]? { pop_return CURRENT_DATE; } + DATE-OF-INTEGER{OSPC}/[(]? { pop_return DATE_OF_INTEGER; } + DATE-TO-YYYYMMDD{OSPC}/[(]? { pop_return DATE_TO_YYYYMMDD; } + DAY-OF-INTEGER{OSPC}/[(]? { pop_return DAY_OF_INTEGER; } + DAY-TO-YYYYDDD{OSPC}/[(]? { pop_return DAY_TO_YYYYDDD; } + DISPLAY-OF{OSPC}/[(]? { pop_return DISPLAY_OF; } + E{OSPC}/[(]? { pop_return E; } + + EXCEPTION-FILE-N{OSPC}/[(]? { pop_return EXCEPTION_FILE_N; } + EXCEPTION-FILE{OSPC}/[(]? { pop_return EXCEPTION_FILE; } + EXCEPTION-LOCATION-N{OSPC}/[(]? { pop_return EXCEPTION_LOCATION_N; } + EXCEPTION-LOCATION{OSPC}/[(]? { pop_return EXCEPTION_LOCATION; } + EXCEPTION-STATEMENT{OSPC}/[(]? { pop_return EXCEPTION_STATEMENT; } + EXCEPTION-STATUS{OSPC}/[(]? { pop_return EXCEPTION_STATUS; } + + EXP{OSPC}/[(]? { pop_return EXP; } + EXP10{OSPC}/[(]? { pop_return EXP10; } + FACTORIAL{OSPC}/[(]? { pop_return FACTORIAL; } + FIND-STRING{OSPC}/[(]? { pop_return FIND_STRING; } + + FORMATTED-CURRENT-DATE{OSPC}/[(]? { BEGIN(datetime_fmt); return FORMATTED_CURRENT_DATE; } + FORMATTED-DATE{OSPC}/[(]? { BEGIN(datetime_fmt); return FORMATTED_DATE; } + FORMATTED-DATETIME{OSPC}/[(]? { BEGIN(datetime_fmt); return FORMATTED_DATETIME; } + FORMATTED-TIME{OSPC}/[(]? { BEGIN(datetime_fmt); return FORMATTED_TIME; } + FRACTION-PART{OSPC}/[(]? { pop_return FRACTION_PART; } + + HEX-OF{OSPC}/[(]? { pop_return HEX_OF; } + HEX-TO-CHAR{OSPC}/[(]? { pop_return HEX_TO_CHAR; } + HIGHEST-ALGEBRAIC{OSPC}/[(]? { pop_return HIGHEST_ALGEBRAIC; } + + INTEGER{OSPC}/[(]? { pop_return INTEGER; } + INTEGER-OF-BOOLEAN{OSPC}/[(]? { pop_return INTEGER_OF_BOOLEAN; } + INTEGER-OF-DATE{OSPC}/[(]? { pop_return INTEGER_OF_DATE; } + INTEGER-OF-DAY{OSPC}/[(]? { pop_return INTEGER_OF_DAY; } + INTEGER-OF-FORMATTED-DATE{OSPC}/[(]? { BEGIN(datetime_fmt); return INTEGER_OF_FORMATTED_DATE; } + INTEGER-PART{OSPC}/[(]? { pop_return INTEGER_PART; } + LENGTH{OSPC}/[(]? { pop_return LENGTH; } + LOCALE-COMPARE{OSPC}/[(]? { pop_return LOCALE_COMPARE; } + LOCALE-DATE{OSPC}/[(]? { pop_return LOCALE_DATE; } + LOCALE-TIME{OSPC}/[(]? { pop_return LOCALE_TIME; } + LOCALE-TIME-FROM-SECONDS{OSPC}/[(]? { pop_return LOCALE_TIME_FROM_SECONDS; } + LOG{OSPC}/[(]? { pop_return LOG; } + LOG10{OSPC}/[(]? { pop_return LOG10; } + LOWER-CASE{OSPC}/[(]? { pop_return LOWER_CASE; } + LOWEST-ALGEBRAIC{OSPC}/[(]? { pop_return LOWEST_ALGEBRAIC; } + MAX{OSPC}/[(]? { pop_return MAXX; } + MEAN{OSPC}/[(]? { pop_return MEAN; } + MEDIAN{OSPC}/[(]? { pop_return MEDIAN; } + MIDRANGE{OSPC}/[(]? { pop_return MIDRANGE; } + MIN{OSPC}/[(]? { pop_return MINN; } + MOD{OSPC}/[(]? { pop_return MOD; } + MODULE-NAME{OSPC}/[(]? { pop_return MODULE_NAME; } + NATIONAL-OF{OSPC}/[(]? { pop_return NATIONAL_OF; } + NUMVAL{OSPC}/[(]? { pop_return NUMVAL; } + NUMVAL-C{OSPC}/[(]? { pop_return NUMVAL_C; } + NUMVAL-F{OSPC}/[(]? { pop_return NUMVAL_F; } + ORD{OSPC}/[(]? { pop_return ORD; } + ORD-MAX{OSPC}/[(]? { pop_return ORD_MAX; } + ORD-MIN{OSPC}/[(]? { pop_return ORD_MIN; } + PI{OSPC}/[(]? { pop_return PI; } + PRESENT-VALUE{OSPC}/[(]? { pop_return PRESENT_VALUE; } + + RANDOM{OSPC}{PARENS} { pop_return RANDOM; } + RANDOM{OSPC}[(] { pop_return RANDOM_SEED; } + RANDOM { pop_return RANDOM; } + + RANGE{OSPC}/[(]? { pop_return RANGE; } + REM{OSPC}/[(]? { pop_return REM; } + REVERSE{OSPC}/[(]? { pop_return REVERSE; } + SECONDS-FROM-FORMATTED-TIME{OSPC}/[(]? { BEGIN(datetime_fmt); + return SECONDS_FROM_FORMATTED_TIME; } + SECONDS-PAST-MIDNIGHT{OSPC}/[(]? { pop_return SECONDS_PAST_MIDNIGHT; } + SIGN{OSPC}/[(]? { pop_return SIGN; } + SIN{OSPC}/[(]? { pop_return SIN; } + SMALLEST-ALGEBRAIC{OSPC}/[(]? { pop_return SMALLEST_ALGEBRAIC; } + SQRT{OSPC}/[(]? { pop_return SQRT; } + STANDARD-COMPARE{OSPC}/[(]? { pop_return STANDARD_COMPARE; } + STANDARD-DEVIATION{OSPC}/[(]? { pop_return STANDARD_DEVIATION; } + SUBSTITUTE{OSPC}/[(]? { pop_return SUBSTITUTE; } + SUM{OSPC}/[(]? { pop_return SUM; } + TAN{OSPC}/[(]? { pop_return TAN; } + TEST-DATE-YYYYMMDD{OSPC}/[(]? { pop_return TEST_DATE_YYYYMMDD; } + TEST-DAY-YYYYDDD{OSPC}/[(]? { pop_return TEST_DAY_YYYYDDD; } + TEST-FORMATTED-DATETIME{OSPC}/[(]? { BEGIN(datetime_fmt); return TEST_FORMATTED_DATETIME; } + TEST-NUMVAL{OSPC}/[(]? { pop_return TEST_NUMVAL; } + TEST-NUMVAL-C{OSPC}/[(]? { pop_return TEST_NUMVAL_C; } + TEST-NUMVAL-F{OSPC}/[(]? { pop_return TEST_NUMVAL_F; } + TRIM{OSPC}/[(]? { pop_return TRIM; } + ULENGTH{OSPC}/[(]? { pop_return ULENGTH; } + UPOS{OSPC}/[(]? { pop_return UPOS; } + UPPER-CASE{OSPC}/[(]? { pop_return UPPER_CASE; } + USUBSTR{OSPC}/[(]? { pop_return USUBSTR; } + USUPPLEMENTARY{OSPC}/[(]? { pop_return USUPPLEMENTARY; } + UUID4{OSPC}/[(]? { pop_return UUID4; } + UVALID{OSPC}/[(]? { pop_return UVALID; } + UWIDTH{OSPC}/[(]? { pop_return UWIDTH; } + VARIANCE{OSPC}/[(]? { pop_return VARIANCE; } + WHEN-COMPILED{OSPC}/[(]? { pop_return WHEN_COMPILED; } + YEAR-TO-YYYY{OSPC}/[(]? { pop_return YEAR_TO_YYYY; } + + {NAME}{OSPC}/[(] { /* If /{OSPC}, "dangerous trailing context" "*/ + auto name = null_trim(xstrdup(yytext)); + if( 0 != (yylval.number = symbol_function_token(name)) ) { + pop_return FUNCTION_UDF; + } + yylval.string = name; + pop_return NAME; + } + + {NAME}({OSPC}{PARENS})? { + auto name = null_trim(xstrdup(yytext)); + auto p = strchr(name, '('); + if( p ) *p = '\0'; + if( 0 != (yylval.number = symbol_function_token(name)) ) { + pop_return FUNCTION_UDF_0; + } + yylval.string = name; + pop_return NAME; + } +} + + /* + * CDF: Compiler-directing Facility + */ + +[*]CBL { return STAR_CBL; } +[*]CONTROL { return STAR_CBL; } + +^[ ]*[*](PROCESS\b|CBL\b).*$ { + auto p = std::find(yytext, yytext + yyleng, '*'); + not_implemented("CDF '%s' was ignored", p); + } +^[ ]*[@]OPTIONS.+$ { + auto p = std::find(yytext, yytext + yyleng, '@'); + not_implemented("CDF '%s' was ignored", p); + } + +BASIS { yy_push_state(basis); return BASIS; } + +<basis>{ + [[:blank:]]+ + {BLANK_EOL} + + {STRING} { yy_pop_state(); + yypush_buffer_state( yy_create_buffer(yyin, YY_BUF_SIZE) ); + if( (yyin = cdftext::lex_open(yytext)) == NULL ) { + yywarn("could not open BASIS file '%s'", yytext); + yyterminate(); + } + } +} + +<subscripts>{ + [(] { pop_return LPAREN; } +} + +<procedure_div>{ + EQUALS?{OSPC}/[(] { return '='; } + + {NAME}{OSPC}/[(] { /* If /{OSPC}, "dangerous trailing context" "*/ + if( is_integer_token() ) return numstr_of(yytext); + ydflval.string = yylval.string = xstrdup(yytext); + + int token = keyword_tok(null_trim(yylval.string), true); + + if( token && ! symbol_field(PROGRAM, 0, yylval.string) ) { + // If token is an intrinsic, and not in Repository, pretend + // it's a name and let the parser sort it out. + auto name = intrinsic_function_name(token); + if( ! name ) return token; // valid keyword, like IF + if( token == repository_function_tok(name) ) { + return token; // intrinsic and in repository + } + error_msg(yylloc, "'FUNCTION %s' required because %s " + "is not mentioned in REPOSITORY paragraph", + name, name); + } + + if( 0 != (token = repository_function_tok(yylval.string)) ) { + auto e = symbol_function(0, yylval.string); + assert(e); + yylval.number = symbol_index(e); + return token; + } + token = typed_name(yylval.string); + switch(token) { + case NAME: + case NUME: + case NAME88: + yy_push_state(subscripts); + } + return token; + } + [.][[:blank:].]+ { return '.'; } +} + +<exception>{ + CHECKING { return CHECKING; } + ON { return ON; } + OFF { return OFF; } + WITH { return WITH; } + LOCATION { return LOCATION; } + + {NAME} { + auto ec = ec_type_of(yytext); + if( ec != ec_none_e ) { + ydflval.number = ec; + return EXCEPTION_NAME; + } + ydflval.string = xstrdup(yytext); + return symbol_file(PROGRAM, yytext)? FILENAME : NAME; + } + [[:blank:]]+ + \r?\n { yy_pop_state(); } +} + +<raising>{ + LAST({SPC}EXCEPTION)? { yy_pop_state(); return LAST; } + . { yy_pop_state(); return RAISING; } // invalid syntax +} + /* + * Catch-all + */ + + +<*>{ + ^[ ]{6}D.*\n { + if( !is_fixed_format() ) { + myless(6); + } else { + // If WITH DEBUGGING MODE, drop the D, else drop the line. + if( include_debug() ) myless(7); + } + } + ^[ ]*>>{OSPC}IF { yy_push_state(cdf_state); return CDF_IF; } + ^[ ]*>>{OSPC}ELSE { return CDF_ELSE; } + ^[ ]*>>{OSPC}END-IF { return CDF_END_IF; } + + ^[ ]*[$]{OSPC}IF { if( ! dialect_mf() ) { + dialect_error(yylloc, yytext, "mf"); + } + yy_push_state(cdf_state); return CDF_IF; } + ^[ ]*[$]{OSPC}ELSE { if( ! dialect_mf() ) { + dialect_error(yylloc, yytext, "mf"); + } + return CDF_ELSE; } + ^[ ]*[$]{OSPC}END { if( ! dialect_mf() ) { + dialect_error(yylloc, yytext, "mf"); + } + return CDF_END_IF; } + + ^[ ]*[$]{OSPC}SET({SPC}CONSTANT)? { + if( ! dialect_mf() ) dialect_error(yylloc, yytext, "mf"); + yy_push_state(cdf_state); return CDF_DEFINE; } + + ^[ ]*>>{OSPC}EVALUATE { return CDF_EVALUATE; } + ^[ ]*>>{OSPC}WHEN { return CDF_WHEN; } + ^[ ]*>>{OSPC}END-EVALUATE { return CDF_END_EVALUATE; } + + ^[ ]*>>{OSPC}CALL-CONVENTION{SPC}C { return CALL_VERBATIM; } + ^[ ]*>>{OSPC}CALL-CONVENTION{SPC}COBOL { return CALL_COBOL; } + ^[ ]*>>{OSPC}CALL-CONVENTION{SPC}VERBATIM { return CALL_VERBATIM; } + + ^[ ]*>>{OSPC}DEFINE { yy_push_state(cdf_state); return CDF_DEFINE; } + ^[ ]*>>{OSPC}DISPLAY { return CDF_DISPLAY; } + ^[ ]*>>{OSPC}TURN { yy_push_state(exception); return TURN; } + ^[ ]*>>{OSPC}COBOL-WORDS { yy_push_state(cobol_words); return COBOL_WORDS; } + + ^[ ]*>>{OSPC}{NAME} { + error_msg(yylloc, "unknown CDF token: %s", yytext); + } + + OTHER { return OTHER; } + OVERRIDE { return OVERRIDE; } + PARAMETER { return PARAMETER_kw; } + THRU { return THRU; } + TRUE { return TRUE_kw; } +} + +<cobol_words>{ + EQUATE { return EQUATE; } + UNDEFINE { return UNDEFINE; } + SUBSTITUTE { return SUBSTITUTE; } + RESERVE { return RESERVE; } + {NAME} { + ydflval.string = yylval.string = xstrdup(yytext); + pop_return NAME; + } +} + +<*>{ + {PUSH_FILE} { + yy_set_bol(true); + auto top_file = cobol_lineno_save(); + if( top_file ) { + if( yy_flex_debug ) dbgmsg(" saving line %4d of %s", + yylineno, top_file); + } + // "\f#file push <name>": name starts at offset 13. + char *filename = xstrdup(yytext); + filename[yyleng - 1] = '\0'; // kill the trailing formfeed + filename += 12; + if( yytext[0] != '\f' ) { + dbgmsg("logic warning: filename was adjusted to %s", --filename); + } + input_file_status.enter(filename); + } + + {POP_FILE} { + yy_set_bol(true); + input_file_status.leave(); + } + + {LINE_DIRECTIVE} { cobol_fileline_set(yytext); } +} + + +<*>OR { return OR; } +<*>AND { return AND; } +<*>{DOTSEP}[[:blank:].]+$ { return '.'; } +<*>[*/+-]{SPC}[+] { return *yytext; } +<*>[().=*/+&-] { return *yytext; } +<*>[[:blank:]]+ +<*>\r?\n + +<*>{ + {COMMA} + ^{SKIP} + ^{TITLE} +} + +<*>{ + ACCEPT { return ACCEPT; } + ACCESS { return ACCESS; } + ADD { return ADD; } + ADDRESS { return ADDRESS; } + ADVANCING { return ADVANCING; } + AFTER { return AFTER; } + ALL { return ALL; } + ALLOCATE { return ALLOCATE; } + ALPHABET { return ALPHABET; } + ALPHABETIC { return ALPHABETIC; } + ALPHABETIC-LOWER { return ALPHABETIC_LOWER; } + ALPHABETIC-UPPER { return ALPHABETIC_UPPER; } + ALPHANUMERIC { return ALPHANUMERIC; } + ALPHANUMERIC-EDITED { return ALPHANUMERIC_EDITED; } + ALSO { return ALSO; } + ALTERNATE { return ALTERNATE; } + AND { return AND; } + ANY { return ANY; } + ANYCASE { return ANYCASE; } + ARE { return ARE; } + AREA { return AREA; } + AREAS { return AREAS; } + AS { return AS; } + ASCENDING { return ASCENDING; } + ASSIGN { return ASSIGN; } + AT { return AT; } + BASED { return BASED; } + BEFORE { return BEFORE; } + BINARY { return BINARY; } + BIT { return BIT; } + BLANK { return BLANK; } + BLOCK { return BLOCK; } + BOTTOM { return BOTTOM; } + BY { return BY; } + CALL { return CALL; } + CANCEL { return CANCEL; } + CF { return CF; } + CH { return CH; } + CHARACTER { return CHARACTER; } + CHARACTERS { return CHARACTERS; } + CLASS { return CLASS; } + CLOSE { return CLOSE; } + CODE { return CODE; } + COMMA { return COMMA; } + COMMIT { return COMMIT; } + COMMON { return COMMON; } + CONDITION { return CONDITION; } + CONSTANT { return CONSTANT; } + CONTAINS { return CONTAINS; } + CONTENT { return CONTENT; } + CONTINUE { return CONTINUE; } + CONTROL { return CONTROL; } + CONTROLS { return CONTROLS; } + CONVERTING { return CONVERTING; } + COPY { return COPY; } + COUNT { return COUNT; } + CURRENCY { return CURRENCY; } + DATA { return DATA; } + DATE { return DATE; } + DAY { return DAY; } + DAY-OF-WEEK { return DAY_OF_WEEK; } + DE { return DE; } + DECIMAL-POINT { return DECIMAL_POINT; } + DECLARATIVES { return DECLARATIVES; } + DEFAULT { return DEFAULT; } + DELETE { return DELETE; } + DELIMITED { return DELIMITED; } + DELIMITER { return DELIMITER; } + DEPENDING { return DEPENDING; } + DESCENDING { return DESCENDING; } + DETAIL { return DETAIL; } + DISPLAY { return DISPLAY; } + DIVIDE { return DIVIDE; } + DOWN { return DOWN; } + DUPLICATES { return DUPLICATES; } + DYNAMIC { return DYNAMIC; } + EC { return EC; } + ELSE { return ELSE; } + END { return END; } + END-ACCEPT { return END_ACCEPT; } + END-ADD { return END_ADD; } + END-CALL { return END_CALL; } + END-DELETE { return END_DELETE; } + END-DISPLAY { return END_DISPLAY; } + END-DIVIDE { return END_DIVIDE; } + END-EVALUATE { return END_EVALUATE; } + END-IF { return END_IF; } + END-MULTIPLY { return END_MULTIPLY; } + END-PERFORM { return END_PERFORM; } + END-READ { return END_READ; } + END-RETURN { return END_RETURN; } + END-REWRITE { return END_REWRITE; } + END-SEARCH { return END_SEARCH; } + END-SUBTRACT { return END_SUBTRACT; } + END-WRITE { return END_WRITE; } + ENVIRONMENT { return ENVIRONMENT; } + EQUAL { return EQUAL; } + ERROR { return ERROR; } + EVALUATE { return EVALUATE; } + EXCEPTION { return EXCEPTION; } + EXIT { return EXIT; } + EXTEND { return EXTEND; } + EXTERNAL { return EXTERNAL; } + + FD { return FD; } + FINAL { return FINAL; } + FINALLY { return FINALLY; } + FIRST { return FIRST; } + FOOTING { return FOOTING; } + FOR { return FOR; } + FREE { return FREE; } + FROM { return FROM; } + FUNCTION { return FUNCTION; } + GENERATE { return GENERATE; } + GIVING { return GIVING; } + GLOBAL { return GLOBAL; } + GO { return GO; } + GOBACK { return GOBACK; } + GROUP { return GROUP; } + HEADING { return HEADING; } + IDENTIFICATION { return IDENTIFICATION_DIV; } + IF { return IF; } + IN { return IN; } + INDEX { return INDEX; } + INDEXED { return INDEXED; } + INDICATE { return INDICATE; } + INITIAL { return INITIAL; } + INITIALIZE { return INITIALIZE; } + INITIATE { return INITIATE; } + INPUT { return INPUT; } + INSPECT { return INSPECT; } + INTERFACE { return INTERFACE; } + INTO { return INTO; } + INVOKE { return INVOKE; } + IS { return IS; } + KEY { return KEY; } + LAST { return LAST; } + LEADING { return LEADING; } + LEFT { return LEFT; } + LENGTH { return LENGTH; } + LIMIT { return LIMIT; } + LIMITS { return LIMITS; } + LINAGE { return LINAGE; } + LINE { return LINE; } + LINE-COUNTER { return LINE_COUNTER; } + LINES { return LINES; } + LINKAGE { return LINKAGE; } + LOCAL-STORAGE { return LOCAL_STORAGE; } + LOCALE { return LOCALE; } + LOCATION { return LOCATION; } + LOCK { return LOCK; } + MERGE { return MERGE; } + MODE { return MODE; } + MOVE { return MOVE; } + MULTIPLY { return MULTIPLY; } + NATIONAL { return NATIONAL; } + NATIONAL-EDITED { return NATIONAL_EDITED; } + NATIVE { return NATIVE; } + NEGATIVE { return NEGATIVE; } + NESTED { return NESTED; } + NEXT { return NEXT; } + NO { return NO; } + NOT { return NOT; } + NUMBER { return NUMBER; } + NUMERIC { return NUMERIC; } + NUMERIC-EDITED { return NUMERIC_EDITED; } + OCCURS { return OCCURS; } + OF { return OF; } + OFF { return OFF; } + OMITTED { return OMITTED; } + ON { return ON; } + OPEN { return OPEN; } + OPTIONAL { return OPTIONAL; } + OPTIONS { return OPTIONS; } + OR { return OR; } + ORDER { return ORDER; } + ORGANIZATION { return ORGANIZATION; } + OTHER { return OTHER; } + OUTPUT { return OUTPUT; } + OVERFLOW { return OVERFLOW; } + OVERRIDE { return OVERRIDE; } + PACKED-DECIMAL { return PACKED_DECIMAL; } + PAGE { return PAGE; } + PAGE-COUNTER { return PAGE_COUNTER; } + PERFORM { return PERFORM; } + PF { return PF; } + PH { return PH; } + PIC { return PIC; } + PICTURE { return PICTURE; } + PLUS { return PLUS; } + POINTER { return POINTER; } + POSITIVE { return POSITIVE; } + PROCEDURE { return PROCEDURE; } + PROGRAM { return PROGRAM; } + PROGRAM-ID { return PROGRAM_ID; } + PROPERTY { return PROPERTY; } + PROTOTYPE { return PROTOTYPE; } + QUOTES { return QUOTES; } + RAISE { return RAISE; } + RAISING { return RAISING; } + RANDOM { return RANDOM; } + RD { return RD; } + READ { return READ; } + RECORD { return RECORD; } + RECORDS { return RECORDS; } + REDEFINES { return REDEFINES; } + REEL { return REEL; } + REFERENCE { return REFERENCE; } + RELATIVE { return RELATIVE; } + RELEASE { return RELEASE; } + REMAINDER { return REMAINDER; } + REMOVAL { return REMOVAL; } + RENAMES { return RENAMES; } + REPLACE { return REPLACE; } + REPLACING { return REPLACING; } + REPORT { return REPORT; } + REPORTING { return REPORTING; } + REPORTS { return REPORTS; } + REPOSITORY { return REPOSITORY; } + RESERVE { return RESERVE; } + RESET { return RESET; } + RESUME { return RESUME; } + RETURN { return RETURN; } + RETURNING { return RETURNING; } + REWIND { return REWIND; } + REWRITE { return REWRITE; } + RF { return RF; } + RH { return RH; } + RIGHT { return RIGHT; } + ROUNDED { return ROUNDED; } + RUN { return RUN; } + SAME { return SAME; } + SCREEN { return SCREEN; } + SD { return SD; } + SEARCH { return SEARCH; } + SECTION { return SECTION; } + SELECT { return SELECT; } + SENTENCE { return SENTENCE; } + SEPARATE { return SEPARATE; } + SEQUENCE { return SEQUENCE; } + SEQUENTIAL { return SEQUENTIAL; } + SET { return SET; } + SHARING { return SHARING; } + SIGN { return SIGN; } + SIZE { return SIZE; } + SORT { return SORT; } + SORT-MERGE { return SORT_MERGE; } + SOURCE { return SOURCE; } + SPACE { return SPACE; } + SPACES { return SPACES; } + SPECIAL-NAMES { return SPECIAL_NAMES; } + STANDARD { return STANDARD; } + STANDARD-1 { return STANDARD_1; } + START { return START; } + STATUS { return STATUS; } + STOP { return STOP; } + SUBTRACT { return SUBTRACT; } + SUM { return SUM; } + SUPPRESS { return SUPPRESS; } + SYMBOLIC { return SYMBOLIC; } + TALLYING { return TALLYING; } + TERMINATE { return TERMINATE; } + TEST { return TEST; } + THAN { return THAN; } + THEN { return THEN; } + THRU { return THRU; } + TIME { return TIME; } + TIMES { return TIMES; } + TO { return TO; } + TOP { return TOP; } + TRAILING { return TRAILING; } + + TYPE { return TYPE; } + TYPEDEF { return TYPEDEF; } + UNIT { return UNIT; } + UNTIL { return UNTIL; } + UP { return UP; } + UPON { return UPON; } + USAGE { return USAGE; } + USE { return USE; } + USING { return USING; } + VALUE { return VALUE; } + VARYING { return VARYING; } + WHEN { return WHEN; } + WITH { return WITH; } + WORKING-STORAGE { return WORKING_STORAGE; } + WRITE { return WRITE; } + + ZERO | + ZEROES | + ZEROS { return ZERO; } +} + +<*>{ + %EBCDIC-MODE { ydflval.number = feature_internal_ebcdic_e; + return FEATURE; } + %64-BIT-POINTER { ydflval.number = feature_embiggen_e; + return FEATURE; } +} + +<*>{ + {NAME} { + int token = keyword_tok(yytext); + if( token ) { + if(yy_flex_debug && YY_START) { + dbgmsg("missed token %s in start condition %d", + yytext, YY_START); + } + // Do not return "token" because it may have been excluded + // by a start condition. For example, REM might be a name, + // but is the name of an intrinsic function, which would + // appear only after FUNCTION. + } + if( is_integer_token() ) return numstr_of(yytext); + ydflval.string = yylval.string = xstrdup(yytext); + return typed_name(yytext); + } +} + +<*>. { + auto state = start_condition_is(); + dbgmsg("scanner error: " + "%sstart condition %s (0x%02x): scanner default rule", + YY_AT_BOL()? "(bol) " : "", state, *yytext ); + return NO_CONDITION; + } + +<<EOF>> { + + if( YY_START == quoted1 || YY_START == quoted2 ) { + error_msg(yylloc, "syntax error: unterminated string '%s'", + tmpstring); + cbl_internal_error(""); + } + yypop_buffer_state(); + + if ( !YY_CURRENT_BUFFER ) { + return 0; + } + + if( ! wait_for_the_child() ) { + yyterminate(); + } + cobol_filename_restore(); + parser_leave_file(); + + if( yydebug ) yywarn("resume parsing '%s'", cobol_filename()); + yy_set_bol(true); + } + +%% + +#pragma GCC diagnostic pop + +#include "scan_post.h" diff --git a/gcc/cobol/scan_ante.h b/gcc/cobol/scan_ante.h new file mode 100644 index 0000000..b9bbd30 --- /dev/null +++ b/gcc/cobol/scan_ante.h @@ -0,0 +1,745 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Flex override + */ +static void /* yynoreturn */ yy_fatal_error ( const char* msg ); + +static void inline +die_fatal_error( const char msg[] ) { + cbl_internal_error("scan.o: %s", msg); + yy_fatal_error(msg); +} + +#define YY_FATAL_ERROR(msg) die_fatal_error((msg)) + +/* + * External functions + */ + +void parser_enter_file(const char *filename); +void parser_leave_file(); + +bool is_fixed_format(); +bool include_debug(); +int lexer_input( char buf[], int max_size, FILE *input ); + +const char * keyword_str( int token ); + +int repository_function_tok( const char name[] ); + +void cobol_set_indicator_column( int column ); + +void next_sentence_label(cbl_label_t*); + +int repeat_count( const char picture[] ); + +size_t program_level(); + +int ydfparse(void); + +FILE * copy_mode_start(); + +/* + * Public functions and data + */ + +cbl_label_t *next_sentence; + +static bool echo_on = false; + +void +lexer_echo( bool tf ) { + echo_on = tf; +} + +bool +lexer_echo() { + return echo_on; +} + +// IBM says a picture can be up to 50 bytes, not 1000 words. +// ISO says a picture can be up to 63 bytes. We allow for a NUL terminator. +static char orig_picture[PICTURE_MAX]; +static char orig_number[80]; + +const char * +original_picture() { + const char *out = xstrdup(orig_picture); + assert(orig_picture[0] != '\0'); + return out; +} + +char * +original_number( char input[] = NULL ) { + if( input ) { + if(sizeof(orig_number) < strlen(input) ) return NULL; + strcpy(orig_number, input); + return input; + } + char *out = xstrdup(orig_number); + assert(orig_number[0] != '\0'); + return out; +} + +/* + * Local functions + */ +static const char * start_condition_str( int sc ); +static const char * start_condition_is(); + +static bool nonspace( char ch ) { return !ISSPACE(ch); } + +static int +numstr_of( const char string[], radix_t radix = decimal_e ) { + yylval.numstr.radix = radix; + ydflval.string = yylval.numstr.string = xstrdup(string); + char *comma = strchr(yylval.numstr.string, ','); + if( comma && comma[1] == '\0' ) *comma = '\0'; + if( ! original_number(yylval.numstr.string) ) { + error_msg(yylloc, "input inconceivably long"); + return NO_CONDITION; + } + + const char *input = yylval.numstr.string; + auto eoinput = input + strlen(input); + auto p = std::find_if( input, eoinput, + []( char ch ) { return ch == 'e' || ch == 'E';} ); + + if( p < eoinput ) { + if( eoinput == std::find(input, eoinput, symbol_decimal_point()) ) { + // no decimal point: 1E0 is a valid user-defined name + ydflval.string = yylval.string = yylval.numstr.string; + return NAME; + } + assert(input < p); + // "The literal to the left of the 'E' represents the significand. It may + // be signed and shall include a decimal point. The significand shall be + // from 1 to 36 digits in length." + if( p == std::find(input, p, symbol_decimal_point()) ) { + return NO_CONDITION; + } + auto nx = std::count_if(input, p, fisdigit); + if( 36 < nx ) { + error_msg(yylloc, "significand of %s has more than 36 digits (%zu)", input, nx); + return NO_CONDITION; + } + + // "The literal to the right of the 'E' represents the exponent. It may be + // signed and shall have a maximum of four digits and no decimal point. " + // "The maximum permitted value and minimum permitted value of the + // exponent is implementor-defined." (We allow 9999.) + nx = std::count_if(p, eoinput, fisdigit); + if( 4 < nx ) { + error_msg(yylloc, "exponent %s more than 4 digits", ++p); + return NO_CONDITION; + } + if( eoinput != std::find(p, eoinput, symbol_decimal_point()) ) { + error_msg(yylloc, "exponent includes decimal point", ++p); + return NO_CONDITION; + } + + // "If all the digits in the significand are zero, then all the digits of + // the exponent shall also be zero and neither significand nor exponent + // shall have a negative sign." + bool zero_signficand = std::all_of( input, p, + []( char ch ) { + return !ISDIGIT(ch) || ch == '0'; } ); + if( zero_signficand ) { + if( p != std::find(input, p, '-') ) { + error_msg(yylloc, "zero significand of %s " + "cannot be negative", input); + return NO_CONDITION; + } + if( eoinput != std::find(p, eoinput, '-') ) { + error_msg(yylloc, "exponent of zero significand of %s " + "cannot be negative", input); + return NO_CONDITION; + } + } + } + if( 1 < std::count(input, eoinput, symbol_decimal_point()) ) { + error_msg(yylloc, "invalid numeric literal", ++p); + return NO_CONDITION; + } + + return NUMSTR; +} + +static char * +null_trim( char name[] ) { + auto p = std::find_if( name, name + strlen(name), fisspace ); + if( p < name + strlen(name) ) *p = '\0'; + return name; +} + +/* + * CDF management + */ +static int final_token; + +static inline const char * +boolalpha( bool tf ) { return tf? "True" : "False"; } + +struct cdf_status_t { + int lineno; + const char *filename; + int token; + bool parsing; + cdf_status_t( int token = 0, bool parsing = true ) + : lineno(yylineno), filename(cobol_filename()) + , token(token), parsing(parsing) + {} + bool toggle() { return parsing = ! parsing; } + + const char * str() const { + static char line[132]; + snprintf(line, sizeof(line), "%s:%d: %s, parsing %s", + filename, lineno, keyword_str(token), boolalpha(parsing)); + return line; + } + static const char * as_string( const cdf_status_t& status ) { + return status.str(); + } +}; + +/* + * Scanning status is true if tokens are being parsed and false if not (because + * CDF is skipping some code). Because CDF status is nested, status is true + * only if the whole stack is true. That is, if B is stacked on A, and A is + * false, then all of B is skipped, regardless of >>IF and >>ELSE for B. + */ +static bool run_cdf( int token ); + +static class parsing_status_t : public std::stack<cdf_status_t> { + typedef int (parser_t)(void); + struct parsing_state_t { + bool at_eof, expect_field_level; + int pending_token; + parser_t *parser; + parsing_state_t() + : at_eof(false) + , expect_field_level(true) + , pending_token(0) + , parser(yyparse) + {} + } state, shadow; + + public: + bool on() const { // true only if all true + bool parsing = std::all_of( c.begin(), c.end(), + []( const auto& status ) { return status.parsing; } ); + return parsing; + } + + bool feed_a_parser() const { + return on() || state.parser == ydfparse; + } + + void need_level( bool tf ) { state.expect_field_level = tf; } + bool need_level() const { return state.expect_field_level; } + + void parser_save( parser_t * new_parser ) { + shadow = state; + state.parser = new_parser; + } + void parser_restore() { + state.parser = shadow.parser; + } + + void inject_token( int token ) { state.pending_token = token; } + int pending_token() { + int token = state.pending_token; + state.pending_token = 0; + return token; + } + + void at_eof( bool tf ) { state.at_eof = shadow.at_eof = tf; assert(tf); } + bool at_eof() const { return state.at_eof; } + + bool in_cdf() const { return state.parser == ydfparse; } + bool normal() const { return on() && state.parser == yyparse; } + + void splat() const { + int i=0; + for( const auto& status : c ) { + yywarn( "%4d\t%s", ++i, status.str() ); + } + } +} parsing; + +// Used only by parser, so scanner_normal() obviously true. +void field_done() { orig_picture[0] = '\0'; parsing.need_level(true); } + +static int scanner_token() { + if( parsing.empty() ) { + error_msg(yylloc, ">>ELSE or >>END-IF without >>IF"); + return NO_CONDITION; + } + return parsing.top().token; +} + +bool scanner_parsing() { return parsing.on(); } +bool scanner_normal() { return parsing.normal(); } + +void scanner_parsing( int token, bool tf ) { + parsing.push( cdf_status_t(token, tf) ); + if( yydebug ) { + yywarn("%10s: parsing now %5s, depth %zu", + keyword_str(token), boolalpha(parsing.on()), parsing.size()); + parsing.splat(); + } +} +void scanner_parsing_toggle() { + if( parsing.empty() ) { + error_msg(yylloc, ">>ELSE without >>IF"); + return; + } + parsing.top().toggle(); + if( yydebug ) { + yywarn("%10s: parsing now %5s", + keyword_str(CDF_ELSE), boolalpha(parsing.on())); + } +} +void scanner_parsing_pop() { + if( parsing.empty() ) { + error_msg(yylloc, ">>END-IF without >>IF"); + return; + } + parsing.pop(); + if( yydebug ) { + yywarn("%10s: parsing now %5s, depth %zu", + keyword_str(CDF_END_IF), boolalpha(parsing.on()), parsing.size()); + parsing.splat(); + } +} + + +static bool level_needed() { + return scanner_normal() && parsing.need_level(); +} + +static void level_found() { + if( scanner_normal() ) parsing.need_level(false); +} + +#define myless(N) \ + do { \ + auto n(N); \ + trim_location(n); \ + yyless(n); \ + } while(0) + +class enter_leave_t { + typedef void( parser_enter_file_f)(const char *filename); + typedef void (parser_leave_file_f)(); + parser_enter_file_f *entering; + parser_leave_file_f *leaving; + const char *filename; + + public: + enter_leave_t() : entering(NULL), leaving(NULL), filename(NULL) {} + enter_leave_t( parser_enter_file_f *entering, const char *filename ) + : entering(entering), leaving(NULL), filename(filename) {} + enter_leave_t(parser_leave_file_f *leaving) + : entering(NULL), leaving(leaving), filename(NULL) {} + + void notify() { + if( entering ) { + cobol_filename(filename, 0); + if( yy_flex_debug ) dbgmsg("starting line %4d of %s", + yylineno, filename); + entering(filename); + gcc_assert(leaving == NULL); + } + if( leaving ) { + auto name = cobol_filename_restore(); + if( yy_flex_debug ) dbgmsg("resuming line %4d of %s", + yylineno, name? name : "<none>"); + leaving(); + gcc_assert(entering == NULL); + } + } +}; + +static class input_file_status_t { + std::queue <enter_leave_t> inputs; + public: + void enter(const char *filename) { + inputs.push( enter_leave_t(parser_enter_file, filename) ); + } + void leave() { + inputs.push( parser_leave_file ); + } + void notify() { + while( ! inputs.empty() ) { + auto enter_leave = inputs.front(); + enter_leave.notify(); + inputs.pop(); + } + } +} input_file_status; + +void input_file_status_notify() { input_file_status.notify(); } + +void cdf_location_set(YYLTYPE loc); + +static void +update_location() { + YYLTYPE loc = { + yylloc.last_line, yylloc.last_column, + yylineno, yylloc.last_column + yyleng + }; + + auto nline = std::count(yytext, yytext + yyleng, '\n'); + if( nline ) { + char *p = static_cast<char*>(memrchr(yytext, '\n', yyleng)); + loc.last_column = (yytext + yyleng) - p; + } + + yylloc = loc; + cdf_location_set(loc); + location_dump(__func__, __LINE__, "yylloc", yylloc); +} + +static void +trim_location( int nkeep) { + gcc_assert( 0 <= nkeep && nkeep <= yyleng ); + struct { char *p, *pend; + size_t size() const { return pend - p; } + } rescan = { yytext + nkeep, yytext + yyleng }; + + auto nline = std::count(rescan.p, rescan.pend, '\n'); + dbgmsg("%s:%d: yyless(%d), rescan '%.*s' (%zu lines, %d bytes)", + __func__, __LINE__, + nkeep, + int(rescan.size()), rescan.p, + nline, rescan.size()); + if( nline ) { + gcc_assert( yylloc.first_line + nline <= yylloc.last_line ); + yylloc.last_line =- int(nline); + char *p = static_cast<char*>(memrchr(rescan.p, '\n', rescan.size())); + yylloc.last_column = rescan.pend - ++p; + return; + } + + gcc_assert( int(rescan.size()) < yylloc.last_column ); + yylloc.last_column -= rescan.size(); + if( yylloc.last_column < yylloc.first_column ) { + yylloc.first_column = 1; + } + + location_dump(__func__, __LINE__, "yylloc", yylloc); +} + +static void +update_location_col( const char str[], int correction = 0) { + auto col = yylloc.last_column - strlen(str) + correction; + if( col > 0 ) { + yylloc.first_column = col; + } + location_dump(__func__, __LINE__, "yylloc", yylloc); +} + +#define not_implemented(...) cbl_unimplemented_at(yylloc, __VA_ARGS__) + +#define YY_USER_INIT do { \ + static YYLTYPE ones = {1,1, 1,1}; \ + yylloc = ones; \ + } while(0) + +/* + * YY_DECL is the generated lexer. The parser calls yylex(). yylex() invokes + * next_token(), which calls this lexer function. The Flex-generated code + * updates neither yylval nor yylloc. That job is left to the actions. + * + * The parser relies on yylex to set yylval and yylloc each time it is + * called. It apparently maintains a separate copy for each term, and uses + * YYLLOC_DEFAULT() to update the location of nonterminals. + */ +#define YY_DECL int lexer(void) + +#define YY_USER_ACTION \ + update_location(); \ + if( yy_flex_debug ) dbgmsg("SC: %s", start_condition_is() ); + +# define YY_INPUT(buf, result, max_size) \ +{ \ + if( 0 == (result = lexer_input(buf, max_size, yyin)) ) \ + result = YY_NULL; \ +} + +#define scomputable(T, C) \ + yylval.computational.type=T, \ + yylval.computational.capacity=C, \ + yylval.computational.signable=true, COMPUTATIONAL +#define ucomputable(T, C) \ + yylval.computational.type=T, \ + yylval.computational.capacity=C, \ + yylval.computational.signable=false, COMPUTATIONAL + +static char *tmpstring = NULL; + +#define PROGRAM current_program_index() + +static uint32_t +level_of( const char input[] ) { + unsigned int output = 0; + + if( input[0] == '0' ) input++; + + if( 1 != sscanf(input, "%u", &output) ) { + yywarn( "%s:%d: invalid level '%s'", __func__, __LINE__, input ); + } + + return output; +} + +static inline int +ndigit(int len) { + char *input = TOUPPER(yytext[0]) == 'V'? yytext + 1 : yytext; + int n = repeat_count(input); + return n == -1? len : n; +} + +static int +picset( int token ) { + static const char * const eop = orig_picture + sizeof(orig_picture); + char *p = orig_picture + strlen(orig_picture); + + if( eop < p + yyleng ) { + error_msg(yylloc, "PICTURE exceeds maximum size of %zu bytes", + sizeof(orig_picture) - 1); + } + snprintf( p, eop - p, "%s", yytext ); + return token; +} + +static inline bool +is_integer_token( int *pvalue = NULL ) { + int v, n = 0; + if( pvalue == NULL ) pvalue = &v; + return 1 == sscanf(yytext, "%d%n", pvalue, &n) && n == yyleng; +} + +static bool need_nume = false; +bool need_nume_set( bool tf ) { + dbgmsg( "need_nume now %s", tf? "true" : "false" ); + return need_nume = tf; +} + +static int datetime_format_of( const char input[] ); + +static int symbol_function_token( const char name[] ) { + auto e = symbol_function( 0, name ); + return e ? symbol_index(e) : 0; +} + +bool in_procedure_division(void ); + +static symbol_elem_t * +symbol_exists( const char name[] ) { + typedef std::map <std::string, size_t> name_cache_t; + static std::map <size_t, name_cache_t> cachemap; + + cbl_name_t lname; + std::transform( name, name + strlen(name) + 1, lname, tolower ); + auto& cache = cachemap[PROGRAM]; + + if( in_procedure_division() && cache.empty() ) { + for( auto e = symbols_begin(PROGRAM) + 1; + PROGRAM == e->program && e < symbols_end(); e++ ) { + if( e->type == SymFile ) { + cbl_file_t *f(cbl_file_of(e)); + cbl_name_t lname; + std::transform( f->name, f->name + strlen(f->name) + 1, lname, tolower ); + cache[lname] = symbol_index(e); + continue; + } + if( e->type == SymField ) { + auto f(cbl_field_of(e)); + cbl_name_t lname; + std::transform( f->name, f->name + strlen(f->name) + 1, lname, tolower ); + cache[lname] = symbol_index(e); + } + } + cache.erase(""); + } + auto p = cache.find(lname); + + if( p == cache.end() ) { + symbol_elem_t * e = symbol_field( PROGRAM, 0, name ); + return e; + } + + return symbol_at(p->second); +} + +static int +typed_name( const char name[] ) { + if( 0 == PROGRAM ) return NAME; + if( need_nume ) { need_nume_set(false); return NUME; } + + int token = repository_function_tok(name); + switch(token) { + case 0: + break; + case FUNCTION_UDF_0: + yylval.number = symbol_function_token(name); + __attribute__((fallthrough)); + default: + return token; + } + + struct symbol_elem_t *e = symbol_special( PROGRAM, name ); + if( e ) return cbl_special_name_of(e)->token; + + if( (token = redefined_token(name)) ) { return token; } + + e = symbol_exists( name ); + + auto type = e && e->type == SymField? cbl_field_of(e)->type : FldInvalid; + + switch(type) { + case FldLiteralA: + { + auto f = cbl_field_of(e); + if( is_constant(f) ) { + int token = datetime_format_of(f->data.initial); + if( token ) { + yylval.string = xstrdup(f->data.initial); + return token; + } + } + } + __attribute__((fallthrough)); + case FldLiteralN: + { + auto f = cbl_field_of(e); + if( type == FldLiteralN ) { + yylval.numstr.radix = + f->has_attr(hex_encoded_e)? hexadecimal_e : decimal_e; + yylval.numstr.string = xstrdup(f->data.initial); + return NUMSTR; + } + if( !f->has_attr(record_key_e) ) { // not a key-name literal + yylval.literal.set(f); + ydflval.string = yylval.literal.data; + return LITERAL; + } + } + __attribute__((fallthrough)); + case FldInvalid: + case FldGroup: + case FldForward: + case FldIndex: + case FldAlphanumeric: + case FldPacked: + case FldNumericDisplay: + case FldNumericEdited: + case FldAlphaEdited: + case FldNumericBinary: + case FldFloat: + case FldNumericBin5: + case FldPointer: + return NAME; + case FldSwitch: + return SWITCH; + case FldClass: + return cbl_field_of(e)->level == 88? NAME88 : CLASS_NAME; + break; + default: + yywarn("%s:%d: invalid symbol type %s for symbol \"%s\"", + __func__, __LINE__, cbl_field_type_str(type), name); + return NAME; + } + return cbl_field_of(e)->level == 88? NAME88 : NAME; +} + +int +retype_name_token() { + return typed_name(ydflval.string); +} + +static char * +tmpstring_append( int len ) { + const char *extant = tmpstring == NULL ? "" : tmpstring; + char *s = xasprintf("%s%.*s", extant, len, yytext); + free(tmpstring); + if( yy_flex_debug && getenv(__func__) ) { + yywarn("%s: value is now '%s'", __func__, s); + } + return tmpstring = s; +} + +#define pop_return yy_pop_state(); return + +static bool +wait_for_the_child(void) { + pid_t pid; + int status; + + if( (pid = wait(&status)) == -1 ) { + yywarn("internal error: no pending child CDF parser process"); + return false; + } + + if( WIFSIGNALED(status) ) { + yywarn( "process %d terminated by %s", pid, strsignal(WTERMSIG(status)) ); + return false; + } + if( WIFEXITED(status) ) { + if( WEXITSTATUS(status) != 0 ) { + yywarn("process %d exited with status %d", pid, status); + return false; + } + } + if( yy_flex_debug ) { + yywarn("process %d exited with status %d", pid, status); + } + return true; +} + +static bool is_not = false; + +static uint64_t +integer_of( const char input[], bool is_hex = false) { + uint64_t output = 0; + const char *fmt = is_hex? "%ul" : "%hl"; + + if( input[0] == '0' ) input++; + + if( 1 != sscanf(input, fmt, &output) ) { + yywarn( "%s:%d: invalid integer '%s'", __func__, __LINE__, input ); + } + + return output; +} diff --git a/gcc/cobol/scan_post.h b/gcc/cobol/scan_post.h new file mode 100644 index 0000000..dabb168 --- /dev/null +++ b/gcc/cobol/scan_post.h @@ -0,0 +1,401 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +static const char * +start_condition_str( int sc ) { + const char *state = "???"; + switch(sc) { + case INITIAL: state = "INITIAL"; break; + case author_state: state = "author_state"; break; + case basis: state = "basis"; break; + case bool_state: state = "bool_state"; break; + case cdf_state: state = "cdf_state"; break; + case classify: state = "classify"; break; + case copy_state: state = "copy_state"; break; + case comment_entries: state = "comment_entries"; break; + case date_state: state = "date_state"; break; + case datetime_fmt: state = "datetime_fmt"; break; + case dot_state: state = "dot_state"; break; + case exception: state = "exception"; break; + case field_level: state = "field_level"; break; + case field_state: state = "field_state"; break; + case function: state = "function"; break; + case hex_state: state = "hex_state"; break; + case ident_state: state = "ident_state"; break; + case integer_count: state = "integer_count"; break; + case name_state: state = "name_state"; break; + case numeric_state: state = "numeric_state"; break; + case numstr_state: state = "numstr_state"; break; + case partial_name: state = "partial_name"; break; + case picture: state = "picture"; break; + case picture_count: state = "picture_count"; break; + case procedure_div: state = "procedure_div"; break; + case program_id_state: state = "program_id_state"; break; + case quoted1: state = "quoted1"; break; + case quoted2: state = "quoted2"; break; + case quoteq: state = "quoteq"; break; + case raising: state = "raising"; break; + case subscripts: state = "subscripts"; break; + case sort_state: state = "sort_state"; break; + } + return state; +} + +static const char * +start_condition_is() { return start_condition_str( YY_START ); } + +/* + * Match datetime constants. + * + * A 78 or CONSTANT could have a special literal for formatted + * date/time functions. + */ + +static int +datetime_format_of( const char input[] ) { + + static const char date_fmt_b[] = "YYYYMMDD|YYYYDDD|YYYYWwwD"; + static const char date_fmt_e[] = "YYYY-MM-DD|YYYY-DDD|YYYY-Www-D"; + + static const char time_fmt_b[] = + "hhmmss([.,]s+)?|hhmmss([.,]s+)?Z|hhmmss([.,]s+)?[+]hhmm|"; + static const char time_fmt_e[] = + "hh:mm:ss([.,]s+)?|hh:mm:ss([.,]s+)?Z|hh:mm:ss([.,]s+)?[+]hh:mm"; + + static char date_pattern[ 3 * sizeof(date_fmt_e) ]; + static char time_pattern[ 3 * sizeof(time_fmt_e) ]; + static char datetime_pattern[ 6 * sizeof(time_fmt_e) ]; + + static struct pattern_t { + regex_t re; + const char *regex; + int token; + } patterns[] = { + { {}, datetime_pattern, DATETIME_FMT }, + { {}, date_pattern, DATE_FMT }, + { {}, time_pattern, TIME_FMT }, + }, * eopatterns = patterns + COUNT_OF(patterns);; + + // compile patterns + if( ! date_pattern[0] ) { + sprintf(date_pattern, "%s|%s", date_fmt_b, date_fmt_e); + sprintf(time_pattern, "%s|%s", time_fmt_b, time_fmt_e); + + sprintf(datetime_pattern, "(%sT%s)|(%sT%s)", + date_fmt_b, time_fmt_b, + date_fmt_e, time_fmt_e); + + for( auto p = patterns; p < eopatterns; p++ ) { + static const int cflags = REG_EXTENDED | REG_ICASE; + static char msg[80]; + int erc; + + if( 0 != (erc = regcomp(&p->re, p->regex, cflags)) ) { + regerror(erc, &p->re, msg, sizeof(msg)); + yywarn("%s:%d: %s: %s", __func__, __LINE__, keyword_str(p->token), msg); + } + } + } + + // applies only in the datetime_fmt start condition + if( datetime_fmt == YY_START ) { + yy_pop_state(); + if( input == NULL ) return 0; + + // See if the input is a date, time, or datetime pattern string. + static const int nmatch = 3; + regmatch_t matches[nmatch]; + + auto p = std::find_if( patterns, eopatterns, + [input, &matches]( auto& pattern ) { + auto erc = regexec( &pattern.re, input, + COUNT_OF(matches), matches, 0 ); + return erc == 0; + } ); + + return p != eopatterns? p->token : 0; + } + return 0; +} + + +/* + * >>DEFINE, >>IF, and >>EVALUATE + */ + +static bool +is_cdf_token( int token ) { + switch(token) { + case CDF_DEFINE: + case CDF_DISPLAY: + case CDF_IF: case CDF_ELSE: case CDF_END_IF: + case CDF_EVALUATE: case CDF_WHEN: case CDF_END_EVALUATE: + return true; + case CALL_COBOL: + case CALL_VERBATIM: + case COPY: + case TURN: + return true; + } + return false; +} + +static bool +is_cdf_condition_token( int token ) { + switch(token) { + case CDF_IF: case CDF_ELSE: case CDF_END_IF: + case CDF_EVALUATE: case CDF_WHEN: case CDF_END_EVALUATE: + return true; + } + return false; +} + +/* + * IF and EVALUATE are partially parsed in cdf.y. ELSE and WHEN, etc., are + * valid only in context. + */ +static bool +valid_conditional_context( int token ) { + switch(token) { + case CDF_DEFINE: + case CDF_IF: + case CDF_EVALUATE: + return true; + case CDF_ELSE: + case CDF_END_IF: + return scanner_token() == CDF_IF; + case CDF_WHEN: + case CDF_END_EVALUATE: + return scanner_token() == CDF_EVALUATE; + } + return true; // all other CDF tokens valid regardless of context +} + +static bool +run_cdf( int token ) { + if( ! valid_conditional_context(token) ) { + error_msg(yylloc, "CDF syntax error at '%s'", keyword_str(token)); + return false; + } + + parsing.inject_token(token); // because it will be needed by CDF parser + + if( yy_flex_debug ) dbgmsg("CDF parser start with '%s'", keyword_str(token)); + + parsing.parser_save(ydfparse); + + int erc = ydfparse(); // Parse the CDF directive. + + parsing.parser_restore(); + + if( YY_START == cdf_state ) yy_pop_state(); + + if( yy_flex_debug ) { + dbgmsg("CDF parser returned %d, scanner SC <%s>", erc, start_condition_is()); + } + + return 0 == erc; +} + +#include <queue> +struct pending_token_t { + int token; + YYSTYPE value; + pending_token_t( int token, YYSTYPE value ) : token(token), value(value) {} +}; +#define PENDING(T) pending_token_t( (T), yylval ) + +static std::queue<pending_token_t> pending_tokens; + +int next_token() { + int token = lexer(); + return token; +} + +extern int ydfchar; +bool in_procedure_division(void); + +// act on CDF tokens +int +prelex() { + static bool in_cdf = false; + int token = next_token(); + + if( in_cdf ) { return token; } + if( ! is_cdf_token(token) ) { return token; } + + in_cdf = true; + + assert(is_cdf_token(token)); + + while( is_cdf_token(token) ) { + + if( ! run_cdf(token) ) { + dbgmsg( ">>CDF parser failed" ); + return NO_CONDITION; + } + // Return the CDF's discarded lookahead token, if extant. + token = ydfchar > 0? ydfchar : next_token(); + if( token == NO_CONDITION && parsing.at_eof() ) { + return token = YYEOF; + } + + // Reenter cdf parser only if next token could affect parsing state. + if( ! parsing.on() && ! is_cdf_condition_token(token) ) break; + } + + if( yy_flex_debug ) { + dbgmsg("scanner SC <%s>", start_condition_is()); + } + + if( YY_START == copy_state || YY_START == cdf_state ) { + if( token == NAME ) { + auto tok = keyword_tok(ydflval.string); + if( tok ) token = tok; + } + yy_pop_state(); + dbgmsg("scanner SC <%s>, token now %s", + start_condition_is(), keyword_str(token)); + } + + /* + * The final, rejected CDF token might be a LEVEL number. + */ + if( YY_START == field_state && level_needed() ) { + switch( token ) { + case NUMSTR: + if( yy_flex_debug ) yywarn("final token is NUMSTR"); + yylval.number = level_of(yylval.numstr.string); + token = LEVEL; + break; + case YDF_NUMBER: + if( yy_flex_debug ) yywarn("final token is YDF_NUMBER"); + yylval.number = ydflval.number; + token = LEVEL; + break; + } + if( token == LEVEL ) { + switch(yylval.number) { + case 66: + token = LEVEL66; + break; + case 78: + token = LEVEL78; + break; + case 88: + token = LEVEL78; + break; + } + } + } + + dbgmsg( ">>CDF parser done, %s returning " + "%s (because final_token %s, lookhead %d) on line %d", __func__, + keyword_str(token), keyword_str(final_token), + ydfchar, yylineno ); + in_cdf = false; + return token; +} + +/* There are 2 parsers and one scanner. + * yyparse calls yylex. + * yylex calls prelex + * prelex calls lexer, the scanner produced by flex. + * lexer reads input from yyin via lexer_input. + * + * prelex intercepts CDF statements, each of which it parses with ydfparse. + * ydfparse affects CDF variables, which may affect how yylex treats + * the input stream. + * + * Because the lexer is called recursively: + * + * yyparse -> yylex -> ydfparse -> yylex + * + * the global state of the scanner has changed when ydfparse returns. Part of + * that state is the unused lookahead token that ydfparse discarded, stored in + * final_token. prelex then returns final_token as its own, which is duly + * returned to yyparse. + */ + +int +yylex(void) { + static bool produce_next_sentence_target = false; + int token = parsing.pending_token(); + + if( parsing.at_eof() ) return YYEOF; + if( token ) return token; + + /* + * NEXT SENTENCE jumps to an implied CONTINUE at the next dot ('.'). + * Documentation says variously that the implied CONTINUE is before or after + * that dot, but the meaning is one: after the statement that precedes the + * dot. + * + * When the lexer encounters the dot, it returns it to the parser, which may + * use it as a look-ahead token to decide the grammar production. By the + * time it returns to the lexer looking for its next token, the parser will + * have taken whatever actions the dot decided. At that point, the lexer + * injects the label that NEXT SENTENCE jumps to. + */ + if( produce_next_sentence_target ) { + next_sentence_label(next_sentence); + produce_next_sentence_target = false; + } + + do { + token = prelex(); + if( yy_flex_debug ) { + if( parsing.in_cdf() ) { + dbgmsg( "%s:%d: %s routing %s to CDF parser", __func__, __LINE__, + start_condition_is(), keyword_str(token) ); + } else if( !parsing.on() ) { + dbgmsg( "eating %s because conditional compilation is FALSE", + keyword_str(token) ); + } + } + + } while( token && ! parsing.feed_a_parser() ); + + if( next_sentence && token == '.' ) { + produce_next_sentence_target = true; + } + + if( parsing.normal() ) { + final_token = token; + } + + if( token == YYEOF && parsing.in_cdf() ) { + if( yy_flex_debug) dbgmsg("deflecting EOF"); + parsing.at_eof(true); + return NO_CONDITION; + } + + return token; +} diff --git a/gcc/cobol/show_parse.h b/gcc/cobol/show_parse.h new file mode 100644 index 0000000..81b1283 --- /dev/null +++ b/gcc/cobol/show_parse.h @@ -0,0 +1,523 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef SHOW_PARSE_H_ +#define SHOW_PARSE_H_ + +// These macros provide information about what the compiler is doing, +// and about what the compiled code is doing. + +// SHOW_PARSE gives information when parser_xxx functions are entered, and +// then attempts to give as much information as it can at compile time about +// variables and their characteristics, the contents of literals, and such. It +// doesn't affect the executable at all. + +// TRACE1 lays down code for run-time tracing. + +// SHOW_PARSE must be followed by a bracketed set of instructions, no semicolon + +// This construction isn't really necessary; getenv() apparently runs pretty +// fast. But using makes compiling a large number of programs just perceptably +// quicker. So, I am using it; it's cheap. +extern bool bSHOW_PARSE; +extern bool show_parse_sol; +extern int show_parse_indent; + +extern char const *bTRACE1; +extern tree trace_handle; +extern tree trace_indent; +extern bool cursor_at_sol; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" + +#define RETURN_IF_PARSE_ONLY \ + do { if( mode_syntax_only() ) return; } while(0) + +#define SHOW_PARSE1 if(bSHOW_PARSE) +#define SHOW_PARSE RETURN_IF_PARSE_ONLY; if(bSHOW_PARSE) + +// _HEADER and _END are generally the first and last things inside the +// SHOW_PARSE statement. They don't have to be; SHOW_PARSE can be used +// anywhere +#define SHOW_PARSE_HEADER do \ + { \ + if(!show_parse_sol){fprintf(stderr, "\n");} \ + show_parse_indent=fprintf(stderr, \ + "( %d ) %s():" , \ + (CURRENT_LINE_NUMBER), __func__); \ + show_parse_sol=false; \ + }while(0); +#define SHOW_PARSE_END do{fprintf(stderr, "\n");show_parse_sol=true;}while(0); + +// This does one simple text string +#define SHOW_PARSE_TEXT(a) do \ + { \ + fprintf(stderr, "%s", a); \ + show_parse_sol=false; \ + }while(0); + +#define SHOW_PARSE_INDENT do{ \ + if(!show_parse_sol){fprintf(stderr, "\n");} \ + for(int i=0; i<show_parse_indent-1; i++) \ + {fprintf(stderr, " ");} \ + fprintf(stderr, ": "); \ + show_parse_sol=false; \ + }while(0); + +// This does three simple text strings. +#define SHOW_PARSE_TEXT_AB(pre, a, post) do \ + { \ + SHOW_PARSE_TEXT(pre);SHOW_PARSE_TEXT(a);SHOW_PARSE_TEXT(post) \ + }while(0); + +// +#define SHOW_PARSE_FIELD(pre, b) \ + do \ + { \ + fprintf(stderr, "%s", pre); \ + if( !(b) ) \ + { \ + fprintf(stderr, "parameter " #b " is NULL"); \ + } \ + else \ + { \ + fprintf(stderr, "%s", (b)->name); \ + if( (b)->type == FldLiteralA || (b)->type == FldLiteralN ) \ + { \ + fprintf(stderr, " \"%s\"", (b)->data.initial); \ + } \ + else \ + { \ + fprintf(stderr, "<%s>", cbl_field_type_str((b)->type)); \ + } \ + } \ + show_parse_sol = false; \ + } while(0); + +#define SHOW_PARSE_REF(pre, b) \ + do \ + { \ + fprintf(stderr, "%s", pre); \ + if( !(b).field ) \ + { \ + fprintf(stderr, "parameter " #b".field is NULL"); \ + } \ + else \ + { \ + fprintf(stderr, "%s", (b).field->name); \ + if( (b).field->type == FldLiteralA || (b).field->type == FldLiteralN ) \ + { \ + fprintf(stderr, " \"%s\"", (b).field->data.initial); \ + } \ + else \ + { \ + fprintf(stderr, "<%s>", cbl_field_type_str((b).field->type)); \ + } \ + } \ + if( (b).nsubscript) \ + { \ + fprintf(stderr,"("); \ + for(size_t jjj=0; jjj<(b).nsubscript; jjj++) \ + { \ + if(jjj) \ + { \ + SHOW_PARSE_FIELD(" ", (b).subscripts[jjj].field) \ + } \ + else \ + { \ + SHOW_PARSE_FIELD("", (b).subscripts[jjj].field) \ + } \ + } \ + fprintf(stderr,")"); \ + } \ + show_parse_sol = false; \ + } while(0); + +#define SHOW_PARSE_LABEL(a, b) \ + do \ + { \ + fprintf(stderr, "%s", a); \ + if( !b ) \ + { \ + fprintf(stderr, "label " #b " is NULL"); \ + } \ + else \ + { \ + fprintf(stderr, " %p:%s (%s)", b, b->name, b->type_str()); \ + } \ + show_parse_sol = false; \ + } while(0); + +#define TRACE1 if(bTRACE1) +#define TRACE1_HEADER do \ + { \ + if(!cursor_at_sol){gg_fprintf(trace_handle , 0, "\n");} \ + gg_assign(trace_indent, \ + gg_fprintf( trace_handle , \ + 2, \ + ">>>>>>( %d )(%s) ", \ + build_int_cst_type(INT, CURRENT_LINE_NUMBER), \ + gg_string_literal(__func__))); \ + }while(0); + +#define TRACE1_INDENT do{ \ + if(!cursor_at_sol){gg_fprintf(trace_handle , 0, "\n");} \ + tree counter = gg_define_int(); \ + gg_assign(counter, integer_zero_node); \ + WHILE(counter, lt_op, trace_indent) \ + gg_fprintf(trace_handle , 0, " "); \ + gg_increment(counter); \ + WEND \ + }while(0); + +#define TRACE1_END do{gg_fprintf(trace_handle, 0, "\n");cursor_at_sol=true;}while(0); + +#define TRACE1_TEXT(a) do{cursor_at_sol=false;gg_fprintf(trace_handle, 1, "%s", gg_string_literal(a));}while(0); +#define TRACE1_TEXT_ABC(a,b,c) do{TRACE1_TEXT(a);TRACE1_TEXT(b);TRACE1_TEXT(c)}while(0); + +#define TRACE1_FIELD_VALUE(a, field, b) \ + do \ + { \ + cursor_at_sol=false; \ + if ( field->type == FldConditional ) \ + { \ + gg_fprintf(trace_handle, 1, "%s \"", gg_string_literal(a)); \ + parser_display_internal_field(trace_handle, field, false); \ + gg_fprintf(trace_handle, 1, "\" %s", gg_string_literal(b)); \ + } \ + else \ + { \ + IF( member(field->var_decl_node, "data"), eq_op, gg_cast(UCHAR_P, null_pointer_node) ) \ + { \ + gg_fprintf(trace_handle, 1, "%s ", gg_string_literal(a)); \ + gg_fprintf(trace_handle, 0, "NULL"); \ + gg_fprintf(trace_handle, 1, " %s", gg_string_literal(b)); \ + } \ + ELSE \ + { \ + if( field->type == FldGroup \ + || field->type == FldAlphanumeric \ + || field->type == FldAlphaEdited \ + || field->type == FldLiteralA ) \ + { \ + gg_fprintf(trace_handle, 1, "%s \"", gg_string_literal(a)); \ + parser_display_internal_field(trace_handle, field, false); \ + gg_fprintf(trace_handle, 1, "\" %s", gg_string_literal(b)); \ + } \ + else \ + { \ + gg_fprintf(trace_handle, 1, "%s [", gg_string_literal(a)); \ + parser_display_internal_field(trace_handle, field, false); \ + gg_fprintf(trace_handle, 1, "] %s", gg_string_literal(b)); \ + } \ + } \ + ENDIF \ + } \ + }while(0); + +#define TRACE1_REFER_VALUE(a, refer, b) \ + do \ + { \ + if( refer.field ) \ + { \ + cursor_at_sol=false; \ + IF( member(refer.field->var_decl_node, "data"), eq_op, gg_cast(UCHAR_P, null_pointer_node) ) \ + { \ + gg_fprintf(trace_handle, 1, "%s ", gg_string_literal(a)); \ + gg_fprintf(trace_handle, 0, "NULL"); \ + gg_fprintf(trace_handle, 1, " %s", gg_string_literal(b)); \ + } \ + ELSE \ + { \ + if( refer.field->type == FldGroup \ + || refer.field->type == FldAlphanumeric \ + || refer.field->type == FldAlphaEdited \ + || refer.field->type == FldLiteralA ) \ + { \ + gg_fprintf(trace_handle, 1, "%s \"", gg_string_literal(a)); \ + parser_display_internal(trace_handle, refer, false); \ + gg_fprintf(trace_handle, 1, "\" %s", gg_string_literal(b)); \ + } \ + else \ + { \ + gg_fprintf(trace_handle, 1, "%s [", gg_string_literal(a)); \ + parser_display_internal(trace_handle, refer, false); \ + gg_fprintf(trace_handle, 1, "] %s", gg_string_literal(b)); \ + } \ + } \ + ENDIF \ + } \ + else \ + { \ + gg_fprintf(trace_handle, 0, "refer.field is NULL"); \ + } \ + }while(0); + +#define TRACE1_FIELD_INFO(pre, b) \ + do{ \ + cursor_at_sol=false; \ + gg_fprintf(trace_handle, 1, "%s", gg_string_literal(pre)); \ + if( !b ) \ + { \ + gg_fprintf(trace_handle, 0, "field " #b " is NULL"); \ + } \ + else \ + { \ + gg_fprintf(trace_handle, 1, "%s", gg_string_literal(b->name)); \ + gg_fprintf(trace_handle, 1, " (%s", gg_string_literal(cbl_field_type_str((b)->type))); \ + if( b->type != FldLiteralN && b->type != FldConditional ) \ + { \ + cbl_field_t* B(b); \ + if( !b->var_decl_node ) \ + { \ + gg_fprintf(trace_handle, 0, #b "->var_decl_node is NULL", NULL_TREE); \ + } \ + else \ + { \ + gg_fprintf(trace_handle, 1, " attr 0x%lx", member(B, "attr" )); \ + gg_fprintf(trace_handle, 1, " c:o:d:r %ld", member(B, "capacity")); \ + gg_fprintf(trace_handle, 1, ":%ld", member(B, "offset" )); \ + gg_fprintf(trace_handle, 1, ":%d", gg_cast(INT, (member(B, "digits" )))); \ + gg_fprintf(trace_handle, 1, ":%d", gg_cast(INT, (member(B, "rdigits" )))); \ + } \ + } \ + else if( b->type == FldLiteralN ) \ + { \ + gg_fprintf(trace_handle, 1, " attr 0x%lx", build_int_cst_type(SIZE_T, b->attr)); \ + gg_fprintf(trace_handle, 1, " c:o:d:r %ld", build_int_cst_type(SIZE_T, b->data.capacity)); \ + gg_fprintf(trace_handle, 1, ":%ld", build_int_cst_type(SIZE_T, b->offset)); \ + gg_fprintf(trace_handle, 1, ":%d", build_int_cst_type(INT, b->data.digits)); \ + gg_fprintf(trace_handle, 1, ":%d", build_int_cst_type(INT, b->data.rdigits)); \ + } \ + gg_fprintf(trace_handle, 0, ")"); \ + } \ + }while(0); + +#define TRACE1_REFER_INFO(pre, b) \ + do{ \ + cursor_at_sol=false; \ + gg_fprintf(trace_handle, 1, "%s", gg_string_literal(pre)); \ + if( !(b).field ) \ + { \ + gg_fprintf(trace_handle, 0, #b ".field is NULL"); \ + } \ + else \ + { \ + gg_fprintf(trace_handle, 1, "%s", gg_string_literal( (b).field->name ? (b).field->name:"")); \ + if( b.nsubscript ) \ + { \ + gg_fprintf(trace_handle, 0, "("); \ + for(unsigned int i=0; i<b.nsubscript; i++) \ + { \ + gg_fprintf(trace_handle, 1, "%s", gg_string_literal( b.subscripts[i].field->name ? b.subscripts[i].field->name : "" )); \ + if( i<b.nsubscript-1 ) \ + { \ + gg_fprintf(trace_handle, 0, " "); \ + } \ + } \ + if( b.refmod.from || b.refmod.len ) \ + { \ + gg_fprintf(trace_handle, 0, "("); \ + if( b.refmod.from ) \ + { \ + gg_fprintf(trace_handle, 1, "%s", gg_string_literal( b.refmod.from->name() ? b.refmod.from->name() : "" )); \ + } \ + gg_fprintf(trace_handle, 0, ":"); \ + if( b.refmod.len ) \ + { \ + gg_fprintf(trace_handle, 1, "%s", gg_string_literal( b.refmod.len->name() ? b.refmod.len->name() : "" )); \ + } \ + gg_fprintf(trace_handle, 0, "("); \ + } \ + gg_fprintf(trace_handle, 0, ")"); \ + } \ + gg_fprintf(trace_handle, 1, " (%s", gg_string_literal(cbl_field_type_str((b).field->type))); \ + if( (b).field->type != FldLiteralN && (b).field->type != FldConditional ) \ + { \ + if( !(b).field->var_decl_node ) \ + { \ + gg_fprintf(trace_handle, 0, #b ".field->var_decl_node is NULL", NULL_TREE); \ + } \ + else \ + { \ + gg_fprintf(trace_handle, 1, " attr 0x%lx", member(b.field, "attr" )); \ + gg_fprintf(trace_handle, 1, " c:o:d:r %ld", member(b.field, "capacity")); \ + gg_fprintf(trace_handle, 1, ":%ld", member(b.field, "offset" )); \ + gg_fprintf(trace_handle, 1, ":%d", gg_cast(INT, (member(b.field, "digits" )))); \ + gg_fprintf(trace_handle, 1, ":%d)", gg_cast(INT, (member(b.field, "rdigits" )))); \ + } \ + } \ + else if( (b).field->type == FldLiteralN ) \ + { \ + gg_fprintf(trace_handle, 1, " attr 0x%lx", build_int_cst_type(SIZE_T, (b).field->attr)); \ + gg_fprintf(trace_handle, 1, " c:o:d:r %ld", build_int_cst_type(SIZE_T, (b).field->data.capacity)); \ + gg_fprintf(trace_handle, 1, ":%ld", build_int_cst_type(SIZE_T, (b).field->offset)); \ + gg_fprintf(trace_handle, 1, ":%d", build_int_cst_type(INT, (b).field->data.digits)); \ + gg_fprintf(trace_handle, 1, ":%d)", build_int_cst_type(INT, (b).field->data.rdigits)); \ + } \ + } \ + }while(0); + +#define TRACE1_FIELD(a, b, c) \ + do{ \ + TRACE1_FIELD_INFO(a, b) \ + TRACE1_FIELD_VALUE("", b, c) \ + }while(0); + +#define TRACE1_REFER(a, b, c) \ + do{ \ + TRACE1_REFER_INFO(a, b) \ + TRACE1_REFER_VALUE("", b, c) \ + }while(0); + +#define TRACE1_LABEL(a, b, c) \ + do{ \ + cursor_at_sol=false; \ + gg_fprintf(trace_handle, 1, "%s", gg_string_literal(a)); \ + if( !b ) \ + { \ + gg_fprintf(trace_handle, 0, "label " #b " is NULL"); \ + } \ + else \ + { \ + gg_fprintf(trace_handle, 2, \ + "%s (%s)", \ + gg_string_literal(b->name), \ + gg_string_literal(b->type_str()), \ + NULL_TREE); \ + } \ + gg_fprintf(trace_handle, 1, "%s", gg_string_literal(c)); \ + } while(0); + +// Use CHECK_FIELD when a should be non-null, and a->var_decl_node also should +// by non-null: +#define CHECK_FIELD(a) \ + do{ \ + if(!a) \ + { \ + yywarn("%s(): parameter " #a " is NULL", __func__); \ + gcc_unreachable(); \ + } \ + if( !a->var_decl_node && a->type != FldConditional && a->type != FldLiteralA) \ + { \ + yywarn("%s() parameter " #a " is variable %s<%s> with NULL var_decl_node", \ + __func__, \ + a->name, \ + cbl_field_type_str(a->type) ); \ + gcc_unreachable(); \ + } \ + }while(0); + +#define CHECK_LABEL(a) \ + do{ \ + if(!a) \ + { \ + yywarn("%s(): parameter " #a " is NULL", __func__); \ + gcc_unreachable(); \ + } \ + }while(0); + +#ifdef INCORPORATE_ANALYZER +// The analyzer requires a C++17 compiler because of the inline static variable +class ANALYZE + { + private: + const char *func; + int level; + inline static int analyze_level=1; + public: + ANALYZE(const char *func_) : func(func_) + { + level = 0; + if( getenv("Analyze") ) + { + level = analyze_level++; + char ach[128]; + snprintf(ach, sizeof(ach), "# %s analyze_enter %d", func, level); + if( !mode_syntax_only() ) + { + gg_insert_into_assembler(ach); + } + } + } + ~ANALYZE() + { + ExitMessage(); + } + void ExitMessage() + { + if( getenv("Analyze") ) + { + char ach[128]; + snprintf(ach, sizeof(ach), "# %s analyze_exit %d", func, level); + if( !mode_syntax_only() ) + { + gg_insert_into_assembler(ach); + } + } + } + void Message(const char *msg) + { + if( getenv("Analyze") ) + { + char ach[128]; + snprintf(ach, sizeof(ach), "# %s %s %d", func, msg, level); + if( !mode_syntax_only() ) + { + gg_insert_into_assembler(ach); + } + } + } + }; +#else +class ANALYZE + { + public: + ANALYZE(const char *) + { + } + ~ANALYZE() + { + ExitMessage(); + } + void ExitMessage() + { + } + void Message(const char *) + { + } + }; +#endif + +#define Analyze() ANALYZE Analyzer(__func__); + +#pragma GCC diagnostic pop + +#endif diff --git a/gcc/cobol/structs.cc b/gcc/cobol/structs.cc new file mode 100644 index 0000000..bf98d1f --- /dev/null +++ b/gcc/cobol/structs.cc @@ -0,0 +1,333 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + /* This module exists in support of genapi.c + + It creates the declarations for structures that are implemented in the + the libgcobol run-time library. These are type_decls; the analog in the + C world would be that these are typedefs: + + typedef struct XXX_ + { + .... + } XXX; + + These functions don't, on their own, allocate any storage. That gets done + when the type_decl is handed to the build_decl routine, which creates + a var_decl. And that gets added to the GENERIC tree when the var_decl + is turned into a decl_expr by build1() and then the decl_expr is added + to the current statement list. + + Your best bet is to simply emulate the code here to create the type_decl + for each structure, and then just use gg_declare_variable() to create the + storage when you need it. + + Learning from the code in genapi.c is your best bet. + + */ + +#include "cobol-system.h" +#include "coretypes.h" +#include "tree.h" +#define HOWEVER_GCC_DEFINES_TREE 1 +#include "ec.h" +#include "common-defs.h" +#include "util.h" +#include "cbldiag.h" +#include "symbols.h" +#include "gengen.h" + +tree +var_decl_node_p_of( cbl_field_t *var ) + { + if( var->var_decl_node ) + { + return gg_get_address_of(var->var_decl_node); + } + else + { + return null_pointer_node; + } + } + +// These routines return references, rather than values. So, in cases +// like MOVE TABLE(a) TO TABLE (b), you need to gg_assign the returned +// value elsewhere, rather than use them directly, because the second +// refer_qualification calculation will overwrite the first. + +tree +member(tree var, const char *member_name) + { + return gg_struct_field_ref(var, member_name); + } + +tree +member(cbl_field_t *var, const char *member_name) + { + return gg_struct_field_ref(var->var_decl_node, member_name); + } + +tree +member(cbl_file_t *var, const char *member_name) + { + return gg_struct_field_ref(var->var_decl_node, member_name); + } + +void +member(tree var, const char *member_name, int value) + { + gg_assign( member(var, member_name), + build_int_cst_type(INT, value) ); + } + +void +member(tree var, const char *member_name, tree value) + { + gg_assign( member(var, member_name), + value ); + } + +void +member(cbl_field_t *var, const char *member_name, tree value) + { + gg_assign( member(var->var_decl_node, member_name), + value ); + } + +tree +member2(tree var, const char *member_name, const char *submember) + { + tree level1 = member(var, member_name); + return member(level1, submember ); + } + +void +member2(tree var, const char *member_name, const char *submember, int value) + { + tree level1 = member(var, member_name); + tree level2 = member(level1, submember ); + gg_assign(level2, build_int_cst_type(INT, value) ); + } + +void +member2(tree var, const char *member_name, const char *submember, tree value) + { + tree level1 = member(var, member_name); + tree level2 = member(level1, submember ); + gg_assign(level2, value); + } + +void +member3(tree var, const char *mem, const char *sub2, const char *sub3, tree value) + { + tree level1 = member(var, mem); + tree level2 = member(level1, sub2 ); + tree level3 = member(level2, sub3 ); + gg_assign(level3, value); + } + +tree cblc_field_type_node; +tree cblc_field_p_type_node; +tree cblc_field_pp_type_node; +tree cblc_file_type_node; +tree cblc_file_p_type_node; +tree cblc_goto_type_node; +tree cblc_int128_type_node; + +// The following functions return type_decl nodes for the various structures + +static tree +create_cblc_field_t() + { + /* + typedef struct cblc_field_t + { + unsigned char *data; // The runtime data. There is no null terminator + size_t capacity; // The size of "data" + size_t allocated; // The number of bytes available for capacity + size_t offset; // Offset from our ancestor + char *name; // The null-terminated name of this variable + char *picture; // The null-terminated picture string. + char *initial; // The null_terminated initial value + struct cblc_field_t *parent;// This field's immediate parent field + size_t occurs_lower; // non-zero for a table + size_t occurs_upper; // non-zero for a table + size_t attr; // See cbl_field_attr_t + signed char type; // A one-byte copy of cbl_field_type_t + signed char level; // This variable's level in the naming heirarchy + signed char digits; // Digits specified in PIC string; e.g. 5 for 99v999 + signed char rdigits; // Digits to the right of the decimal point. 3 for 99v999 + } cblc_field_t; + */ + tree retval = NULL_TREE; + retval = gg_get_filelevel_struct_type_decl( "cblc_field_t", + 16, + UCHAR_P, "data", + SIZE_T, "capacity", + SIZE_T, "allocated", + SIZE_T, "offset", + CHAR_P, "name", + CHAR_P, "picture", + CHAR_P, "initial", + CHAR_P, "parent", + SIZE_T, "occurs_lower", + SIZE_T, "occurs_upper", + SIZE_T, "attr", + SCHAR, "type", + SCHAR, "level", + SCHAR, "digits", + SCHAR, "rdigits", + INT, "dummy"); // Needed to make it an even number of 32-bit ints + retval = TREE_TYPE(retval); + + return retval; + } + +static tree +create_cblc_file_t() + { + // When doing FILE I/O, you need the cblc_file_t structure + + /* +typedef struct cblc_file_t + { + char *name; // This is the name of the structure; might be the name of an environment variable + char *filename; // The name of the file to be opened + FILE *file_pointer; // The FILE *pointer + cblc_field_t *default_record; // The record_area + size_t record_area_min; // The size of the smallest 01 record in the FD + size_t record_area_max; // The size of the largest 01 record in the FD + cblc_field_t **keys; // For relative and indexed files. The first is the primary key. Null-terminated. + int *key_numbers; // One per key -- each key has a number. This table is key_number + 1 + int *uniques; // One per key + cblc_field_t *password; // + cblc_field_t *status; // This must exist, and is the cbl_field_t version of io_status + cblc_field_t *user_status; // This might exist, and is another copy See 2014 standard, section 9.1.12 + cblc_field_t *vsam_status; // + cblc_field_t *record_length; // + supplemental_t *supplemental; // + void *implementation; // reserved for any implementation + size_t reserve; // From I-O section RESERVE clause + long prior_read_location; // Location of immediately preceding successful read + cbl_file_org_t org; // from ORGANIZATION clause + cbl_file_access_t access; // from ACCESS MODE clause + int mode_char; // 'r', 'w', '+', or 'a' from FILE OPEN statement + int errnum; // most recent errno; can't reuse "errno" as the name + file_status_t io_status; // See 2014 standard, section 9.1.12 + int padding; // Actually a char + int delimiter; // ends a record; defaults to '\n'. + int flags; // cblc_file_flags_t + int recent_char; // This is the most recent char sent to the file + int recent_key; + cblc_file_prior_op_t prior_op; + int dummy // We need an even number of INT + } cblc_file_t; + */ + + tree retval = NULL_TREE; + retval = gg_get_filelevel_struct_type_decl( "cblc_file_t", + 30, + CHAR_P, "name", + CHAR_P, "filename", + FILE_P, "file_pointer", + cblc_field_p_type_node, "default_record", + SIZE_T, "record_area_min", + SIZE_T, "record_area_max", + build_pointer_type(cblc_field_p_type_node), "keys", + build_pointer_type(INT),"key_numbers", + build_pointer_type(INT),"uniques", + cblc_field_p_type_node, "password", + cblc_field_p_type_node, "status", + cblc_field_p_type_node, "user_status", + cblc_field_p_type_node, "vsam_status", + cblc_field_p_type_node, "record_length", + VOID_P, "supplemental", + VOID_P, "implementation", + SIZE_T, "reserve", + LONG, "prior_read_location", + INT, "org", + INT, "access", + INT, "mode_char", + INT, "errnum", + INT, "io_status", + INT, "padding", + INT, "delimiter", + INT, "flags", + INT, "recent_char", + INT, "recent_key", + INT, "prior_op", + INT, "dummy"); + retval = TREE_TYPE(retval); + return retval; + } + +static tree +create_cblc_int128_t() + { + /* + // GCC-13 can't initialize __int64 variables, which is something we need to + // be able to do. So, I created this union. The array can be initialized, + // and thus we do an end run around the problem. Annoying, but not fatally + // so. + + typedef union cblc_int128_t + { + unsigned char array16[16]; + __uint128 uval128; + __int128 sval128; + } cblc_int128_t; + */ + tree retval = NULL_TREE; + tree array_type = build_array_type_nelts(UCHAR, 16); + retval = gg_get_filelevel_union_type_decl( + "cblc_int128_t", + 3, + array_type, "array16" , + UINT128, "uval128" , + INT128, "sval128" ); + retval = TREE_TYPE(retval); + return retval; + } + +void +create_our_type_nodes() + { + static bool just_once = true; + if( just_once ) + { + just_once = false; + cblc_field_type_node = create_cblc_field_t(); + cblc_field_p_type_node = build_pointer_type(cblc_field_type_node); + cblc_field_pp_type_node = build_pointer_type(cblc_field_p_type_node); + cblc_file_type_node = create_cblc_file_t(); + cblc_file_p_type_node = build_pointer_type(cblc_file_type_node); + cblc_int128_type_node = create_cblc_int128_t(); + } + } + diff --git a/gcc/cobol/structs.h b/gcc/cobol/structs.h new file mode 100644 index 0000000..618d8f0 --- /dev/null +++ b/gcc/cobol/structs.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef STRUCTS_H__ +#define STRUCTS_H__ + +extern tree var_decl_node_p_of( cbl_field_t *var ); + +// Simple fetch +extern tree member(tree var, const char *member_name); +extern tree member(cbl_field_t *var, const char *member_name); +extern tree member(cbl_refer_t refer, const char *member_name); + +extern tree member(cbl_file_t *var, const char *member_name); +extern tree member2(tree var, const char *member_name, const char *submember); + +// assignment +extern void member(tree var, const char *member_name, int value); +extern void member(tree var, const char *member_name, tree value); +extern void member(cbl_field_t *var, const char *member_name, tree value); + +extern void member2(tree var, const char *member_name, const char *submember, int value); +extern void member2(tree var, const char *member_name, const char *submember, tree value); +extern void member3(tree var, const char *mem, const char *sub1, const char *sub2, tree value); + +extern GTY(()) tree cblc_field_type_node; +extern GTY(()) tree cblc_field_p_type_node; +extern GTY(()) tree cblc_field_pp_type_node; +extern GTY(()) tree cblc_file_type_node; +extern GTY(()) tree cblc_file_p_type_node; +extern GTY(()) tree cblc_goto_type_node; +extern GTY(()) tree cblc_int128_type_node; + +extern void create_our_type_nodes(); + +#endif diff --git a/gcc/cobol/symbols.cc b/gcc/cobol/symbols.cc new file mode 100644 index 0000000..a4e87c8 --- /dev/null +++ b/gcc/cobol/symbols.cc @@ -0,0 +1,4881 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <fstream> // Before cobol-system because it uses poisoned functions +#include "cobol-system.h" +#include <search.h> +#include <iconv.h> +#include "ec.h" +#include "common-defs.h" +#include "util.h" +#include "cbldiag.h" +#include "symbols.h" +#include "inspect.h" +#include "io.h" +#include "genapi.h" + +#pragma GCC diagnostic ignored "-Wunused-result" +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" + +bool +lexio_dialect_mf() { return dialect_mf(); } + +class symbol_pair_t +{ + const symbol_elem_t *first, *last; +public: + symbol_pair_t( const symbol_elem_t * first, const symbol_elem_t * end = NULL ) + : first(first), last(end) + {} + + // used only by std::find to locate a pointer between first and last + bool operator==( const symbol_pair_t& that ) const { + return this->first <= that.first && that.first < this->last; + } + + size_t index( const symbol_elem_t *psym ) const { + assert( first <= psym && psym < last ); + return psym - first; + } +}; + +static std::map<size_t, YYLTYPE> field_locs; + +void +symbol_field_location( size_t ifield, const YYLTYPE& loc ) { + gcc_assert(field_at(ifield)); + field_locs[ifield] = loc; +} +YYLTYPE +symbol_field_location( size_t ifield ) { + auto p = field_locs.find(ifield); + gcc_assert(p != field_locs.end()); + return p->second; +} + +static struct symbol_table_t { + int fd; + size_t capacity, nelem; + size_t first_program, procedures; + struct { + size_t file_status, linage_counter, return_code, + exception_condition, very_true, very_false; + } registers; + + struct symbol_elem_t *elems; + + std::map<elem_key_t, size_t> specials; + std::map<elem_key_t, std::list<size_t>> labels; + + std::vector<symbol_pair_t> mappings; + + /* + * To compute an offset into the symbol table from an element + * pointer, first search the mappings to determine which one it + * belongs to. + */ + size_t index( const symbol_elem_t * psym ) const { + assert(psym); + auto pend = mappings.end(); + auto p = std::find(mappings.begin(), pend, symbol_pair_t(psym)); + assert( p != pend ); // pysm does not point to a symbol in the symbol table. + return p->index(psym); + } + + void save() { mappings.push_back( symbol_pair_t( elems, elems + capacity ) ); } + + size_t size() const { return capacity * sizeof(elems[0]); } + + void labelmap_add( const symbol_elem_t *e ) { + const char *name = cbl_label_of(e)->name; + labels[ elem_key_t(e->program, name) ].push_back( symbol_index(e) ); + } +} symbols { .fd = -1 }; + +static symbol_table_t& +symbol_table_extend() { + static FILE *mapped; + + if( symbols.nelem == 0 ) { // first time: create file & set initial capacity + assert(mapped == NULL && symbols.fd == -1); + + if( (mapped = tmpfile()) == NULL ) { + cbl_err( "could not create temporary file for symbol table"); + } + + symbols.fd = fileno(mapped); + assert(symbols.fd > 0); + + symbols.capacity = 64; + } else { + if( 0 != msync(symbols.elems, symbols.size(), MS_SYNC | MS_INVALIDATE) ) { + cbl_err( "%s:%d: could not synchronize symbol table with mapped file", + __func__, __LINE__ ); + } + } + + symbols.capacity *= 2; + off_t len = symbols.size(); + + if( 0 != ftruncate(symbols.fd, len) ) { + cbl_err( "%s:%d:could not extend symbol table to %zu elements", + __func__, __LINE__, symbols.capacity); + } + + /* + * We never unmap a disused symbol table, to avoid referencing + * invalid pointers. The table itself contains no pointers; it uses + * table indexes. But the parser API uses pointers, and sometimes + * the table needs to be extended before the code generator is done + * with them. + * + * By extending the file and mapping it anew, the old mapping + * remains valid, and the new mapping extends it in a different part + * of the virtual address space. Page 0 of the old map, for example, + * occupies the same physical RAM as before, but is shared between + * two mappings. + */ + + void *mem = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, symbols.fd, 0); + + if( MAP_FAILED == mem ) { + cbl_err( "%s:%d: could not extend symbol table", __func__, __LINE__); + } + symbols.elems = static_cast<struct symbol_elem_t*>(mem); + + symbols.save(); // add new mapping to list of mappings + + return symbols; +} + +static struct symbol_elem_t * +symbol_at_impl( size_t index, bool internal = true ) { + assert( index <= symbols.nelem ); + if( !internal ) assert( index < symbols.nelem ); + symbol_elem_t *e = symbols.elems + index; + + if( index == symbols.nelem ) return e; + + if( e->type == SymField && cbl_field_of(e)->type == FldForward ) { + return symbol_field(e->program, + cbl_field_of(e)->parent, cbl_field_of(e)->name); + } + return e; +} + +struct symbol_elem_t * +symbol_at( size_t index ) { + return symbol_at_impl(index, false); +} + +static char decimal_point = '.'; + +size_t file_status_register() { return symbols.registers.file_status; } +size_t return_code_register() { return symbols.registers.return_code; } +size_t very_true_register() { return symbols.registers.very_true; } +size_t very_false_register() { return symbols.registers.very_false; } +size_t ec_register() { return symbols.registers.exception_condition; } + +cbl_refer_t * +cbl_refer_t::empty() { + static cbl_refer_t empty; + return ∅ +} + +cbl_field_t * +cbl_span_t::from_field() { assert(from); return from->field; } +cbl_field_t * +cbl_span_t::len_field() { assert(len); return len->field; } + +cbl_ffi_arg_t:: +cbl_ffi_arg_t( cbl_refer_t* refer, cbl_ffi_arg_attr_t attr ) + : optional(false) + , crv(by_reference_e) + , attr(attr) + , refer(refer? *refer : cbl_refer_t()) +{ + if( refer && refer != refer->empty() ) delete refer; +} + +cbl_ffi_arg_t:: +cbl_ffi_arg_t( cbl_ffi_crv_t crv, + cbl_refer_t* refer, cbl_ffi_arg_attr_t attr ) + : optional(false) + , crv(crv) + , attr(attr) + , refer(refer? *refer : cbl_refer_t()) +{ + if( refer && refer != refer->empty() ) delete refer; +} + +#define ERROR_FIELD(F, ...) \ + do{ \ + auto loc = symbol_field_location(field_index(F)); \ + error_msg(loc, __VA_ARGS__); \ + } while(0) + + +cbl_field_t * +symbol_valid_udf_args( size_t function, std::list<cbl_refer_t> args ) { + auto L = cbl_label_of(symbol_at(function)); + if( ! L->returning ) { + dbgmsg("logic error: %s does not define RETURNING", L->name); + return NULL; + } + auto e = std::find_if( symbol_at(function), symbols_end(), + []( auto symbol ) { + if( symbol.type == SymDataSection ) { + auto section(symbol.elem.section); + return section.type == linkage_sect_e; + } + return false; + } ); + for( auto arg : args ) { + size_t iarg(1); + e++; // skip over linkage_sect_e, which appears after the function + if( e->type != SymField ) { + ERROR_FIELD(arg.field, + "FUNCTION %s has no defined parameter matching arg %zu, '%s'", + L->name, iarg, arg.field->name ); + return NULL; + } + + auto tgt = cbl_field_of(e); + + if( ! valid_move(tgt, arg.field) ) { + ERROR_FIELD(tgt, "FUNCTION %s arg %zu, '%s' cannot be passed to %s, type %s", + L->name, iarg, arg.field->pretty_name(), + tgt->pretty_name(), 3 + cbl_field_type_str(tgt->type) ); + return NULL; + } + } + return cbl_field_of(symbol_at(L->returning)); +} + +static const struct cbl_occurs_t nonarray = cbl_occurs_t(); + +static const struct cbl_field_t empty_float = { + 0, FldFloat, FldInvalid, + intermediate_e, + 0, 0, 0, nonarray, 0, "", + 0, cbl_field_t::linkage_t(), + {16, 16, 32, 0, NULL, NULL, {NULL}, {NULL}}, NULL }; + +static const struct cbl_field_t empty_comp5 = { + 0, FldNumericBin5, FldInvalid, + signable_e | intermediate_e, + 0, 0, 0, nonarray, 0, "", + 0, cbl_field_t::linkage_t(), + {16, 16, MAX_FIXED_POINT_DIGITS, 0, NULL, NULL, {NULL}, {NULL}}, NULL }; + +#if 0 +# define CONSTANT_E constant_e +#else +# define CONSTANT_E intermediate_e +#endif + +static struct cbl_field_t empty_literal = { + 0, FldInvalid, FldInvalid, CONSTANT_E, + 0, 0, 0, nonarray, 0, "", + 0, cbl_field_t::linkage_t(), + {0,0,0,0, NULL, NULL, {NULL}, {NULL}}, NULL }; + +static const struct cbl_field_t empty_conditional = { + 0, FldConditional, FldInvalid, intermediate_e, + 0, 0, 0, nonarray, 0, "", + 0, cbl_field_t::linkage_t(), + {0,0,0,0, NULL, NULL, {NULL}, {NULL}}, NULL }; + + +/** + * Debug register record + 01 DEBUG-ITEM. + 02 DEBUG-LINE PIC X(6). + 02 FILLER PIC X VALUE SPACE. + 02 DEBUG-NAME PIC X(30). + 02 FILLER PIC X VALUE SPACE. + 02 DEBUG-SUB-1 PIC S9999 SIGN IS LEADING SEPARATE CHARACTER. + 02 FILLER PIC X VALUE SPACE. + 02 DEBUG-SUB-2 PIC S9999 SIGN IS LEADING SEPARATE CHARACTER. + 02 FILLER PIC X VALUE SPACE. + 02 DEBUG-SUB-3 PIC S9999 SIGN IS LEADING SEPARATE CHARACTER. + 02 FILLER PIC X VALUE SPACE. + 02 DEBUG-CONTENTS PIC X(76). + **/ + +static cbl_field_t debug_registers[] = { + { 0, FldGroup, FldInvalid, global_e, 0,0,1, nonarray, 0, + "DEBUG-ITEM", 0, {}, {132,132,0,0, NULL, NULL, {NULL}, {NULL}}, NULL }, + { 0, FldAlphanumeric, FldInvalid, global_e, 0,0,2, nonarray, 0, + "DEBUG-LINE", 0, {}, {6,6,0,0, " ", NULL, {NULL}, {NULL}}, NULL }, + { 0, FldAlphanumeric, FldInvalid, 0, 0,0,2, nonarray, 0, + "FILLER", 0, {}, {1,1,0,0, " ", NULL, {NULL}, {NULL}}, NULL }, + { 0, FldAlphanumeric, FldInvalid, global_e, 0,0,2, nonarray, 0, + "DEBUG-NAME", 0, {}, {30,30,0,0, NULL, NULL, {NULL}, {NULL}}, NULL }, + { 0, FldAlphanumeric, FldInvalid, 0, 0,0,2, nonarray, 0, + "FILLER", 0, {}, {1,1,0,0, " ", NULL, {NULL}, {NULL}}, NULL }, + { 0, FldNumericDisplay, FldInvalid, signable_e | global_e | leading_e | separate_e, 0,0,2, nonarray, 0, + "DEBUG-SUB-1", 0, {}, {5,5,3,0, NULL, NULL, {NULL}, {NULL}}, NULL }, + { 0, FldAlphanumeric, FldInvalid, 0, 0,0,2, nonarray, 0, + "FILLER", 0, {}, {1,1,0,0, " ", NULL, {NULL}, {NULL}}, NULL }, + { 0, FldNumericDisplay, FldInvalid, signable_e | global_e | leading_e | separate_e, 0,0,2, nonarray, 0, + "DEBUG-SUB-2", 0, {}, {5,5,3,0, NULL, NULL, {NULL}, {NULL}}, NULL }, + { 0, FldAlphanumeric, FldInvalid, 0, 0,0,2, nonarray, 0, + "FILLER", 0, {}, {1,1,0,0, " ", NULL, {NULL}, {NULL}}, NULL }, + { 0, FldNumericDisplay, FldInvalid, signable_e | global_e | leading_e | separate_e, 0,0,2, nonarray, 0, + "DEBUG-SUB-3", 0, {}, {5,5,3,0, NULL, NULL, {NULL}, {NULL}}, NULL }, + { 0, FldAlphanumeric, FldInvalid, 0, 0,0,2, nonarray, 0, + "FILLER", 0, {}, {1,1,0,0, " ", NULL, {NULL}, {NULL}}, NULL }, + { 0, FldAlphanumeric, FldInvalid, signable_e | global_e, 0,0,2, nonarray, 0, + "DEBUG-CONTENTS", 0, {}, {76,76,0,0, NULL, NULL, {NULL}, {NULL}}, NULL }, +}; + +class group_size_t { + size_t size; + public: + group_size_t() : size(0) {} + group_size_t& operator+( const cbl_field_t& field ) { + size += field.data.capacity; + return *this; + } + size_t capacity() const { return size; } +}; + +enum { constq = constant_e | quoted_e }; + +static cbl_field_t special_registers[] = { + { 0, FldNumericDisplay, FldInvalid, 0, 0, 0, 0, nonarray, 0, "_FILE_STATUS", + 0, {}, {2,2,2,0, NULL, NULL, {NULL}, {NULL}}, NULL }, + { 0, FldNumericBin5, FldInvalid, 0, 0, 0, 0, nonarray, 0, "UPSI-0", + 0, {}, {2,2,4,0, NULL, NULL, {NULL}, {NULL}}, NULL }, + { 0, FldNumericBin5, FldInvalid, 0, 0, 0, 0, nonarray, 0, "RETURN-CODE", + 0, {}, {2,2,4,0, NULL, NULL, {NULL}, {NULL}}, NULL }, + { 0, FldNumericBin5, FldInvalid, 0, 0, 0, 0, nonarray, 0, "LINAGE-COUNTER", + 0, {}, {2,2,4,0, NULL, NULL, {NULL}, {NULL}}, NULL }, + { 0, FldLiteralA, FldInvalid, 0, 0, 0, 0, nonarray, 0, "_dev_stdin", + 0, {}, {0,0,0,0, "/dev/stdin", NULL, {NULL}, {NULL}}, NULL }, + { 0, FldLiteralA, FldInvalid, constq, 0, 0, 0, nonarray, 0, "_dev_stdout", + 0, {}, {0,0,0,0, "/dev/stdout", NULL, {NULL}, {NULL}}, NULL }, + { 0, FldLiteralA, FldInvalid, constq, 0, 0, 0, nonarray, 0, "_dev_stderr", + 0, {}, {0,0,0,0, "/dev/stderr", NULL, {NULL}, {NULL}}, NULL }, + { 0, FldLiteralA, FldInvalid, constq, 0, 0, 0, nonarray, 0, "_dev_null", + 0, {}, {0,0,0,0, "/dev/null", NULL, {NULL}, {NULL}}, NULL }, + +}; + +static symbol_elem_t +elementize( cbl_field_t& field ) { + symbol_elem_t elem = { .type = SymField, .elem = {.field = field} }; + return elem; +} + +size_t +field_index( const cbl_field_t *f ) { + assert(f); + return symbol_index(symbol_elem_of(f)); +} + +static inline bool +is_forward( const struct symbol_elem_t *e ) { + return cbl_field_of(e)->type == FldForward; +} +static inline bool +is_forward( const cbl_field_t *field ) { + return field->type == FldForward; +} + +static inline bool +has_parent( const struct symbol_elem_t *e ) { + return cbl_field_of(e)->parent > 0; +} + +/* + * A field is global if it's marked global, or if any of its parents are. + * Actually, only 01 level can be global, but this works. + */ +bool +is_global( const cbl_field_t * field ) { + do { + if( (field->attr & global_e) == global_e ) { + return true; + } + if( field->parent > 0 ) { + symbol_elem_t *e = symbol_at(field->parent); + if( SymField == e->type ) { + field = cbl_field_of(e); + continue; + } + } + break; + } while(true); + return false; +} + +static bool +special_pair_cmp( const cbl_special_name_t& key, + const cbl_special_name_t& elem ) { + const bool matched = key.id == elem.id || 0 == strcasecmp(key.name, elem.name); + + if( getenv(__func__) ) { + dbgmsg("%s:%d: key: id=%2d, %s", __func__, __LINE__, key.id, key.name); + dbgmsg("%s:%d: elem: id=%2d, %s => %s", __func__, __LINE__, + elem.id, elem.name, matched? "match" : "no match"); + } + + return matched; +} + +/* + * On insertion, a label may be a definition or a forward reference. + * On reference, a label may be qualified or not. If not, we don't + * know if it refers to a section or a paragraph. + * + * Declarations and references always use line == 0; only definitions + * have a line number. + * + * An unqualified reference is denoted LblNone. If not found, it is + * inserted as a declaration: LblNone, line 0. + * + * A qualified reference is denoted LblParagraph with a section, and + * with line = 0. A qualified reference updates an unqualified + * declaration; the declation is upgraded to LblParagraph with the + * section as its parent, but still with no line (because it's still + * undefined). + * + * Matching rules (assuming names match): + * Key Element New Effect + * type parent line type parent type + * None - None - unqualified ref matches decl + * None - Sect - unqualified ref matches section + * None - Para x unqualified ref matches any para + * Sect - None - Sect section definition updates decl + * Sect - Sect - section matches section + * Para S 0 None - Para S qualified ref updates decl + * Para x >0 None - Para paragraph definition updates decl + * Para S 0 Para S qualified ref matches decl or def + * Para x >0 Para x Para paragraph definition updates decl + * if elem.line == 0. + * + * All other combinations fail or are invalid by assertion. + */ +static bool label_cmp( const cbl_label_t& key, + const cbl_label_t& elem, bool names_matched = false ) { + if( ! names_matched ) { + if( 0 != strcasecmp(key.name, elem.name) ) return false; + } + + switch( key.type ) { + + case LblNone: + assert(0 == key.explicit_parent()); + assert(0 == key.line); + switch( elem.type ) { + case LblNone: + case LblSection: + assert(!elem.explicit_parent()); + return true; + break; + case LblParagraph: + return true; + break; + default: + break; + } + break; + + case LblSection: + assert(0 == key.explicit_parent()); + switch( elem.type ) { + case LblNone: + case LblSection: + assert(!elem.explicit_parent()); + return true; + break; + default: + break; + } + break; + + case LblParagraph: + switch( elem.type ) { + case LblNone: + if(elem.explicit_parent()) { + cbl_errx( "%s:%d: LblNone '%s' has parent #%zu", + __func__, __LINE__, elem.name, elem.parent ); + } + assert(!elem.explicit_parent()); + return true; + break; + case LblParagraph: + if( key.parent == elem.parent ) { // explicit or implicit + return key.line == 0 || elem.line == 0 || key.line == elem.line; + // negative key.line never matches (causing insertion) + } + break; + default: + break; + } + break; + default: + gcc_unreachable(); + } + return false; +} + +static int +symbol_elem_cmp( const void *K, const void *E ) +{ + const struct symbol_elem_t + *k=static_cast<const struct symbol_elem_t *>(K), + *e=static_cast<const struct symbol_elem_t *>(E); + + if( k->type != e->type ) return 1; + if( k->program != e->program && !is_program(*k)) return 1; + + switch( k->type ) { + case SymFilename: + return strcmp(k->elem.filename, e->elem.filename); + break; + case SymDataSection: + return k->elem.section.type == e->elem.section.type ? 0 : 1; + break; + case SymFunction: + return strcmp(k->elem.function.name, e->elem.function.name); + break; + case SymField: + if( has_parent(k) && cbl_field_of(k)->parent != cbl_field_of(e)->parent ) { + return 1; + } + // If the key has attributes, they must match. + if( (cbl_field_of(k)->attr & global_e) == global_e ) { + if( !is_global(cbl_field_of(e)) ) { + return 1; + } + } + // forwards match forwards only + if( is_forward(k) && !is_forward(e) ) return 1; + if( !is_forward(k) && is_forward(e) ) return 1; + break; + case SymLabel: + // A LblNone element (created by a forward reference) that lacks a parent + // matches on name only. It becomes a LblParagraph or LblSection. + // Remember: this test is for adding labels, not resolving references. + { + const cbl_label_t& key = *cbl_label_of(k); + const cbl_label_t& elem = *cbl_label_of(e); + + if( key.type != elem.type ) { + if( !(key.type == LblNone || elem.type == LblNone) ) return 1; + } + + switch(key.type) { + case LblProgram: // There are no forward program labels + if( key.parent > 0 && key.parent != elem.parent ) return 1; + assert(key.parent == elem.parent || key.parent == 0); + break; + case LblNone: case LblSection: case LblParagraph: + return label_cmp(key, elem)? 0 : 1; + break; + default: + if( key.parent != elem.parent ) { // allow zero parent of LblNone + if( !(elem.type == LblNone && elem.explicit_parent() == 0) ) return 1; + } + assert(key.parent == elem.parent || elem.type == LblNone); + } + + if( key.os_name && elem.os_name ) { + if( 0 == strcasecmp(key.os_name, elem.os_name) ) return 0; // success + } + return strcasecmp(key.name, elem.name); + } + break; + case SymSpecial: + return special_pair_cmp(k->elem.special, e->elem.special)? 0 : 1; + break; + case SymAlphabet: + return strcasecmp(k->elem.alphabet.name, e->elem.alphabet.name); + break; + case SymFile: + // If the key is global, so must be the found element. + if( (cbl_file_of(k)->attr & global_e) == global_e && + (cbl_file_of(e)->attr & global_e) != global_e ) { + return 1; + } + return strcasecmp(k->elem.file.name, e->elem.file.name); + break; + } + assert(k->type == SymField); + +#if 1 + // Used by symbol_literalA + // Literals have no name. They match on their constant initial value. + if( is_literal(cbl_field_of(k)) && is_literal(cbl_field_of(e)) ) { + return strcmp(cbl_field_of(k)->data.initial, cbl_field_of(e)->data.initial); + } +#endif + if( cbl_field_of(k)->has_attr(filler_e) ) { + return 1; // filler never matches + } + + return strcasecmp(cbl_field_of(k)->name, cbl_field_of(e)->name); +} + +cbl_label_ref_t:: +cbl_label_ref_t( size_t program, const cbl_label_t& context, int line, + const char name[], size_t isect ) + : qualified(isect != 0) + , context(context) + , line(line) + , handle(NULL) +{ + cbl_label_type_t type = isect? LblParagraph : LblNone; + struct cbl_label_t label = { type, isect, line }; + assert(strlen(name) < sizeof(label.name)); + strcpy(label.name, name); + + target = symbol_label_add(program, &label); + assert(target); +} + +struct cbl_label_t * +symbol_label( size_t program, cbl_label_type_t type, size_t section, + const char name[], + const char os_name[] ) +{ + static cbl_name_t lname; + std::transform(name, name + strlen(name) + 1, lname, tolower); + elem_key_t key( program, lname ); + auto p = symbols.labels.find(key); + if( p == symbols.labels.end()) return NULL; + + cbl_label_t protolabel = { .type = type, .parent = section, .os_name = os_name }; + assert(strlen(name) < sizeof protolabel.name); + strcpy(protolabel.name, name); + + const std::list<size_t>& syms(p->second); + auto psym = + std::find_if( syms.begin(), syms.end(), + [key=protolabel]( size_t isym ) { + const auto& elem = *cbl_label_of(symbol_at(isym)); + + switch(key.type) { + case LblProgram: // There are no forward program labels + if( key.parent > 0 && key.parent != elem.parent ) return false; + assert(key.parent == elem.parent || key.parent == 0); + break; + case LblNone: case LblSection: case LblParagraph: + return label_cmp(key, elem, true); + break; + default: + if( key.parent != elem.parent ) { // allow zero parent of LblNone + if( !(elem.type == LblNone && elem.explicit_parent() == 0) ) return false; + } + assert(key.parent == elem.parent || elem.type == LblNone); + break; + } + + if( key.os_name && elem.os_name ) { + if( 0 == strcasecmp(key.os_name, elem.os_name) ) return true; // success + } + return true; + } ); + if( psym == syms.end() ) return NULL; + return cbl_label_of(symbol_at(*psym)); +} + +size_t +symbol_label_id( const cbl_label_t *label ) { + auto e = symbol_elem_of(label); + size_t label_index = symbol_index(e); + assert( label_index < std::numeric_limits<uint32_t>::max() ); + return label_index; +} + +struct cbl_label_t * +symbol_program( size_t parent, const char name[] ) +{ + cbl_label_t label = {}; + label.type = LblProgram; + label.parent = parent; + assert(strlen(name) < sizeof label.name); + strcpy(label.name, name); + + struct symbol_elem_t key = { SymLabel, 0, { NULL } }, *e; + key.elem.label = label; + + e = static_cast<struct symbol_elem_t *>(lfind( &key, symbols.elems, + &symbols.nelem, sizeof(key), + symbol_elem_cmp ) ); + return e? cbl_label_of(e) : NULL; +} + +extern int yydebug; + +static size_t +symbols_dump( size_t first, bool header ); + +struct symbol_elem_t * +symbol_function( size_t parent, const char name[] ) +{ + auto p = std::find_if( symbols_begin(), symbols_end(), + [parent, name]( const auto& elem ) { + if( elem.type == SymLabel ) { + auto L = cbl_label_of(&elem); + if( L->type == LblFunction ) { + return 0 == strcasecmp(L->name, name); + } + } + return false; + } ); + + if( yydebug && p == symbols_end() ) symbols_dump( symbols.first_program, true); + + return p == symbols_end()? NULL : p; + + cbl_label_t label = {}; + label.type = LblFunction; + label.parent = parent; + assert(strlen(name) < sizeof label.name); + strcpy(label.name, name); + + struct symbol_elem_t key = { SymLabel, 0, { NULL } }, *e; + key.elem.label = label; + + e = static_cast<struct symbol_elem_t *>(lfind( &key, symbols.elems, + &symbols.nelem, sizeof(key), + symbol_elem_cmp ) ); + return e; +} + +struct symbol_elem_t * +symbol_special( size_t program, const char name[] ) +{ + elem_key_t key( program, name ); + auto p = symbols.specials.find(key); + if( p == symbols.specials.end() ) return NULL; + return symbol_at(p->second); +} + +struct symbol_elem_t * +symbol_alphabet( size_t program, const char name[] ) +{ + cbl_alphabet_t alphabet(YYLTYPE(), custom_encoding_e); + assert(strlen(name) < sizeof alphabet.name); + strcpy(alphabet.name, name); + + struct symbol_elem_t key = { SymAlphabet, program, { NULL } }, *e; + key.elem.alphabet = alphabet; + + e = static_cast<struct symbol_elem_t *>(lfind( &key, symbols.elems, + &symbols.nelem, sizeof(key), + symbol_elem_cmp ) ); + return e; +} + +symbol_elem_t * +symbols_begin( size_t first ) +{ + return symbols.elems + first; +} + +symbol_elem_t * +symbols_end(void) +{ + return symbols.elems + symbols.nelem; +} + +cbl_field_t * +symbol_redefines( const struct cbl_field_t *field ) { + if( field->parent == 0 ) return NULL; + struct symbol_elem_t *e = symbol_at(field->parent); + if( e->type == SymField ) { + cbl_field_t *parent = cbl_field_of(e); + if( parent->level == field->level || field->level == 66) { + return parent; + } + return NULL; + } + return NULL; +} + +static cbl_field_t * +symbol_explicitly_redefines( const cbl_field_t *field ) { + auto f = symbol_redefines(field); + if( f && is_record_area(f) ) return NULL; + return f; +} + +static uint32_t +field_size( const struct cbl_field_t *field ) { + size_t n = field->occurs.ntimes(); + return field->data.capacity * (n > 0? n : 1); +} + +const char * +cbl_field_attr_str( cbl_field_attr_t attr ) { + switch(attr) { + case none_e: return "none"; + case figconst_1_e: return "figconst_1"; + case figconst_2_e: return "figconst_2"; + case figconst_4_e: return "figconst_4"; + case rjust_e: return "rjust"; + case ljust_e: return "ljust"; + case zeros_e: return "zeros"; + case signable_e: return "signable"; + case constant_e: return "constant"; + case function_e: return "function"; + case quoted_e: return "quoted"; + case filler_e: return "filler"; + case _spare_e: return "temporary"; + case intermediate_e: return "intermediate"; + case embiggened_e: return "embiggened"; + case all_alpha_e: return "all_alpha"; + case all_x_e: return "all_x"; + case all_ax_e: return "all_ax"; + case prog_ptr_e: return "prog_ptr"; + case scaled_e: return "scaled"; + case refmod_e: return "refmod"; + case based_e: return "based"; + case any_length_e: return "any_length"; + case global_e: return "global"; + case external_e: return "external"; + case blank_zero_e: return "blank_zero"; + case linkage_e: return "linkage"; + case local_e: return "local"; + case leading_e: return "leading"; + case separate_e: return "separate"; + case envar_e: return "envar"; + case dnu_1_e: return "dnu_1"; + case bool_encoded_e: return "bool"; + case hex_encoded_e: return "hex"; + case depends_on_e: return "depends_on"; + case initialized_e: return "initialized"; + case has_value_e: return "has_value"; + case ieeedec_e: return "ieeedec"; + case big_endian_e: return "big"; + case same_as_e: return "same_as"; + case record_key_e: return "record_key"; + case typedef_e: return "typedef"; + case strongdef_e: return "strongdef"; + } + return "???"; +} + +uint32_t +cbl_field_t::size() const { + return field_size(this); +} + +size_t +cbl_field_t::set_attr( cbl_field_attr_t attr ) { + if( attr == signable_e ) { + if( ! has_attr(attr) && this->var_decl_node != NULL ) { + parser_field_attr_set(this, attr); + } + } + return this->attr |= size_t(attr); +} + +size_t +cbl_field_t::clear_attr( cbl_field_attr_t attr ) { + if( attr == signable_e ) { + if( this->var_decl_node != nullptr && has_attr(attr) ) { + parser_field_attr_set(this, attr, false); + } + } + return this->attr &= ~size_t(attr); +} + +static uint32_t +field_memsize( const struct cbl_field_t *field ) { + uint32_t n = field->occurs.ntimes(); + n = field->data.capacity * (n > 0? n : 1); + return std::max(n, field->data.memsize); +} + +static inline bool +field_skippable( const struct cbl_field_t *field ) { + // skip forward references + if( field->type == FldForward ) { + return true; + } + + // typedef takes no space + if( field->is_typedef() ) { + return true; + } + + // skip 88s and 66s because they don't add to capacity + if( field->level == 66 || field->level == 88 ) { + return true; + } + + // skip switch values because they're just compile-time constants + if( field->type == FldSwitch ) { + return true; + } + + // skip INDEXED BY if its level is 0. + if( field->level == 0 && field->type == FldIndex ) { + return true; + } + return false; +} + +/* + * Start at a LEVEL01 field and walk through it until the next LEVEL01 + * or LEVEL77, if any. Update the offset of each subfield field + * based on the sizes of all the preceding items. + * + * A field whose parent is the same level is a REDEFINE. It does not + * use additional storage, and has an offset the same as its "parent". + */ +static struct symbol_elem_t * +update_block_offsets( struct symbol_elem_t *block) +{ + assert(block); + assert(block->type == SymField); + + uint32_t offset = cbl_field_of(block)->offset; + const uint32_t block_level = cbl_field_of(block)->level; + + if( getenv(__func__) ) { + cbl_field_t *field = cbl_field_of(block); + dbgmsg( "%s: offset is %3zu for %2u %-30s #%3zu P%zu", + __func__, field->offset, field->level, field->name, + symbol_index(block), field->parent ); + } + + struct symbol_elem_t *e = block; + for( ++e; e < symbols_end(); e++ ) { + if( e->type != SymField ) { + // Ignore non-fields + continue; + } + + cbl_field_t *field = cbl_field_of(e); + + if( field->level == 66 ) { + field->offset = parent_of(field)->offset; + continue; + } + + if( field_skippable(field) ) { + continue; + } + + if( field->level <= block_level || field->level == LEVEL77 ) { + break; // end of group + } + + if( symbol_redefines(field) ) { + field->offset = parent_of(field)->offset; + } else { + field->offset = offset; + offset += field_memsize(field); + } + + if( getenv(__func__) ) { + dbgmsg( "%s: offset is %3zu for %2u %-30s #%3zu P%zu", + __func__, field->offset, field->level, field->name, + symbol_index(e), field->parent ); + } + + if( field->type == FldGroup ) { + e = update_block_offsets(e) - 1; + } + } + return e; +} + +static inline bool +end_of_group( const cbl_field_t *group, const cbl_field_t *field ) { + // A group ends when we strike a level less than or equal to + // group_symbol->level, or when we hit a LEVEL77. + + // reject forward fields + if( is_forward(field) ) return false; + + // If field redefines group, we're not at the end. + if( group == symbol_redefines(field) ) return false; + + // An index that is part of a table is part of the group. + if( field->level == 0 && field->type == FldIndex ) return false; + + return + field->level <= group->level || + field->level == LEVEL77 || + field->level == 66; +} + +class eog_t { + const cbl_field_t * group; +public: + eog_t( const symbol_elem_t *e ) : group(cbl_field_of(e)) {} + + bool operator()( symbol_elem_t& e ) { + return e.type == SymField && end_of_group(group, cbl_field_of(&e)); + } +}; + +size_t +end_of_group( size_t igroup ) { + symbol_elem_t * group(symbol_at(igroup)); + + if( group->type == SymFile ) { + cbl_field_t * first_record = symbol_file_record(cbl_file_of(group)); + assert(first_record); + group = symbol_at(field_index(first_record)); + for( auto e = group + 1; e < symbols_end(); e++ ) { + auto isym = symbol_index(e); + if( e->program != group->program ) return isym; + if( e->type == SymLabel ) return isym; // end of data division + if( e->type == SymField ) { + auto f = cbl_field_of(e); + if( f->level == LEVEL77 || f->level == 66 ) return isym; + if( f->level == 1 && f->parent != igroup ) { + return isym; + } + } + } + return symbols.nelem; + } + + eog_t eog(symbol_at(igroup)); + symbol_elem_t *e = std::find_if( symbols_begin(++igroup), symbols_end(), eog ); + return e - symbols_begin(); +} + +size_t +symbol_field_capacity( const cbl_field_t *field ) { + class sym_field_size { + public: + sym_field_size() {} + static size_t capacity( size_t n, const symbol_elem_t& elem ) { + if( elem.type == SymField ) { + const cbl_field_t *f = cbl_field_of(&elem); + if( is_elementary(f->type) ) { + return n + ::field_size(f); + } + } + return n; + } + }; + size_t bog = field_index(const_cast<cbl_field_t*>(field)); + size_t eog = end_of_group(bog); + size_t size = std::accumulate( symbol_at(bog), symbol_at_impl(eog), + 0, sym_field_size::capacity ); + + if(true) dbgmsg("%s: %02u %s.data.capacity was computed as %zu", __func__, + field->level, field->name, size); + + return size; +} + +static bool +has_odo( const symbol_elem_t& e ) { + return e.type == SymField && cbl_field_of(&e)->occurs.depending_on > 0; +} + +// a debug version of symbol_find_odo +struct cbl_field_t * +symbol_find_odo_debug( cbl_field_t * field ) { + size_t bog = field_index(field), eog = end_of_group(bog); + dbgmsg("%s: %s is #%zu - #%zu of %zu, ends at %s", __func__, + field->name, bog, eog, symbols.nelem, + eog == symbols.nelem? "[end]" : cbl_field_of(symbol_at(eog))->name ); + + auto e = std::find_if( symbol_at(bog), symbol_at_impl(eog, true), has_odo ); + if( e != symbol_at_impl(eog, true) ) { + dbgmsg("%s: %s has ODO at #%zu (return '%s')", __func__, + field->name, symbol_index(e), + cbl_field_of(e)->name ); + } + return e == symbol_at_impl(eog, true)? NULL : cbl_field_of(e); +} + +// Return OCCURS DEPENDING ON table subordinate to field, if any. +struct cbl_field_t * +symbol_find_odo( cbl_field_t * field ) { + if( getenv(__func__) ) return symbol_find_odo_debug(field); + size_t bog = field_index(field), eog = end_of_group(bog); + auto e = std::find_if( symbol_at(bog), symbol_at_impl(eog, true), has_odo ); + return e == symbol_at_impl(eog, true)? NULL : cbl_field_of(e); +} + +static inline bool +is_index( const cbl_field_type_t type ) { return type == FldIndex; } + +static size_t +symbols_dump( size_t first, bool header ) { + size_t ninvalid = 0; + + if( !yydebug ) return 0; + + if( header ) { + fprintf(stderr, "Symbol Table has %zu elements\n", + symbols_end() - symbols_begin()); + } + + for( struct symbol_elem_t *e = symbols_begin(first); e < symbols_end(); e++ ) { + char *s; + + switch(e->type) { + case SymFilename: + s = xasprintf("%4zu %-18s %s", e->program, + "Filename", e->elem.filename); + break; + case SymDataSection: + s = xasprintf("%4zu %-18s line %d", e->program, + cbl_section_of(e)->name(), cbl_section_of(e)->line); + break; + case SymFunction: + s = xasprintf("%4zu %-15s %s", e->program, + "Function", e->elem.function.name); + break; + case SymField: { + auto field = cbl_field_of(e); + char *odo_str = NULL; + if( field->occurs.depending_on != 0 ) { + odo_str = xasprintf("odo %zu", field->occurs.depending_on ); + } + ninvalid += cbl_field_of(e)->type == FldInvalid? 1 : 0; + s = xasprintf("%4zu %-18s %s (%s)", e->program, + cbl_field_type_str(cbl_field_of(e)->type) + 3, + field_str(cbl_field_of(e)), + odo_str? odo_str : + cbl_field_type_str(cbl_field_of(e)->usage) + 3); + } + break; + case SymLabel: + s = xasprintf("%4zu %-18s %s", e->program, + "Labe1l", e->elem.label.str()); + if( LblProgram == cbl_label_of(e)->type ) { + const auto& L = *cbl_label_of(e); + if( L.os_name ) { + char *base = s; + s = xasprintf("%s as \"%s\")", base, L.os_name); + free(base); + } + } + break; + case SymSpecial: + s = xasprintf("%4zu %-18s id=%2d, %s", e->program, + "Special", e->elem.special.id, e->elem.special.name); + break; + case SymAlphabet: + s = xasprintf("%4zu %-18s encoding=%2d, '%s'", e->program, "Alphabet", + int(e->elem.alphabet.encoding), e->elem.alphabet.name); + break; + case SymFile: + s = xasprintf("%4zu %-18s %-20s", e->program, + "File", e->elem.file.name); + { + char same_as[26] = ""; + if( cbl_file_of(e)->same_record_as > 0 ) { + sprintf(same_as, "s%3zu", cbl_file_of(e)->same_record_as); + } + const char *type = file_org_str(e->elem.file.org); + char *part = s; + + s = xasprintf("%s %-4s %s %s %s{%zu-%zu} status=#%zu", + part, same_as, type, + e->elem.file.keys_str(), + cbl_file_of(e)->varies()? "varies " : "", + cbl_file_of(e)->varying_size.min, + cbl_file_of(e)->varying_size.max, + cbl_file_of(e)->user_status); + free(part); + } + break; + default: + dbgmsg("%s: cannot dump symbol type %d", __func__, e->type); + continue; + } + fprintf(stderr, "%4zu: %s\n", e - symbols_begin(), s); + free(s); + } + return ninvalid; +} + +static bool +grow_redefined_group( cbl_field_t *redefined, const cbl_field_t *field ) { + assert(redefined); + assert(field); + assert(redefined == symbol_redefines(field)); + + /* + * When this function is called, redefined elementary items are + * already resized, if eligible. + */ + if( redefined->type != FldGroup ) return false; + + /* + * 8) The storage area required for the subject of the entry + * shall not be larger than the storage area required for the + * data item referenced by data-name-2, unless the data item + * referenced by data- name-2 has been specified with level + * number 1 and without the EXTERNAL clause. + */ + if( 1 < redefined->level ) { + if( field_memsize(redefined) < field_memsize(field) ) { + ERROR_FIELD(field, "line %d: %s (size %u) larger than REDEFINES %s (size %u)", + field->line, + field->name, field_memsize(field), + redefined->name, field_memsize(redefined)); + return false; + } + } + + redefined->data.memsize = std::max(field_memsize(redefined), + field_memsize(field)); + + return true; +} + + +/* + * Input is a symbol-table element, always a field. + * For elementary fields, return the input. + * For groups, return the element after the last field in the group. + */ +static struct symbol_elem_t * + calculate_capacity( struct symbol_elem_t *e) { + // For each group, sum capacities of children. Exclude: + // FldClass, FldForward + // FldIndex with level 0 (really, any level 0) + // REDEFINES + + cbl_field_t *group = cbl_field_of(e); + + if( is_literal(group) ) return e; + if( is_index(group->type) ) return e; // 01 can be index type. + + if( is_elementary(group->type) ) { // "group" is in fact just a field + if( is_record_area(group) ) { + if( group->data.capacity == 0 ) { + const auto& file = *cbl_file_of(symbol_at(group->file)); + group->data.capacity = file.varying_size.max; + } + + // Find 01s for the file that is not a record area field. + for( auto p = symbols_begin(e->program) + 1; p < symbols_end(); ++p ) { + p = std::find_if( p, symbols_end(), + [group](const symbol_elem_t& elem) { + if( elem.type == SymField ) { + auto field = cbl_field_of(&elem); + return field != group && + field->file == group->file; + } + return false; + } ); + // If an 01 record exists for the FD/SD, use its capacity as the + // default_record capacity. + if( p != symbols_end() ) { + auto record = cbl_field_of(p); + assert(record->level == 1); + e = calculate_capacity(p); + auto record_size = std::max(record->data.memsize, + record->data.capacity); + group->data.capacity = std::max(group->data.capacity, record_size); + } + } + + // SAME AREA AS causes this record area to redefine another. + // Reach back to that symbol to set its capacity, if need be. + auto area = symbol_redefines(group); + if( area ) { + area->data.capacity = std::max(area->data.capacity, + group->data.capacity); + } + + return e; // no 01, return self + } + + cbl_field_t *redefined = symbol_redefines(group); + + if( redefined ) { + redefined->data.memsize = std::max(field_memsize(redefined), field_size(group)); + if( redefined->data.memsize == redefined->data.capacity ) { + redefined->data.memsize = 0; + } + } + return e; + } + + if(yydebug && group->type != FldGroup) { + dbgmsg("Field #%zu '%s' is not a group", symbol_index(e), group->name); + symbols_dump(symbols.first_program, true); + } + if( group->type == FldInvalid ) return e; + + assert(group->type == FldGroup); + + group->data.capacity = 0; + + std::list<cbl_field_t*> members; + + while( ++e < symbols_end() ) { + if( e->type != SymField ) continue; + cbl_field_t *field = cbl_field_of(e); + + if( field_skippable(field) ) continue; + + // Stop if field isn't a member of the group. + if( end_of_group(group, field) ) break; + + if( field->type == FldGroup ) { + e = calculate_capacity(e); + e--; // set e to last symbol processed (not next one, because ++e) + } + + members.push_back(field); + } + + // Print accumulating details for one group to debug log. + bool details = false; + if( yydebug ) { + const auto details_for = getenv("symbols_update"); + details = details_for && 0 == strcasecmp(details_for, group->name); + } + + // At end of group, members is a list of all immediate children, any + // of which might have been redefined and so acquired a memsize. + // Any element of members that redefines something redefines group. + uint32_t max_memsize = 0; + for( auto field : members ) { + cbl_field_t *redefined = symbol_redefines(field); + if( redefined ) { + if( group != redefined ) { + grow_redefined_group(redefined, field); + } + max_memsize = std::max(max_memsize, field_memsize(field)); + + field->data.memsize = 0; + + if( redefined->data.memsize == redefined->data.capacity ) { + redefined->data.memsize = 0; + } + continue; + } + group->data.capacity += field_size(field); + group->data.memsize += field_memsize(field); + + // If group has a parent that is a record area, expand it, too. + if( 0 < group->parent ) { + auto redefined = symbol_redefines(group); + if( redefined && is_record_area(redefined) ) { + if( redefined->data.capacity < group->data.memsize ) { + redefined->data.capacity = group->data.memsize; + } + } + } + + if( details ) { + dbgmsg("%s:%d: %s", __func__, __LINE__, field_str(field) ); + dbgmsg("%s:%d: %s", __func__, __LINE__, field_str(group) ); + } + } + + group->data.memsize = std::max(max_memsize, group->data.memsize); + if( group->data.memsize == group->data.capacity ) group->data.memsize = 0; + + if( 0 < group->data.memsize && group->data.memsize < group->data.capacity ) { + if( yydebug ) { + dbgmsg( "%s:%d: small capacity?\n\t%s", __func__, __LINE__, field_str(group) ); + } + group->data.memsize = group->data.capacity; + } + + if( group->data.capacity == 0 ) { + dbgmsg( "%s:%d: zero capacity?\n\t%s", __func__, __LINE__, field_str(group) ); + } + + switch( group->level ) { + case 1: case 77: + if( dialect_mf() && is_table(group) ) { + size_t elem_size = std::max(group->data.memsize, group->data.memsize); + group->data.memsize = elem_size * group->occurs.ntimes(); + } + } + return e; +} + +static void +verify_block( const struct symbol_elem_t *block, + const struct symbol_elem_t *eoblock ) +{ + for( const struct symbol_elem_t *e=block; e < eoblock; e++ ) { + if( e->type != SymField ) { + continue; + } + const struct cbl_field_t *field = cbl_field_of(e); + + if( getenv(__func__) ) { + if( e == block ) { + static const char ds[] = "--------------------------------"; + dbgmsg( "%17s %-3s %-3s %-18s %-3s %3s %-16s C/D/R = init\n" + "%.25s %-.3s %-.3s %-.18s %-.3s %.3s %-.16s %-.7s %-.16s", + "", "ndx", "off", "type", "par", "lvl", "name", + ds, ds, ds, ds, ds, ds, ds, ds, ds ); + } + dbgmsg( "%s:%d: %3zu %3zu %-18s %3zu %02d %-16s %2u/%u/%d = '%s'", + __func__, __LINE__, e - symbols.elems, field->offset, + cbl_field_type_str(field->type), + field->parent, field->level, field->name, + field->data.capacity, field->data.digits, field->data.rdigits, + field->data.initial? field->data.initial : "(none)" ); + } + } +} + +static symbol_type_t +parent_type( const cbl_field_t *f ) { + return f->parent == 0? (symbol_type_t)-1 : symbol_at(f->parent)->type; +} + +cbl_field_t * +parent_of( const cbl_field_t *f ) { + return SymField == parent_type(f) ? cbl_field_of(symbol_at(f->parent)) : NULL; +} + +const cbl_field_t * +occurs_in( const cbl_field_t *f ) { + while( (f = parent_of(f)) != NULL ) { + if( f->occurs.ntimes() > 0 ) break; + } + return f; +} + +bool +immediately_follows( const cbl_field_t *field ) { + auto esym = symbols_end(); + auto e = std::find_if( symbol_at(field_index(field)) + 1, esym, + []( auto& e ) { + if( e.type != SymField ) return false; + auto f = cbl_field_of(&e); + return f->level == 1; + } ); + return e == esym; +} + +bool +is_variable_length( const cbl_field_t *field ) { + bool odo = false; + std::find_if( symbol_at(field_index(field)) + 1, symbols_end(), + [&odo, field]( const auto& elem ) { + if( elem.type == SymField ) { + auto f = cbl_field_of(&elem); + if( f->level <= field->level ) return true; + if( f->occurs.depending_on ) { + odo = true; + return true; + } + } + return false; + } ); + return odo; +} + +/* + * "None of the items within the range, including data-name-2 and + * data-name-3, if specified, shall be of class object, message-tag, + * or pointer, a strongly-typed group item, an item subordinate to a + * strongly- typed group item, a variable-length data item, or an + * occurs-depending table." +*/ +cbl_field_t * +rename_not_ok( cbl_field_t *first, cbl_field_t *last) { + symbol_elem_t + *beg = symbol_at(field_index(first)), + *end = symbol_at(field_index(last)); + auto e = std::find_if( beg, ++end, + []( auto& e ) { + if( e.type != SymField ) return false; + auto f = cbl_field_of(&e); + switch( f->type ) { + case FldPointer: + return true; + default: + break; + } + if( f->occurs.depending_on ) return true; + return false; + } ); + return e == end? NULL : cbl_field_of(e); +} + +cbl_file_t * +symbol_record_file( const cbl_field_t *f ) { + do { + if( is_record_area(f) ) return cbl_file_of(symbol_at(f->parent)); + if( f->file ) return cbl_file_of(symbol_at(f->file)); + } while( (f = parent_of(f)) != NULL ); + return NULL; +} + +size_t +dimensions( const cbl_field_t *f ) { + size_t n = is_table(f)? 1 : 0; + + if( f->type == FldIndex ) return 0; + + while( (f = parent_of(f)) != NULL ) { + if( is_table(f) ) n++; + } + + return n; +} + +const char * +cbl_figconst_str( cbl_figconst_t fig ) { + switch(fig) { + case normal_value_e: return "NORMAL CONSTANT"; + case low_value_e: return "LOW-VALUES"; + case zero_value_e: return "ZEROS"; + case space_value_e: return "SPACES"; + case quote_value_e: return "QUOTES"; + case null_value_e: return "NULLS"; + case high_value_e: return "HIGH-VALUES"; + } + return "NOT FIGURATIVE CONSTANT"; +} + +static const char * +value_or_figconst_name( const char *value ) { + auto fig = cbl_figconst_of(value); + return normal_value_e == fig? value : cbl_figconst_str(fig); +} + +const char * +cbl_field_t::attr_str( const std::vector<cbl_field_attr_t>& attrs ) const +{ + const char *sep = ""; + char *out = NULL; + + for( auto attr : attrs ) { + char *part = out; + if( has_attr(attr) ) { + int erc = asprintf(&out, "%s%s%s", + part? part : "", sep, cbl_field_attr_str(attr)); + if( -1 == erc ) return part; + free(part); + sep = ", "; + } + } + return out? out : "none"; +} + +char * +field_str( const cbl_field_t *field ) { + static char string[3*sizeof(cbl_name_t)]; + char *pend = string; + + char name[2*sizeof(cbl_name_t)] = ""; + if( true ) { + if( field->occurs.ntimes() == 0 ) { + snprintf(name, sizeof(name), "%s", field->name); + } else { + char updown[1 + field->occurs.nkey] = ""; + for( size_t i=0; i < field->occurs.nkey; i++ ) { + updown[i] = field->occurs.keys[i].ascending? 'A' : 'D'; + } + snprintf(name, sizeof(name), "%s[%zu]%s", + field->name, field->occurs.ntimes(), updown); + } + } + + pend += snprintf(pend, string + sizeof(string) - pend, + "%02d %-20s ", field->level, name); + + char offset[32] = ""; + if( field->level > 1 ) { + sprintf( offset, "off%3zu", field->offset ); + } + + char parredef = + parent_of(field) != NULL && parent_of(field)->level == field->level? 'r' : 'P'; + if( 'r' == parredef && field->level == 0 ) parredef = 'p'; + if( field->has_attr(typedef_e) ) parredef = 'T'; + + const char *data = field->data.initial? field->data.initial : NULL; + if( data ) { + auto fig = cbl_figconst_of(data); + if( normal_value_e != fig ) { + data = cbl_figconst_str(fig); + } else { + char *s; + auto n = asprintf(&s, "'%s'", data); + gcc_assert(n); + auto eodata = data + field->data.capacity; + if( eodata != std::find_if_not(data, eodata, fisprint) ) { + char *p = reinterpret_cast<char*>(xrealloc(s, n + 8 + 2 * field->data.capacity)); + if( is_elementary(field->type) && + field->type != FldPointer && p != NULL ) { + s = p; + p += n; + strcat( p, "(0x" ); + p += 3; + for( auto d=data; d < eodata; d++ ) { + p += sprintf(p, "%02x", *d); + } + strcat( p++, ")" ); + } + } + data = s; + } + } else { + data = "NULL"; + if( field->type == FldSwitch ) { + data = xasprintf("0x%02x", field->data.upsi_mask->value); + } + } + if( field->level == 88 ) { + const auto& dom = *field->data.domain; + data = xasprintf("%s%s %s - %s%s", + dom.first.all? "A" : "", + value_or_figconst_name(dom.first.name()) , + dom.first.is_numeric? "(num)" : "", + dom.last.all? "A" : "", + dom.last.name()? value_or_figconst_name(dom.last.name()) : ""); + } + + char storage_type = 0x20; + assert( (field->attr & (linkage_e | local_e)) < (linkage_e | local_e) ); + if( field->attr & linkage_e ) storage_type = 'L'; + if( field->attr & local_e ) storage_type = 'w'; // because 'l' hard to read + + static const std::vector<cbl_field_attr_t> attrs { + figconst_1_e, figconst_2_e, figconst_4_e, rjust_e, ljust_e, + zeros_e, signable_e, constant_e, function_e, quoted_e, filler_e, + intermediate_e, embiggened_e, all_alpha_e, all_x_e, + all_ax_e, prog_ptr_e, scaled_e, refmod_e, based_e, any_length_e, + /* global_e, external_e, */ blank_zero_e, /* linkage_e, local_e, */ leading_e, + separate_e, envar_e, dnu_1_e, bool_encoded_e, hex_encoded_e, + depends_on_e, /* initialized_e, */ has_value_e, ieeedec_e, big_endian_e, + same_as_e, record_key_e, typedef_e, strongdef_e, + }; + + pend += snprintf(pend, string + sizeof(string) - pend, + "%c%3zu %-6s %c%c%c %2u{%3u,%u,%d = %s} (%s), line %d", + parredef, field->parent, offset, + (field->attr & global_e)? 'G' : 0x20, + (field->attr & external_e)? 'E' : 0x20, + storage_type, + field->data.memsize, + field->data.capacity, field->data.digits, field->data.rdigits, + data, field->attr_str(attrs), field->line ); + return string; +} + +void +labels_dump() { + symbols_dump( symbols.procedures, true ); +} + +struct capacity_of { + uint32_t capacity; + + capacity_of() : capacity(0) {} + + capacity_of operator()( symbol_elem_t& elem ) { + if( elem.type == SymField ) { + cbl_field_t *f = cbl_field_of(&elem); + if( is_elementary(f->type) ) { + capacity += field_size(f); + } + } + return *this; + } +}; + +static void +extend_66_capacity( cbl_field_t *alias ) { + static_assert(sizeof(symbol_elem_t*) == sizeof(const char *)); + assert(alias->data.picture); + assert(alias->type == FldGroup); + symbol_elem_t *e = symbol_at(alias->parent); + symbol_elem_t *e2 = + reinterpret_cast<symbol_elem_t*>(const_cast<char*>(alias->data.picture)); + assert(e < e2); + alias->data.picture = NULL; + + capacity_of cap; + if( alias->type == FldGroup ) { + e2 = symbol_at_impl(end_of_group(symbol_index(e2))); + } else { + ++e2; + } + alias->data.capacity = std::for_each(e, e2, cap).capacity; + assert(alias->data.capacity > 0); +} + +bool +symbols_alphabet_set( size_t program, const char name[]) { + struct alpha { + void operator()( symbol_elem_t& elem ) const { + if( elem.type == SymAlphabet ) { + parser_alphabet( *cbl_alphabet_of(&elem) ); + } + } + }; + + // Define alphabets for codegen. + std::for_each(symbols_begin(), symbols_end(), alpha() ); + + // Set collation sequence before parser_symbol_add. + if( name ) { + symbol_elem_t *e = symbol_alphabet(program, name); + if( !e ) { + return false; + } + parser_alphabet_use(*cbl_alphabet_of(e)); + } + return true; +} + +static std::ostream& +operator<<( std::ostream& os, const cbl_occurs_bounds_t& bound ) { + return os << bound.lower << ',' << bound.upper; +} + +static std::ostream& +operator<<( std::ostream& os, const cbl_field_data_t& field ) { + return os << field.memsize << ',' + << field.capacity << ',' + << field.digits << ',' + << field.rdigits << ',' + << (field.picture? field.picture : ""); +} + +static std::ostream& +operator<<( std::ostream& os, const cbl_field_t& field ) { + return os << field.parent + << ',' << field.level + << ',' << field.name + << ',' << field.offset + << ',' << cbl_field_type_str(field.type) + << ',' << "0x" << std::hex << field.attr << std::dec + // occurs + << ',' << field.occurs.depending_on + << ',' << field.occurs.bounds + << ',' << field.line + << ',' << field.data; +} + +static void +write_field_csv( size_t isym, const cbl_field_t *field ) { + static std::ofstream os( getenv("GCOBOL_DATA") ); + assert(os.is_open()); + + if( symbols.first_program < isym) { + os << isym << "," << *field << std::endl; + } +} + +static std::map<size_t, std::set<size_t>> same_record_areas; +size_t parse_error_count(); + +/* + * This function produces a zero-filled level number, so 1 becomes "01". It's + * needed because the diagnostic format string doesn't support zero-filled + * integer conversion or width. + */ +const char * +cbl_field_t::level_str( uint32_t level ) { + char *str = xasprintf( "%02u", level ); + return str; +} + +size_t +symbols_update( size_t first, bool parsed_ok ) { + struct symbol_elem_t *p, *pend; + std::list<cbl_field_t*> shared_record_areas; + + if( getenv(__func__) ) { + fprintf(stderr, "Initial"); + symbols_dump(std::max(first, symbols.first_program), true); + } + + for( p = symbols_begin(first); p < symbols_end(); p++ ) { + + if( p->type == SymAlphabet ) continue; // Alphabets already processed. + if( p->type == SymFile ) continue; // Do fields before files. + if( p->type != SymField ) continue; + + cbl_field_t *field = cbl_field_of(p); + if( field->our_index == 0 ) field->our_index = symbol_index(p); + if( field->type == FldForward ) continue; + if( field->type == FldSwitch ) continue; + if( is_literal(field) && field->var_decl_node != NULL ) continue; + + switch(field->level) { + case 0: + if( field->is_key_name() ) { + update_symbol_map2(p); + continue; + } + break; + case 1: + pend = calculate_capacity(p); + if( dialect_mf() && is_table(field) ) { + cbl_field_t *field = cbl_field_of(p); + if( field->data.memsize < field->size() ) { + field->data.memsize = field->size(); + } + } + update_block_offsets(p); + verify_block(p, pend); + break; + case 66: + assert(field->parent > 0); + assert(symbol_at(field->parent)->type == SymField); + if( field->type == FldGroup && field->data.picture ) { + extend_66_capacity(field); + } else { + auto data = parent_of(field)->data; + data.memsize = 0; + field->data = data; + } + break; + // no special processing for other levels + } + + if( getenv("GCOBOL_DATA") ) { + write_field_csv( p - symbols_begin(), field ); + } + + // Update ODO field in situ. + if( is_table(field) ) { + size_t& odo = field->occurs.depending_on; + if( odo != 0 ) { + auto odo_field = cbl_field_of(symbol_at(odo)); // get not-FldForward if exists + if( is_forward(odo_field) ) { + ERROR_FIELD(field, "table %s (line %d) DEPENDS ON %s, which is not defined", + field->name, field->line, odo_field->name); + } else { + // set odo to found field + odo = field_index(odo_field); + } + } + } + + bool size_invalid = field->data.memsize > 0 && symbol_redefines(field); + if( size_invalid ) { // redefine of record area is ok + auto redefined = symbol_redefines(field); + size_invalid = ! is_record_area(redefined); + } + if( !field->is_valid() || size_invalid ) + { + size_t isym = p - symbols_begin(); + symbols_dump(symbols.first_program, true); + if( symbol_at(field->parent)->type == SymFile ) { + assert(field->parent == field_index(field) + 1); + auto e = std::find_if( symbols_begin(field->parent), symbols_end(), + [program = p->program, ifile = field->parent] + ( const auto& elem ) { + if( elem.program == program ) { + if( elem.type == SymField ) { + auto f = cbl_field_of(&elem); + return f->parent == ifile; + } + } + return false; + } ); + if( e == symbols_end() ) { + // no field redefines the file's default record + auto file = cbl_file_of(symbol_at(field->parent)); + ERROR_FIELD(field, "line %d: %s lacks a file description", + file->line, file->name); + return 0; + } + } + // Better to report an error than to fail mysteriously with "0 errors". + if( yydebug || parse_error_count() == 0 ) { + if( field->type == FldInvalid ) { + ERROR_FIELD(field, "line %d: %s %s requires PICTURE", + field->line, field->level_str(), field->name); + + } else { + dbgmsg("%s: error: data item %s #%zu '%s' capacity %u rejected", + __func__, + 3 + cbl_field_type_str(field->type), + isym, field->name, field->data.capacity); + } + } + return 0; + } + + if(! (field->data.memsize == 0 || field_size(field) <= field->data.memsize) ) { + dbgmsg( "%s:%d: #%zu: invalid: %s", __func__, __LINE__, + symbol_index(p), field_str(cbl_field_of(p)) ); + } + assert(field->data.memsize == 0 || field_size(field) <= field_memsize(field)); + assert( !(field->data.memsize > 0 && symbol_explicitly_redefines(field)) ); + } + + if( getenv(__func__) ) { + fprintf(stderr, "Pre"); + symbols_dump(std::max(first, symbols.first_program), true); + } + + // A shared record area has no 01 child because that child redefines its parent. + for( auto sharer : shared_record_areas ) { + auto redefined = cbl_field_of(symbol_at(sharer->parent)); + sharer->data.capacity = redefined->data.capacity; + } + + for( p = symbols_begin(first); p < symbols_end(); p++ ) { + if( p->type != SymField ) continue; + cbl_field_t *field = cbl_field_of(p); + if( field->type == FldForward ) continue; + if( field->type == FldSwitch ) continue; + if( field->level == 0 && field->is_key_name() ) continue; + if( is_literal(field) && field->var_decl_node != NULL ) continue; + + if( field->is_typedef() ) { + auto isym = end_of_group( symbol_index(p) ); + p = symbol_at(--isym); + continue; + } + + // Verify REDEFINing field has no ODO components + auto parent = symbol_redefines(field); + if( parent && !is_record_area(parent) && is_variable_length(field) ) { + ERROR_FIELD(field, "line %d: REDEFINES field %s cannot be variable length", + field->line, field->name); + return 0; + } + + if( field->type == FldInvalid ) { + dbgmsg("%s:%d: %s", __func__, __LINE__, field_str(field)); + ERROR_FIELD(field, "line %d: %s %s requires PICTURE", + field->line, field->level_str(), field->name); + continue; + } + + assert( ! field->is_typedef() ); + + if( parsed_ok ) parser_symbol_add(field); + } + + finalize_symbol_map2(); + if( yydebug ) dump_symbol_map2(); + + build_symbol_map(); + + int ninvalid = 0; + for( p = symbols_begin(first); p < symbols_end(); p++ ) { + if( p->type == SymFile ) { // now do the files + auto& file = *cbl_file_of(p); + if( !file.varying_size.explicitly ) { + auto sizes = symbol_file_record_sizes( &file ); + file.varying_size = sizes; + } + file.deforward(); + if( ! file.validate() ) { + ninvalid++; + continue; + } + if( parsed_ok ) parser_file_add(&file); + } + } + + symbols_dump(symbols.first_program, true); + + symbols.procedures = p - symbols_begin(); + + return ninvalid > 0? 0 : symbols.procedures; +} + +size_t +symbol_index() { + assert( symbols.first_program <= symbols.nelem ); + return symbols.nelem - symbols.first_program; +} + +size_t +symbol_index( const struct symbol_elem_t *e ) { + assert(e); + size_t isym = symbols.index(e); + assert( isym < symbols.nelem ); + return isym; +} + +// Match on name (implied: of forward declaration). +static int +defined_fwd_cmp( const void *K, const void *E ) { + const struct symbol_elem_t + *k=static_cast<const struct symbol_elem_t *>(K), + *e=static_cast<const struct symbol_elem_t *>(E); + + if( k->type != SymField ) { + cbl_errx( "%s: key must be field", __func__); + } + if( k->type != e->type ) return 1; + if( k->program != e->program ) return 1; + + // Matches if names match, and both are fields in the same program. + // A forward declaration doesn't have parent because only its name is mentioned. + return strcasecmp(cbl_field_of(k)->name, cbl_field_of(e)->name); +} + +/* + * Given a symbol index that may be forward reference, return the + * "resolved" field, if extant, else the forward field. Forward + * references remain in the symbol table and their index may appear in, + * for example, cbl_file_t symbols. + */ +struct cbl_field_t * +symbol_field_forward( size_t index ) { + assert( index < symbols.nelem ); + symbol_elem_t *e = symbol_at(index); + if( (e->type != SymField) ) { + dbgmsg("%s: logic error: #%zu is %s", __func__, index, symbol_type_str(e->type)); + } + assert(e->type == SymField); + + if( cbl_field_of(e)->type == FldForward ) { + + symbol_elem_t *start = symbols_begin(++index); + size_t nelem = symbols_end() - start; + + struct symbol_elem_t *kid = + static_cast<struct symbol_elem_t *>(lfind( e, start, + &nelem, sizeof(*e), + defined_fwd_cmp ) ); + if( kid ) { + return cbl_field_of(kid); + } + } + return cbl_field_of(e); +} + +struct symbol_elem_t * +symbol_parent( const struct symbol_elem_t *e ) { + assert(e); + assert(e->type == SymField); + assert(cbl_field_of(e)->type != FldInvalid); + + if( cbl_field_of(e)->parent == 0 ) { + return NULL; + } + + symbol_elem_t *p = symbols.elems + cbl_field_of(e)->parent; + + assert( symbols.elems < p && p < symbols.elems + symbols.nelem ); + + return p; +} + +static bool +had_picture( const cbl_field_t *field ) { + if( is_elementary(field->type) ) { + switch(field->type) { + case FldAlphanumeric: + // VALUE string for alphanumeric might mean no PICTURE. + return field->data.initial == NULL; + case FldNumericDisplay: + case FldNumericEdited: + case FldAlphaEdited: + return true; + case FldPointer: + case FldPacked: + case FldNumericBinary: + case FldNumericBin5: + case FldFloat: + break; + default: + break; + } + } + return false; +} + +void +name_queue_t::dump( const char tag[] ) const { + if( ! (yydebug ) ) return; + int i=0; + for( const auto& namelocs : this->c ) { + static char line[256]; + char *p = line; + const char *sep = ""; + for( auto nameloc : namelocs ) { + p += snprintf( p, line + sizeof(line) - p, "%s%s", sep, nameloc.name ); + sep = "::"; + } + dbgmsg("name_queue: %s: %2d: %s", tag, ++i, line); + } + if( empty() ) { + dbgmsg("name_queue: %s: is empty", tag); + } + } + +#if 0 +/* + * When adding a symbol, set the parent as an offset into the symbol table. + */ +static symbol_elem_t * +symbol_in_file( symbol_elem_t *e ) { + + auto beg = std::reverse_iterator<symbol_elem_t *>(e); + auto end = std::reverse_iterator<symbol_elem_t *>(symbols_begin()); + auto p = std::find_if( beg, end, + []( const symbol_elem_t& elem ) { + return elem.type == SymFilename; + } ); + + return p != end? &*p : NULL; +} +#endif + +static struct cbl_field_t * +symbol_field_parent_set( struct cbl_field_t *field ) +{ + if( field->level == 01 ) return NULL; + if( field->level == 77 ) return NULL; + if( field->level == 78 ) return NULL; + + struct symbol_elem_t *e = symbols.elems + symbols.nelem - 1; + struct symbol_elem_t *first = symbols.elems + symbols.first_program; + + for( ; field->parent == 0 && e >= first; e-- ) { + if( ! (e->type == SymField && cbl_field_of(e)->level > 0) ) { + continue; // level 0 fields are not user-declared symbols + } + + cbl_field_t *prior = cbl_field_of(e); + + if( prior->level == 77 || prior->level == 78 ) { + switch(field->level) { + case 66: case 88: + break; + default: + return NULL; // 77/78 cannot be a parent + } + } + + if( prior->level == field->level ) { + auto redefined = symbol_redefines(prior); + if( redefined ) prior = redefined; + field->parent = prior->parent; + return cbl_field_of(symbol_at(field->parent)); + } + + if( prior->level < field->level ) { + if( prior->has_attr(same_as_e) ) { + ERROR_FIELD(prior, "%s created with SAME AS or TYPE TO, cannot have new member %s", + prior->name, field->name); + return NULL; + } + field->parent = e - symbols.elems; + if( 1 < field->level && field->level < 50 ) { + if( had_picture(prior) ) { + ERROR_FIELD(prior, "group %s cannot have PICTURE clause", prior->name); + return NULL; + } + prior->type = FldGroup; + field->attr |= numeric_group_attrs(prior); + } + // verify level 88 domain value + if( is_numeric(prior) && field->level == 88 ) { + // domain array terminated by an element with a NULL name (value) + auto edom = field->data.domain; + while( edom->first.name() ) edom++; + + bool all_numeric = + std::all_of( field->data.domain, edom, + []( const cbl_domain_t& domain ) { + switch( cbl_figconst_of(domain.first.name()) ) { + case normal_value_e: + // parser ensures first.is_numeric == last.is_numeric + return domain.first.is_numeric && + domain.last.is_numeric; + case zero_value_e: + return true; + default: + break; + } + return false; + } ); + if( ! all_numeric ) { + auto loc = symbol_field_location(0); + error_msg(loc, "%s %s invalid VALUE for numeric type %s", + field->level_str(), field->name, prior->name); + } + } + return prior; + } + } + return NULL; +} + +class parent_elem_set +{ +private: + size_t parent_index; +public: + parent_elem_set( size_t parent_index ) + : parent_index(parent_index) + {} + void operator()( struct symbol_elem_t& e ) { + // cannot use cbl_field_of, because symbols.elems not yet ready + assert(e.type == SymField); + e.elem.field.parent = this->parent_index; + } +}; + +static symbol_elem_t +add_token( symbol_elem_t sym ) { + assert(sym.type == SymSpecial); + sym.elem.special.token = keyword_tok(sym.elem.special.name); + return sym; +} + +/* + * When adding registers, be sure to add a complementary cblc_field_t + * in libgcobol/constants.cc. + */ +void +symbol_table_init(void) { + assert(symbols.fd == -1); + assert(symbols.nelem == 0); + + symbol_table_t table = symbol_table_extend(); + + // Insert known contants at the top of an empty table. + // Constants are signified by their attribute + // Be warned that ZEROS plays for both sides. It is defined here as + // quoted, but in context it can be the value zero at run-time. Yes, it + // is an annoyance. + static char zeroes_for_null_pointer[8] = {0,0,0,0,0,0,0,0}; + + // These should match the definitions in libgcobol/constants.cc + static cbl_field_t constants[] = { + { 0, FldAlphanumeric, FldInvalid, space_value_e | constq, 0, 0, 0, nonarray, 0, + "SPACE", 0, {}, {1,1,0,0, " \0\xFF", NULL, { NULL }, { NULL } }, NULL }, + { 0, FldAlphanumeric, FldInvalid, space_value_e | constq , 0, 0, 0, nonarray, 0, + "SPACES", 0, {}, {1,1,0,0, " \0\xFF", NULL, { NULL }, { NULL } }, NULL }, + { 0, FldAlphanumeric, FldInvalid, low_value_e | constq, 0, 0, 0, nonarray, 0, + "LOW_VALUES", 0, {}, {1,1,0,0, "L\0\xFF", NULL, { NULL }, { NULL } }, NULL }, + { 0, FldAlphanumeric, FldInvalid, zero_value_e | constq, 0, 0, 0, nonarray, 0, + "ZEROS", 0, {}, {1,1,0,0, "0", NULL, { NULL }, { NULL } }, NULL }, + { 0, FldAlphanumeric, FldInvalid, high_value_e | constq, 0, 0, 0, nonarray, 0, + "HIGH_VALUES", 0, {}, {1,1,0,0, "H\0\xFF", NULL, { NULL }, { NULL } }, NULL }, + // IBM standard: QUOTE is a double-quote unless APOST compiler option + { 0, FldAlphanumeric, FldInvalid, quote_value_e | constq , 0, 0, 0, nonarray, 0, + "QUOTES", 0, {}, {1,1,0,0, "\"\0\xFF", NULL, { NULL }, { NULL } }, NULL }, + { 0, FldPointer, FldPointer, constq , 0, 0, 0, nonarray, 0, + "NULLS", 0, {}, {8,8,0,0, zeroes_for_null_pointer, NULL, { NULL }, { NULL } }, NULL }, + // IBM defines TALLY + // 01 TALLY GLOBAL PICTURE 9(5) USAGE BINARY VALUE ZERO. + { 0, FldNumericBin5, FldInvalid, signable_e, 0, 0, 0, nonarray, 0, + "_TALLY", 0, {}, {16, 16, MAX_FIXED_POINT_DIGITS, 0, NULL, NULL, {NULL}, {NULL}}, NULL }, + // 01 ARGI is the current index into the argv array + { 0, FldNumericBin5, FldInvalid, signable_e, 0, 0, 0, nonarray, 0, + "_ARGI", 0, {}, {16, 16, MAX_FIXED_POINT_DIGITS, 0, NULL, NULL, {NULL}, {NULL}}, NULL }, + + // These last two don't require actual storage; they get BOOL var_decl_node + // in parser_symbol_add() + { 0, FldConditional, FldInvalid, constant_e , 0, 0, 0, nonarray, 0, + "_VERY_TRUE", 0, {}, {1,1,0,0, "", NULL, { NULL }, { NULL } }, NULL }, + { 0, FldConditional, FldInvalid, constant_e , 0, 0, 0, nonarray, 0, + "_VERY_FALSE", 0, {}, {1,1,0,0, "", NULL, { NULL }, { NULL } }, NULL }, + }; + for( struct cbl_field_t *f = constants; + f < constants + COUNT_OF(constants); f++ ) { + f->our_index = table.nelem; + struct symbol_elem_t e = { SymField, 0, { .field = *f } }; + table.elems[table.nelem++] = e; + } + + static symbol_elem_t environs[] = { + { SymSpecial, 0, {.special = {0, SYSIN_e, "SYSIN", 0, "/dev/stdin"}} }, + { SymSpecial, 0, {.special = {0, SYSIPT_e, "SYSIPT", 0, "/dev/stdout"}} }, + { SymSpecial, 0, {.special = {0, SYSOUT_e, "SYSOUT", 0, "/dev/stdout"}} }, + { SymSpecial, 0, {.special = {0, SYSLIST_e, "SYSLIST", 0, "/dev/stdout"}} }, + { SymSpecial, 0, {.special = {0, SYSLST_e, "SYSLST", 0, "/dev/stdout"}} }, + { SymSpecial, 0, {.special = {0, SYSPUNCH_e, "SYSPUNCH", 0, "/dev/stderr"}} }, + { SymSpecial, 0, {.special = {0, SYSPCH_e, "SYSPCH", 0, "/dev/stderr"}} }, + { SymSpecial, 0, {.special = {0, CONSOLE_e, "CONSOLE", 0, "/dev/stdout"}} }, + { SymSpecial, 0, {.special = {0, C01_e, "C01", 0, "/dev/null"}} }, + { SymSpecial, 0, {.special = {0, C02_e, "C02", 0, "/dev/null"}} }, + { SymSpecial, 0, {.special = {0, C03_e, "C03", 0, "/dev/null"}} }, + { SymSpecial, 0, {.special = {0, C04_e, "C04", 0, "/dev/null"}} }, + { SymSpecial, 0, {.special = {0, C05_e, "C05", 0, "/dev/null"}} }, + { SymSpecial, 0, {.special = {0, C06_e, "C06", 0, "/dev/null"}} }, + { SymSpecial, 0, {.special = {0, C07_e, "C07", 0, "/dev/null"}} }, + { SymSpecial, 0, {.special = {0, C08_e, "C08", 0, "/dev/null"}} }, + { SymSpecial, 0, {.special = {0, C09_e, "C09", 0, "/dev/null"}} }, + { SymSpecial, 0, {.special = {0, C10_e, "C10", 0, "/dev/null"}} }, + { SymSpecial, 0, {.special = {0, C11_e, "C11", 0, "/dev/null"}} }, + { SymSpecial, 0, {.special = {0, C12_e, "C12", 0, "/dev/null"}} }, + { SymSpecial, 0, {.special = {0, CSP_e, "CSP", 0, "/dev/null"}} }, + { SymSpecial, 0, {.special = {0, S01_e, "S01", 0, "/dev/null"}} }, + { SymSpecial, 0, {.special = {0, S02_e, "S02", 0, "/dev/null"}} }, + { SymSpecial, 0, {.special = {0, S03_e, "S03", 0, "/dev/null"}} }, + { SymSpecial, 0, {.special = {0, S04_e, "S04", 0, "/dev/null"}} }, + { SymSpecial, 0, {.special = {0, S05_e, "S05", 0, "/dev/null"}} }, + { SymSpecial, 0, {.special = {0, AFP_5A_e, "AFP-5A", 0, "/dev/null"}} }, + { SymSpecial, 0, {.special = {0, STDIN_e, "STDIN", 0, "/dev/stdin"}} }, + { SymSpecial, 0, {.special = {0, STDOUT_e, "STDOUT", 0, "/dev/stdout"}} }, + { SymSpecial, 0, {.special = {0, STDERR_e, "STDERR", 0, "/dev/stderr"}} }, + { SymSpecial, 0, {.special = {0, SYSERR_e, "SYSERR", 0, "/dev/stderr"}} }, + }; + + struct symbol_elem_t *p = table.elems + table.nelem; + std::transform(environs, environs + COUNT_OF(environs), p, add_token); + + table.nelem += COUNT_OF(environs); + + assert(table.nelem < table.capacity); + + // debug registers + assert(table.nelem + COUNT_OF(debug_registers) < table.capacity); + + group_size_t group_size = + std::accumulate(debug_registers, + debug_registers + COUNT_OF(debug_registers), group_size_t()); + debug_registers[0].data.memsize = + debug_registers[0].data.capacity = group_size.capacity(); + + auto debug_start = p = table.elems + table.nelem; + p = std::transform(debug_registers, + debug_registers + COUNT_OF(debug_registers), p, elementize); + table.nelem = p - table.elems; + assert(table.nelem < table.capacity); + std::for_each(debug_start+1, p, parent_elem_set(debug_start - table.elems)); + + // special registers + assert(table.nelem + COUNT_OF(special_registers) < table.capacity); + + p = table.elems + table.nelem; + p = std::transform(special_registers, + special_registers + COUNT_OF(special_registers), + p, elementize); + table.nelem = p - table.elems; + assert(table.nelem < table.capacity); + + // Initialize symbol table. + symbols = table; + + for( auto e = symbols.elems; e < symbols.elems + symbols.nelem; e++ ) { + if( e->type == SymField ) { + update_symbol_map2(e); + } + } + + symbols.first_program = symbols.nelem; + + symbols.registers.linage_counter = symbol_index(symbol_field(0,0, + "LINAGE-COUNTER")); + symbols.registers.file_status = symbol_index(symbol_field(0,0, "_FILE_STATUS")); + symbols.registers.return_code = symbol_index(symbol_field(0,0, "RETURN-CODE")); + symbols.registers.very_true = symbol_index(symbol_field(0,0, "_VERY_TRUE")); + symbols.registers.very_false = symbol_index(symbol_field(0,0, "_VERY_FALSE")); + + if( getenv(__func__) ) symbols_dump(0, true); +} + +/* + * Add a symbol to the symbol table. + */ +static struct symbol_elem_t * +symbol_add( struct symbol_elem_t *elem ) +{ + assert(symbols.capacity > 0); // initialized + + if( symbols.nelem == symbols.capacity ) { + symbol_table_extend(); + }; + + assert(symbols.nelem < symbols.capacity); // not at capacity + + if( elem->type == SymField ) { + // Place the [soon-to-be] index of this field into the field + cbl_field_of(elem)->our_index = symbols.nelem; + } + + struct symbol_elem_t *p = + static_cast<struct symbol_elem_t *>(lsearch( elem, symbols.elems, + &symbols.nelem, sizeof(*elem), + symbol_elem_cmp ) ); + assert(symbols.nelem > 1); + + if( is_program(*p) ) { + assert(p->program == 0 || p->elem.label.os_name != NULL); + p->program = p - symbols.elems; + } + + if( p->program == 0 ) { + p->program = p[-1].program; + } + + return p; +} + +static symbol_elem_t * +symbol_append( const symbol_elem_t& elem ) { + if( symbols.nelem == symbols.capacity ) { + symbol_table_extend(); + }; + + auto e = symbols.elems + symbols.nelem++; + *e = elem; + return e; +} + +cbl_label_t * +cbl_perform_tgt_t::finally( size_t program ) { + assert(0 < ito); + static const char fini[] = "_fini"; + cbl_label_t proto = *to(); + auto p = proto.name + strlen(proto.name); + auto n = snprintf(p, proto.name + sizeof(proto.name) - p, "%s", fini); + assert(n < int(sizeof(fini))); + symbol_elem_t elem = { + .type = SymLabel, + .program = program, + .elem = { .label = proto } }, *e; + e = symbol_add(&elem); + ifrom = symbol_index(e); + return cbl_label_of(e); +} + +struct symbol_elem_t * +symbol_file_add( size_t program, cbl_file_t *file ) { + auto e = std::find_if( symbols_begin(program), symbols_end(), + [file]( const auto& elem ) { + if( elem.type == SymFile ) { + auto f = cbl_file_of(&elem); + return 0 == strcasecmp(f->name, file->name); + } + return false; + } ); + if( e != symbols_end() ) { // duplicate SELECT filenames not allowed + auto f = cbl_file_of(e); + file->line = f->line; // use called structure to capture prior line + return NULL; + } + + struct symbol_elem_t sym = { SymFile, program, {NULL} }; + sym.elem.file = *file; + + e = symbol_add(&sym); + + const auto& f = *cbl_file_of(e); + if( f.same_record_as > 0 ) { // add to list of files sharing one record area + same_record_areas[f.same_record_as].insert(symbol_index(e)); + } + + return e; +} + +struct symbol_elem_t * +symbol_alphabet_add( size_t program, struct cbl_alphabet_t *alphabet ) { + struct symbol_elem_t sym = { SymAlphabet, program, {.alphabet = *alphabet} }; + return symbol_add(&sym); +} + +size_t +numeric_group_attrs( const cbl_field_t *field ) { + static const size_t inherit = signable_e | leading_e | separate_e | big_endian_e; + static_assert(sizeof(cbl_field_t::type) < sizeof(inherit), "need bigger type"); + assert(field); + if( field->type == FldNumericDisplay || field->type == FldGroup ) { + if( field->parent > 0 && symbol_at(field->parent)->type == SymField ) { + cbl_field_t *parent = parent_of(field); + assert(parent); + return inherit & parent->attr; + } + } + return 0; +} + +/* + * "The essential characteristics of a type, which is identified by + * its type- name, are the relative positions and lengths of the + * elementary items defined in the type declaration, and the ALIGNED, + * BLANK WHEN ZERO, DYNAMIC LENGTH, JUSTIFIED, PICTURE, SIGN, + * SYNCHRONIZED, and USAGE clauses specified for each of these + * elementary items" + */ +struct symbol_elem_t * +symbol_typedef_add( size_t program, struct cbl_field_t *field ) { + assert(field); + assert(field->is_typedef()); + + if( field->is_strongdef() && field->level != 1 ) { + ERROR_FIELD(field, "%s %s STRONG TYPEDEF must be level 01", + field->level_str(), field->name); + return NULL; + } + + // Might have just been added to the symbol table. + auto e = symbols_end() - 1; + assert( symbols_begin() < e ); + if( e->type == SymField ) { + auto f = cbl_field_of(e); + if( f == field ) return e; + } + + symbol_elem_t elem = { SymField, program, { .field = *field } }; + + e = symbol_add( &elem ); + + return e; +} + +typedef std::map <std::string, size_t > namemap_t; +static std::map <size_t, namemap_t > numeric_constants; + +/* + * Add a Cobol variable/literal to the symbol table. + * + * Each time the filename changes, a "filename" symbol is added to the + * symbol table. We find what file a symbol was defined in by + * searching back from the symbol for a filename entry. + * + * Fields may be function pointers too, from dlopen(3). + * + * Most symbols are Cobol variables of type cbl_field_t. Duplicate + * names are allowed; they just can't be referenced. + * + * The passed parameter contains two pointers; the initial value and + * the picture. Except for inherited types, these pointers are NOT + * changed. Make them point where you want them to point. + * + * Literals have an initial pointer only; the picture NULL. + * + * Returns a pointer to the added symbol, always. + */ +struct symbol_elem_t * +symbol_field_add( size_t program, struct cbl_field_t *field ) +{ + field->our_index = symbols.nelem; + cbl_field_t *parent = symbol_field_parent_set( field ); + if( parent && parent->type == FldGroup) { + // Inherit effects of parent's USAGE, as though it appeared 1st in the + // member's definition. + static const size_t inherit = global_e | external_e | local_e | linkage_e; + field->attr = inherit & parent->attr; + field->attr |= numeric_group_attrs(parent); + field->usage = parent->usage; + // BINARY-LONG, for example, sets capacity. + if( is_numeric(parent->usage) && parent->data.capacity > 0 ) { + field->type = parent->usage; + field->data = parent->data; + field->data.value = 0.0; + field->data.initial = NULL; + } + } + + char *s; + if( (s = getenv(__func__)) != NULL ) { + if( s[0] == 'D' ) { + for( struct symbol_elem_t *e = symbols_begin(); e < symbols_end(); e++ ) { + fprintf(stderr, "%zu: %s ", e - symbols.elems, symbol_type_str(e->type)); + if( e->type == SymField ) { + fprintf(stderr, "%s = %s", + cbl_field_of(e)->name, cbl_field_of(e)->data.initial); + } + fprintf(stderr, "\n"); + } + } + + dbgmsg( "%s:%d: %3zu %-18s %02d %-16s %u/%u/%d = '%s'", __func__, __LINE__, + field->offset, + cbl_field_type_str(field->type), field->level, field->name, + field->data.capacity, field->data.digits, field->data.rdigits, + field->data.initial? field->data.initial : "(none)" ); + } + + if( is_forward(field) ) { + auto *e = symbol_field( program, field->parent, field->name ); + if( e ) { + field = cbl_field_of(e); + if( is_constant(field) && field->type == FldNumericBin5 ) { + cbl_name_t lname; + std::transform( field->name, field->name + strlen(field->name) + 1, + lname, tolower ); + numeric_constants[program][lname] = symbol_index(e); + } + return e; + } + } + + if( strlen(field->name) == 6 && 0 == strcasecmp("FILLER", field->name) ) { + field->attr |= filler_e; + } + if( field->name[0] == '\0' ) { + field->attr |= filler_e; + } + + struct symbol_elem_t key = { .type = SymField, .program = program, NULL }; + key.elem.field = *field; + + // Literals must have an initial value; + assert( !is_literal(field) || field->data.initial ); + + /* + * Field names need not be unique. They exist in the symbol table + * (and in memory) regardless, but only unique names may be referenced. + * We don't use symbol_add, because it looks up the symbol by name. + */ + + // ensure the table has room + if( symbols.nelem == symbols.capacity ) { + symbol_table_extend(); + }; + + assert(symbols.nelem < symbols.capacity); // not at capacity + + // append the symbol + struct symbol_elem_t *e = symbols_end(); + *e = key; + symbols.nelem++; + + field = cbl_field_of(e); + if( is_constant(field) && field->type == FldNumericBin5 ) { + cbl_name_t lname; + std::transform( field->name, field->name + strlen(field->name) + 1, + lname, tolower ); + numeric_constants[program][lname] = symbol_index(e); + } + + update_symbol_map2( e ); + return e; +} + +/* + * TYPEDEF is relevant only in Data Division. + */ +struct symbol_elem_t * +symbol_typedef( size_t program, const char name[] ) +{ + auto beg = std::reverse_iterator<symbol_elem_t *>(symbols_end()); + auto end = std::reverse_iterator<symbol_elem_t *>(symbols_begin(program)); + + auto p = std::find_if( beg, end, + [name]( const symbol_elem_t& sym ) { + if( sym.type == SymField ) { + auto f = cbl_field_of(&sym); + if( f->has_attr(typedef_e) ) { + return 0 == strcasecmp(name, f->name); + } + } + return false; + } ); + + return p != end? &*p : NULL; +} + +/* + * Search backwards during symbol-table construction for nearest name. + */ +symbol_elem_t * +symbol_field( size_t program, size_t parent, const char name[] ) +{ + class match_field { + size_t program, parent; + const char *name; + public: + match_field( size_t program, size_t parent, const char name[] ) + : program(program) + , parent(parent) + , name(name) + {} + bool operator()( const symbol_elem_t& sym ) const { + if( sym.type != SymField ) return false; + if( sym.program != program ) return false; + + const auto& field = *cbl_field_of(&sym); + + if( parent > 0 && parent != field.parent ) return false; + if( field.is_typedef() ) return false; + + return 0 == strcasecmp(name, field.name); + } + }; + + auto beg = std::reverse_iterator<symbol_elem_t *>(symbols_end()); + auto end = std::reverse_iterator<symbol_elem_t *>(symbols_begin(program)); + auto p = std::find_if( beg, end, match_field(program, parent, name) ); + + return p != end? &*p : NULL; +} + +symbol_elem_t * +symbol_register( const char name[] ) +{ + auto p = std::find_if(symbols_begin(), symbol_at(symbols.first_program), + [len = strlen(name), name]( auto e ) { + if( e.type == SymField ) { + if( strlen(cbl_field_of(&e)->name) == len ) { + return 0 == strcasecmp(cbl_field_of(&e)->name, name); + } + } + return false; + } ); + + return p; +} + +// Find current 01 record during Level 66 construction. +const symbol_elem_t * +symbol_field_current_record() { + assert(symbols.nelem > 0); + size_t program = symbols_end()[-1].program; + auto beg = std::reverse_iterator<symbol_elem_t *>(symbols_end()); + auto end = std::reverse_iterator<symbol_elem_t *>(symbols_begin(program)); + auto p = std::find_if( beg, end, + []( const auto& elem ) { + if( elem.type == SymField ) { + auto f = cbl_field_of(&elem); + return f->level == 1; + } + return false; + } ); + return p != end? &*p : NULL; +} + + +struct symbol_elem_t * +symbol_field_forward_add( size_t program, size_t parent, + const char name[], int line ) +{ + auto e = symbol_field(program, parent, name); + if( e ) return e; + + struct cbl_field_t field = { 0, + FldForward, FldInvalid, 0, parent, 0, 0, + nonarray, line, "", + 0, cbl_field_t::linkage_t(), + {0,0,0,0, " ", NULL, {NULL}, {NULL}}, NULL }; + if( sizeof(field.name) < strlen(name) ) { + dbgmsg("%s:%d: logic error: name %s too long", __func__, __LINE__, name); + return NULL; + } + strcpy( field.name, name); + return symbol_field_add( program, &field ); +} + +struct symbol_elem_t * +symbol_literalA( size_t program, const char name[] ) +{ + cbl_field_t field = {}; + field.type = FldLiteralA; + field.data.initial = name; + field.attr = constq; + + struct symbol_elem_t key = { SymField, program, { .field = field } }; + + symbol_elem_t *start = symbols_begin(key.program), *e; + size_t nelem = symbols_end() - start; + + e = static_cast<struct symbol_elem_t *>(lfind( &key, start, + &nelem, sizeof(key), + symbol_elem_cmp ) ); + return e; +} + +struct symbol_elem_t * +symbol_file( size_t program, const char name[] ) { + size_t nelem = symbols.nelem; + struct symbol_elem_t key = { SymFile, program, {NULL} }, *e = &key; + + assert(strlen(name) < sizeof(key.elem.file.name)); + strcpy(key.elem.file.name, name); + + do { + e = static_cast<struct symbol_elem_t *>(lfind( &key, symbols.elems, + &nelem, sizeof(*e), + symbol_elem_cmp ) ); + if( e ) break; + key.program = cbl_label_of(symbol_at(key.program))->parent; + if( key.program == 0 ) break; // no file without a program + } while( !e ); + + if( e ) { + assert(e->type == SymFile); + return e; + } + + // perhaps a record name? + for( e = symbol_field(program, 0, name); e != NULL; e = symbol_parent(e) ) { + if( e->type == SymFile ) { + return e; + } + if( e->type != SymField ) { + dbgmsg("%s:%d: '%s' is not a file and has parent of type %s", + __func__, __LINE__, name, symbol_type_str(e->type)); + return NULL; + } + if( symbol_index(e) == 0 ) { + dbgmsg("%s:%d: '%s' is not a file and has no parent", + __func__, __LINE__, name); + return NULL; + } + } + + assert(!e); + return e; +} + +struct symbol_elem_t * +symbol_field_alias( struct symbol_elem_t *e, const char name[] ) +{ + cbl_field_t alias = *cbl_field_of(e); + cbl_field_data_t data = { .memsize = alias.data.memsize, + .capacity = alias.data.capacity }; + alias.data = data; + alias.data.memsize = 0; + + assert(strlen(name) < sizeof(alias.name)); + strcpy(alias.name, name); + + alias.level = 66; + alias.parent = symbol_index(e); + alias.var_decl_node = NULL; + + return symbol_field_add(e->program, &alias); +} + +struct symbol_elem_t * +symbol_field_alias2( struct symbol_elem_t *e, struct symbol_elem_t *e2, + const char name[] ) +{ + assert(cbl_field_of(e)->data.picture == NULL); + e = symbol_field_alias(e, name); + + cbl_field_t& alias = *cbl_field_of(e); + alias.type = FldGroup; + + // store THRU symbol in data.picture, capacity computed by extend_66_capacity + alias.data.picture = reinterpret_cast<char*>(e2); + + return e; +} + +static bool +target_in_src( const cbl_field_t *tgt, const cbl_field_t *src ) { + size_t isrc = field_index(src); + while( tgt->parent > 0 ) { + if( tgt->parent == isrc ) return true; + auto e = symbol_at(tgt->parent); + if( e->type != SymField ) break; + tgt = cbl_field_of(e); + } + return false; +} + +class elem_group_t { + const symbol_elem_t *bog, *eog; +public: + elem_group_t( const symbol_elem_t *bog, const symbol_elem_t *eog ) + : bog(bog), eog(eog) {} + const symbol_elem_t *begin() const { return bog; } + const symbol_elem_t *end() const { return eog; } +}; + +static size_t +seek_parent( const symbol_elem_t *e, size_t level ) { + size_t program = e->program; + const cbl_field_t *field = cbl_field_of(e); + while( program == e->program && level <= field->level ) { + if( e->type != SymField ) break; + auto f = cbl_field_of(e); + if( f->parent == 0 ) break; + e = symbol_at(f->parent); + } + return symbol_index(e); +} + +/* + * For SAME AS definition, copy the field metadata and update the parent. + * For a group, create new fields and copy members recursively. + * Precondition: both fields exist in the symbol table. + * Postcondition: return final element copied. + * + * "The condition-name entries for a particular conditional variable + * shall immediately follow the entry describing the item...." + */ +struct symbol_elem_t * +symbol_field_same_as( cbl_field_t *tgt, const cbl_field_t *src ) { + if( target_in_src(tgt, src) ) { + ERROR_FIELD(tgt, "%s %s may not reference itself as part of %s %s", + tgt->level_str(), tgt->name, src->level_str(), src->name); + return NULL; + } + if( tgt->level == 77 && src->type == FldGroup ) { + ERROR_FIELD(tgt, "%s %s TYPE TO %s must be an elementary item", + tgt->level_str(), tgt->name, src->name); + return NULL; + } + auto last_elem = symbol_at(field_index(tgt)); + tgt->same_as(*src, src->is_typedef()); + + size_t isrc = field_index(src); + + symbol_elem_t *bog = symbol_at(isrc); + symbol_elem_t *eog = symbol_at_impl(end_of_group(isrc), true); + + if( src->type != FldGroup ) { + // For scalar, check for Level 88, which if extant must follow immediately. + eog = std::find_if( bog + 1, + symbols_end(), + []( const auto& elem ) { + if( elem.type == SymField ) { + auto f = cbl_field_of(&elem); + return f->level != 88; + } + return true; + } ); + } + + cbl_field_t dup = { .parent = field_index(tgt), .line = tgt->line }; + + elem_group_t group(++bog, eog); + + for( const auto& elem : group ) { + const cbl_field_t *that(cbl_field_of(&elem)); + if( is_forward(that) ) { + auto e = symbol_field(current_program_index(), 0, that->name); + that = cbl_field_of(e); // must exist + } + memcpy(dup.name, that->name, sizeof(dup.name)); + dup.occurs = that->occurs; + dup.level = that->level; + switch( dup.level ) { + case 0: + assert(that->type == FldIndex); + case 88: + break; + default: + dup.level += tgt->level; + break; + } + dup.parent = seek_parent(last_elem, dup.level); + dup.same_as( *that, src->is_typedef() ); + + last_elem = symbol_field_add( last_elem->program, &dup ); + } + + return last_elem; +} + +static bool first_among_equals( const cbl_file_t *a, const cbl_file_t *b ) { + return symbol_index(symbol_elem_of(a)) < symbol_index(symbol_elem_of(b)); +} + +size_t +symbol_file_same_record_area( std::list<cbl_file_t*>& files ) { + auto first = std::min_element(files.begin(), files.end(), first_among_equals); + const auto ifirst_file = symbol_index(symbol_elem_of(*first)); + + for( auto file : files ) { + if( *first == file ) { + assert(symbol_index(symbol_elem_of(file)) == ifirst_file ); + file->same_record_as = 0; + continue; + } + auto& redefines = cbl_field_of(symbol_at(file->default_record))->parent; + redefines = (*first)->default_record; + file->same_record_as = ifirst_file; + } + return ifirst_file; +} + +static symbol_elem_t * +next_program( symbol_elem_t *elem ) { + size_t start = elem? symbol_index(elem) : 0; + symbol_elem_t * e = + std::find_if( symbols_begin(start), symbols_end(), is_program ); + if( e == symbols_end() ) { + return NULL; + } + return e; +} + +bool +is_cobol_name( const char name[] ) { + for( symbol_elem_t *e = next_program(NULL); + e != NULL; e = next_program(++e) ) { + if( strcmp(name, cbl_label_of(e)->name) == 0 ) return true; + if( symbol_field(symbol_index(e), 0, name) ) return true; + if( symbol_label(symbol_index(e), LblNone, 0, name) ) return true; + } + return false; +} + +const char * +is_numeric_constant( const char name[] ) { + cbl_name_t lname; + auto program = current_program_index(); + std::transform( name, + name + std::min(sizeof(lname), strlen(name) + 1), + lname, tolower ); + auto p = numeric_constants[program].find(lname); + if( p != numeric_constants[program].end() ) { + size_t isym = p->second; + return cbl_field_of(symbol_at(isym))->data.initial; + } + return NULL; +} + +// get default record layout for a file +struct cbl_field_t * +symbol_file_record( struct cbl_file_t *file ) { + return cbl_field_of(symbol_at(file->default_record)); +} + +class is_section { + cbl_section_type_t section_type; + public: + is_section( cbl_section_type_t sect ) : section_type(sect) {} + bool operator()( symbol_elem_t& e ) const { + return e.type == SymDataSection && cbl_section_of(&e)->type == section_type; + } +}; + + +static bool fd_record_size_cmp( const symbol_elem_t& a, const symbol_elem_t& b ) { + return cbl_field_of(&a)->data.capacity < cbl_field_of(&b)->data.capacity; +} + +/* + * Find largest and smallest record defined for a file. The rule is: + * cbl_file_t::varies() returns true if the record size varies, + * whether explicit or implied. In all cases if the record size + * varies, min < else, min max == max. + * + * Input: Output: + * ------------------------------------------ ------------------ + * VARIES FROM TO 1st-FD-size 2nd-FD-size varies() min max + * VARIES x y true x y + * VARIES x y any any true x y + * VARIES x true x -1 + * VARIES y any any true 0 y + * VARIES x 120 150 true x 150 + * VARIES 120 150 true 0 150 + * VARIES 150 true 0 150 + * 120 150 true 120 150 + * 150 false 150 150 + * + * ISO 13.4.4.2 says "When no record description entries are specified: + * a) a RECORD clause shall be specified in the file description entry" + * + * If VARIES TO Y is explicit, FROM 0 is implicit, notwithstanding any + * record description(s). + */ +cbl_file_t::varying_t +symbol_file_record_sizes( struct cbl_file_t *file ) { + if( file->varies() ) { + return file->varying_size; + } + + // Compute implicit records sizes from FD 01 records + assert( ! file->varying_size.explicitly ); + + auto file_element = symbol_elem_of(file); + auto pend = std::find_if( file_element, symbols_end(), + is_section(working_sect_e) ); + std::list<symbol_elem_t> records; + std::copy_if( file_element, pend, back_inserter(records), + [ifile = symbol_index(file_element)](const symbol_elem_t& elem) { + if( elem.type == SymField ) { + return ifile == cbl_field_of(&elem)->file; + } + return false; + } ); + if( records.empty() ) return file->varying_size; + + auto p = std::minmax_element(records.begin(), records.end(), + fd_record_size_cmp); + + // Make a copy, update the sizes, and return it. + cbl_file_t::varying_t output = file->varying_size; + + output.min = cbl_field_of(&*p.first)->data.capacity; + output.max = cbl_field_of(&*p.second)->data.capacity; + + if( yydebug && getenv(__func__) ) { + dbgmsg("%s: %s: min '%s' %zu, max '%s' %zu", __func__, file->name, + cbl_field_of(&*p.first)->name, output.min, + cbl_field_of(&*p.second)->name, output.max); + } + + assert(output.min > 0 && "min record size is 0"); + assert(output.min <= output.max); + + return output; +} + +/* + * Find a symbol's type based solely on its name. + * + * The lexer uses this function to determine if the referenced name is + * special in some way. To be correct, the symbol table (or at least + * the lookup mechanism) must reflect what the current namespace is. + * If a symbol is ambiguous -- if a name could be a level 01 and part + * of a group, say -- only the first match is returned. This may lead + * the parser astray, which is too bad. + * + * As of 30 Oct 2021, there are 22 instances where introducing just a + * plain NAME in the parser where otherwise NAME X Y is needed would + * create shift-reduce conflicts. This function allows the lexer to + * returns a spealized name, which the parser distinguishes from a + * generic name. The S/R conflicts could in theory be resolved with + * precedence, but it's not obvious to the author that's the best + * choice, or the least effort. + * + * The risk seems small. The distinction here is by field type, not + * value. If there are two fields FOO, one a level 88 and another a + * variable, it's not clear if that can be resolved by the lexer, even + * with the parser's help. The bet is that won't matter because + * it won't happen. + */ +enum cbl_field_type_t +symbol_field_type( size_t program, const char name[] ) { + struct symbol_elem_t *e = symbol_field( program, 0, name ); + + return e && e->type == SymField? cbl_field_of(e)->type: FldInvalid; +} + +struct cbl_field_t * +constant_of( size_t isym ) +{ + assert(isym < symbols.nelem); + struct cbl_field_t *field = cbl_field_of(symbols.elems + isym); + assert((field->attr & constant_e) == constant_e); + return field; +} + +bool +cbl_alphabet_t::assign( const YYLTYPE& loc, unsigned char ch, unsigned char high_value ) { + if( alphabet[ch] == 0xFF || alphabet[ch] == high_value) { + alphabet[ch] = high_value; + last_index = ch; + return true; + } + auto taken = alphabet[ch]; + error_msg(loc, "ALPHABET %s, character '%c' (X'%x') " + "in position %d already defined at position %d", + name, + ISPRINT(ch)? ch : '?', ch, + high_value, taken ); + if( yydebug ) dump(); + return false; +} + +void +cbl_alphabet_t::also( const YYLTYPE& loc, size_t ch ) { + if( ch < 256 ) { + alphabet[ch] = alphabet[last_index]; + if( ch == high_index ) high_index--; + return; + } // else it's a figurative constant ... + + ch &= 0xFFFF; // High bit indicated symbol-table entry; mask off high word. + assert( ch < 256 ); + auto field = cbl_field_of(symbol_at(ch)); + auto attr = field->attr; + assert(attr & constant_e); + + // last_index is already set; use it as the "last value before ALSO" + if( attr & low_value_e ) { + alphabet[0] = alphabet[last_index]; + return; + } + if( attr & high_value_e ) { + alphabet[high_index--] = alphabet[last_index]; + return; + } + if( attr & (space_value_e|quote_value_e) ) { + ch = field->data.initial[0]; + alphabet[ch] = alphabet[last_index]; + return; + } + if( attr & (zero_value_e) ) { + alphabet[0] = alphabet[last_index]; + error_msg(loc, "ALSO value '%s' is unknown", field->name); + return; + } + error_msg(loc, "ALSO value %zu is unknown", ch); +} + +using std::deque; +static deque<cbl_field_t*> stack; + +static cbl_field_t * +new_temporary_impl( enum cbl_field_type_t type ) +{ + extern int yylineno; + static int nstack, nliteral; + static const struct cbl_field_t empty_alpha = { + 0, FldAlphanumeric, FldInvalid, + intermediate_e, 0, 0, 0, nonarray, 0, "", + 0, cbl_field_t::linkage_t(), + {0,0,0,0, NULL, NULL, {NULL}, {NULL}}, NULL }; + struct cbl_field_t *f = new cbl_field_t; + f->type = type; + + switch(type) { + case FldGroup: + case FldAlphanumeric: + *f = empty_alpha; + break; + case FldInvalid: + case FldClass: + case FldForward: + case FldIndex: + case FldSwitch: + case FldDisplay: + case FldPointer: + case FldBlob: + break; + case FldConditional: + *f = empty_conditional; + break; + case FldLiteralA: + case FldLiteralN: + *f = empty_literal; + f->type = type; + break; + case FldNumericBin5: + case FldNumericBinary: + case FldNumericDisplay: + case FldNumericEdited: + case FldAlphaEdited: + case FldPacked: + *f = empty_comp5; + break; + case FldFloat: + *f = empty_float; + break; + } + + f->line = yylineno; + if( is_literal(type) ) { + snprintf(f->name, sizeof(f->name), "_literal%d",++nliteral); + } else { + snprintf(f->name, sizeof(f->name), "_stack%d",++nstack); + + if( getenv("symbol_temporaries_free") ) { + dbgmsg("%s: %s, %s", __func__, f->name, 3 + cbl_field_type_str(f->type)); + } + } + + return f; +} + +cbl_field_t * +new_temporary_decl() { + auto field = new_temporary_impl(FldAlphanumeric); + strcpy(field->name, "DECLARATIVES"); + return field; +} + +static inline cbl_field_t * +parser_symbol_add2( cbl_field_t *field ) { + parser_symbol_add(field); + return field; +} + +static cbl_field_t * +new_literal_add( const char initial[], uint32_t len, enum cbl_field_attr_t attr ) { + static char empty[2] = "\0"; + cbl_field_t *field = NULL; + if( !(attr & quoted_e) ) + { + field = new_temporary_impl(FldLiteralN); + field->attr |= attr; + field->data.valify(initial); + } + else + { + field = new_temporary_impl(FldLiteralA); + field->attr |= attr; + field->data.initial = len > 0? initial : empty; + field->data.capacity = len; + + if( ! field->internalize() ) + { + ERROR_FIELD(field, "inconsistent string literal encoding for '%s'", initial); + } + } + + static size_t literal_count = 1; + sprintf(field->name, + "%s%c_%zd", + "_literal", + field->type == FldLiteralA ? 'a' : 'n', + literal_count++); + + return parser_symbol_add2(field); +} + +static temporaries_t temporaries; + +cbl_field_t * +temporaries_t::literal( const char value[], uint32_t len, cbl_field_attr_t attr ) { + auto key = literal_an(value, quoted_e == (attr & quoted_e)); + + if( 0 == (attr & hex_encoded_e) ) { + auto p = literals.find(key); + if( p != literals.end() ) { + cbl_field_t *field = p->second; + return field; + } + } + return literals[key] = new_literal_add(value, len, attr); +} + +cbl_field_t * +new_literal( uint32_t len, const char initial[], enum cbl_field_attr_t attr ) { + return temporaries.literal(initial, len, attr); +} + +void +temporaries_t::dump() const { + extern int yylineno; + char *output = xasprintf("%4d: %zu Literals", yylineno, literals.size()); + + for( const auto& elem : used ) { + if( ! elem.second.empty() ) { + char *so_far = output; + output = xasprintf("%s, %zu %s", + so_far, + elem.second.size(), + 3 + cbl_field_type_str(elem.first)); + free(so_far); + } + } + dbgmsg("status: %s", output); + free(output); +} + +temporaries_t::~temporaries_t() { + if( getenv( "symbol_temporaries_free" ) ) { + dbgmsg("%s: %zu literals", __func__, literals.size()); + for( const auto& elem : literals ) { + const literal_an& key(elem.first); + fprintf(stderr, "%c '%s'\n", key.is_quoted? 'Q' : ' ', key.value.c_str()); + } + dump(); + } +} + +cbl_field_t * +temporaries_t::add( cbl_field_t *field ) { + auto p = used[field->type].insert(field); + bool yn(p.second); + assert(yn); + return *p.first; +}; + +cbl_field_t * +temporaries_t::reuse( cbl_field_type_t type ) { +//// DUBNER is defeating reuse as part of investigating problems with recursion + return NULL; +//// + + auto& fields = freed[type]; + cbl_field_t *field; + + if( fields.empty() ) { + return NULL; + } else { + auto p = fields.begin(); + field = *p; + fields.erase(p); + } + + return add(field); +} + +cbl_field_t * +temporaries_t::acquire( cbl_field_type_t type ) { + cbl_field_t *field = reuse(type); + + if( !field ) { + field = new_temporary_impl(type); + add(field); + } + return parser_symbol_add2(field); // notify of reuse +} + +void +symbol_temporaries_free() { + if( getenv(__func__) ) temporaries.dump(); + for( auto& elem : temporaries.used ) { + const cbl_field_type_t& type(elem.first); + temporaries_t::fieldset_t& used(elem.second); + + auto freed = std::inserter(temporaries.freed[type], + temporaries.freed[type].begin()); + std::transform( used.begin(), used.end(), freed, + []( auto field ) { + switch( field->type ) { + case FldConditional: + field->attr &= intermediate_e; + break; + case FldNumericBin5: + field->set_attr(signable_e); + break; + default: + break; + } + return field; + } ); + used.clear(); + } +} + +cbl_field_t * +new_alphanumeric( size_t capacity ) { + cbl_field_t * field = new_temporary_impl(FldAlphanumeric); + field->data.capacity = capacity; + temporaries.add(field); + return parser_symbol_add2(field); +} + +cbl_field_t * +new_temporary( enum cbl_field_type_t type, const char *initial ) { + if( ! initial ) { + assert( ! is_literal(type) ); // Literal type must have literal value. + return temporaries.acquire(type); + } + if( is_literal(type) ) { + auto field = temporaries.literal(initial, + type == FldLiteralA? quoted_e : none_e); + return field; + } + cbl_field_t *field = new_temporary_impl(type); + field->data.capacity = strlen(field->data.initial = initial); + temporaries.add(field); + parser_symbol_add(field); + + return field; +} + +#if needed +cbl_field_t * +keep_temporary( cbl_field_type_t type ) { + auto field = new_temporary(type); + bool ok = temporaries.keep(field); + assert(ok); + return field; +} +#endif + +cbl_field_t * +new_temporary_like( cbl_field_t skel ) { + auto field = temporaries.reuse(skel.type); + if( ! field ) { + field = new_temporary_impl(skel.type); + temporaries.add(field); + } + memcpy(skel.name, field->name, sizeof(field->name)); + skel.var_decl_node = field->var_decl_node; + *field = skel; + + return parser_symbol_add2(field); +} + +cbl_field_t * +new_temporary_clone( const cbl_field_t *orig) { + cbl_field_type_t type = is_literal(orig)? FldAlphanumeric : orig->type; + auto field = temporaries.reuse(type); + if( ! field ) { + field = new_temporary_impl(type); + temporaries.add(field); + } + field->data = orig->data; + if( field->type == FldNumericBin5 ) field->type = orig->type; + field->attr = intermediate_e; + + return parser_symbol_add2(field); +} + +bool +cbl_field_t::is_ascii() const { + return std::all_of( data.initial, + data.initial + data.capacity, + isascii ); +} + +/* + * Convert an input source-code string literal (or VALUE) to internal encoding. + * + * Input encoding initially defaults to UTF-8, regardless of locale(7), + * for two reasons: + * 1) The source code might not match the locale + * 2) The assumption is easily disproved with most input. That is, + * input values above 0x7F will rarely look like UFT-8 unless + * they actually are UTF-8. + * + * If conversion from UTF-8 fails, the compiler's locale is examined + * next. If it is C, it is ignored, else it is tried. If that fails, + * the input is assumed to be encoded as CP1252. + * + * This is a global static sticky setting, meaning that during + * compilation, if it moves off the default, it adjusts only once, and + * never reverts. + */ +static const char standard_internal[] = "CP1252//"; +extern os_locale_t os_locale; + +static const char * +guess_encoding() { + static const char *fromcode; + + if( ! fromcode ) { + return fromcode = os_locale.assumed; + } + + if( fromcode == os_locale.assumed ) { + fromcode = os_locale.codeset; + if( 0 != strcmp(fromcode, "C") ) { // anything but that + return fromcode; + } + } + + return standard_internal; +} + +const char * +cbl_field_t::internalize() { + static const char *tocode = standard_internal; + static const char *fromcode = guess_encoding(); + static iconv_t cd = iconv_open(tocode, fromcode); + static const size_t noconv = size_t(-1); + + // Sat Mar 16 11:45:08 2024: require temporary environment for testing + if( getenv( "INTERNALIZE_NO") ) return data.initial; + + bool using_assumed = fromcode == os_locale.assumed; + + if( fromcode == tocode || has_attr(hex_encoded_e) ) { + return data.initial; + } + + if( is_ascii() ) return data.initial; + assert(data.capacity > 0); + + char output[data.capacity + 2], *out = output; + char *in = const_cast<char*>(data.initial); + size_t n, inbytesleft = data.capacity, outbytesleft = sizeof(output); + if( !is_literal(this) && inbytesleft < strlen(data.initial) ) { + inbytesleft = strlen(data.initial); + } + + assert(fromcode != tocode); + + while( (n = iconv( cd, &in, &inbytesleft, &out, &outbytesleft)) == noconv ) { + if( !using_assumed ) break; // change only once + fromcode = guess_encoding(); + cd = iconv_open(tocode, fromcode); + dbgmsg("%s: trying input encoding %s", __func__, fromcode); + if( fromcode == tocode ) break; + } + + if( n == noconv ) { + if( !using_assumed ) { + yywarn("failed to decode '%s' as %s", data.initial, fromcode); + return NULL; + } + return data.initial; + } + + if( 0 < inbytesleft ) { + // data.capacity + inbytesleft is not correct if the remaining portion has + // multibyte characters. But the fact reamins that the VALUE is too big. + ERROR_FIELD(this, "%s %s VALUE '%s' requires %zu bytes for size %u", + cbl_field_t::level_str(level), name, data.initial, + data.capacity + inbytesleft, data.capacity ); + } + + // Replace data.initial only if iconv output differs. + if( 0 != memcmp(data.initial, output, out - output) ) { + assert(out <= output + data.capacity); + + if( getenv(__func__) ) { + const char *eoi = data.initial + data.capacity, *p; + char nullitude[64] = "no null"; + if( (p = std::find(data.initial, eoi, '\0')) != eoi ) { + sprintf(nullitude, "NUL @ %zu", p - data.initial); + } + dbgmsg("%s:%d: before: %-15s %-20s: '%.*s'{%u}, %s", __func__, __LINE__, + 3 + cbl_field_type_str(type), name, + data.capacity, data.initial, data.capacity, nullitude); + } + dbgmsg("%s: converted '%.*s' to %s", + __func__, data.capacity, data.initial, tocode); + + int len = int(out - output); + char *mem = static_cast<char*>( xcalloc(1, sizeof(output)) ); + + // Set the new memory to all blanks, tacking a '!' on the end. + memset(mem, 0x20, sizeof(output) - 1); + mem[ sizeof(output) - 2] = '!'; + + if( is_literal(this) ) { + data.capacity = len; // trailing '!' will be overwritten + } + + memcpy(mem, output, len); // copy only as much as iconv converted + + free(const_cast<char*>(data.initial)); + data.initial = mem; + + if( getenv(__func__) ) { + const char *eoi = data.initial + data.capacity, *p; + char nullitude[64] = "no null"; + if( (p = std::find(data.initial, eoi, '\0')) != eoi ) { + sprintf(nullitude, "NUL @ %zu", p - data.initial); + } + dbgmsg("%s:%d: after: %-15s %-20s: '%.*s'{%u}, %s", __func__, __LINE__, + "", name, + data.capacity, data.initial, data.capacity, nullitude); + } + + } + + return data.initial; +} + +const char * +cbl_label_t::str() const { + char *buf; + switch(type) { + case LblParagraph: + buf = xasprintf("%-12s %s OF '%s', line %d", type_str() + 3, name, + parent? cbl_label_of(symbol_at(parent))->name : "", line); + break; + case LblProgram: + if( parent == 0 ) { + buf = xasprintf("%-12s %s top level [%s], line %d", + type_str() + 3, name, mangled_name, line); + } else { + buf = xasprintf("%-12s %s OF #%zu '%s' [%s], line %d", + type_str() + 3, name, parent, + cbl_label_of(symbol_at(parent))->name, + mangled_name, line); + } + break; + default: + buf = xasprintf("%-12s %s, line %d", type_str() + 3, name, line); + } + return buf; +} + +size_t +cbl_label_t::explicit_parent() const { + switch(type) { + case LblParagraph: case LblSection: case LblNone: + if( parent != 0 ) { + // implicit parents don't count + symbol_elem_t *p = symbol_at(parent); + if( p->type == SymLabel && cbl_label_of(p)->name[0] == '_' ) { + return 0; + } + } + break; + default: + break; + } + return parent; +} + +cbl_prog_hier_t::cbl_prog_hier_t() { + nlabel = std::count_if( symbols_begin(), symbols_end(), is_program ); + assert(nlabel >0); + labels = new cbl_prog_hier_t::program_label_t[nlabel]; + + std::copy_if( symbols_begin(), symbols_end(), + labels, is_program ); +} + +/* + * Map of program to its callable COMMON programs. + */ +static std::map<size_t, symbolset_t> common_callables; + +symbolset_t +symbol_program_programs() { + symbolset_t programs; + + for( const auto& elem : common_callables ) { + if( elem.first == 0 ) continue; + assert(symbol_at(elem.first)->type == SymLabel); + assert(is_program(*symbol_at(elem.first))); // might be a function + programs.insert(elem.first); + } + return programs; +} + +static void +common_callables_update( const size_t iprog ) { + // Add this directly contained COMMON program to the parent's set. + auto prog = cbl_label_of(symbol_at(iprog)); + if( prog->type != LblProgram ) return; + if( prog->common ) { + common_callables[prog->parent].insert(iprog); + } + + // Add all ancestors' COMMON programs to the iprog siblings and uncles. + std::list<size_t> dnr; // do not recurse + + while( prog->parent > 0 ) { + if( !prog->recursive ) dnr.push_back(symbol_index(symbol_elem_of(prog))); + auto c = common_callables[prog->parent]; + common_callables[iprog].insert(c.begin(), c.end()); + prog = cbl_label_of(symbol_at(prog->parent)); + } + // Top-level programs (parent == 0) cannot be COMMON, but are public + // symbols. They can be called from anywhere, except from a + // (directly or indirectly) contained program, unless marked + // RECURSIVE. + assert(prog->parent == 0); + auto itop = symbol_index(symbol_elem_of(prog)); + common_callables[0].insert(itop); + if( prog->recursive ) { + common_callables[iprog].insert(itop); + } + + for( size_t isym : dnr ) { + common_callables[iprog].erase(isym); + } +} + +/* + * Unlike fields, there is no LblForward. Instead, a forward + * reference to a procedure -- section or paragraph name -- begins + * life as LblNone. When it is actually defined, the lookup function + * updates the LblNone entry and defines its type, parent, and line + * number. + */ +cbl_label_t * +symbol_label_add( size_t program, cbl_label_t *input ) +{ + if( getenv(__func__) ) { + const cbl_label_t *L = input; + dbgmsg( "%s:%d: %-5s #%3zu %-9s '%s' of '%s' at line %d", __func__, __LINE__, + "input", + size_t(0), + L->type_str()+3, + L->name, + L->parent? cbl_label_of(symbol_at(L->parent))->name : "", + L->line ); + } + + cbl_label_t *label = symbol_label(program, input->type, + input->parent, input->name); + + if( label && label->type == LblNone ) { + const char *verb = "set"; + label->type = input->type; + label->parent = input->parent; + label->line = input->line; + + if( getenv(__func__) ) { + const cbl_label_t *L = label; + dbgmsg( "%s:%d: %-5s #%3zu %-9s '%s' of '%s' at line %d", + __func__, __LINE__, + verb, + symbol_elem_of(L) - symbols_begin(), + L->type_str()+3, + L->name, + L->parent? cbl_label_of(symbol_at(L->parent))->name : "", + L->line ); + } + return label; + } + + // Set the program's mangled name, dehyphenated and uniqified by parent index. + if( input->type == LblProgram ) { + char *psz = cobol_name_mangler(input->name); + input->mangled_name = xasprintf("%s.%zu", psz, input->parent); + free(psz); + } + + struct symbol_elem_t + elem = { SymLabel, program, { .label = *input } }, *e = &elem; + + assert(0 <= e->elem.label.line); + e->elem.label.line = -e->elem.label.line; // force insertion + + if( (e = symbol_add(&elem)) == NULL ) { + cbl_errx("%s:%d: could not add '%s'", __func__, __LINE__, label->name); + } + + common_callables_update( symbol_index(e) ); + + // restore munged line number unless symbol_add returned an existing label + if( e->elem.label.line < 0 ) e->elem.label.line = -e->elem.label.line; + + if( getenv(__func__) ) { + const cbl_label_t *L = cbl_label_of(e); + dbgmsg( "%s:%d: added #%3zu %-9s '%s' of '%s' at line %d", __func__, __LINE__, + e - symbols_begin(), + L->type_str()+3, + L->name, + L->parent? cbl_label_of(symbol_at(L->parent))->name : "", + L->line ); + } + symbols.labelmap_add(e); + return cbl_label_of(e); +} + +/* + * Under ISO (and not IBM) Declaratives are followed by a Section name. When + * the first statement is parsed, verify, if Declaratives were used, that it + * was preceeded by a Section name. + */ +bool +symbol_label_section_exists( size_t program ) { + auto pblob = std::find_if( symbols_begin(program), symbols_end(), + []( const auto& sym ) { + if( sym.type == SymField ) { + auto& f( sym.elem.field ); + return f.type == FldBlob; + } + return false; + } ); + if( pblob == symbols_end() ) return true; // Section name not required + + bool has_section = std::any_of( ++pblob, symbols_end(), + []( const auto& sym ) { + if( sym.type == SymLabel ) { + auto& L(sym.elem.label); + if( L.type == LblSection ) { + if( L.name[0] != '_' ) { // not implicit + return true; // Section name exists + } + } + } + return false; + } ); + if( yydebug && ! has_section ) { + symbols_dump(program, true); + } + // Return true if no Declaratives, because the (non-)requirement is met. + // Return false if Declaratives exist, because no Section name was found. + return has_section; +} + +cbl_label_t * +symbol_program_add( size_t program, cbl_label_t *input ) +{ + symbol_elem_t + elem = { SymLabel, program, { .label = *input } }, *e; + + assert( is_program(elem) ); + + // Set the program's mangled name, dehyphenated and uniqified by parent index. + char *psz = cobol_name_mangler(input->name); + elem.elem.label.mangled_name = xasprintf("%s.%zu", psz, input->parent); + free(psz); + + e = std::find_if( symbols_begin(program), symbols_end(), + [program, name = input->name]( const auto& elem ) { + if( elem.type == SymLabel ) { + if( program == elem.program ) { + auto L = cbl_label_of(&elem); + if( 0 == strcasecmp(name, L->name) ) return true; + } + } + return false; + } ); + if( e != symbols_end() ) return NULL; + + e = symbol_append(elem); + + common_callables_update( symbol_index(e) ); + + return cbl_label_of(e); +} + +#if 1 +struct cbl_special_name_t * +symbol_special( special_name_t id ) { + cbl_special_name_t special = { .id = id }; + struct symbol_elem_t key = { SymSpecial, 0, + { .special = special } }, *e; + + e = static_cast<struct symbol_elem_t *>(lfind( &key, symbols.elems, + &symbols.nelem, sizeof(key), + symbol_elem_cmp ) ); + return e? cbl_special_name_of(e) : NULL; +} +#endif + +struct symbol_elem_t * +symbol_special_add( size_t program, struct cbl_special_name_t *special ) +{ + // Ensure this special name isn't already defined for this program. + struct symbol_elem_t *e = symbol_special(program, special->name); + + if( e ) { + cbl_special_name_t *s = cbl_special_name_of(e); + if( getenv(__func__) ) { + dbgmsg("%s:%d matches %s %d (%s)", __func__, __LINE__, + special->name, int(s->id), s->name); + } + return e; + } + assert(e == NULL); + + struct symbol_elem_t elem = { SymSpecial, program, { .special = *special } }; + + if( (e = symbol_add(&elem)) == NULL ) { + cbl_errx( "%s:%d: could not add '%s'", __func__, __LINE__, special->name); + } + + if( getenv(__func__) ) { + dbgmsg( "%s:%d: added special '%s'", __func__, __LINE__, + e->elem.special.name); + } + + elem_key_t key(program, cbl_special_name_of(e)->name); + symbols.specials[key] = symbol_index(e); + + return e; +} + +struct cbl_section_t * +symbol_section( size_t program, struct cbl_section_t *section ) { + struct symbol_elem_t key = { SymDataSection, program, + { .section = *section } }, *e; + + e = static_cast<struct symbol_elem_t *>(lfind( &key, symbols.elems, + &symbols.nelem, sizeof(key), + symbol_elem_cmp ) ); + return e? cbl_section_of(e) : NULL; +} + + +struct symbol_elem_t * +symbol_section_add( size_t program, struct cbl_section_t *section ) +{ + if( symbol_section(program, section) ) { + return NULL; // error, exists + } + + struct symbol_elem_t *e, elem = { SymDataSection, + program, { .section = *section } }; + + if( (e = symbol_add(&elem)) == NULL ) { + cbl_errx( "%s:%d: could not add '%s'", __func__, __LINE__, section->name()); + } + + return e; +} + +static int +currency_char_in_string(const char *picture) { + // This can take an unexpanded string + int retval = 0; + while(*picture) { + if( symbol_currency(*picture) ){ + retval = *picture; + break; + } + picture += 1; + } + return retval; +} + +static +int l_and_r(const char *expanded_picture, int ch) { + const char *l = strchr(expanded_picture, ch); + const char *r = strrchr(expanded_picture, ch); + return r > l ? ch : 0; +} + +static int +floating_char_in_string(const char *expanded_picture) { + int ch = '+'; + if( l_and_r(expanded_picture, ch) ) { + return ch; + } + ch = '-'; + if( l_and_r(expanded_picture, ch) ) { + return ch; + } + ch = currency_char_in_string(expanded_picture); + if( ch && l_and_r(expanded_picture, ch) ) { + return ch; + } + return 0; +} + +char * +expand_picture(const char *picture) + { + assert(strlen(picture) < PICTURE_MAX); // guaranteed by picset() in scanner + size_t retval_length = PICTURE_MAX; + char *retval = (char *)xmalloc(retval_length); + size_t index = 0; + + int ch; + int prior_ch = '\0'; + const char *p = picture; + + long repeat; + + int currency_symbol = currency_char_in_string(picture); + + while( (ch = (*p++ & 0xFF) ) ) + { + if( ch == '(' ) + { + // Pick up the number after the left parenthesis + char *endchar; + repeat = strtol(p, &endchar, 10); + + // We subtract one because we know that the character just before + // the parenthesis was already placed in dest + repeat -= 1; + + // Update p to the character after the right parenthesis + p = endchar + 1; + + if( index + repeat >= retval_length ) + { + retval_length <<= 1; + retval = (char *)xrealloc(retval, retval_length); + } + + while(repeat--) + { + retval[index++] = prior_ch; + } + } + else + { + if( index >= retval_length ) + { + retval_length <<= 1; + retval = (char *)xrealloc(retval, retval_length); + } + retval[index++] = ch; + } + prior_ch = ch; + } + if( index >= retval_length ) + { + retval_length <<= 1; + retval = (char *)xrealloc(retval, retval_length); + } + retval[index++] = '\0'; + + size_t dest_length = strlen(retval); + + // We have to take into account the possibility that the currency symbol + // mapping might be to a string of more than one character: + + if( currency_symbol ) + { + size_t sign_length = strlen(symbol_currency(currency_symbol)) - 1; + if( sign_length ) + { + char *pcurrency = strchr(retval, currency_symbol); + assert(pcurrency); + memmove( pcurrency + sign_length, + pcurrency, + dest_length+1 - (pcurrency-retval)); + for(size_t i=0; i<sign_length; i++) + { + pcurrency[i] = 'B'; + } + dest_length += sign_length; + } + } + + return retval; + } + +int +length_of_picture(const char *picture) +{ + // Calculate the length of a PICTURE string with the parenthetical + // abbreviations expanded: +9(10).9(4)CR, as an example, returns 18 + int retval = 0; + char ch; + char prior_char = 0; // Calm the compiler down + const char *p = picture; + const char *currency_sign = NULL; + int currency_char = currency_char_in_string(picture); + + if( currency_char ) + { + currency_sign = symbol_currency(currency_char); + } + + while( (ch = *p++) ) { + if( ch == '(' ) { + // Pick up the number that starts after the left parenthesis + char *endchar; + int increment = strtol(p, &endchar, 10); + if( prior_char != 'P' ) { + retval += increment-1 ; + } + p = endchar + 1; + } + else { + prior_char = TOUPPER(ch); + if( prior_char != 'P' ) { + // P-scaling characters don't count in the capacity: + retval += 1; + } + } + + } + // We need to adjust for the length of a currency sign, because it might + // have more than one character. We've already accounted for one of its + // characters, so.... + if( currency_sign ) { + retval += strlen(currency_sign) - 1; + } + return retval; +} + +int +digits_of_picture(const char *runlength_picture, bool return_rdigits) + { + // This is a strangely busy routine. The capacity is calculated elsewhere, + // by the length_of_picture() routine. This routine calculates the + // total number of digits (which are the total number of digit positions) + // and the number of rdigits (digit positions to the right of any decimal + // point.) + // + // It also takes into account the possibility of the number being P-scaled. + // The scaled_e attribute also gets set separately. For a numeric-edited + // scaled_e value, a positive value of rdigits means the number is less than + // 1.000000 and has an extra rdigits's count of '0' between the decimal + // point and the rest of the number + // + // A negative value of rdigits means that the number has no decimal places, + // is zero or greater, and has an extra scaling factor of 10^(-rdigits) + + int retval; + char *picture = expand_picture(runlength_picture); + int digits = 0; + int rdigits = 0; + int pcount = 0; + unsigned char ch; + const char *p = picture; + const char *rightmost_p = NULL; + const char *rightmost_d = NULL; + const char *decimal_position = NULL; + const char *first_float = NULL; + + unsigned char floating_character = floating_char_in_string(picture); + + while( (ch = *p++) ) + { + if( ch == decimal_point || ch == 'v' || ch == 'V') + { + // This is an actual or virtual decimal point + // There should only be one of these in the picture string + decimal_position = p-1; + } + else if( ch == floating_character ) + { + // All but the first floating character acts like a digit + // position. We'll adjust the counts at the end + digits += 1; + if( decimal_position ) + { + // Having encountered a decimal point means this is an + // rdigit: + rdigits += 1; + } + if( !first_float ) + { + first_float = p-1; + } + continue; + } + else + { + switch(ch) + { + case '9' : + case 'z' : + case 'Z' : + case '*' : + // These are positions that can hold a digit + rightmost_d = p-1; + digits += 1; + if( decimal_position ) + { + // Having encountered a decimal point means this is an + // rdigit: + rdigits += 1; + } + break; + + case 'P': + case 'p': + rightmost_p = p-1; + pcount += 1; + break; + } + } + } + + // We have looped through all the characters + + if( floating_character ) + { + // Account for the fact that ++ turns into +<digit>, but only one digit + digits -= 1; + + if( decimal_position ) + { + if( first_float > decimal_position ) + { + // Because the first_float is to the right of the + // decimal point, rdigits has to be reduced by one: + rdigits -=1 ; + } + } + } + + if( pcount ) + { + // We encountered some P-scaling characters in the PICTURE string. + if( rightmost_p < rightmost_d ) + { + // This is a scaled variable of type PPP999 + rdigits = pcount; + } + else + { + // This is a scaled variable of type 999PPP + rdigits = -pcount; + } + } + + free(picture); + + if(return_rdigits) + { + retval = rdigits; + } + else + { + retval = digits; + } + + return retval; + } + + +int +rdigits_of_picture(const char *picture) { + return digits_of_picture(picture, true); +} + +bool +is_picture_scaled(const char *picture) { + bool retval = false; + if( strchr( picture, 'P') ) { + retval = true; + } + if( strchr( picture, 'p') ) { + retval = true; + } + return retval; +} + +/* + * Static call support. Return reachable programs. + * + * 8.4.5.2 Scope of program-names + * + * "The names assigned to programs that are contained directly or + * indirectly within the same outermost program shall be unique within + * that outermost program." + * + * At point of CALL, the target name might or might not be that of a + * contained or COMMON program. If no such program exists, the CALL + * is to an external reference. If exactly one such program exists, + * the CALL references that program. The returned map is used to + * enforce those rules, and to replace seemingly external calls with + * internal ones. + */ + +symbolset_t +symbol_program_callables( size_t program ) { + symbolset_t callables = common_callables[program]; + + auto self = cbl_label_of(symbol_at(program)); + auto start_with = 0 < self->parent? self->parent : program; + + // Build a list of programs reachable by the current program. + for( auto e = symbols_begin(++start_with); e < symbols_end(); e++ ) { + if( e->type != SymLabel ) continue; + if( e->elem.label.type != LblProgram ) continue; + + auto prog = cbl_label_of(e); + if( program == symbol_index(e) && !prog->recursive ) continue; + + if( (self->parent == prog->parent && prog->common) || + (prog->parent == program) ) + { + callables.insert(symbol_index(e)); + } + } + + return callables; +} + + +const cbl_label_t * +symbol_program_local( const char tgt_name[] ) { + symbolset_t callables = symbol_program_callables(current_program_index()); + + for( auto callable : callables ) { + auto called = cbl_label_of(symbol_at(callable)); + if( 0 == strcasecmp(called->name, tgt_name) ) return called; + } + return NULL; +} + +/* + * FILE SECTION support + */ + +/* + * SPECIAL-NAMES support + */ +std::map<char, const char *> currencies; + +bool +symbol_currency_add( const char symbol[], const char sign[] ) { + // In service of CURRENCY sign PICTURE SYMBOL symbol + // The single-character 'symbol' is replaced with multi-char 'sign' + // by the NumericEdited processing. + if( !symbol ) { + symbol = xasprintf("%c", *sign); + } + currencies[*symbol] = sign; + return true; +} + +const char * +symbol_currency( char sign ) { + // We need a default of '$' + if( currencies.size() == 0 ) { + currencies['$'] = "$"; + } + auto result = currencies.find(sign); + return result == currencies.end()? NULL : result->second; +} + +char symbol_decimal_point_set( char ch ) { return decimal_point = ch; } +char symbol_decimal_point() { return decimal_point; } +bool decimal_is_comma() { return decimal_point == ','; } + +/* + * OCCURS support + */ + +/* + * A cbl_occurs_key_t is part of a field definition, and comprises + * size_t symbol indexes. A cbl_key_t is a list of field pointers, + * and can be created ad hoc to describe a sort. We can construct a + * cbl_key_t from cbl_occurs_key_t. + */ +cbl_key_t:: +cbl_key_t( const cbl_occurs_key_t& that ) + : ascending(that.ascending) +{ + if( that.field_list.nfield == 0 ) { + *this = cbl_key_t(); + return; + } + + nfield = that.field_list.nfield; + fields = static_cast<cbl_field_t**>( xcalloc(nfield, + sizeof(*fields)) ); + for( size_t i=0; i < that.field_list.nfield; i++ ) { + fields[i] = cbl_field_of(symbol_at(that.field_list.fields[i])); + } +} + +void +cbl_occurs_t::key_alloc( bool ascending ) { + auto nbytes = sizeof(keys[0]) * (nkey + 1); + cbl_occurs_key_t key = { ascending, cbl_field_list_t() }; + + keys = static_cast<cbl_occurs_key_t *>(xrealloc(keys, nbytes)); + keys[nkey++] = key; +} + +void +cbl_occurs_t::field_add( cbl_field_list_t& field_list, cbl_field_t *field ) { + cbl_field_list_t list = field_list; + size_t ifield = field_index(field); + auto nbytes = sizeof(list.fields[0]) * (list.nfield + 1); + + list.fields = static_cast<size_t*>(xrealloc(list.fields, nbytes)); + list.fields[list.nfield++] = ifield; + field_list = list; +} + +void +cbl_occurs_t::key_field_add( cbl_field_t *field ) { + assert(nkey > 0); + cbl_occurs_key_t& key = keys[nkey-1]; + field_add(key.field_list, field); +} + +void +cbl_occurs_t::index_add( cbl_field_t *field ) { + field_add(indexes, field); +} + +class is_field_at { + cbl_field_t *field; + public: + is_field_at( cbl_field_t *field ) : field(field) {} + bool operator()( size_t isym ) const { + return field == field_at(isym); + } +}; + +cbl_occurs_key_t * +cbl_occurs_t::key_of( cbl_field_t *field ) { + for( auto key = keys; key < keys + nkey; key++ ) { + size_t *fields = key->field_list.fields; + size_t *efield = key->field_list.fields + key->field_list.nfield; + auto f = std::find_if( fields, efield, is_field_at(field) ); + if( f < efield ) { + return key; + } + } + return NULL; +} + +bool +cbl_occurs_t::subscript_ok( const cbl_field_t *subscript ) const { + if( !is_literal(subscript) ) { + return true; // Cannot check non-literals, so, OK. + } + // It must be a number. + if( subscript->type != FldLiteralN ) return false; + + auto sub = subscript->data.value; + + if( sub < 1 || sub != size_t(sub) ) { + return false; // zero/fraction invalid + } + if( bounds.fixed_size() ) { + return sub <= bounds.upper; + } + return bounds.lower <= sub && sub <= bounds.upper; +} + +cbl_file_key_t:: +cbl_file_key_t( cbl_name_t name, + const std::list<cbl_field_t *>& fields, + bool is_unique ) + : unique(is_unique) + , leftmost(0) +{ + assert(name); + memcpy(this->name, name, sizeof(this->name)); + nfield = fields.size(); + assert(nfield > 0); + this->fields = new size_t[nfield]; + std::transform( fields.begin(), fields.end(), this->fields, field_index ); +} + +size_t cbl_file_key_t:: +offset() const { + return cbl_field_of(symbol_at(fields[0]))->offset; +} + +/* + * A multi-field key has a name. A single-field key has no name. + */ +bool cbl_file_key_t:: +operator==( const cbl_field_t *key_field ) { + this->leftmost = 0; + + // match multi-field key by name + if( 0 == strcasecmp(this->name, key_field->name) ) return true; + + // A literal key_field is a "magic" literal indicating a key name + // (that didn't match, above). + if( is_literal(key_field) ) return false; + + // match single-field key by its symbol index + size_t ifield = field_index(key_field); + if( nfield == 1 && fields[0] == ifield ) return true; + + // A literal key_field is a "magic" literal indicating a key name + // (that didn't match, above). + if( is_literal(key_field) ) return false; + + // Match if the field has the same offset as the key, and belongs to + // an 01 record for the same FD. + if( this->offset() == key_field->offset ) { + auto this_file( symbol_record_file(cbl_field_of(symbol_at(fields[0]))) ); + auto that_file( symbol_record_file(key_field) ); + if( this_file && that_file && + symbol_index(symbol_elem_of(this_file)) == + symbol_index(symbol_elem_of(that_file)) ) { + this->leftmost = ifield; + return true; + } + } + + return false; +} + +uint32_t cbl_file_key_t:: +key_field_size( uint32_t sum, size_t ifield ) { + return sum + field_size( cbl_field_of(symbol_at(ifield)) ); +} + +// Return size of named field in key or, if NULL, whole key +uint32_t cbl_file_key_t:: +size() { + if( leftmost != 0 ) { + return cbl_field_of(symbol_at(leftmost))->data.capacity; + } + return std::accumulate(fields, fields + nfield, 0, key_field_size); +} + + +/* + * Produce list of qualifier names for any key field. + */ +static std::list<const char *> +symbol_forward_names( size_t ifield ) { + std::list<const char *> output; + + for( auto sym = symbols_begin(ifield); sym && sym->type == SymField; ) { + const cbl_field_t *field = cbl_field_of(sym); + if( !(field->type == FldForward) ) { + dbgmsg("%s:%d: logic error, not FldForward: #%zu %s", + __func__, __LINE__, symbol_index(sym), field_str(field)); + } + assert(field->type == FldForward); + + output.push_front( field->name ); + + if( 0 == field->parent) break; + sym = symbols_begin(field->parent); + } + + return output; +} + +static size_t +symbol_forward_to( size_t fwd ) { + std::list<const char *> names = symbol_forward_names(fwd); + size_t program = symbols_begin(fwd)->program; + + std::pair <symbol_elem_t *, bool> elem = symbol_find( program, names ); + + if( !elem.second ) { + const auto& field = *cbl_field_of(symbols_begin(fwd)); + if( yydebug ) + dbgmsg("%s:%d: no symbol found for #%zu %s %s", __func__, __LINE__, + fwd, cbl_field_type_str(field.type), field.name); + return fwd; + } + + return symbol_index(elem.first); +} + +/* + * For each FldForward, resolve to a field that is part of an FD + * record for the file. + */ +void +cbl_file_key_t::deforward( size_t ifile ) { + const auto file = cbl_file_of(symbol_at(ifile)); + std::transform( fields, fields + nfield, fields, + [ifile, file]( size_t fwd ) { + static std::map<size_t, int> keys; + auto ifield = symbol_forward_to(fwd); + const auto field = cbl_field_of(symbol_at(ifield)); + + if( is_forward(field) && yydebug ) { + dbgmsg("%s:%d: key %d: #%zu %s of %s is %s", "deforward", __LINE__, + keys[ifile]++, ifield, field->name, file->name, + cbl_field_type_str(field->type) + 3); + } + + auto parent = symbol_record_file(field); + + if( ifield == fwd ) { + ERROR_FIELD(field, "line %d: %s of %s " + "is not defined", + file->line, field->name, file->name); + return ifield; + } + + // relative files have numeric keys that are not part of the record + if( file->org == file_relative_e ) { + if( parent != NULL ) { + ERROR_FIELD(field, "line %d: RELATIVE file %s key %s " + "is defined in file description", + file->line, file->name, field->name); + return ifield; + } + if( field->occurs.ntimes() ) { + ERROR_FIELD(field, "line %d: RELATIVE file %s key %s " + "cannot have OCCURS clause", + file->line, file->name, field->name); + return ifield; + } + if( ! (is_numeric(field) && 0 == field->data.rdigits) ) { + ERROR_FIELD(field, "line %d: RELATIVE file %s key %s " + "must be integer type", + file->line, file->name, field->name); + return ifield; + } + return ifield; + } + // looked-up field must have same file as parent + if( ! (parent != NULL && + symbol_index(symbol_elem_of(parent)) == ifile) ) { + ERROR_FIELD(field, "line %d: %s of %s " + "is not defined in file description", + file->line, field->name, file->name); + } + return ifield; + } ); +} + +char * +cbl_file_key_t::str() const { + char *output = static_cast<char*>( xcalloc(nfield, 8) ), *p = output; + assert(output); + const char *sep = ""; + + *p++ = '['; + for( auto f = fields; f < fields + nfield; f++) { + auto n = sprintf(p, "%s%zu", sep, *f); + p += n; + sep = ", "; + } + *p++ = ']'; + return output; +} + +/* + * After processing FILE SECTION, replace forward references with actual ones. + */ +void +cbl_file_t::deforward() { + if( user_status ) { + user_status = symbol_forward_to(user_status); + + auto field = cbl_field_of(symbol_at(user_status)); + if( is_forward(field) ) { + ERROR_FIELD(field, "%s of %s never defined in FD record", + field->name, this->name); + } + } + + for( auto p = keys; p < keys + nkey; p++ ) { + p->deforward( symbol_index(symbol_elem_of(this)) ); + } +} + +char * +cbl_file_t::keys_str() const { + char *ks[nkey]; + std::transform(keys, keys + nkey, ks, + []( const cbl_file_key_t& key ) { + return key.str(); + } ); + size_t n = 4 * nkey + std::accumulate(ks, ks + nkey, 0, + []( int n, const char *s ) { + return n + strlen(s); + } ); + char *output = static_cast<char*>( xcalloc(1, n) ), *p = output; + const char *sep = ""; + + *p++ = '['; + for( auto k : ks ) { + p = stpcpy(p, sep); + p = stpcpy(p, k); + sep = ", "; + free(k); + } + *p++ = ']'; + return output; +} + +/* + * _FILE_STATUS symbols + */ + +static struct file_status_field_t { + file_status_t status; +} file_status_fields[] = { + {FsSuccess}, + {FsDupRead}, + {FsRecordLength}, + {FsUnavail}, + {FsNotaTape}, + + {FsEofSeq}, + {FsEofRel}, + + {FsKeySeq}, + {FsDupWrite}, + {FsNotFound}, + {FsEofWrite}, + + {FsOsError}, + {FsBoundary}, + {FsNoFile}, + {FsNoAccess}, + {FsCloseLock}, + {FsWrongType}, + + {FsLogicErr}, + {FsIsOpen}, + {FsCloseNotOpen}, + {FsNoRead}, + {FsBoundWrite}, + {FsReadError}, + {FsReadNotOpen}, + {FsNoWrite}, + {FsNoDelete}, + + {FsWrongThread}, + {FsPassword}, + {FsLogicOther}, + {FsNoResource}, + {FsIncomplete}, + {FsNoDD}, + {FsVsamOK}, + {FsBadEnvVar}, +}; + +static int +cbl_file_status_cmp( const void *K, const void *E ) { + const struct file_status_field_t + *k=static_cast<const struct file_status_field_t *>(K), + *e=static_cast<const struct file_status_field_t *>(E); + return k->status == e->status? 0 : 1; +} + +static long +file_status_status_of( file_status_t status ) { + size_t n = COUNT_OF(file_status_fields); + file_status_field_t *fs, key = { .status = status }; + + fs = (file_status_field_t*)lfind( &key, file_status_fields, + &n, sizeof(*fs), cbl_file_status_cmp ); + + return fs? (long)fs->status : -1; +} + +cbl_field_t * +ast_file_status_between( file_status_t lower, file_status_t upper ) { + struct { cbl_field_t *lb, *ub, *both; } cond = { new_temporary(FldConditional), + new_temporary(FldConditional), + new_temporary(FldConditional) }; + + cbl_field_t *file_status = cbl_field_of(symbol_field(0, 0, "_FILE_STATUS")); + + long status_lower = file_status_status_of(lower); + long status_upper = file_status_status_of(upper); + assert(status_lower != -1); + assert(status_upper != -1); + + parser_relop_long( cond.lb, status_lower, le_op, file_status ); + parser_relop_long( cond.ub, status_upper, gt_op, file_status ); + + parser_logop( cond.both, cond.lb, and_op, cond.ub ); + + return cond.both; +} + +bool +is_register_field(cbl_field_t *field) + { + // TRUE when the field is an executable-level global variable of the type we + // are calling a "register", like RETURN-CODE or UPSI or the like: + return + ( field->parent == 0 + && field->level == 0 + && !(field->attr & intermediate_e) + && !(field->attr & filler_e) + && field->type != FldClass + && field->type != FldBlob + ); + } + +bool +has_value( cbl_field_type_t type ) { + // Indicates that the field type contains data that can be expressed as + // a numeric value + switch ( type ) { + case FldInvalid: + case FldGroup: + case FldAlphanumeric: + case FldNumericEdited: + case FldAlphaEdited: + case FldLiteralA: + case FldClass: + case FldConditional: + case FldForward: + case FldSwitch: + case FldDisplay: + case FldBlob: + return false; + case FldIndex: + case FldPointer: + case FldNumericDisplay: + case FldNumericBinary: + case FldFloat: + case FldPacked: + case FldNumericBin5: + case FldLiteralN: + return true; + } + dbgmsg( "%s:%d: invalid symbol_type_t %d", __func__, __LINE__, type ); + return false; +} diff --git a/gcc/cobol/symbols.h b/gcc/cobol/symbols.h new file mode 100644 index 0000000..18944b0 --- /dev/null +++ b/gcc/cobol/symbols.h @@ -0,0 +1,2210 @@ + /* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifdef _SYMBOLS_H_ +#pragma message __FILE__ " included twice" +#else +#define _SYMBOLS_H_ + +#include <assert.h> +#include <limits.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#include <algorithm> +#include <list> +#include <map> +#include <set> +#include <stack> +#include <string> +#include <variant> +#include <vector> + +#define PICTURE_MAX 64 + +// Define a tree type as void pointer outside the generator code. +#ifndef HOWEVER_GCC_DEFINES_TREE +typedef void *tree; +#endif + +#if ! (__HAVE_FLOAT128 && __GLIBC_USE (IEC_60559_TYPES_EXT)) +static_assert( sizeof(output) == sizeof(long double), "long doubles?" ); + +static inline _Float128 +strtof128 (const char *__restrict __nptr, char **__restrict __endptr) { + return strtold(nptr, endptr); +} + +static inline int +strfromf128 (char *restrict string, size_t size, + const char *restrict format, _Float128 value) { + return strfroml(str, n, format, fp); +} +#endif + +extern const char *numed_message; + +enum cbl_dialect_t { + dialect_gcc_e = 0x00, + dialect_ibm_e = 0x01, + dialect_mf_e = 0x02, + dialect_gnu_e = 0x04, +}; + +extern cbl_dialect_t cbl_dialect; +void cobol_dialect_set( cbl_dialect_t dialect ); +cbl_dialect_t dialect_is(); + +static inline bool dialect_gcc() { + return dialect_gcc_e == cbl_dialect; +} + +static inline bool dialect_ibm() { + return dialect_ibm_e == (cbl_dialect & dialect_ibm_e); +} +static inline bool dialect_mf() { + return dialect_mf_e == (cbl_dialect & dialect_mf_e ); +} + +enum cbl_gcobol_feature_t { + feature_gcc_e = 0x00, + feature_internal_ebcdic_e = 0x01, + feature_embiggen_e = 0x02, // widen numeric that redefine POINTER +}; + +extern size_t cbl_gcobol_features; +bool cobol_gcobol_feature_set( cbl_gcobol_feature_t gcobol_feature, bool on = true ); + +static inline bool gcobol_feature_internal_ebcdic() { + return feature_internal_ebcdic_e == + (cbl_gcobol_features & feature_internal_ebcdic_e); +} +static inline bool gcobol_feature_embiggen() { + return feature_embiggen_e == + (cbl_gcobol_features & feature_embiggen_e); +} + +enum cbl_division_t { + identification_div_e, + environment_div_e, + data_div_e, + procedure_div_e, +}; + +void mode_syntax_only( cbl_division_t division ); +bool mode_syntax_only(); + +static inline bool +is_numeric( cbl_field_type_t type ) { + switch ( type ) { + case FldInvalid: + case FldGroup: + case FldAlphanumeric: + case FldNumericEdited: + case FldAlphaEdited: + case FldLiteralA: + case FldClass: + case FldConditional: + case FldForward: + case FldSwitch: + case FldDisplay: + case FldPointer: // not numeric because not computable, only settable + case FldBlob: + return false; + // These types are computable or, in the case of FldIndex, may be + // arbitrarily set and incremented. + case FldNumericDisplay: + case FldNumericBinary: + case FldFloat: + case FldPacked: + case FldNumericBin5: + case FldLiteralN: + case FldIndex: + return true; + } + yywarn( "%s:%d: invalid symbol_type_t %d", __func__, __LINE__, type ); + return false; +} + +struct os_locale_t { + char assumed[16]; + char *codeset; +}; + +const char * cbl_field_attr_str( cbl_field_attr_t attr ); + +cbl_field_attr_t literal_attr( const char prefix[] ); + +static inline bool +is_working_storage(uint32_t attr) { + return 0 == (attr & (linkage_e | local_e)); +} + +enum cbl_figconst_t cbl_figconst_of( const char *value ); +const char * cbl_figconst_str( cbl_figconst_t fig ); + +const char * consistent_encoding_check( const YYLTYPE& loc, const char input[] ); + +class cbl_domain_elem_t { + uint32_t length; + const char *value; + public: + bool is_numeric, all; + + cbl_domain_elem_t() + : length(0), value(NULL), is_numeric(false), all(false) + {} + cbl_domain_elem_t( const YYLTYPE& loc, + bool all, + uint32_t length, + const char *value, + bool is_numeric = false ) + : length(length), value(value), is_numeric(is_numeric), all(all) + { + if( value && ! is_numeric ) { + auto s = consistent_encoding_check(loc, value); + if( s ) value = s; + } + } + const char *name() const { return value; } + uint32_t size() const { return is_numeric ? strlen(value) : length; } +}; + +struct cbl_domain_t { + cbl_domain_elem_t first, last; + cbl_domain_t() : first(), last(first) + {} + cbl_domain_t( const YYLTYPE& loc, + bool all, + uint32_t length, + const char * value, + bool is_numeric = false ) + : first(loc, all, length, value, is_numeric), last(first) + {} + cbl_domain_t( const cbl_domain_elem_t& a, const cbl_domain_elem_t& z ) + : first(a) + , last(z) + { + assert(a.is_numeric == z.is_numeric); + } +}; + +typedef const char * (*time_now_f)( void ); + +const char * date2_is_now(void); +const char * day2_is_now(void); +const char * date4_is_now(void); +const char * day4_is_now(void); +const char * time_is_now(void); + +struct cbl_upsi_mask_t { + bool on_off; + uint32_t value; +cbl_upsi_mask_t( bool on_off, uint32_t value ) : on_off(on_off), value(value) {} +}; + +char symbol_decimal_point_set( char ch ); +char symbol_decimal_point(); +bool decimal_is_comma(); + +enum symbol_type_t { + SymFilename, + SymFunction, + SymField, + SymLabel, // section, paragraph, or label + SymSpecial, + SymAlphabet, + SymFile, + SymDataSection, +}; + +struct cbl_field_data_t { + uint32_t memsize; // nonzero if larger subsequent redefining field + uint32_t capacity, // allocated space + digits; // magnitude: total digits (or characters) + int32_t rdigits; // digits to the right + const char *initial, *picture; + + union { + // "Domain" is an array representing the VALUE of CLASS or 88 type. + const struct { cbl_domain_t *false_value; cbl_domain_t *domain; }; + const struct cbl_upsi_mask_t *upsi_mask; + _Float128 value; + }; + + union { // anonymous union allows for other function types later + time_now_f time_func; + }; + uint32_t upsi_mask_of() const { + assert(initial); + assert('0' <= initial[0] && initial[0] < '8'); + const uint32_t bitn = initial[0] - '0'; + return (1 << bitn); + } + + int32_t precision() const { return std::max(int32_t(0), rdigits); } + int32_t ldigits() const { return std::max(int(digits), int(digits - rdigits)); } + + cbl_field_data_t& valify() { + assert(initial); + const size_t len = strlen(initial); + char input[len + 1]; + std::copy(initial, initial + len + 1, input); // copy the NUL + if( decimal_is_comma() ) { + std::replace(input, input + sizeof(input), ',', '.'); + } + + char *pend = NULL; + value = strtof128( input, &pend ); + + if( pend != input + len ) { + dbgmsg("%s: error: could not interpret '%s' of '%s' as a number", + __func__, pend, initial); + } + return *this; + } + cbl_field_data_t& valify( const char *input ) { + assert(input); + initial = input; + capacity = strlen(initial); + return valify(); + } +}; + +static inline uint32_t +capacity_cast( size_t size ) { + uint32_t len = static_cast<uint32_t>(size); + assert(len == size); + return len; +} + +struct cbl_occurs_bounds_t { + // lower = upper = 0 for a non-table + // lower = upper = occurs for a fixed table + // lower and upper are the (inclusive) bounds for DEPENDING ON in a + // variable size table. lower can be zero. + size_t lower, upper; + + cbl_occurs_bounds_t(size_t lower=0, size_t upper=0) + : lower(lower), upper(upper) {} + size_t ntimes() const { + return upper; + } + bool fixed_size() const { return lower == upper; } +}; + +struct cbl_field_t; // A necessary forward reference + +struct cbl_field_list_t { + size_t nfield; + size_t *fields; + cbl_field_list_t() : nfield(0), fields(NULL) {} +}; + +struct cbl_occurs_key_t { + bool ascending; + cbl_field_list_t field_list; +}; + +struct cbl_occurs_t { + cbl_occurs_bounds_t bounds; + size_t depending_on; + size_t nkey; + cbl_occurs_key_t *keys; + cbl_field_list_t indexes; + + cbl_occurs_t() : depending_on(0), nkey(0), keys(NULL) {} + + size_t ntimes() const { return bounds.ntimes(); } + + void key_alloc( bool ascending ); + void key_field_add( cbl_field_t *field ); + void index_add( cbl_field_t *field ); + cbl_occurs_key_t * key_of( cbl_field_t *field ); + bool subscript_ok( const cbl_field_t *subscript ) const; + +protected: + void field_add( cbl_field_list_t& fields, cbl_field_t *field ); +}; + +/* + * Support for CALL and Linkage Section. + */ +enum cbl_ffi_arg_attr_t { none_of_e, address_of_e, length_of_e }; + +enum cbl_ffi_crv_t { + by_default_e, + by_reference_e = 'R', + by_content_e = 'C', + by_value_e = 'E' +}; + +static inline const char * +cbl_ffi_crv_str( cbl_ffi_crv_t crv ) { + switch (crv) { + case by_default_e: return "<default>"; + case by_reference_e: return "REFERENCE"; + case by_content_e: return "CONTENT"; + case by_value_e: return "VALUE"; + } + return "???"; +} + +typedef std::pair<size_t, size_t> cbl_bytespan_t; +struct cbl_subtable_t { + size_t offset, isym; +}; + +bool is_elementary( enum cbl_field_type_t type ); + +struct cbl_field_t { + size_t offset; + enum cbl_field_type_t type, usage; + size_t attr; + static_assert(sizeof(attr) == sizeof(cbl_field_attr_t), "wrong attr size"); + size_t parent; // symbols[] index of our parent + size_t our_index; // symbols[] index of this field, set in symbol_add() + uint32_t level; + struct cbl_occurs_t occurs; + int line; // Where it appears in the file. + cbl_name_t name; // Appears in the GIMPLE dump. + size_t file; // nonzero if field is 01 record for a file + struct linkage_t { + bool optional; + cbl_ffi_crv_t crv; // Using by C/R/V in Linkage + linkage_t() : optional(false), crv(by_default_e) {} + } linkage; + struct cbl_field_data_t data; + tree var_decl_node; // Reference to the pointer to the cblc_field_t structure + tree data_decl_node; // Reference to the run-time data of the COBOL variable + // // For linkage_e variables, data_decl_node is a pointer + // // to the data, rather than the actual data + tree literal_decl_node; // This is a FLOAT128 version of data.value + + void set_linkage( cbl_ffi_crv_t crv, bool optional ) { + linkage.optional = optional; + linkage.crv = crv; + assert(crv != by_content_e); + } + + inline bool is_typedef() const { + return has_attr(typedef_e); + } + inline bool is_strongdef() const { + return has_attr(strongdef_e); + } + + bool is_valid() const { + return data.capacity > 0 + || level == 88 + || level == 66 + || type == FldClass + || type == FldIndex + || type == FldLiteralA + || type == FldLiteralN; + } + + bool rename_level_ok() const { + switch( level ) { + case 0: + case 1: + case 66: + case 77: + case 88: + return false; + } + return true; + } + + bool reasonable_capacity() const { + return data.capacity <= MAX_FIXED_POINT_DIGITS; + } + + cbl_field_t& same_as( const cbl_field_t& that, bool is_typedef ) { + type = that.type; + attr |= (that.attr & external_e); + attr |= same_as_e; + + data = that.data; + + if( ! (is_typedef || that.type == FldClass) ) { + data.initial = NULL; + data.value = 0.0; + } + return *this; + } + + void report_invalid_initial_value(const YYLTYPE& loc) const; + + bool is_ascii() const; + bool is_integer() const { return is_numeric(type) && data.rdigits == 0; } + + bool is_binary_integer() const { + return type == FldNumericBinary || type == FldNumericBin5; + } + + void embiggen( size_t eight=8 ) { + assert(gcobol_feature_embiggen() && is_numeric(type) && size() == 4); + + type = FldNumericBin5; + attr |= embiggened_e; + data.capacity = eight; + data.digits = 0; + } + + bool has_attr( cbl_field_attr_t attr ) const { + return cbl_field_attr_t(this->attr & attr) == attr; + } + size_t set_attr( cbl_field_attr_t attr ); + size_t clear_attr( cbl_field_attr_t attr ); + const char * attr_str( const std::vector<cbl_field_attr_t>& attrs ) const; + + bool is_justifiable() const { + if( type == FldAlphanumeric ) return true; + if( type == FldInvalid ) return true; + return ! has_attr(rjust_e); + } + + bool has_subordinate( const cbl_field_t *that ) const; + + const char * internalize(); + bool value_set( _Float128 value ); + const char *value_str() const; + + bool is_key_name() const { return has_attr(record_key_e); } + + long scaled_capacity() const { + return data.digits? + long(data.digits) - data.rdigits + : + data.capacity; + } + uint32_t size() const; // table capacity or capacity + + const char * pretty_name() const { + if( name[0] == '_' && data.initial ) return data.initial; + return name; + } + static const char * level_str(uint32_t level ); + inline const char * level_str() const { + return level_str(level); + } +}; + +// Necessary forward referencea +struct cbl_label_t; +struct cbl_refer_t; + +struct cbl_span_t { + cbl_refer_t *from, *len; + + cbl_span_t( cbl_refer_t *from, cbl_refer_t *len = NULL ) + : from(from), len(len) {}; + bool is_active() const { return !( from == NULL && len == NULL ); } + + cbl_field_t *from_field(); + cbl_field_t *len_field(); +}; + + +struct cbl_refer_t { + YYLTYPE loc; + cbl_field_t *field; + cbl_label_t *prog_func; + bool all, addr_of; + uint32_t nsubscript; + cbl_refer_t *subscripts; // indices + cbl_span_t refmod; // substring bounds + + cbl_refer_t() + : field(NULL), prog_func(NULL) + , all(NULL), addr_of(false) + , nsubscript(0), subscripts(NULL), refmod(NULL) + {} + cbl_refer_t( cbl_field_t *field, bool all = false ) + : field(field), prog_func(NULL) + , all(all), addr_of(false) + , nsubscript(0), subscripts(NULL), refmod(NULL) + {} + cbl_refer_t( const YYLTYPE& loc, cbl_field_t *field, bool all = false ) + : loc(loc), field(field), prog_func(NULL) + , all(all), addr_of(false) + , nsubscript(0), subscripts(NULL), refmod(NULL) + {} + cbl_refer_t( cbl_field_t *field, cbl_span_t& refmod ) + : field(field), prog_func(NULL) + , all(false), addr_of(false) + , nsubscript(0), subscripts(NULL), refmod(refmod) + {} + cbl_refer_t( cbl_field_t *field, + size_t nsubscript, cbl_refer_t *subscripts, + cbl_span_t refmod = cbl_span_t(NULL) ) + : field(field), prog_func(NULL) + , all(false), addr_of(false) + , nsubscript(nsubscript) , subscripts( new cbl_refer_t[nsubscript] ) + , refmod(refmod) + { + std::copy(subscripts, subscripts + nsubscript, this->subscripts); + } + explicit cbl_refer_t( cbl_label_t *prog_func, bool addr_of = true ) + : field(NULL), prog_func(prog_func) + , all(false), addr_of(addr_of) + , nsubscript(0), subscripts(NULL), refmod(cbl_span_t(NULL)) + {} + + cbl_refer_t duplicate() const { + return cbl_refer_t( field, nsubscript, subscripts, refmod ); + } + + static cbl_refer_t *empty(); + + cbl_refer_t * name( const char name[] ) { + assert(name); + assert(strlen(name) < sizeof(field->name)); + strcpy(field->name, name); + return this; + } + + bool is_pointer() const { return addr_of || field->type == FldPointer; } + bool is_reference() const { return nsubscript > 0 || refmod.is_active(); } + bool is_table_reference() const { return nsubscript > 0; } + bool is_refmod_reference() const { return refmod.is_active(); } + + size_t subscripts_set( const std::list<cbl_refer_t>& subs ); + const char * str() const; + const char * deref_str() const; + const char * name() const; + cbl_field_t * cond() { + assert( ! is_reference() ); + assert(field); + if( FldConditional != field->type ) { + dbgmsg("cbl_refer_t::cond: " + "logic error: %s is not a condition expression", field->name); + } + assert( FldConditional == field->type); + return field; + } +}; + +struct elem_key_t { + size_t program; + const char * name; + elem_key_t( size_t program, const cbl_name_t name ) + : program(program) + , name(name) + {} + bool operator<( const elem_key_t& that ) const { + if( program == that.program ) { + return strcasecmp(name, that.name) < 0; + } + return program < that.program; + } + bool operator==( const elem_key_t& that ) const { + if( program == that.program ) { + return strcasecmp(name, that.name) == 0; + } + return false; + } +}; + +struct field_key_t { + size_t program; + const char * name; + field_key_t( size_t program, const cbl_field_t *field ) + : program(program) + , name(field->name) + {} + field_key_t( size_t program, const cbl_name_t name ) + : program(program) + , name(name) + {} + bool operator<( const field_key_t& that ) const { + if( program == that.program ) { + return strcasecmp(name, that.name) < 0; + } + return program < that.program; + } + bool operator==( const field_key_t& that ) const { + if( program == that.program ) { + return strcasecmp(name, that.name) == 0; + } + return false; + } +}; + +bool valid_move( const struct cbl_field_t *tgt, const struct cbl_field_t *src ); + +#define record_area_name_stem "_ra_" + +static inline bool +is_record_area( const cbl_field_t *field ) { + static const char stem[] = record_area_name_stem; + return 0 == memcmp(field->name, stem, sizeof(stem)-1); +} + +bool +is_register_field(cbl_field_t *field); + +static inline bool +is_constant( const cbl_field_t *field ) { + return field->has_attr(constant_e); +} + +const char * +is_numeric_constant( const char name[] ); + +cbl_field_t * +symbol_field_index_set( cbl_field_t *field ); + +bool +symbol_field_type_update( cbl_field_t *field, + cbl_field_type_t type, bool is_usage ); + +struct sort_key_t; + +struct cbl_key_t { + bool ascending; + size_t nfield; + cbl_field_t **fields; + + cbl_key_t() : ascending(false), nfield(0), fields(0) {} + cbl_key_t( size_t nfield, cbl_field_t **fields, bool ascending = true ) + : ascending(ascending), nfield(nfield), fields(fields) {} + cbl_key_t( const sort_key_t& src ); + explicit cbl_key_t( const cbl_occurs_key_t& that ); +}; + +enum cbl_label_type_t { + /* + * LblNone "matches" all types, because it exists for forward + * references. Labels are equal if the types match and the names + * match. + */ + LblNone, // top-level programs have no parent + LblProgram, + LblFunction, + LblSection, + LblParagraph, + LblLoop, + LblEvaluate, + LblSearch, + LblSort, + LblString, + LblArith, + LblCompute, +}; + +struct cbl_proc_addresses_t { + // This structure is used by 4; it very likely will never be + // referenced elsewhere + tree go_to; // gg_append_statement(go_to) generates "goto label" + tree label; // gg_append_statement(label) generates "label:" + tree addr; // addr can be used as the right-hand-side of a "pointer = addr" + tree decl; // This is the decl used to create the other three +}; + +struct cbl_proc_t { + struct cbl_label_t *label; + struct cbl_proc_addresses_t top; + struct cbl_proc_addresses_t exit; + struct cbl_proc_addresses_t bottom; + tree alter_location; // The altered value if this paragraph is the target of an ALTER +}; + +struct cbl_label_addresses_t { + // This structure is used by parser_label_label() and parser_label_goto() + // It reuses the cbl_label_t *proc pointer; the meaning is clear from context + tree go_to; // gg_append_statement(go_to) generates "goto label" + tree label; // gg_append_statement(label) generates "label:" +}; + +struct cbl_refer_t; + +static inline const char * +logop_str( enum logop_t logop ) { + switch ( logop ) { + case not_op: return "not"; + case and_op: return "and"; + case or_op: return "or"; + case xor_op: return "xor"; + case xnor_op: return "xnor"; + case true_op: return "true"; + case false_op: return "false"; + } + return "???"; +} + +static inline const char * +relop_str( enum relop_t relop ) { + switch ( relop ) { + case lt_op: + return "<"; + case le_op: + return "<="; + case eq_op: + return "=="; + case ne_op: + return "<>"; + case ge_op: + return ">="; + case gt_op: + return ">"; + } + return "???"; +} + +static inline const char * +setop_str( enum setop_t setop ) { + switch ( setop ) { + case is_op: + return "is_op"; + } + return "???"; +} + +struct cbl_substitute_t { + enum subst_fl_t { subst_all_e, subst_first_e = 'F', subst_last_e = 'L'}; + bool anycase; + subst_fl_t first_last; + cbl_refer_t orig, replacement; + + cbl_substitute_t( bool anycase = false, char first_last = 0, + cbl_refer_t *orig = NULL, cbl_refer_t *replacement = NULL ) + : anycase(anycase) + , first_last(subst_fl_t(first_last)) + , orig( orig? *orig : cbl_refer_t() ) + , replacement( replacement? *replacement : cbl_refer_t() ) + {} +}; + +static inline const char * +field_name( const cbl_field_t *f ) { return f? f->name : "(void)"; } + +static inline const char * +field_name(const cbl_refer_t *r) { return r? field_name(r->field) : "(Nil)"; } + +char * field_str( const cbl_field_t *field ); + +struct cbl_string_src_t { + cbl_refer_t delimited_by; // identifier-2: BY SIZE indicated by NULL field + size_t ninput; + cbl_refer_t *inputs; // identifier-1 + + cbl_string_src_t( const cbl_refer_t& delimited_by, + size_t ninput, cbl_refer_t *inputs ) + : delimited_by(delimited_by) + , ninput(ninput) + , inputs(inputs) + {} +}; + +struct cbl_num_result_t { + enum cbl_round_t rounded; + struct cbl_refer_t refer; + + static cbl_refer_t refer_of( const cbl_num_result_t& res ) { return res.refer; } +}; + +void parser_symbol_add( struct cbl_field_t *new_var ); +void parser_local_add( struct cbl_field_t *new_var ); + +struct cbl_ffi_arg_t { + bool optional; + cbl_ffi_crv_t crv; + cbl_ffi_arg_attr_t attr; + cbl_refer_t refer; // refer::field == NULL is OMITTED + + cbl_ffi_arg_t( cbl_refer_t* refer = NULL, + cbl_ffi_arg_attr_t attr = none_of_e ); + cbl_ffi_arg_t( cbl_ffi_crv_t crv, + cbl_refer_t* refer, + cbl_ffi_arg_attr_t attr = none_of_e ); + cbl_field_t *field() { return refer.field; } + void validate() const { + if( refer.is_reference() ) { + yyerror("%s is a reference", refer.field->name); + } + if( ! refer.field->has_attr(linkage_e) ) { + yyerror("%s not found in LINKAGE SECTION", refer.field->name); + } + switch( refer.field->level ) { + case 1: case 77: + break; + default: + yyerror("%s must be LEVEL 01 or 77", refer.field->name); + } + // Update Linkage Section data item. + refer.field->set_linkage(crv, optional); + } +protected: + bool by_value() const { + if( crv == by_reference_e ) return false; + return refer.field != NULL; + } +}; + +// In support of serial/linear search: +struct cbl_lsearch_addresses_t { + // This structure is used by linear_search + struct cbl_label_addresses_t at_exit; // The at_exit statements are at the top + struct cbl_label_addresses_t top; // Start of the loop of WHENS + struct cbl_label_addresses_t bottom; // The very bottom +}; + +struct cbl_lsearch_t { + cbl_lsearch_addresses_t addresses; + cbl_label_addresses_t jump_over; + tree limit; + tree counter; + struct cbl_field_t *index; + struct cbl_field_t *varying; + bool first_when; +}; + +// This structure is used for binary searches: + +struct cbl_bsearch_t { + cbl_label_addresses_t too_small; + cbl_label_addresses_t too_big; + cbl_label_addresses_t top; + cbl_label_addresses_t first_test; + cbl_label_addresses_t bottom; + tree left; // This is a long + tree right; // This is a long + tree middle; // This is our copy of the index, so we only need to write + // it and never read it. + tree compare_result; // This is an int, and avoids + struct cbl_field_t *index; + bool first_when; +}; + +struct cbl_unstring_t { + cbl_label_addresses_t over; + cbl_label_addresses_t into; + cbl_label_addresses_t bottom; +}; + +// Used by RETURN instruction in SORT with output-procedure +struct cbl_sortreturn_t { + cbl_label_addresses_t at_end; + cbl_label_addresses_t not_at_end; + cbl_label_addresses_t bottom; +}; + +struct cbl_call_exception_t { + cbl_label_addresses_t over; + cbl_label_addresses_t into; + cbl_label_addresses_t bottom; +}; + +struct cbl_arith_error_t { + cbl_label_addresses_t over; + cbl_label_addresses_t into; + cbl_label_addresses_t bottom; +}; + +struct cbl_compute_error_t { + // This is an int. The value is a cbl_compute_error_code_t + tree compute_error_code; +}; + +struct cbl_label_t { + enum cbl_label_type_t type; + size_t parent; + int line, used, lain; + bool common, initial, recursive; + size_t initial_section, returning; + cbl_name_t name; + const char *os_name, *mangled_name; + union + { + // For performs, paragraphs, and sections: + cbl_proc_t *proc; + + // For parser_label_label and parser_label_goto + cbl_label_addresses_t *goto_trees; + + // For linear/serial search + cbl_lsearch_t *lsearch; + + // For binary search + cbl_bsearch_t *bsearch; + + // For UNSTRING search + cbl_unstring_t *unstring; + + // for CALL [NOT] ON EXCEPTION + struct cbl_call_exception_t *call_exception; + + // for arithmetic [NOT] ON SIZE_ERROR + struct cbl_arith_error_t *arith_error; + + // for parser_op/parser_assign error tracking + struct cbl_compute_error_t *compute_error; + } structs; + + bool is_function() const { return type == LblFunction; } + + const char *type_str() const { + switch(type) { + case LblNone: return "LblNone"; + case LblProgram: return "LblProgram"; + case LblFunction: return "LblFunction"; + case LblSection: return "LblSection"; + case LblParagraph: return "LblParagraph"; + case LblLoop: return "LblLoop"; + case LblEvaluate: return "LblEvaluate"; + case LblSearch: return "LblSearch"; + case LblSort: return "LblSort"; + case LblString: return "LblString"; + case LblArith: return "LblArith"; + case LblCompute: return "LblCompute"; + } + gcc_unreachable(); + } + + size_t explicit_parent() const; + const char *str() const; +}; + +struct parser_tgt_t; + +class cbl_label_ref_t { + bool qualified; // caller mentioned paragraph & section + cbl_label_t *target; + const cbl_label_t& context; // section called from + int line; // point of reference + parser_tgt_t *handle; +public: + cbl_label_ref_t( size_t program, const cbl_label_t& context, int line, + const char name[], size_t isect = 0 ); + + cbl_label_t * target_of() { return target; } + + parser_tgt_t * handle_of(parser_tgt_t *parser_tgt) { + return this->handle = parser_tgt; + } + parser_tgt_t * handle_of() { + return this->handle; + } +}; + +static inline bool +label_lessthan( const cbl_label_t & a, const cbl_label_t & b ) { + if ( a.type == LblNone || b.type == LblNone || a.type == b.type ) { + return strcmp( a.name, b.name ) < 0; + } + return a.type < b.type; +} + +static inline bool +operator<( const cbl_label_t & a, const cbl_label_t & b ) { + return label_lessthan( a, b ); +} + +struct label_cmp_lessthan { + bool operator() ( const cbl_label_t * a, const cbl_label_t * b ) { + return label_lessthan( *a, *b ); + } + bool operator() ( const cbl_label_t& a, const cbl_label_t& b ) { + return label_lessthan( a, b ); + } +}; + +size_t field_index( const cbl_field_t *f ); + +cbl_field_t * new_temporary( enum cbl_field_type_t type, const char initial[] = NULL ); +cbl_field_t * new_temporary_like( cbl_field_t skel ); +cbl_field_t * new_temporary_clone( const cbl_field_t *orig); +cbl_field_t * keep_temporary( cbl_field_type_t type ); + +cbl_field_t * new_literal( uint32_t len, const char initial[], + enum cbl_field_attr_t attr = none_e ); + +void symbol_temporaries_free(); + +class temporaries_t { + friend void symbol_temporaries_free(); + struct literal_an { + bool is_quoted; + std::string value; + literal_an( const char value[] = "???", bool is_quoted = false ) + : is_quoted(is_quoted), value(value) {} + literal_an& operator=( const literal_an& that ) { + is_quoted = that.is_quoted; + value = that.value; + return *this; + } + bool operator<( const literal_an& that ) const { + if( value == that.value ) { // alpha before numeric + return (is_quoted? 0 : 1) < (that.is_quoted? 0 : 1); + } + return value < that.value; + } + }; + + std::map<literal_an, cbl_field_t *> literals; + typedef std::set<cbl_field_t *> fieldset_t; + typedef std::map<cbl_field_type_t, fieldset_t> fieldmap_t; + fieldmap_t used, freed; + +public: + cbl_field_t * literal( const char value[], uint32_t len, cbl_field_attr_t attr = none_e ); + cbl_field_t * reuse( cbl_field_type_t type ); + cbl_field_t * acquire( cbl_field_type_t type ); + cbl_field_t * add( cbl_field_t *field ); + bool keep( cbl_field_t *field ) { return 1 == used[field->type].erase(field); } + void dump() const; + ~temporaries_t(); +}; + + +static inline bool is_table( const cbl_field_t *field ) { + return field && field->occurs.ntimes() > 0; +} + +static inline bool is_filler( const cbl_field_t *field ) { + return field && 0 == strcasecmp("FILLER", field->name); +} + +/* + * CALL + */ + +/* + * Intrinsics + */ + +enum cbl_intrinsic_trim_t { + trim_none_e, + trim_leading_e = 1, + trim_trailing_e = 2, +}; + +enum cbl_ctype_t { + c_unknown, + c_bool, + c_char, + c_wchar, + c_byte, + c_ubyte, + c_short, + c_ushort, + c_int, + c_uint, + c_long, + c_ulong, + c_longlong, + c_ulonglong, + c_size_t, + c_ssize_t, + c_int128, + c_float, + c_double, + c_longdouble, + c_char_p, + c_wchar_p, + c_void_p, + c_nts, // this is a null-terminated-string char_p +}; + +struct function_descr_arg_t { + size_t isym; + cbl_ffi_crv_t crv; + bool optional; + + function_descr_arg_t() + : isym(0), crv(by_default_e), optional(false) + {} + function_descr_arg_t( size_t isym, cbl_ffi_crv_t crv, bool optional ) + : isym(isym), crv(crv), optional(optional) + {} +}; + +struct function_descr_t { + int token; + cbl_name_t name; + char cname[48]; + char types[8]; + std::vector<function_descr_arg_t> linkage_fields; + cbl_field_type_t ret_type; + + static function_descr_t init( const char name[] ) { + function_descr_t descr = {}; + if( -1 == snprintf( descr.name, sizeof(descr.name), "%s", name ) ) { + dbgmsg("name truncated to '%s' (max %zu characters)", name); + } + return descr; // truncation also reported elsewhere ? + } + static function_descr_t init( int isym ); + + static char + parameter_type( const cbl_field_t& field ) { + switch( field.type ) { + case FldDisplay: + case FldInvalid: + case FldGroup: + case FldLiteralA: + case FldLiteralN: + case FldClass: + case FldConditional: + case FldForward: + case FldIndex: + case FldSwitch: + case FldBlob: + return '?'; + case FldPointer: + return 'O'; + case FldAlphanumeric: + return field.has_attr(all_alpha_e)? 'A' : 'X'; + case FldPacked: + case FldNumericDisplay: + case FldNumericEdited: + case FldAlphaEdited: + case FldNumericBinary: + case FldNumericBin5: + return field.data.rdigits == 0? 'I' : 'N'; + case FldFloat: + return 'N'; + } + gcc_unreachable(); + } + + bool operator<( const function_descr_t& that ) const { + return strcasecmp(name, that.name) < 0; + } + bool operator==( const function_descr_t& that ) const { + return strcasecmp(name, that.name) == 0; + } + bool operator==( const char *name ) const { + return strcasecmp(this->name, name) == 0; + } +}; + +enum cbl_section_type_t { + file_sect_e, + working_sect_e, + linkage_sect_e, + local_sect_e, +}; + +struct cbl_section_t { + cbl_section_type_t type; + int line; + void * node; + + const char * name() const { + switch(type) { + case file_sect_e: return "file_sect_e"; + case working_sect_e: return "working_sect_e"; + case linkage_sect_e: return "linkage_sect_e"; + case local_sect_e: return "local_sect_e"; + } + gcc_unreachable(); + } + uint32_t attr() const { + switch(type) { + case file_sect_e: + case working_sect_e: return 0; + case linkage_sect_e: return linkage_e; + case local_sect_e: return local_e; + } + gcc_unreachable(); + } +}; + +struct cbl_special_name_t { + int token; + enum special_name_t id; + cbl_name_t name; + size_t filename; + char os_filename[16]; // short because always in /dev +}; + +char * hex_decode( const char text[] ); + +struct cbl_alphabet_t { + YYLTYPE loc; + cbl_name_t name; + cbl_encoding_t encoding; + unsigned char low_index, high_index, last_index, alphabet[256];; + + cbl_alphabet_t() + : loc { 1,1, 1,1 } + , encoding(ASCII_e) + , low_index(0) + , high_index(255) + , last_index(0) + { + memset(name, '\0', sizeof(name)); + memset(alphabet, 0xFF, sizeof(alphabet)); + } + + cbl_alphabet_t(const YYLTYPE& loc, cbl_encoding_t enc) + : loc(loc) + , encoding(enc) + , low_index(0) + , high_index(255) + , last_index(0) + { + memset(name, '\0', sizeof(name)); + memset(alphabet, 0xFF, sizeof(alphabet)); + } + + cbl_alphabet_t( const YYLTYPE& loc, const cbl_name_t name, + unsigned char low_index, unsigned char high_index, + unsigned char alphabet[] ) + : loc(loc) + , encoding(custom_encoding_e) + , low_index(low_index), high_index(high_index) + , last_index(high_index) + { + assert(strlen(name) < sizeof(this->name)); + strcpy(this->name, name); + std::copy(alphabet, alphabet + sizeof(this->alphabet), this->alphabet); + } + + unsigned char low_value() const { + return alphabet[low_index]; + } + unsigned char high_value() const { + return alphabet[high_index]; + } + + void + add_sequence( const YYLTYPE& loc, const unsigned char seq[] ) { + if( low_index == 0 ) low_index = seq[0]; + + unsigned char high_value = last_index > 0? alphabet[last_index] + 1 : 0; + + for( const unsigned char *p = seq; !end_of_string(p); p++ ) { + assign(loc, *p, high_value++); + } + } + + void + add_interval( const YYLTYPE& loc, unsigned char low, unsigned char high ) { + if( low_index == 0 ) low_index = low; + + unsigned char high_value = alphabet[last_index]; + + for( unsigned char ch = low; ch < high; ch++ ) { + assign(loc, ch, high_value++); + } + } + + void also( const YYLTYPE& loc, size_t ch ); + bool assign( const YYLTYPE& loc, unsigned char ch, unsigned char value ); + + static const char * + encoding_str( cbl_encoding_t encoding ) { + switch(encoding) { + case ASCII_e: return "ascii"; + case iso646_e: return "iso646"; + case EBCDIC_e: return "ebcdic"; + case custom_encoding_e: return "custom"; + } + return "???"; + } + + void dump() const { + yywarn("'%s': %s, '%c' to '%c' (low 0x%02x, high 0x%02x)", + name, encoding_str(encoding), + low_index, last_index, low_index, high_index); + if( encoding == custom_encoding_e ) { + fprintf(stderr, "\t" + " 0 1 2 3 4 5 6 7" + " 8 9 A B C C E F"); + unsigned int row = 0; + for( auto p = alphabet; p < alphabet + sizeof(alphabet); p++ ) { + if( (p - alphabet) % 16 == 0 ) fprintf(stderr, "\n%4X\t", row++); + fprintf(stderr, "%3u ", *p); + } + fprintf(stderr, "\n"); + } + } + static unsigned char nul_string[2]; + + protected: + static inline bool end_of_string( const unsigned char *p ) { + return p != nul_string && *p == '\0'; + } +}; + +// a function pointer +typedef void ( *cbl_function_ptr ) ( void ); + +struct cbl_function_t { + char name[NAME_MAX]; + cbl_function_ptr func; +}; + +static inline const char * +file_org_str( enum cbl_file_org_t org ) { + switch ( org ) { + case file_disorganized_e: return "DISORGANIZED"; + case file_sequential_e: return "SEQUENTIAL"; + case file_line_sequential_e: return "LINE_SEQUENTIAL"; + case file_indexed_e: return "INDEXED"; + case file_relative_e: return "RELATIVE"; + } + return "???"; +} + +enum file_entry_type_t { fd_e, sd_e }; + +static inline const char * +file_access_str( cbl_file_access_t access ) { + switch(access) { + case file_inaccessible_e: return "INACCESSIBLE"; + case file_access_seq_e: return "SEQUENTIAL"; + case file_access_rnd_e: return "RANDOM"; + case file_access_dyn_e: return "DYNAMIC"; + } + return "???"; +} + +enum declarative_culprit_t { + culpa_none_e, + culpa_input_e = 0x01, + culpa_output_e = 0x02, + culpa_io_e = 0x03, // both input and output + culpa_extend_e = 0x04, +}; + +struct cbl_file_key_t { + bool unique; + cbl_name_t name; + size_t leftmost; // START or READ named leftmost field in key + size_t nfield; + size_t *fields; + + cbl_file_key_t( size_t field = 0, bool unique = true ) + : unique(unique) + , leftmost(0) + , nfield(1) + , fields( new size_t[nfield] ) + { + fields[0] = field; + memset(name, '\0', sizeof(name)); + } + cbl_file_key_t( const cbl_file_key_t *that ) + : unique(that->unique) + , leftmost(that->leftmost) + , nfield(that->nfield) + { + memcpy(name, that->name, sizeof(name)); + fields = new size_t[nfield]; + std::copy( that->fields, that->fields + that->nfield, fields ); + } + + cbl_file_key_t( cbl_name_t name, + const std::list<cbl_field_t *>& fields, + bool is_unique ); + + uint32_t size(); + void deforward( size_t ifile ); + char * str() const; + bool operator==( const cbl_field_t *key_field ); // not const, may set leftmost + + protected: + static uint32_t key_field_size( uint32_t sum, size_t ifield ); + size_t offset() const; +}; + +struct cbl_file_lock_t { + bool multiple; + enum lock_mode_t { unlocked_e, manual_e, record_e, automatic_e } mode; + bool mode_set( int token ); + bool locked() const { return mode != unlocked_e; } +}; + +struct cbl_file_t { + enum cbl_file_org_t org; + enum file_entry_type_t entry_type; + uint32_t attr; + size_t reserve, same_record_as; + char padding; + bool optional; + // varying_size::explicitly is TRUE if if RECORD has VARYING or CONTAINS x TO y + struct varying_t { bool explicitly; size_t min, max; } varying_size; + cbl_file_lock_t lock; + // "The RECORD DELIMITER clause is syntax checked, but has no effect + // on the execution of the program." + enum cbl_file_access_t access; + size_t filename; // + size_t default_record; + size_t nkey; // 1st key is primary & unique + cbl_file_key_t *keys; // indexes into symbol table for key field(s) + size_t password; // index into symbol table for password (!) + size_t user_status; // index into symbol table for file status + size_t vsam_status; // index into symbol table for vsam status PIC X(6) + size_t record_length; // DEPENDS ON + int line; + cbl_name_t name; + cbl_sortreturn_t *addresses; // Used during parser_return_start, et al. + tree var_decl_node; // GENERIC tag for the run-time FIELD structure + bool varies() const { return varying_size.min != varying_size.max; } + bool validate() const; + void deforward(); + char * keys_str() const; + int key_one( cbl_field_t *field ) const { + auto ekey = keys + nkey, p = ekey; + if( (p = std::find(keys, ekey, field)) == ekey ) return 0; + return (p - keys) + 1; + } + bool relative_sequential() const { + return org == file_relative_e && access == file_access_seq_e; + } + bool indexed_sequential() const { + return org == file_indexed_e && access == file_access_seq_e; + } + void consider_for_default( const cbl_field_t *record ); + protected: + bool validate_forward( size_t isym ) const; + bool validate_key( const cbl_file_key_t& key ) const; +}; + +static inline bool +is_sequential( const cbl_file_t *file ) { + assert(file); + switch(file->org) { + case file_sequential_e: + case file_line_sequential_e: + return true; + case file_disorganized_e: + case file_indexed_e: + case file_relative_e: + break; + } + return false; +} + +struct symbol_elem_t { + enum symbol_type_t type; + size_t program; + union symbol_elem_u { + char *filename; + struct cbl_function_t function; + struct cbl_field_t field; + struct cbl_label_t label; + struct cbl_special_name_t special; + struct cbl_alphabet_t alphabet; + struct cbl_file_t file; + struct cbl_section_t section; + } elem; +}; + +# define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER) + +static inline symbol_elem_t * +symbol_elem_of( cbl_label_t *label ) { + size_t n = offsetof(struct symbol_elem_t, elem.label); + return + reinterpret_cast<struct symbol_elem_t *>((char*)label - n); +} + +static inline const symbol_elem_t * +symbol_elem_of( const cbl_label_t *label ) { + size_t n = offsetof(symbol_elem_t, elem.label); + return + reinterpret_cast<const symbol_elem_t *>((const char*)label - n); +} + +static inline symbol_elem_t * +symbol_elem_of( cbl_special_name_t *special ) { + size_t n = offsetof(symbol_elem_t, elem.special); + return + reinterpret_cast<symbol_elem_t *>((char*)special - n); +} + +static inline symbol_elem_t * +symbol_elem_of( cbl_alphabet_t *alphabet ) { + size_t n = offsetof(symbol_elem_t, elem.alphabet); + return + reinterpret_cast<symbol_elem_t *>((char*)alphabet - n); +} + +static inline symbol_elem_t * +symbol_elem_of( cbl_file_t *file ) { + size_t n = offsetof(struct symbol_elem_t, elem.file); + return + reinterpret_cast<struct symbol_elem_t *>((char*)file - n); +} +static inline const symbol_elem_t * +symbol_elem_of( const cbl_file_t *file ) { + size_t n = offsetof(symbol_elem_t, elem.file); + return + reinterpret_cast<const symbol_elem_t *>((const char*)file - n); +} + +static inline symbol_elem_t * +symbol_elem_of( cbl_field_t *field ) { + size_t n = offsetof(struct symbol_elem_t, elem.field); + return + reinterpret_cast<struct symbol_elem_t *>((char*)field - n); +} +static inline const symbol_elem_t * +symbol_elem_of( const cbl_field_t *field ) { + size_t n = offsetof(symbol_elem_t, elem.field); + return + reinterpret_cast<const symbol_elem_t *>((const char*)field - n); +} + +symbol_elem_t * symbols_begin( size_t first = 0 ); +symbol_elem_t * symbols_end(void); +cbl_field_t * symbol_redefines( const struct cbl_field_t *field ); + +void build_symbol_map(); +bool update_symbol_map( symbol_elem_t *e ); + +void update_symbol_map2( const symbol_elem_t *elem ); +void finalize_symbol_map2(); +void dump_symbol_map2(); + +symbol_elem_t * symbol_register( const char name[] ); + +std::pair<symbol_elem_t *, bool> +symbol_find( size_t program, std::list<const char *> names ); +symbol_elem_t * symbol_find_of( size_t program, + std::list<const char *> names, size_t group ); + +struct cbl_field_t *symbol_find_odo( cbl_field_t * field ); +size_t dimensions( const cbl_field_t *field ); + +const symbol_elem_t * symbol_field_current_record(); +const symbol_elem_t * symbol_field_alias_begin(); +void symbol_field_alias_end(); + +typedef std::map< size_t, size_t > corresponding_fields_t; + +corresponding_fields_t +corresponding_arith_fields( cbl_field_t *lhs, cbl_field_t *rhs ); +corresponding_fields_t +corresponding_move_fields( cbl_field_t *lhs, cbl_field_t *rhs ); + +typedef std::set<size_t> symbolset_t; + +symbolset_t symbol_program_programs(); +symbolset_t symbol_program_callables( size_t program ); +const cbl_label_t * symbol_program_local( const char called[] ); + +bool redefine_field( cbl_field_t *field ); + +// Functions to correctly extract the underlying type. +static inline struct cbl_function_t * +cbl_function_of( struct symbol_elem_t *e ) { + assert(e->type == SymFunction); + return &e->elem.function; +} + +static inline struct cbl_section_t * +cbl_section_of( struct symbol_elem_t *e ) { + assert(e->type == SymDataSection); + return &e->elem.section; +} + +static inline struct cbl_field_t * +cbl_field_of( struct symbol_elem_t *e ) { + assert(e->type == SymField); + return &e->elem.field; +} +static inline const struct cbl_field_t * +cbl_field_of( const struct symbol_elem_t *e ) { + assert(e->type == SymField); + return &e->elem.field; +} + +static inline struct cbl_label_t * +cbl_label_of( struct symbol_elem_t *e ) { + assert(e->type == SymLabel); + return &e->elem.label; +} + +static inline const struct cbl_label_t * +cbl_label_of( const struct symbol_elem_t *e ) { + assert(e->type == SymLabel); + return &e->elem.label; +} + +static inline struct cbl_special_name_t * +cbl_special_name_of( struct symbol_elem_t *e ) { + assert(e->type == SymSpecial); + return &e->elem.special; +} + +static inline struct cbl_alphabet_t * +cbl_alphabet_of( struct symbol_elem_t *e ) { + assert(e->type == SymAlphabet); + return &e->elem.alphabet; +} + +static inline struct cbl_file_t * +cbl_file_of( struct symbol_elem_t *e ) { + assert(e->type == SymFile); + return &e->elem.file; +} + +static inline const struct cbl_file_t * +cbl_file_of( const struct symbol_elem_t *e ) { + assert(e->type == SymFile); + return &e->elem.file; +} + +static inline bool +is_program( const symbol_elem_t& e ) { + return e.type == SymLabel && + (cbl_label_of(&e)->type == LblProgram || + cbl_label_of(&e)->type == LblFunction); +} + +static inline bool +is_procedure( const symbol_elem_t& e ) { + return e.type == SymLabel && + (cbl_label_of(&e)->type == LblParagraph || + cbl_label_of(&e)->type == LblSection); +} + +static inline bool +is_figconst(const struct cbl_field_t *field ) { + return ((field->attr & FIGCONST_MASK) != 0 ); +} + +static inline bool +is_figconst_low( const struct cbl_field_t *field ) { + return ((field->attr & FIGCONST_MASK) == low_value_e ); +} + +static inline bool +is_figconst_zero( const struct cbl_field_t *field ) { + return ((field->attr & FIGCONST_MASK) == zero_value_e ); +} + +static inline bool +is_figconst_space( const struct cbl_field_t *field ) { + return ((field->attr & FIGCONST_MASK) == space_value_e ); +} + +static inline bool +is_figconst_quote( const struct cbl_field_t *field ) { + return ((field->attr & FIGCONST_MASK) == quote_value_e ); +} + +static inline bool +is_figconst_high( const struct cbl_field_t *field ) { + return ((field->attr & FIGCONST_MASK) == high_value_e ); +} + +static inline bool +is_space_value( const struct cbl_field_t *field ) { + return( (strcmp(field->name, "SPACE") == 0) + || (strcmp(field->name, "SPACES") == 0) ); +} + +static inline bool +is_quoted( const struct cbl_field_t *field ) { + return field->has_attr(quoted_e); +} + +/* + * PERFORM support + * + * cbl_until_addresses_t has the goto/label pairs needed to implement the + * PERFORM UNTIL/VARYING/TIMES possibilities + */ + +#define MAXIMUM_UNTILS 64 // This was one VARYING and four AFTERs + +struct cbl_until_addresses_t { + // This structure is used by parser_perform_start() and parser_perform_until + struct cbl_label_addresses_t top; // The very top of the loop + struct cbl_label_addresses_t exit; // The implied continue at the bottom + struct cbl_label_addresses_t test; // The test at the bottom of the body + struct cbl_label_addresses_t testA; // Starting point of a TEST_AFTER loop + struct cbl_label_addresses_t setup; // The actual entry point + size_t number_of_conditionals; + struct cbl_label_addresses_t condover[MAXIMUM_UNTILS]; // Jumping over the conditional + struct cbl_label_addresses_t condinto[MAXIMUM_UNTILS]; // Jumping into the conditional + struct cbl_label_addresses_t condback[MAXIMUM_UNTILS]; // Jumping back from the conditional + int line_number_of_setup_code; // This is needed to thwart the too-helpful compiler +}; + +size_t symbol_index(); // nth after first program symbol +size_t symbol_index( const struct symbol_elem_t *e ); +struct symbol_elem_t * symbol_at( size_t index ); + +struct cbl_options_t { + enum arith_t { + native_e, + standard_e, + standard_binary_e, + standard_decimal_e, + } arith; + enum float_endidanism_t { + high_order_left_e, + high_order_right_e, + } binary_endidanism, decimal_endidanism; + enum float_encoding_t { + binary_encoding_e, + decimal_encoding_e, + } float_encoding; + + cbl_round_t default_round, intermediate_round; + + struct initialize_t { + ssize_t working, local; + initialize_t() : working(-1), local(-1) {} + } initial_value; + + cbl_options_t() + : arith(cbl_options_t::native_e) + , binary_endidanism(cbl_options_t::high_order_right_e) + , decimal_endidanism(cbl_options_t::high_order_right_e) + , float_encoding(cbl_options_t::binary_encoding_e) + , default_round(nearest_away_from_zero_e) + , intermediate_round(nearest_away_from_zero_e) + {} + cbl_field_t * initial_working() const { + return initial_value.working < 0? nullptr : + cbl_field_of(symbol_at(initial_value.working)); + } + cbl_field_t * initial_local() const { + return initial_value.local < 0? nullptr : + cbl_field_of(symbol_at(initial_value.local)); + } +}; +cbl_options_t current_options(); + +struct symbol_elem_t * +symbol_field_forward_add( size_t program, size_t parent, + const char name[], int line ); + +struct cbl_field_t * symbol_field_forward( size_t index ); + +struct cbl_prog_hier_t { + size_t nlabel; + struct program_label_t { + size_t ordinal; + cbl_label_t label; + program_label_t() : ordinal(0) {} + program_label_t( const symbol_elem_t& e ) { + ordinal = symbol_index(&e); + label = e.elem.label; + } + } *labels; + + cbl_prog_hier_t(); +}; + +/* + * cbl_perform_tgt_t has from and to: the 1st and last labels to be performed. + * When only one label is being performed (no "thru"), "to" is NULL. + * In the case of an inline perform, "from" points to a label of type LblLoop. + */ +struct cbl_perform_tgt_t { + struct cbl_until_addresses_t addresses; + + cbl_perform_tgt_t() : ifrom(0), ito(0) {} + cbl_perform_tgt_t( cbl_label_t * from, cbl_label_t *to = NULL ) + : ifrom( from? symbol_index(symbol_elem_of(from)) : 0 ) + , ito( to? symbol_index(symbol_elem_of(to)) : 0 ) + { + addresses = {}; + } + + cbl_label_t * from( cbl_label_t * label ) { + ifrom = symbol_index(symbol_elem_of(label)); + return from(); + } + cbl_label_t * finally( size_t program ); + + cbl_label_t * from() const { + return ifrom? cbl_label_of(symbol_at(ifrom)) : NULL; + } + cbl_label_t * to() const { + return ito? cbl_label_of(symbol_at(ito)) : NULL; + } + + + void dump() const { + assert(ifrom); + if( !ito ) { + dbgmsg( "%s:%d: #%3zu %s", __PRETTY_FUNCTION__, __LINE__, + ifrom, from()->str() ); + } else { + dbgmsg( "%s:%d: #%3zu %s THRU #%3zu %s", __PRETTY_FUNCTION__, __LINE__, + ifrom, from()->str(), ito, to()->str() ); + } + } + + protected: + size_t ifrom, ito; +}; + +struct cbl_perform_vary_t { + struct cbl_refer_t varying; // numeric + struct cbl_refer_t from; // numeric + struct cbl_refer_t by; // numeric + struct cbl_field_t *until; // FldConditional + + cbl_perform_vary_t( const cbl_refer_t& varying = cbl_refer_t(), + const cbl_refer_t& from = cbl_refer_t(), + const cbl_refer_t& by = cbl_refer_t(), + cbl_field_t *until = NULL ) + : varying(varying) + , from(from) + , by(by) + , until(until) + {} +}; + +bool is_global( const cbl_field_t * field ); + +static inline bool +is_literal( const cbl_field_type_t type ) { + return type == FldLiteralA + || type == FldLiteralN; +} + +static inline bool +is_literal( const cbl_field_t *field ) { + return is_literal(field->type); +} + +static inline bool +is_signable( const struct cbl_field_t *field ) { + return field->attr & signable_e; +} + +static inline bool +is_temporary( const struct cbl_field_t *field ) { + return field->attr & intermediate_e; +} + +bool has_value( cbl_field_type_t type ); + + +static inline bool +is_numeric( const cbl_field_t *field ) { + assert( field ); + bool is_zero = zero_value_e == (field->attr & zero_value_e); + return is_zero || is_numeric(field->type); +} + +/* + * Public functions + */ + +bool cobol_filename( const char *name ); +const char * cobol_filename(); + +const char * cobol_fileline_set( const char line[] ); + +char *cobol_name_mangler(const char *cobol_name); + +bool is_elementary( enum cbl_field_type_t type ); +bool is_numeric_edited( const char picture[] ); + +const char * intrinsic_function_name( int token ); + +char date_time_fmt( const char input[] ); + +size_t current_program_index(); +const char * current_declarative_section_name(); + +struct cbl_nameloc_t { + YYLTYPE loc; + const char *name; + + cbl_nameloc_t() : loc{ 1,1, 1,1 }, name(NULL) {} + cbl_nameloc_t( const YYLTYPE& loc, const char *name ) + : loc(loc), name(name) + {} +}; + +/* + * The lexer pushes qualified names unilaterally, regardless of the + * state of the parser, because it runs ahead of the parser. The + * parser adds to the queue conditionally, only if the lexer has not. + * The parser consumes a queue element (a name list) whenever it looks + * up a name, e.g. on the way to producing a scalar. + */ +#include <queue> +typedef std::list<const char *> cbl_namelist_t; +typedef std::list<cbl_nameloc_t> cbl_namelocs_t; +class name_queue_t : private std::queue<cbl_namelocs_t> +{ + friend void tee_up_empty(); + cbl_namelocs_t recent; + + void allocate() { + std::queue<cbl_namelocs_t>::push( cbl_namelocs_t() ); + } + public: + static cbl_namelist_t + namelist_of( const cbl_namelocs_t& namelocs ) { + cbl_namelist_t names; + std::transform( namelocs.begin(), namelocs.end(), std::back_inserter(names), + []( const cbl_nameloc_t& nameloc ) { + return nameloc.name; + } ); + return names; + } + size_t push( const YYLTYPE& loc, const char name[] ) { + assert( !empty() ); + back().push_front( cbl_nameloc_t(loc, name) ); + dump(__func__); + return size(); + } + void qualify( const YYLTYPE& loc, const char name[] ) { + if( empty() ) { + allocate(); + push(loc, name); + } else { + back().push_front( cbl_nameloc_t(loc, name) ); + } + dump(__func__); + } + cbl_namelocs_t pop() { + assert(!empty()); + recent = front(); + std::queue<cbl_namelocs_t>::pop(); + dump(__func__); + return recent; + } + cbl_namelist_t pop_as_names() { + return namelist_of(pop()); + } + + void dump( const char tag[] ) const; + + cbl_namelocs_t peek() const { dump(__func__); return empty()? recent : back(); } + + bool empty() const { return std::queue<cbl_namelocs_t>::empty(); } + size_t size() const { return std::queue<cbl_namelocs_t>::size(); } + +}; + +void tee_up_empty(); +void tee_up_name( const YYLTYPE& loc, const char name[] ); +cbl_namelist_t teed_up_names(); + +size_t end_of_group( size_t igroup ); + +struct symbol_elem_t * symbol_typedef( size_t program, std::list<const char *> names ); +struct symbol_elem_t * symbol_typedef( size_t program, const char name[] ); +struct symbol_elem_t * symbol_field( size_t program, + size_t parent, const char name[] ); +struct cbl_label_t * symbol_program( size_t parent, const char name[] ); +struct cbl_label_t * symbol_label( size_t program, cbl_label_type_t type, + size_t section, const char name[], + const char os_name[] = NULL ); +struct symbol_elem_t * symbol_function( size_t parent, const char name[] ); + +struct symbol_elem_t * symbol_literalA( size_t program, const char name[] ); + +struct cbl_special_name_t * symbol_special( special_name_t id ); +struct symbol_elem_t * symbol_special( size_t program, const char name[] ); +struct symbol_elem_t * symbol_alphabet( size_t program, const char name[] ); + +struct symbol_elem_t * symbol_file( size_t program, const char name[] ); +struct cbl_field_t * symbol_file_record( struct cbl_file_t *file ); +cbl_file_t::varying_t symbol_file_record_sizes( struct cbl_file_t *file ); +struct cbl_section_t * symbol_section( size_t program, + struct cbl_section_t *section ); + +size_t symbol_label_id( const cbl_label_t *label ); + +struct cbl_field_t * parent_of( const cbl_field_t *f ); + const cbl_field_t * occurs_in( const cbl_field_t *f ); + +cbl_field_t *rename_not_ok( cbl_field_t *first, cbl_field_t *last); +bool immediately_follows( const cbl_field_t *first ); +bool is_variable_length( const cbl_field_t *field ); + +cbl_file_t * symbol_record_file( const cbl_field_t *f ); + +struct cbl_field_t * symbol_find_odo( const cbl_field_t * field ); + +size_t numeric_group_attrs( const cbl_field_t *field ); + +static inline struct cbl_field_t * +field_at( size_t index ) { + struct symbol_elem_t *e = symbol_at(index); + assert(e->type == SymField); + + return &e->elem.field; +} + +bool symbols_alphabet_set( size_t program, const char name[]); + +size_t symbols_update( size_t first, bool parsed_ok = true ); + +void symbol_table_init(void); +void symbol_table_check(void); + +struct symbol_elem_t * symbol_typedef_add( size_t program, + struct cbl_field_t *field ); +struct symbol_elem_t * symbol_field_add( size_t program, + struct cbl_field_t *field ); +struct cbl_label_t * symbol_label_add( size_t program, + struct cbl_label_t *label ); +struct cbl_label_t * symbol_program_add( size_t program, cbl_label_t *input ); +struct symbol_elem_t * symbol_special_add( size_t program, + struct cbl_special_name_t *special ); +struct symbol_elem_t * symbol_alphabet_add( size_t program, + struct cbl_alphabet_t *alphabet ); +struct symbol_elem_t * symbol_file_add( size_t program, + struct cbl_file_t *file ); +struct symbol_elem_t * symbol_section_add( size_t program, + struct cbl_section_t *section ); + +void symbol_field_location( size_t ifield, const YYLTYPE& loc ); +YYLTYPE symbol_field_location( size_t ifield ); + +bool symbol_label_section_exists( size_t program ); + +size_t symbol_field_capacity( const cbl_field_t *field ); + +size_t file_status_register(); +size_t return_code_register(); +size_t very_true_register(); +size_t very_false_register(); +size_t ec_register(); + +static inline size_t upsi_register() { + return symbol_index(symbol_field(0,0,"UPSI-0")); +} + +void wsclear( char ch); +const char *wsclear(); + +enum cbl_call_convention_t { + cbl_call_verbatim_e = 'V', + cbl_call_cobol_e = 'N', // native +}; + +cbl_call_convention_t current_call_convention(); + +cbl_call_convention_t +current_call_convention( cbl_call_convention_t convention); + +class procref_base_t { +private: + const char *section_name, *paragraph_name; +public: + procref_base_t( const char *section_name = NULL, + const char *paragraph_name = NULL ) + : section_name(section_name) + , paragraph_name(paragraph_name) + {} + procref_base_t( const procref_base_t& that ) + : section_name(that.section_name) + , paragraph_name(that.paragraph_name) + {} + + bool operator<( const procref_base_t& that ) const; + bool operator==( const procref_base_t& that ) const; + + const char *section() const { return section_name? section_name : ""; } + const char *paragraph() const { return paragraph_name? paragraph_name : ""; } + + bool has_section() const { return section_name != NULL; } + bool has_paragraph() const { return paragraph_name != NULL; } +}; + +class procref_t : public procref_base_t { + int line; + size_t context; // section called from +public: + procref_t( const char *section, const char *paragraph, int line, size_t context ) + : procref_base_t(section, paragraph) + , line(line) + , context(context) + { + assert(line); + assert(context == 0 || cbl_label_of(symbol_at(context))->type == LblSection); + } + + int line_number() const { return line; } +}; + +int keyword_tok( const char * text, bool include_intrinsics = false ); +int redefined_token( const cbl_name_t name ); + +void procedure_definition_add( size_t program, const cbl_label_t *procedure ); +void procedure_reference_add( const char *sect, const char *para, + int line, size_t context ); +procref_t * ambiguous_reference( size_t program ); + +struct symbol_elem_t * +symbol_field_alias( struct symbol_elem_t *e, const char name[] ); +struct symbol_elem_t * +symbol_field_alias2( struct symbol_elem_t *e, + struct symbol_elem_t *e2, const char name[] ); +struct symbol_elem_t * +symbol_field_same_as( cbl_field_t *tgt, const cbl_field_t *src ); + +size_t symbol_file_same_record_area( std::list<cbl_file_t*>& files ); + +cbl_field_t * +symbol_valid_udf_args( size_t function, + std::list<cbl_refer_t> args = std::list<cbl_refer_t>() ); + +bool symbol_currency_add( const char symbol[], const char sign[] = NULL ); +const char * symbol_currency( char symbol ); + +const char * symbol_type_str( enum symbol_type_t type ); +const char * cbl_field_type_str( enum cbl_field_type_t type ); +const char * cbl_logop_str( enum logop_t op ); + +static inline const char * +refer_type_str( const cbl_refer_t *r ) { + return r && r->field? cbl_field_type_str(r->field->type) : "(none)"; +} + +enum cbl_field_type_t symbol_field_type( size_t program, const char name[] ); + +struct symbol_elem_t * symbol_parent( const struct symbol_elem_t *e ); + +int length_of_picture(const char *picture); +int rdigits_of_picture(const char *picture); +int digits_of_picture(const char *picture, bool for_rdigits); +bool is_picture_scaled(const char *picture); + +template <typename LOC> +void gcc_location_set( const LOC& loc ); + +// This is slightly oddball. This is an entry point in the charutf8.cc module. +// It's the only entry point in the module, and so it seemed to me wasteful to +// create an entire .h module. So, I stuck it here. +size_t count_characters(const char *in, size_t length); + +#endif diff --git a/gcc/cobol/symfind.cc b/gcc/cobol/symfind.cc new file mode 100644 index 0000000..3c3b5d0 --- /dev/null +++ b/gcc/cobol/symfind.cc @@ -0,0 +1,611 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cobol-system.h" + +#include "ec.h" +#include "common-defs.h" +#include "util.h" +#include "cbldiag.h" +#include "symbols.h" +#include "inspect.h" +#include "io.h" +#include "genapi.h" + +extern int yydebug; + +static bool +is_data_field( symbol_elem_t& e ) { + if( e.type != SymField ) return false; + auto f = cbl_field_of(&e); + if( f->name[0] == '\0' ) return false; + if( is_filler(f) ) return false; + + return f->type != FldForward; +} + +class sym_name_t { +public: // TEMPORARY + const char *name; + size_t program, parent; +public: + explicit sym_name_t( const char name[] ) + : name(name), program(0), parent(0) { assert(name[0] == '\0'); } + sym_name_t( size_t program, const char name[], size_t parent ) + : name(name), program(program), parent(parent) {} + + const char * c_str() const { return name; } + + // Order by: Program, Name, Parent. + bool operator<( const sym_name_t& that ) const { + if( program == that.program ) { + int by_name = strcasecmp(name, that.name); + return by_name == 0? parent < that.parent : by_name < 0; + } + return program < that.program; + } + bool operator==( const char *name ) const { + return strcasecmp(this->name, name) == 0; + } + + bool same_program( size_t program ) const { + return program == this->program; + } +}; + +typedef std::map< sym_name_t, std::vector<size_t> > symbol_map_t; + + +static symbol_map_t symbol_map; + +typedef std::map <field_key_t, std::list<size_t> > field_keymap_t; +static field_keymap_t symbol_map2; + +/* + * As each field is added to the symbol table, add its name and index + * to the name map. Initially the type is FldInvalid. Those are + * removed by symbols_update(); + */ +void +update_symbol_map2( const symbol_elem_t *e ) { + auto field = cbl_field_of(e); + + if( ! field->is_typedef() ) { + switch( field->type ) { + case FldForward: + case FldLiteralN: + return; + case FldLiteralA: + if( ! field->is_key_name() ) return; + break; + default: + break; + } + } + + field_key_t fk( e->program, field ); + symbol_map2[fk].push_back(symbol_index(e)); +} + +/* + * Purge any field whose type is FldInvalid. Remove any names that do + * not map to any field. + */ +void +finalize_symbol_map2() { + std::set<field_key_t> empties; + + for( auto& elem : symbol_map2 ) { + auto& fields( elem.second ); + std::remove_if( fields.begin(), fields.end(), + []( auto isym ) { + auto f = cbl_field_of(symbol_at(isym)); + return f->type == FldInvalid; + } ); + if( fields.empty() ) empties.insert(elem.first); + } + + for( const auto& key : empties ) { + symbol_map2.erase(key); + } +} + +static void +dump_symbol_map2( const field_key_t& key, const std::list<size_t>& candidates ) { + if( !yydebug ) return; + char *fields = NULL, sep[2] = ""; + + for( auto candidate : candidates ) { + char *tmp = fields; + fields = xasprintf("%s%s %3zu", tmp? tmp : "", sep, candidate); + sep[0] = ','; + free(tmp); + } + + dbgmsg( "%s:%d: %3zu %s {%s}", __func__, __LINE__, + key.program, key.name, fields ); + free(fields); +} + +void +dump_symbol_map2() { + int n = 0; + for( const auto& elem : symbol_map2 ) { + const field_key_t& key( elem.first ); + const std::list<size_t>& candidates( elem.second); + if( key.program != 0 ) { + dump_symbol_map2( key, candidates ); + n++; + } + } + dbgmsg("symbol_map2 has %d program elements", n); +} + +static void +dump_symbol_map_value( const char name[], const symbol_map_t::value_type& value ) { + if( !yydebug ) return; + char *ancestry = NULL, sep[2] = ""; + auto p = value.second.begin(); + + for( ; p != value.second.end(); p++ ) { + char *tmp = ancestry; + ancestry = xasprintf("%s%s %3zu", tmp? tmp : "", sep, *p); + sep[0] = ','; + free(tmp); + } + + dbgmsg( "%s:%d: %s -> %-24s {%s }", __func__, __LINE__, + name, value.first.c_str(), ancestry ); + free(ancestry); +} + + +static void +dump_symbol_map_value1( const symbol_map_t::value_type& value ) { + dump_symbol_map_value( "result", value ); +} + +static symbol_map_t::value_type +field_structure( symbol_elem_t& sym ) { + static const symbol_map_t::value_type + none( symbol_map_t::key_type( 0, "", 0 ), std::vector<size_t>() ); + + if( getenv(__func__) && sym.type == SymField ) { + const auto& field = *cbl_field_of(&sym); + dbgmsg("%s: #%zu %s: '%s' is_data_field: %s", __func__, + symbol_index(&sym), cbl_field_type_str(field.type), field.name, + is_data_field(sym)? "yes" : "no" ); + } + if( !is_data_field(sym) ) return none; + + cbl_field_t *field = cbl_field_of(&sym); + + symbol_map_t::key_type key( sym.program, field->name, field->parent ); + symbol_map_t::value_type elem( key, std::vector<size_t>() ); + symbol_map_t::mapped_type& v(elem.second); + + for( v.push_back(field_index(field)); field->parent > 0; ) { + symbol_elem_t *par = symbol_at(field->parent); + + if( SymFile == par->type ) { + v.push_back(field->parent); + break; + } + assert( SymField == par->type ); + v.push_back(field->parent); + + field = cbl_field_of(par); + + // for C of R and B of A, where R redefines B, skip B: vector is [C, R, A]. + cbl_field_t *redefined = symbol_redefines(field); // if R redefines B + if( redefined ) { + field = redefined; // We will use B's parent on next iteration + } + } + + if( getenv(__func__) && yydebug ) { + dbgmsg( "%s:%d: '%s' has %zu ancestors", __func__, __LINE__, + elem.first.c_str(), elem.second.size() ); + dump_symbol_map_value(__func__, elem); + } + + return elem; +} + +void erase_symbol_map_fwds( size_t beg ) { + for( auto p = symbols_begin(beg); p < symbols_end(); p++ ) { + if( p->type != SymField ) continue; + const auto& field(*cbl_field_of(p)); + if( field.type == FldForward ) { + symbol_map.erase( sym_name_t(p->program, field.name, field.parent) ); + } + } +} + +void +build_symbol_map() { + static size_t beg = 0; + size_t end = symbols_end() - symbols_begin(); + + if( beg == end ) return; + const size_t nsym = end - beg; + + std::transform( symbols_begin(beg), symbols_end(), + std::inserter(symbol_map, symbol_map.begin()), + field_structure ); + beg = end; + + symbol_map.erase(sym_name_t("")); + + if( yydebug ) { + dbgmsg( "%s:%d: %zu of %zu symbols inserted into %zu in symbol_map", + __func__, __LINE__, nsym, end, symbol_map.size() ); + + if( getenv(__func__) ) { + for( const auto& elem : symbol_map ) { + dump_symbol_map_value1(elem); + } + } + } +} + +bool +update_symbol_map( symbol_elem_t *e ) { + auto output = symbol_map.insert(field_structure(*e)); + return output.second; +} + +class is_name { + const char *name; +public: + is_name( const char *name ) : name(name) {} + bool operator()( symbol_map_t::value_type& elem ) { + const bool tf = elem.first == name; + if( tf && getenv("is_name") ) { + dump_key( "matched", elem.first ); + } + return tf; + } + protected: + void dump_key( const char tag[], const symbol_map_t::key_type& key ) const { + dbgmsg( "symbol_map key: %s { %3zu %3zu %s }", + tag, key.program, key.parent, key.name ); + } +}; + +/* + * Construct a list of ancestors based on a set of candidate groups. + * Presented with an item, see if any group an ancestor. If so, + * replace the item's ancestry with the group's ancestry (thus + * shortening the chain). Otherwise, return an empty element. + */ +class reduce_ancestry { + std::vector<symbol_map_t::mapped_type> candidates; + static symbol_map_t::mapped_type + candidates_only( const symbol_map_t::value_type& elem ) { return elem.second; } +public: + reduce_ancestry( const symbol_map_t& groups ) + : candidates( groups.size() ) + { + std::transform( groups.begin(), groups.end(), candidates.begin(), + candidates_only ); + } + symbol_map_t::value_type + reduce( const symbol_map_t::value_type& item ) { + static symbol_map_t::value_type none( "", std::vector<size_t>() ); + + auto ancestors = candidates.begin(); + for( ; ancestors != candidates.end(); ancestors++ ) { + assert(!ancestors->empty()); // ancestry always starts with self + auto p = std::find( item.second.begin(), item.second.end(), + ancestors->front() ); + if( p != item.second.end() ) { + // Preserve symbol's index at front of ancestor list. + symbol_map_t::mapped_type shorter(1 + ancestors->size()); + auto p = shorter.begin(); + *p = item.second.front(); + shorter.insert( ++p, ancestors->begin(), ancestors->end() ); + return make_pair(item.first, shorter); + } + } + return none; + } + symbol_map_t::value_type + operator()( symbol_map_t::value_type item ) { return reduce(item); } +}; + +class different_program { + size_t program; +public: + different_program( size_t program ) : program(program) {} + bool operator()( const symbol_map_t::value_type& item ) const { + return ! item.first.same_program(program); + } +}; + +class in_scope { + size_t program; + + static size_t prog_of( size_t program ) { + auto L = cbl_label_of(symbol_at(program)); + return L->parent; + } + +public: + in_scope( size_t program ) : program(program) {} + + // A symbol is in scope if it's defined by this program or by an ancestor. + bool operator()( const symbol_map_t::value_type& item ) const { + symbol_elem_t *e = symbol_at(item.second.front()); + for( size_t prog = this->program; prog != 0; prog = prog_of(prog) ) { + if( e->program == prog ) return true; + } + return false; + } +}; + +/* + * For a field symbol and list of qualifier IN/OF names, see if the + * namelist matches the symbol's name and ancectors' names. Success + * is all names match within scope. + * + * All symbols local to the program are in scope. A containing + * program's symbol matches only if global_e is set on it or one of + * its ancestors. + */ +static bool +name_has_names( const symbol_elem_t *e, + const std::list<const char *>& names, bool in_scope ) +{ + assert( ! names.empty() ); + auto name = names.rbegin(); + + while( e && e->type == SymField ) { + auto field = cbl_field_of(e); + if( field->type == FldForward ) return false; + + if( 0 == strcasecmp(field->name, *name) ) { + in_scope = in_scope || (field->attr & global_e); + if( ++name == names.rend() ) break; + } + + // first name must match + if( name == names.rbegin() ) break; + + // Do not chase redefines if we have an 01 record for an FD. + if( field->file ) { + e = symbol_at(field->file); + assert(1 == field->level); + assert(e->type == SymFile); + break; + } + + /* + * If the current field redefines another, it is not a member of + * its parent, but of its grandparent, if any. Not a loop because + * REDEFINES cannot be chained. + */ + cbl_field_t *parent = symbol_redefines(field); + if( parent ) { + field = parent; + assert( NULL == symbol_redefines(field) ); + } + + e = field->parent ? symbol_at(field->parent) : NULL; + } + + if( e && e->type == SymFile ) { + // first name can be a filename + auto file = cbl_file_of(e); + if( 0 == strcasecmp(file->name, *name) ) name++; + } + + return in_scope && name == names.rend(); +} + +size_t end_of_group( size_t igroup ); + +static std::vector<size_t> +symbol_match2( size_t program, + std::list<const char *> names, bool local = true ) +{ + std::vector<size_t> fields; + + field_key_t key(program, names.back()); + + auto plist = symbol_map2.find(key); + if( plist != symbol_map2.end() ) { + for( auto candidate : plist->second ) { + auto e = symbol_at(candidate); + if( name_has_names( e, names, local ) ) { + fields.push_back( symbol_index(e) ); + } + } + } + + if( fields.empty() ){ + if( program > 0 ) { // try containing program + program = cbl_label_of(symbol_at(program))->parent; + return symbol_match2( program, names, program == 0 ); + } + } + + if( yydebug ) { + char *ancestry = NULL; + const char *sep = ""; + for( auto name : names ) { + char *partial = ancestry; + int asret = asprintf(&ancestry, "%s%s%s", partial? partial : "", sep, name); + assert(asret); + sep = " -> "; + assert(ancestry); + free(partial); + } + + if( fields.empty() ) { + dbgmsg("%s: '%s' matches no fields", __func__, ancestry); + dump_symbol_map2(); + } else { + char *fieldstr = NULL; + sep = ""; + for( auto field : fields ) { + char *partial = fieldstr; + int asret = asprintf(&fieldstr, "%s%s%zu", partial? partial : "", sep, field); + assert(asret); + sep = ", "; + assert(fieldstr); + free(partial); + } + + dbgmsg("%s: '%s' matches %zu fields: {%s}", __func__, ancestry, fields.size(), fieldstr); + free(fieldstr); + } + free(ancestry); + } + + return fields; +} + +/* + * The names list is in top-down order, front-to-back. This function + * iterates backwards over the list, looking for the parent of N at + * N-1. + */ +static symbol_map_t +symbol_match( size_t program, std::list<const char *> names ) { + auto matched = symbol_match2( program, names ); + symbol_map_t output; + + for( auto isym : matched ) { + auto e = symbol_at(isym); + auto f = cbl_field_of(e); + + symbol_map_t::key_type key( e->program, f->name, f->parent ); + auto p = symbol_map.find(key); + if( p == symbol_map.end() ) { + yyerror("%s is not defined", key.name); + continue; + } + auto inserted = output.insert(*p); + if( ! inserted.second ) { + yyerror("%s is not a unique reference", key.name); + } + } + return output; +} + +static const symbol_elem_t * symbol_field_alias_01; + +const symbol_elem_t * +symbol_field_alias_begin() { + return symbol_field_alias_01 = symbol_field_current_record(); +} +void +symbol_field_alias_end() { + symbol_field_alias_01 = NULL; +} + +std::pair <symbol_elem_t *, bool> +symbol_find( size_t program, std::list<const char *> names ) { + symbol_map_t items = symbol_match(program, names); + + if( symbol_field_alias_01 && items.size() != 1 ) { + symbol_map_t qualified; + size_t i01( symbol_index(symbol_field_alias_01) ); + std::copy_if( items.begin(), items.end(), + std::inserter(qualified, qualified.begin()), + [i01]( auto item ) { + const std::vector<size_t>& ancestors(item.second); + return ancestors.back() == i01; + } ); + items = qualified; + } + + auto unique = items.size() == 1; + + if( !unique ) { + if( items.empty() ) { + return std::pair<symbol_elem_t *, bool>(NULL, false); + } + if( yydebug ) { + dbgmsg( "%s:%d: '%s' has %zu possible matches", + __func__, __LINE__, names.back(), items.size() ); + std::for_each( items.begin(), items.end(), dump_symbol_map_value1 ); + } + } + + size_t isym = items.begin()->second.front(); + auto output = std::make_pair(symbol_at(isym), unique); + + assert( FldForward != field_at(isym)->type ); + + return output; +} + +class in_group { + size_t group; +public: + in_group( size_t group ) : group(group) {} + + bool operator()( symbol_map_t::const_reference elem ) const { + return 0 < std::count( elem.second.begin(), + elem.second.end(), this->group ); + } +}; + +symbol_elem_t * +symbol_find_of( size_t program, std::list<const char *> names, size_t group ) { + symbol_map_t input = symbol_match(program, names); + + if( getenv(__func__) && input.size() != 1 ) { + dbgmsg( "%s:%d: '%s' has %zu candidates for group %zu", + __func__, __LINE__, names.back(), input.size(), group ); + std::for_each( input.begin(), input.end(), dump_symbol_map_value1 ); + } + + symbol_map_t items; + std::copy_if( input.begin(), input.end(), + std::inserter(items, items.begin()), in_group(group) ); + + if( items.size() == 1 ) { + size_t isym = items.begin()->second.front(); + assert( FldForward != field_at(isym)->type ); + return symbol_at(isym); + } + + if( yydebug ) { + dbgmsg( "%s:%d: '%s' has %zu possible matches", + __func__, __LINE__, names.back(), input.size() ); + std::for_each( input.begin(), input.end(), dump_symbol_map_value1 ); + } + + return NULL; +} diff --git a/gcc/cobol/token_names.h b/gcc/cobol/token_names.h new file mode 100644 index 0000000..26dabc8 --- /dev/null +++ b/gcc/cobol/token_names.h @@ -0,0 +1,1373 @@ +// generated by ./token_names.h.gen ../../build/gcc/cobol/parse.h +// Fri Jan 31 05:52:10 EST 2025 +tokens = { + { "identification", IDENTIFICATION_DIV }, // 258 + { "environment", ENVIRONMENT_DIV }, // 259 + { "procedure", PROCEDURE_DIV }, // 260 + { "data", DATA_DIV }, // 261 + { "file", FILE_SECT }, // 262 + { "input-output", INPUT_OUTPUT_SECT }, // 263 + { "linkage", LINKAGE_SECT }, // 264 + { "local-storage", LOCAL_STORAGE_SECT }, // 265 + { "working-storage", WORKING_STORAGE_SECT }, // 266 + { "object-computer", OBJECT_COMPUTER }, // 267 + { "display-of", DISPLAY_OF }, // 268 + { "end-function", END_FUNCTION }, // 269 + { "end-program", END_PROGRAM }, // 270 + { "end-subprogram", END_SUBPROGRAM }, // 271 + { "justified", JUSTIFIED }, // 272 + { "returning", RETURNING }, // 273 + { "no-condition", NO_CONDITION }, // 274 + { "alnum", ALNUM }, // 275 + { "alphed", ALPHED }, // 276 + { "error", ERROR }, // 277 + { "exception", EXCEPTION }, // 278 + { "size-error", SIZE_ERROR }, // 279 + { "exception-name", EXCEPTION_NAME }, // 280 + { "level", LEVEL }, // 281 + { "level66", LEVEL66 }, // 282 + { "level78", LEVEL78 }, // 283 + { "level88", LEVEL88 }, // 284 + { "class-name", CLASS_NAME }, // 285 + { "name", NAME }, // 286 + { "name88", NAME88 }, // 287 + { "nume", NUME }, // 288 + { "numed", NUMED }, // 289 + { "numed-cr", NUMED_CR }, // 290 + { "numed-db", NUMED_DB }, // 291 + { "ninedot", NINEDOT }, // 292 + { "nines", NINES }, // 293 + { "ninev", NINEV }, // 294 + { "pic-p", PIC_P }, // 295 + { "spaces", SPACES }, // 296 + { "space", SPACES }, // 296 + { "literal", LITERAL }, // 297 + { "end", END }, // 298 + { "eop", EOP }, // 299 + { "filename", FILENAME }, // 300 + { "invalid", INVALID }, // 301 + { "number", NUMBER }, // 302 + { "negative", NEGATIVE }, // 303 + { "numstr", NUMSTR }, // 304 + { "overflow", OVERFLOW }, // 305 + { "computational", COMPUTATIONAL }, // 306 + { "perform", PERFORM }, // 307 + { "backward", BACKWARD }, // 308 + { "positive", POSITIVE }, // 309 + { "pointer", POINTER }, // 310 + { "section", SECTION }, // 311 + { "standard-alphabet", STANDARD_ALPHABET }, // 312 + { "switch", SWITCH }, // 313 + { "upsi", UPSI }, // 314 + { "zero", ZERO }, // 315 + { "zeros", ZERO }, // 315 + { "zeroes", ZERO }, // 315 + { "sysin", SYSIN }, // 316 + { "sysipt", SYSIPT }, // 317 + { "sysout", SYSOUT }, // 318 + { "syslist", SYSLIST }, // 319 + { "syslst", SYSLST }, // 320 + { "syspunch", SYSPUNCH }, // 321 + { "syspch", SYSPCH }, // 322 + { "console", CONSOLE }, // 323 + { "c01", C01 }, // 324 + { "c02", C02 }, // 325 + { "c03", C03 }, // 326 + { "c04", C04 }, // 327 + { "c05", C05 }, // 328 + { "c06", C06 }, // 329 + { "c07", C07 }, // 330 + { "c08", C08 }, // 331 + { "c09", C09 }, // 332 + { "c10", C10 }, // 333 + { "c11", C11 }, // 334 + { "c12", C12 }, // 335 + { "csp", CSP }, // 336 + { "s01", S01 }, // 337 + { "s02", S02 }, // 338 + { "s03", S03 }, // 339 + { "s04", S04 }, // 340 + { "s05", S05 }, // 341 + { "afp-5a", AFP_5A }, // 342 + { "stdin", STDIN }, // 343 + { "stdout", STDOUT }, // 344 + { "stderr", STDERR }, // 345 + { "list", LIST }, // 346 + { "map", MAP }, // 347 + { "nolist", NOLIST }, // 348 + { "nomap", NOMAP }, // 349 + { "nosource", NOSOURCE }, // 350 + { "might-be", MIGHT_BE }, // 351 + { "function-udf", FUNCTION_UDF }, // 352 + { "function-udf-0", FUNCTION_UDF_0 }, // 353 + { "date-fmt", DATE_FMT }, // 354 + { "time-fmt", TIME_FMT }, // 355 + { "datetime-fmt", DATETIME_FMT }, // 356 + { "basis", BASIS }, // 357 + { "cbl", CBL }, // 358 + { "constant", CONSTANT }, // 359 + { "copy", COPY }, // 360 + { "defined", DEFINED }, // 361 + { "enter", ENTER }, // 362 + { "feature", FEATURE }, // 363 + { "insertt", INSERTT }, // 364 + { "lsub", LSUB }, // 365 + { "parameter", PARAMETER_kw }, // 366 + { "override", OVERRIDE }, // 367 + { "ready", READY }, // 368 + { "reset", RESET }, // 369 + { "rsub", RSUB }, // 370 + { "service-reload", SERVICE_RELOAD }, // 371 + { "star-cbl", STAR_CBL }, // 372 + { "subscript", SUBSCRIPT }, // 373 + { "suppress", SUPPRESS }, // 374 + { "title", TITLE }, // 375 + { "trace", TRACE }, // 376 + { "use", USE }, // 377 + { "cobol-words", COBOL_WORDS }, // 378 + { "equate", EQUATE }, // 379 + { "undefine", UNDEFINE }, // 380 + { "cdf-define", CDF_DEFINE }, // 381 + { "cdf-display", CDF_DISPLAY }, // 382 + { "cdf-if", CDF_IF }, // 383 + { "cdf-else", CDF_ELSE }, // 384 + { "cdf-end-if", CDF_END_IF }, // 385 + { "cdf-evaluate", CDF_EVALUATE }, // 386 + { "cdf-when", CDF_WHEN }, // 387 + { "cdf-end-evaluate", CDF_END_EVALUATE }, // 388 + { "call-cobol", CALL_COBOL }, // 389 + { "call-verbatim", CALL_VERBATIM }, // 390 + { "if", IF }, // 391 + { "then", THEN }, // 392 + { "else", ELSE }, // 393 + { "sentence", SENTENCE }, // 394 + { "accept", ACCEPT }, // 395 + { "add", ADD }, // 396 + { "alter", ALTER }, // 397 + { "call", CALL }, // 398 + { "cancel", CANCEL }, // 399 + { "close", CLOSE }, // 400 + { "compute", COMPUTE }, // 401 + { "continue", CONTINUE }, // 402 + { "delete", DELETE }, // 403 + { "display", DISPLAY }, // 404 + { "divide", DIVIDE }, // 405 + { "evaluate", EVALUATE }, // 406 + { "exit", EXIT }, // 407 + { "filler", FILLER_kw }, // 408 + { "goback", GOBACK }, // 409 + { "goto", GOTO }, // 410 + { "initialize", INITIALIZE }, // 411 + { "inspect", INSPECT }, // 412 + { "merge", MERGE }, // 413 + { "move", MOVE }, // 414 + { "multiply", MULTIPLY }, // 415 + { "open", OPEN }, // 416 + { "paragraph", PARAGRAPH }, // 417 + { "read", READ }, // 418 + { "release", RELEASE }, // 419 + { "return", RETURN }, // 420 + { "rewrite", REWRITE }, // 421 + { "search", SEARCH }, // 422 + { "set", SET }, // 423 + { "select", SELECT }, // 424 + { "sort", SORT }, // 425 + { "sort-merge", SORT_MERGE }, // 426 + { "string", STRING_kw }, // 427 + { "stop", STOP }, // 428 + { "subtract", SUBTRACT }, // 429 + { "start", START }, // 430 + { "unstring", UNSTRING }, // 431 + { "write", WRITE }, // 432 + { "when", WHEN }, // 433 + { "abs", ABS }, // 434 + { "access", ACCESS }, // 435 + { "acos", ACOS }, // 436 + { "actual", ACTUAL }, // 437 + { "advancing", ADVANCING }, // 438 + { "after", AFTER }, // 439 + { "all", ALL }, // 440 + { "allocate", ALLOCATE }, // 441 + { "alphabet", ALPHABET }, // 442 + { "alphabetic", ALPHABETIC }, // 443 + { "alphabetic-lower", ALPHABETIC_LOWER }, // 444 + { "alphabetic-upper", ALPHABETIC_UPPER }, // 445 + { "alphanumeric", ALPHANUMERIC }, // 446 + { "alphanumeric-edited", ALPHANUMERIC_EDITED }, // 447 + { "also", ALSO }, // 448 + { "alternate", ALTERNATE }, // 449 + { "annuity", ANNUITY }, // 450 + { "anum", ANUM }, // 451 + { "any", ANY }, // 452 + { "anycase", ANYCASE }, // 453 + { "apply", APPLY }, // 454 + { "are", ARE }, // 455 + { "area", AREA }, // 456 + { "areas", AREAS }, // 457 + { "as", AS }, // 458 + { "ascending", ASCENDING }, // 459 + { "activating", ACTIVATING }, // 460 + { "asin", ASIN }, // 461 + { "assign", ASSIGN }, // 462 + { "at", AT }, // 463 + { "atan", ATAN }, // 464 + { "based", BASED }, // 465 + { "baseconvert", BASECONVERT }, // 466 + { "before", BEFORE }, // 467 + { "binary", BINARY }, // 468 + { "bit", BIT }, // 469 + { "bit-of", BIT_OF }, // 470 + { "bit-to-char", BIT_TO_CHAR }, // 471 + { "blank", BLANK }, // 472 + { "block", BLOCK }, // 473 + { "boolean-of-integer", BOOLEAN_OF_INTEGER }, // 474 + { "bottom", BOTTOM }, // 475 + { "by", BY }, // 476 + { "byte", BYTE }, // 477 + { "byte-length", BYTE_LENGTH }, // 478 + { "cf", CF }, // 479 + { "ch", CH }, // 480 + { "changed", CHANGED }, // 481 + { "char", CHAR }, // 482 + { "char-national", CHAR_NATIONAL }, // 483 + { "character", CHARACTER }, // 484 + { "characters", CHARACTERS }, // 485 + { "checking", CHECKING }, // 486 + { "class", CLASS }, // 487 + { "cobol", COBOL }, // 488 + { "code", CODE }, // 489 + { "code-set", CODESET }, // 490 + { "collating", COLLATING }, // 491 + { "column", COLUMN }, // 492 + { "combined-datetime", COMBINED_DATETIME }, // 493 + { "comma", COMMA }, // 494 + { "command-line", COMMAND_LINE }, // 495 + { "command-line-count", COMMAND_LINE_COUNT }, // 496 + { "commit", COMMIT }, // 497 + { "common", COMMON }, // 498 + { "concat", CONCAT }, // 499 + { "condition", CONDITION }, // 500 + { "configuration", CONFIGURATION_SECT }, // 501 + { "contains", CONTAINS }, // 502 + { "content", CONTENT }, // 503 + { "control", CONTROL }, // 504 + { "controls", CONTROLS }, // 505 + { "convert", CONVERT }, // 506 + { "converting", CONVERTING }, // 507 + { "corresponding", CORRESPONDING }, // 508 + { "cos", COS }, // 509 + { "count", COUNT }, // 510 + { "currency", CURRENCY }, // 511 + { "current", CURRENT }, // 512 + { "current-date", CURRENT_DATE }, // 513 + { "data", DATA }, // 514 + { "date", DATE }, // 515 + { "date-compiled", DATE_COMPILED }, // 516 + { "date-of-integer", DATE_OF_INTEGER }, // 517 + { "date-to-yyyymmdd", DATE_TO_YYYYMMDD }, // 518 + { "date-written", DATE_WRITTEN }, // 519 + { "day", DAY }, // 520 + { "day-of-integer", DAY_OF_INTEGER }, // 521 + { "day-of-week", DAY_OF_WEEK }, // 522 + { "day-to-yyyyddd", DAY_TO_YYYYDDD }, // 523 + { "dbcs", DBCS }, // 524 + { "de", DE }, // 525 + { "debugging", DEBUGGING }, // 526 + { "decimal-point", DECIMAL_POINT }, // 527 + { "declaratives", DECLARATIVES }, // 528 + { "default", DEFAULT }, // 529 + { "delimited", DELIMITED }, // 530 + { "delimiter", DELIMITER }, // 531 + { "depending", DEPENDING }, // 532 + { "descending", DESCENDING }, // 533 + { "detail", DETAIL }, // 534 + { "direct", DIRECT }, // 535 + { "direct-access", DIRECT_ACCESS }, // 536 + { "down", DOWN }, // 537 + { "duplicates", DUPLICATES }, // 538 + { "dynamic", DYNAMIC }, // 539 + { "e", E }, // 540 + { "ebcdic", EBCDIC }, // 541 + { "ec", EC }, // 542 + { "egcs", EGCS }, // 543 + { "entry", ENTRY }, // 544 + { "environment", ENVIRONMENT }, // 545 + { "equal", EQUAL }, // 546 + { "every", EVERY }, // 547 + { "examine", EXAMINE }, // 548 + { "exhibit", EXHIBIT }, // 549 + { "exp", EXP }, // 550 + { "exp10", EXP10 }, // 551 + { "extend", EXTEND }, // 552 + { "external", EXTERNAL }, // 553 + { "exception-file", EXCEPTION_FILE }, // 554 + { "exception-file-n", EXCEPTION_FILE_N }, // 555 + { "exception-location", EXCEPTION_LOCATION }, // 556 + { "exception-location-n", EXCEPTION_LOCATION_N }, // 557 + { "exception-statement", EXCEPTION_STATEMENT }, // 558 + { "exception-status", EXCEPTION_STATUS }, // 559 + { "factorial", FACTORIAL }, // 560 + { "false", FALSE_kw }, // 561 + { "fd", FD }, // 562 + { "file-control", FILE_CONTROL }, // 563 + { "file", FILE_KW }, // 564 + { "file-limit", FILE_LIMIT }, // 565 + { "final", FINAL }, // 566 + { "finally", FINALLY }, // 567 + { "find-string", FIND_STRING }, // 568 + { "first", FIRST }, // 569 + { "fixed", FIXED }, // 570 + { "footing", FOOTING }, // 571 + { "for", FOR }, // 572 + { "formatted-current-date", FORMATTED_CURRENT_DATE }, // 573 + { "formatted-date", FORMATTED_DATE }, // 574 + { "formatted-datetime", FORMATTED_DATETIME }, // 575 + { "formatted-time", FORMATTED_TIME }, // 576 + { "form-overflow", FORM_OVERFLOW }, // 577 + { "free", FREE }, // 578 + { "fraction-part", FRACTION_PART }, // 579 + { "from", FROM }, // 580 + { "function", FUNCTION }, // 581 + { "generate", GENERATE }, // 582 + { "giving", GIVING }, // 583 + { "global", GLOBAL }, // 584 + { "go", GO }, // 585 + { "group", GROUP }, // 586 + { "heading", HEADING }, // 587 + { "hex", HEX }, // 588 + { "hex-of", HEX_OF }, // 589 + { "hex-to-char", HEX_TO_CHAR }, // 590 + { "high-values", HIGH_VALUES }, // 591 + { "highest-algebraic", HIGHEST_ALGEBRAIC }, // 592 + { "hold", HOLD }, // 593 + { "ibm-360", IBM_360 }, // 594 + { "in", IN }, // 595 + { "include", INCLUDE }, // 596 + { "index", INDEX }, // 597 + { "indexed", INDEXED }, // 598 + { "indicate", INDICATE }, // 599 + { "initial", INITIAL_kw }, // 600 + { "initiate", INITIATE }, // 601 + { "input", INPUT }, // 602 + { "installation", INSTALLATION }, // 603 + { "interface", INTERFACE }, // 604 + { "integer", INTEGER }, // 605 + { "integer-of-boolean", INTEGER_OF_BOOLEAN }, // 606 + { "integer-of-date", INTEGER_OF_DATE }, // 607 + { "integer-of-day", INTEGER_OF_DAY }, // 608 + { "integer-of-formatted-date", INTEGER_OF_FORMATTED_DATE }, // 609 + { "integer-part", INTEGER_PART }, // 610 + { "into", INTO }, // 611 + { "intrinsic", INTRINSIC }, // 612 + { "invoke", INVOKE }, // 613 + { "i-o", IO }, // 614 + { "i-o-control", IO_CONTROL }, // 615 + { "is", IS }, // 616 + { "isnt", ISNT }, // 617 + { "kanji", KANJI }, // 618 + { "key", KEY }, // 619 + { "label", LABEL }, // 620 + { "last", LAST }, // 621 + { "leading", LEADING }, // 622 + { "left", LEFT }, // 623 + { "length", LENGTH }, // 624 + { "length-of", LENGTH_OF }, // 625 + { "limit", LIMIT }, // 626 + { "limits", LIMITS }, // 627 + { "line", LINE }, // 628 + { "lines", LINES }, // 629 + { "line-counter", LINE_COUNTER }, // 630 + { "linage", LINAGE }, // 631 + { "linkage", LINKAGE }, // 632 + { "locale", LOCALE }, // 633 + { "locale-compare", LOCALE_COMPARE }, // 634 + { "locale-date", LOCALE_DATE }, // 635 + { "locale-time", LOCALE_TIME }, // 636 + { "locale-time-from-seconds", LOCALE_TIME_FROM_SECONDS }, // 637 + { "local-storage", LOCAL_STORAGE }, // 638 + { "location", LOCATION }, // 639 + { "lock", LOCK }, // 640 + { "lock-on", LOCK_ON }, // 641 + { "log", LOG }, // 642 + { "log10", LOG10 }, // 643 + { "lower-case", LOWER_CASE }, // 644 + { "low-values", LOW_VALUES }, // 645 + { "lowest-algebraic", LOWEST_ALGEBRAIC }, // 646 + { "lparen", LPAREN }, // 647 + { "manual", MANUAL }, // 648 + { "maxx", MAXX }, // 649 + { "mean", MEAN }, // 650 + { "median", MEDIAN }, // 651 + { "midrange", MIDRANGE }, // 652 + { "minn", MINN }, // 653 + { "multiple", MULTIPLE }, // 654 + { "mod", MOD }, // 655 + { "mode", MODE }, // 656 + { "module-name", MODULE_NAME }, // 657 + { "named", NAMED }, // 658 + { "nat", NAT }, // 659 + { "national", NATIONAL }, // 660 + { "national-edited", NATIONAL_EDITED }, // 661 + { "national-of", NATIONAL_OF }, // 662 + { "native", NATIVE }, // 663 + { "nested", NESTED }, // 664 + { "next", NEXT }, // 665 + { "no", NO }, // 666 + { "note", NOTE }, // 667 + { "nulls", NULLS }, // 668 + { "null", NULLS }, // 668 + { "nullptr", NULLPTR }, // 669 + { "numeric", NUMERIC }, // 670 + { "numeric-edited", NUMERIC_EDITED }, // 671 + { "numval", NUMVAL }, // 672 + { "numval-c", NUMVAL_C }, // 673 + { "numval-f", NUMVAL_F }, // 674 + { "occurs", OCCURS }, // 675 + { "of", OF }, // 676 + { "off", OFF }, // 677 + { "omitted", OMITTED }, // 678 + { "on", ON }, // 679 + { "only", ONLY }, // 680 + { "optional", OPTIONAL }, // 681 + { "options", OPTIONS }, // 682 + { "ord", ORD }, // 683 + { "order", ORDER }, // 684 + { "ord-max", ORD_MAX }, // 685 + { "ord-min", ORD_MIN }, // 686 + { "organization", ORGANIZATION }, // 687 + { "other", OTHER }, // 688 + { "otherwise", OTHERWISE }, // 689 + { "output", OUTPUT }, // 690 + { "packed-decimal", PACKED_DECIMAL }, // 691 + { "padding", PADDING }, // 692 + { "page", PAGE }, // 693 + { "page-counter", PAGE_COUNTER }, // 694 + { "pf", PF }, // 695 + { "ph", PH }, // 696 + { "pi", PI }, // 697 + { "pic", PIC }, // 698 + { "picture", PICTURE }, // 699 + { "plus", PLUS }, // 700 + { "present-value", PRESENT_VALUE }, // 701 + { "print-switch", PRINT_SWITCH }, // 702 + { "procedure", PROCEDURE }, // 703 + { "procedures", PROCEDURES }, // 704 + { "proceed", PROCEED }, // 705 + { "process", PROCESS }, // 706 + { "program-id", PROGRAM_ID }, // 707 + { "program", PROGRAM_kw }, // 708 + { "property", PROPERTY }, // 709 + { "prototype", PROTOTYPE }, // 710 + { "pseudotext", PSEUDOTEXT }, // 711 + { "quotes", QUOTES }, // 712 + { "quote", QUOTES }, // 712 + { "random", RANDOM }, // 713 + { "random-seed", RANDOM_SEED }, // 714 + { "range", RANGE }, // 715 + { "raise", RAISE }, // 716 + { "raising", RAISING }, // 717 + { "rd", RD }, // 718 + { "record", RECORD }, // 719 + { "recording", RECORDING }, // 720 + { "records", RECORDS }, // 721 + { "recursive", RECURSIVE }, // 722 + { "redefines", REDEFINES }, // 723 + { "reel", REEL }, // 724 + { "reference", REFERENCE }, // 725 + { "relative", RELATIVE }, // 726 + { "rem", REM }, // 727 + { "remainder", REMAINDER }, // 728 + { "remarks", REMARKS }, // 729 + { "removal", REMOVAL }, // 730 + { "renames", RENAMES }, // 731 + { "replace", REPLACE }, // 732 + { "replacing", REPLACING }, // 733 + { "report", REPORT }, // 734 + { "reporting", REPORTING }, // 735 + { "reports", REPORTS }, // 736 + { "repository", REPOSITORY }, // 737 + { "rerun", RERUN }, // 738 + { "reserve", RESERVE }, // 739 + { "restricted", RESTRICTED }, // 740 + { "resume", RESUME }, // 741 + { "reverse", REVERSE }, // 742 + { "reversed", REVERSED }, // 743 + { "rewind", REWIND }, // 744 + { "rf", RF }, // 745 + { "rh", RH }, // 746 + { "right", RIGHT }, // 747 + { "rounded", ROUNDED }, // 748 + { "run", RUN }, // 749 + { "same", SAME }, // 750 + { "screen", SCREEN }, // 751 + { "sd", SD }, // 752 + { "seconds-from-formatted-time", SECONDS_FROM_FORMATTED_TIME }, // 753 + { "seconds-past-midnight", SECONDS_PAST_MIDNIGHT }, // 754 + { "security", SECURITY }, // 755 + { "separate", SEPARATE }, // 756 + { "sequence", SEQUENCE }, // 757 + { "sequential", SEQUENTIAL }, // 758 + { "sharing", SHARING }, // 759 + { "simple-exit", SIMPLE_EXIT }, // 760 + { "sign", SIGN }, // 761 + { "sin", SIN }, // 762 + { "size", SIZE }, // 763 + { "smallest-algebraic", SMALLEST_ALGEBRAIC }, // 764 + { "source", SOURCE }, // 765 + { "source-computer", SOURCE_COMPUTER }, // 766 + { "special-names", SPECIAL_NAMES }, // 767 + { "sqrt", SQRT }, // 768 + { "stack", STACK }, // 769 + { "standard", STANDARD }, // 770 + { "standard-1", STANDARD_1 }, // 771 + { "standard-deviation", STANDARD_DEVIATION }, // 772 + { "standard-compare", STANDARD_COMPARE }, // 773 + { "status", STATUS }, // 774 + { "strong", STRONG }, // 775 + { "substitute", SUBSTITUTE }, // 776 + { "sum", SUM }, // 777 + { "symbol", SYMBOL }, // 778 + { "symbolic", SYMBOLIC }, // 779 + { "synchronized", SYNCHRONIZED }, // 780 + { "tally", TALLY }, // 781 + { "tallying", TALLYING }, // 782 + { "tan", TAN }, // 783 + { "terminate", TERMINATE }, // 784 + { "test", TEST }, // 785 + { "test-date-yyyymmdd", TEST_DATE_YYYYMMDD }, // 786 + { "test-day-yyyyddd", TEST_DAY_YYYYDDD }, // 787 + { "test-formatted-datetime", TEST_FORMATTED_DATETIME }, // 788 + { "test-numval", TEST_NUMVAL }, // 789 + { "test-numval-c", TEST_NUMVAL_C }, // 790 + { "test-numval-f", TEST_NUMVAL_F }, // 791 + { "than", THAN }, // 792 + { "time", TIME }, // 793 + { "times", TIMES }, // 794 + { "to", TO }, // 795 + { "top", TOP }, // 796 + { "top-level", TOP_LEVEL }, // 797 + { "tracks", TRACKS }, // 798 + { "track-area", TRACK_AREA }, // 799 + { "trailing", TRAILING }, // 800 + { "transform", TRANSFORM }, // 801 + { "trim", TRIM }, // 802 + { "true", TRUE_kw }, // 803 + { "try", TRY }, // 804 + { "turn", TURN }, // 805 + { "type", TYPE }, // 806 + { "typedef", TYPEDEF }, // 807 + { "ulength", ULENGTH }, // 808 + { "unbounded", UNBOUNDED }, // 809 + { "unit", UNIT }, // 810 + { "units", UNITS }, // 811 + { "unit-record", UNIT_RECORD }, // 812 + { "until", UNTIL }, // 813 + { "up", UP }, // 814 + { "upon", UPON }, // 815 + { "upos", UPOS }, // 816 + { "upper-case", UPPER_CASE }, // 817 + { "usage", USAGE }, // 818 + { "using", USING }, // 819 + { "usubstr", USUBSTR }, // 820 + { "usupplementary", USUPPLEMENTARY }, // 821 + { "utility", UTILITY }, // 822 + { "uuid4", UUID4 }, // 823 + { "uvalid", UVALID }, // 824 + { "uwidth", UWIDTH }, // 825 + { "value", VALUE }, // 826 + { "variance", VARIANCE }, // 827 + { "varying", VARYING }, // 828 + { "volatile", VOLATILE }, // 829 + { "when-compiled", WHEN_COMPILED }, // 830 + { "with", WITH }, // 831 + { "working-storage", WORKING_STORAGE }, // 832 + { "xml", XML }, // 833 + { "xmlgenerate", XMLGENERATE }, // 834 + { "xmlparse", XMLPARSE }, // 835 + { "year-to-yyyy", YEAR_TO_YYYY }, // 836 + { "yyyyddd", YYYYDDD }, // 837 + { "yyyymmdd", YYYYMMDD }, // 838 + { "arithmetic", ARITHMETIC }, // 839 + { "attribute", ATTRIBUTE }, // 840 + { "auto", AUTO }, // 841 + { "automatic", AUTOMATIC }, // 842 + { "away-from-zero", AWAY_FROM_ZERO }, // 843 + { "background-color", BACKGROUND_COLOR }, // 844 + { "bell", BELL }, // 845 + { "binary-encoding", BINARY_ENCODING }, // 846 + { "blink", BLINK }, // 847 + { "capacity", CAPACITY }, // 848 + { "center", CENTER }, // 849 + { "classification", CLASSIFICATION }, // 850 + { "cycle", CYCLE }, // 851 + { "decimal-encoding", DECIMAL_ENCODING }, // 852 + { "entry-convention", ENTRY_CONVENTION }, // 853 + { "eol", EOL }, // 854 + { "eos", EOS }, // 855 + { "erase", ERASE }, // 856 + { "expands", EXPANDS }, // 857 + { "float-binary", FLOAT_BINARY }, // 858 + { "float-decimal", FLOAT_DECIMAL }, // 859 + { "foreground-color", FOREGROUND_COLOR }, // 860 + { "forever", FOREVER }, // 861 + { "full", FULL }, // 862 + { "highlight", HIGHLIGHT }, // 863 + { "high-order-left", HIGH_ORDER_LEFT }, // 864 + { "high-order-right", HIGH_ORDER_RIGHT }, // 865 + { "ignoring", IGNORING }, // 866 + { "implements", IMPLEMENTS }, // 867 + { "initialized", INITIALIZED }, // 868 + { "intermediate", INTERMEDIATE }, // 869 + { "lc-all", LC_ALL_kw }, // 870 + { "lc-collate", LC_COLLATE_kw }, // 871 + { "lc-ctype", LC_CTYPE_kw }, // 872 + { "lc-messages", LC_MESSAGES_kw }, // 873 + { "lc-monetary", LC_MONETARY_kw }, // 874 + { "lc-numeric", LC_NUMERIC_kw }, // 875 + { "lc-time", LC_TIME_kw }, // 876 + { "lowlight", LOWLIGHT }, // 877 + { "nearest-away-from-zero", NEAREST_AWAY_FROM_ZERO }, // 878 + { "nearest-even", NEAREST_EVEN }, // 879 + { "nearest-toward-zero", NEAREST_TOWARD_ZERO }, // 880 + { "none", NONE }, // 881 + { "normal", NORMAL }, // 882 + { "numbers", NUMBERS }, // 883 + { "prefixed", PREFIXED }, // 884 + { "previous", PREVIOUS }, // 885 + { "prohibited", PROHIBITED }, // 886 + { "relation", RELATION }, // 887 + { "required", REQUIRED }, // 888 + { "reverse-video", REVERSE_VIDEO }, // 889 + { "rounding", ROUNDING }, // 890 + { "seconds", SECONDS }, // 891 + { "secure", SECURE }, // 892 + { "short", SHORT }, // 893 + { "signed", SIGNED }, // 894 + { "standard-binary", STANDARD_BINARY }, // 895 + { "standard-decimal", STANDARD_DECIMAL }, // 896 + { "statement", STATEMENT }, // 897 + { "step", STEP }, // 898 + { "structure", STRUCTURE }, // 899 + { "toward-greater", TOWARD_GREATER }, // 900 + { "toward-lesser", TOWARD_LESSER }, // 901 + { "truncation", TRUNCATION }, // 902 + { "ucs-4", UCS_4 }, // 903 + { "underline", UNDERLINE }, // 904 + { "unsigned", UNSIGNED }, // 905 + { "utf-16", UTF_16 }, // 906 + { "utf-8", UTF_8 }, // 907 + { "address", ADDRESS }, // 908 + { "end-accept", END_ACCEPT }, // 909 + { "end-add", END_ADD }, // 910 + { "end-call", END_CALL }, // 911 + { "end-compute", END_COMPUTE }, // 912 + { "end-delete", END_DELETE }, // 913 + { "end-display", END_DISPLAY }, // 914 + { "end-divide", END_DIVIDE }, // 915 + { "end-evaluate", END_EVALUATE }, // 916 + { "end-multiply", END_MULTIPLY }, // 917 + { "end-perform", END_PERFORM }, // 918 + { "end-read", END_READ }, // 919 + { "end-return", END_RETURN }, // 920 + { "end-rewrite", END_REWRITE }, // 921 + { "end-search", END_SEARCH }, // 922 + { "end-start", END_START }, // 923 + { "end-string", END_STRING }, // 924 + { "end-subtract", END_SUBTRACT }, // 925 + { "end-unstring", END_UNSTRING }, // 926 + { "end-write", END_WRITE }, // 927 + { "end-if", END_IF }, // 928 + { "thru", THRU }, // 929 + { "through", THRU }, // 929 + { "or", OR }, // 930 + { "and", AND }, // 931 + { "not", NOT }, // 932 + { "ne", NE }, // 933 + { "le", LE }, // 934 + { "ge", GE }, // 935 + { "pow", POW }, // 936 + { "neg", NEG }, // 937 +}; + +token_names = { + "IDENTIFICATION", // 0 (258) + "ENVIRONMENT", // 1 (259) + "PROCEDURE", // 2 (260) + "DATA", // 3 (261) + "FILE", // 4 (262) + "INPUT-OUTPUT", // 5 (263) + "LINKAGE", // 6 (264) + "LOCAL-STORAGE", // 7 (265) + "WORKING-STORAGE", // 8 (266) + "OBJECT-COMPUTER", // 9 (267) + "DISPLAY-OF", // 10 (268) + "END-FUNCTION", // 11 (269) + "END-PROGRAM", // 12 (270) + "END-SUBPROGRAM", // 13 (271) + "JUSTIFIED", // 14 (272) + "RETURNING", // 15 (273) + "NO-CONDITION", // 16 (274) + "ALNUM", // 17 (275) + "ALPHED", // 18 (276) + "ERROR", // 19 (277) + "EXCEPTION", // 20 (278) + "SIZE-ERROR", // 21 (279) + "EXCEPTION-NAME", // 22 (280) + "LEVEL", // 23 (281) + "LEVEL66", // 24 (282) + "LEVEL78", // 25 (283) + "LEVEL88", // 26 (284) + "CLASS-NAME", // 27 (285) + "NAME", // 28 (286) + "NAME88", // 29 (287) + "NUME", // 30 (288) + "NUMED", // 31 (289) + "NUMED-CR", // 32 (290) + "NUMED-DB", // 33 (291) + "NINEDOT", // 34 (292) + "NINES", // 35 (293) + "NINEV", // 36 (294) + "PIC-P", // 37 (295) + "SPACES", // 38 (296) + "LITERAL", // 39 (297) + "END", // 40 (298) + "EOP", // 41 (299) + "FILENAME", // 42 (300) + "INVALID", // 43 (301) + "NUMBER", // 44 (302) + "NEGATIVE", // 45 (303) + "NUMSTR", // 46 (304) + "OVERFLOW", // 47 (305) + "COMPUTATIONAL", // 48 (306) + "PERFORM", // 49 (307) + "BACKWARD", // 50 (308) + "POSITIVE", // 51 (309) + "POINTER", // 52 (310) + "SECTION", // 53 (311) + "STANDARD-ALPHABET", // 54 (312) + "SWITCH", // 55 (313) + "UPSI", // 56 (314) + "ZERO", // 57 (315) + "SYSIN", // 58 (316) + "SYSIPT", // 59 (317) + "SYSOUT", // 60 (318) + "SYSLIST", // 61 (319) + "SYSLST", // 62 (320) + "SYSPUNCH", // 63 (321) + "SYSPCH", // 64 (322) + "CONSOLE", // 65 (323) + "C01", // 66 (324) + "C02", // 67 (325) + "C03", // 68 (326) + "C04", // 69 (327) + "C05", // 70 (328) + "C06", // 71 (329) + "C07", // 72 (330) + "C08", // 73 (331) + "C09", // 74 (332) + "C10", // 75 (333) + "C11", // 76 (334) + "C12", // 77 (335) + "CSP", // 78 (336) + "S01", // 79 (337) + "S02", // 80 (338) + "S03", // 81 (339) + "S04", // 82 (340) + "S05", // 83 (341) + "AFP-5A", // 84 (342) + "STDIN", // 85 (343) + "STDOUT", // 86 (344) + "STDERR", // 87 (345) + "LIST", // 88 (346) + "MAP", // 89 (347) + "NOLIST", // 90 (348) + "NOMAP", // 91 (349) + "NOSOURCE", // 92 (350) + "MIGHT-BE", // 93 (351) + "FUNCTION-UDF", // 94 (352) + "FUNCTION-UDF-0", // 95 (353) + "DATE-FMT", // 96 (354) + "TIME-FMT", // 97 (355) + "DATETIME-FMT", // 98 (356) + "BASIS", // 99 (357) + "CBL", // 100 (358) + "CONSTANT", // 101 (359) + "COPY", // 102 (360) + "DEFINED", // 103 (361) + "ENTER", // 104 (362) + "FEATURE", // 105 (363) + "INSERTT", // 106 (364) + "LSUB", // 107 (365) + "PARAMETER", // 108 (366) + "OVERRIDE", // 109 (367) + "READY", // 110 (368) + "RESET", // 111 (369) + "RSUB", // 112 (370) + "SERVICE-RELOAD", // 113 (371) + "STAR-CBL", // 114 (372) + "SUBSCRIPT", // 115 (373) + "SUPPRESS", // 116 (374) + "TITLE", // 117 (375) + "TRACE", // 118 (376) + "USE", // 119 (377) + "COBOL-WORDS", // 120 (378) + "EQUATE", // 121 (379) + "UNDEFINE", // 122 (380) + "CDF-DEFINE", // 123 (381) + "CDF-DISPLAY", // 124 (382) + "CDF-IF", // 125 (383) + "CDF-ELSE", // 126 (384) + "CDF-END-IF", // 127 (385) + "CDF-EVALUATE", // 128 (386) + "CDF-WHEN", // 129 (387) + "CDF-END-EVALUATE", // 130 (388) + "CALL-COBOL", // 131 (389) + "CALL-VERBATIM", // 132 (390) + "IF", // 133 (391) + "THEN", // 134 (392) + "ELSE", // 135 (393) + "SENTENCE", // 136 (394) + "ACCEPT", // 137 (395) + "ADD", // 138 (396) + "ALTER", // 139 (397) + "CALL", // 140 (398) + "CANCEL", // 141 (399) + "CLOSE", // 142 (400) + "COMPUTE", // 143 (401) + "CONTINUE", // 144 (402) + "DELETE", // 145 (403) + "DISPLAY", // 146 (404) + "DIVIDE", // 147 (405) + "EVALUATE", // 148 (406) + "EXIT", // 149 (407) + "FILLER", // 150 (408) + "GOBACK", // 151 (409) + "GOTO", // 152 (410) + "INITIALIZE", // 153 (411) + "INSPECT", // 154 (412) + "MERGE", // 155 (413) + "MOVE", // 156 (414) + "MULTIPLY", // 157 (415) + "OPEN", // 158 (416) + "PARAGRAPH", // 159 (417) + "READ", // 160 (418) + "RELEASE", // 161 (419) + "RETURN", // 162 (420) + "REWRITE", // 163 (421) + "SEARCH", // 164 (422) + "SET", // 165 (423) + "SELECT", // 166 (424) + "SORT", // 167 (425) + "SORT-MERGE", // 168 (426) + "STRING", // 169 (427) + "STOP", // 170 (428) + "SUBTRACT", // 171 (429) + "START", // 172 (430) + "UNSTRING", // 173 (431) + "WRITE", // 174 (432) + "WHEN", // 175 (433) + "ABS", // 176 (434) + "ACCESS", // 177 (435) + "ACOS", // 178 (436) + "ACTUAL", // 179 (437) + "ADVANCING", // 180 (438) + "AFTER", // 181 (439) + "ALL", // 182 (440) + "ALLOCATE", // 183 (441) + "ALPHABET", // 184 (442) + "ALPHABETIC", // 185 (443) + "ALPHABETIC-LOWER", // 186 (444) + "ALPHABETIC-UPPER", // 187 (445) + "ALPHANUMERIC", // 188 (446) + "ALPHANUMERIC-EDITED", // 189 (447) + "ALSO", // 190 (448) + "ALTERNATE", // 191 (449) + "ANNUITY", // 192 (450) + "ANUM", // 193 (451) + "ANY", // 194 (452) + "ANYCASE", // 195 (453) + "APPLY", // 196 (454) + "ARE", // 197 (455) + "AREA", // 198 (456) + "AREAS", // 199 (457) + "AS", // 200 (458) + "ASCENDING", // 201 (459) + "ACTIVATING", // 202 (460) + "ASIN", // 203 (461) + "ASSIGN", // 204 (462) + "AT", // 205 (463) + "ATAN", // 206 (464) + "BASED", // 207 (465) + "BASECONVERT", // 208 (466) + "BEFORE", // 209 (467) + "BINARY", // 210 (468) + "BIT", // 211 (469) + "BIT-OF", // 212 (470) + "BIT-TO-CHAR", // 213 (471) + "BLANK", // 214 (472) + "BLOCK", // 215 (473) + "BOOLEAN-OF-INTEGER", // 216 (474) + "BOTTOM", // 217 (475) + "BY", // 218 (476) + "BYTE", // 219 (477) + "BYTE-LENGTH", // 220 (478) + "CF", // 221 (479) + "CH", // 222 (480) + "CHANGED", // 223 (481) + "CHAR", // 224 (482) + "CHAR-NATIONAL", // 225 (483) + "CHARACTER", // 226 (484) + "CHARACTERS", // 227 (485) + "CHECKING", // 228 (486) + "CLASS", // 229 (487) + "COBOL", // 230 (488) + "CODE", // 231 (489) + "CODE-SET", // 232 (490) + "COLLATING", // 233 (491) + "COLUMN", // 234 (492) + "COMBINED-DATETIME", // 235 (493) + "COMMA", // 236 (494) + "COMMAND-LINE", // 237 (495) + "COMMAND-LINE-COUNT", // 238 (496) + "COMMIT", // 239 (497) + "COMMON", // 240 (498) + "CONCAT", // 241 (499) + "CONDITION", // 242 (500) + "CONFIGURATION", // 243 (501) + "CONTAINS", // 244 (502) + "CONTENT", // 245 (503) + "CONTROL", // 246 (504) + "CONTROLS", // 247 (505) + "CONVERT", // 248 (506) + "CONVERTING", // 249 (507) + "CORRESPONDING", // 250 (508) + "COS", // 251 (509) + "COUNT", // 252 (510) + "CURRENCY", // 253 (511) + "CURRENT", // 254 (512) + "CURRENT-DATE", // 255 (513) + "DATA", // 256 (514) + "DATE", // 257 (515) + "DATE-COMPILED", // 258 (516) + "DATE-OF-INTEGER", // 259 (517) + "DATE-TO-YYYYMMDD", // 260 (518) + "DATE-WRITTEN", // 261 (519) + "DAY", // 262 (520) + "DAY-OF-INTEGER", // 263 (521) + "DAY-OF-WEEK", // 264 (522) + "DAY-TO-YYYYDDD", // 265 (523) + "DBCS", // 266 (524) + "DE", // 267 (525) + "DEBUGGING", // 268 (526) + "DECIMAL-POINT", // 269 (527) + "DECLARATIVES", // 270 (528) + "DEFAULT", // 271 (529) + "DELIMITED", // 272 (530) + "DELIMITER", // 273 (531) + "DEPENDING", // 274 (532) + "DESCENDING", // 275 (533) + "DETAIL", // 276 (534) + "DIRECT", // 277 (535) + "DIRECT-ACCESS", // 278 (536) + "DOWN", // 279 (537) + "DUPLICATES", // 280 (538) + "DYNAMIC", // 281 (539) + "E", // 282 (540) + "EBCDIC", // 283 (541) + "EC", // 284 (542) + "EGCS", // 285 (543) + "ENTRY", // 286 (544) + "ENVIRONMENT", // 287 (545) + "EQUAL", // 288 (546) + "EVERY", // 289 (547) + "EXAMINE", // 290 (548) + "EXHIBIT", // 291 (549) + "EXP", // 292 (550) + "EXP10", // 293 (551) + "EXTEND", // 294 (552) + "EXTERNAL", // 295 (553) + "EXCEPTION-FILE", // 296 (554) + "EXCEPTION-FILE-N", // 297 (555) + "EXCEPTION-LOCATION", // 298 (556) + "EXCEPTION-LOCATION-N", // 299 (557) + "EXCEPTION-STATEMENT", // 300 (558) + "EXCEPTION-STATUS", // 301 (559) + "FACTORIAL", // 302 (560) + "FALSE", // 303 (561) + "FD", // 304 (562) + "FILE-CONTROL", // 305 (563) + "FILE", // 306 (564) + "FILE-LIMIT", // 307 (565) + "FINAL", // 308 (566) + "FINALLY", // 309 (567) + "FIND-STRING", // 310 (568) + "FIRST", // 311 (569) + "FIXED", // 312 (570) + "FOOTING", // 313 (571) + "FOR", // 314 (572) + "FORMATTED-CURRENT-DATE", // 315 (573) + "FORMATTED-DATE", // 316 (574) + "FORMATTED-DATETIME", // 317 (575) + "FORMATTED-TIME", // 318 (576) + "FORM-OVERFLOW", // 319 (577) + "FREE", // 320 (578) + "FRACTION-PART", // 321 (579) + "FROM", // 322 (580) + "FUNCTION", // 323 (581) + "GENERATE", // 324 (582) + "GIVING", // 325 (583) + "GLOBAL", // 326 (584) + "GO", // 327 (585) + "GROUP", // 328 (586) + "HEADING", // 329 (587) + "HEX", // 330 (588) + "HEX-OF", // 331 (589) + "HEX-TO-CHAR", // 332 (590) + "HIGH-VALUES", // 333 (591) + "HIGHEST-ALGEBRAIC", // 334 (592) + "HOLD", // 335 (593) + "IBM-360", // 336 (594) + "IN", // 337 (595) + "INCLUDE", // 338 (596) + "INDEX", // 339 (597) + "INDEXED", // 340 (598) + "INDICATE", // 341 (599) + "INITIAL", // 342 (600) + "INITIATE", // 343 (601) + "INPUT", // 344 (602) + "INSTALLATION", // 345 (603) + "INTERFACE", // 346 (604) + "INTEGER", // 347 (605) + "INTEGER-OF-BOOLEAN", // 348 (606) + "INTEGER-OF-DATE", // 349 (607) + "INTEGER-OF-DAY", // 350 (608) + "INTEGER-OF-FORMATTED-DATE", // 351 (609) + "INTEGER-PART", // 352 (610) + "INTO", // 353 (611) + "INTRINSIC", // 354 (612) + "INVOKE", // 355 (613) + "I-O", // 356 (614) + "I-O-CONTROL", // 357 (615) + "IS", // 358 (616) + "ISNT", // 359 (617) + "KANJI", // 360 (618) + "KEY", // 361 (619) + "LABEL", // 362 (620) + "LAST", // 363 (621) + "LEADING", // 364 (622) + "LEFT", // 365 (623) + "LENGTH", // 366 (624) + "LENGTH-OF", // 367 (625) + "LIMIT", // 368 (626) + "LIMITS", // 369 (627) + "LINE", // 370 (628) + "LINES", // 371 (629) + "LINE-COUNTER", // 372 (630) + "LINAGE", // 373 (631) + "LINKAGE", // 374 (632) + "LOCALE", // 375 (633) + "LOCALE-COMPARE", // 376 (634) + "LOCALE-DATE", // 377 (635) + "LOCALE-TIME", // 378 (636) + "LOCALE-TIME-FROM-SECONDS", // 379 (637) + "LOCAL-STORAGE", // 380 (638) + "LOCATION", // 381 (639) + "LOCK", // 382 (640) + "LOCK-ON", // 383 (641) + "LOG", // 384 (642) + "LOG10", // 385 (643) + "LOWER-CASE", // 386 (644) + "LOW-VALUES", // 387 (645) + "LOWEST-ALGEBRAIC", // 388 (646) + "LPAREN", // 389 (647) + "MANUAL", // 390 (648) + "MAXX", // 391 (649) + "MEAN", // 392 (650) + "MEDIAN", // 393 (651) + "MIDRANGE", // 394 (652) + "MINN", // 395 (653) + "MULTIPLE", // 396 (654) + "MOD", // 397 (655) + "MODE", // 398 (656) + "MODULE-NAME", // 399 (657) + "NAMED", // 400 (658) + "NAT", // 401 (659) + "NATIONAL", // 402 (660) + "NATIONAL-EDITED", // 403 (661) + "NATIONAL-OF", // 404 (662) + "NATIVE", // 405 (663) + "NESTED", // 406 (664) + "NEXT", // 407 (665) + "NO", // 408 (666) + "NOTE", // 409 (667) + "NULLS", // 410 (668) + "NULLPTR", // 411 (669) + "NUMERIC", // 412 (670) + "NUMERIC-EDITED", // 413 (671) + "NUMVAL", // 414 (672) + "NUMVAL-C", // 415 (673) + "NUMVAL-F", // 416 (674) + "OCCURS", // 417 (675) + "OF", // 418 (676) + "OFF", // 419 (677) + "OMITTED", // 420 (678) + "ON", // 421 (679) + "ONLY", // 422 (680) + "OPTIONAL", // 423 (681) + "OPTIONS", // 424 (682) + "ORD", // 425 (683) + "ORDER", // 426 (684) + "ORD-MAX", // 427 (685) + "ORD-MIN", // 428 (686) + "ORGANIZATION", // 429 (687) + "OTHER", // 430 (688) + "OTHERWISE", // 431 (689) + "OUTPUT", // 432 (690) + "PACKED-DECIMAL", // 433 (691) + "PADDING", // 434 (692) + "PAGE", // 435 (693) + "PAGE-COUNTER", // 436 (694) + "PF", // 437 (695) + "PH", // 438 (696) + "PI", // 439 (697) + "PIC", // 440 (698) + "PICTURE", // 441 (699) + "PLUS", // 442 (700) + "PRESENT-VALUE", // 443 (701) + "PRINT-SWITCH", // 444 (702) + "PROCEDURE", // 445 (703) + "PROCEDURES", // 446 (704) + "PROCEED", // 447 (705) + "PROCESS", // 448 (706) + "PROGRAM-ID", // 449 (707) + "PROGRAM", // 450 (708) + "PROPERTY", // 451 (709) + "PROTOTYPE", // 452 (710) + "PSEUDOTEXT", // 453 (711) + "QUOTES", // 454 (712) + "RANDOM", // 455 (713) + "RANDOM-SEED", // 456 (714) + "RANGE", // 457 (715) + "RAISE", // 458 (716) + "RAISING", // 459 (717) + "RD", // 460 (718) + "RECORD", // 461 (719) + "RECORDING", // 462 (720) + "RECORDS", // 463 (721) + "RECURSIVE", // 464 (722) + "REDEFINES", // 465 (723) + "REEL", // 466 (724) + "REFERENCE", // 467 (725) + "RELATIVE", // 468 (726) + "REM", // 469 (727) + "REMAINDER", // 470 (728) + "REMARKS", // 471 (729) + "REMOVAL", // 472 (730) + "RENAMES", // 473 (731) + "REPLACE", // 474 (732) + "REPLACING", // 475 (733) + "REPORT", // 476 (734) + "REPORTING", // 477 (735) + "REPORTS", // 478 (736) + "REPOSITORY", // 479 (737) + "RERUN", // 480 (738) + "RESERVE", // 481 (739) + "RESTRICTED", // 482 (740) + "RESUME", // 483 (741) + "REVERSE", // 484 (742) + "REVERSED", // 485 (743) + "REWIND", // 486 (744) + "RF", // 487 (745) + "RH", // 488 (746) + "RIGHT", // 489 (747) + "ROUNDED", // 490 (748) + "RUN", // 491 (749) + "SAME", // 492 (750) + "SCREEN", // 493 (751) + "SD", // 494 (752) + "SECONDS-FROM-FORMATTED-TIME", // 495 (753) + "SECONDS-PAST-MIDNIGHT", // 496 (754) + "SECURITY", // 497 (755) + "SEPARATE", // 498 (756) + "SEQUENCE", // 499 (757) + "SEQUENTIAL", // 500 (758) + "SHARING", // 501 (759) + "SIMPLE-EXIT", // 502 (760) + "SIGN", // 503 (761) + "SIN", // 504 (762) + "SIZE", // 505 (763) + "SMALLEST-ALGEBRAIC", // 506 (764) + "SOURCE", // 507 (765) + "SOURCE-COMPUTER", // 508 (766) + "SPECIAL-NAMES", // 509 (767) + "SQRT", // 510 (768) + "STACK", // 511 (769) + "STANDARD", // 512 (770) + "STANDARD-1", // 513 (771) + "STANDARD-DEVIATION", // 514 (772) + "STANDARD-COMPARE", // 515 (773) + "STATUS", // 516 (774) + "STRONG", // 517 (775) + "SUBSTITUTE", // 518 (776) + "SUM", // 519 (777) + "SYMBOL", // 520 (778) + "SYMBOLIC", // 521 (779) + "SYNCHRONIZED", // 522 (780) + "TALLY", // 523 (781) + "TALLYING", // 524 (782) + "TAN", // 525 (783) + "TERMINATE", // 526 (784) + "TEST", // 527 (785) + "TEST-DATE-YYYYMMDD", // 528 (786) + "TEST-DAY-YYYYDDD", // 529 (787) + "TEST-FORMATTED-DATETIME", // 530 (788) + "TEST-NUMVAL", // 531 (789) + "TEST-NUMVAL-C", // 532 (790) + "TEST-NUMVAL-F", // 533 (791) + "THAN", // 534 (792) + "TIME", // 535 (793) + "TIMES", // 536 (794) + "TO", // 537 (795) + "TOP", // 538 (796) + "TOP-LEVEL", // 539 (797) + "TRACKS", // 540 (798) + "TRACK-AREA", // 541 (799) + "TRAILING", // 542 (800) + "TRANSFORM", // 543 (801) + "TRIM", // 544 (802) + "TRUE", // 545 (803) + "TRY", // 546 (804) + "TURN", // 547 (805) + "TYPE", // 548 (806) + "TYPEDEF", // 549 (807) + "ULENGTH", // 550 (808) + "UNBOUNDED", // 551 (809) + "UNIT", // 552 (810) + "UNITS", // 553 (811) + "UNIT-RECORD", // 554 (812) + "UNTIL", // 555 (813) + "UP", // 556 (814) + "UPON", // 557 (815) + "UPOS", // 558 (816) + "UPPER-CASE", // 559 (817) + "USAGE", // 560 (818) + "USING", // 561 (819) + "USUBSTR", // 562 (820) + "USUPPLEMENTARY", // 563 (821) + "UTILITY", // 564 (822) + "UUID4", // 565 (823) + "UVALID", // 566 (824) + "UWIDTH", // 567 (825) + "VALUE", // 568 (826) + "VARIANCE", // 569 (827) + "VARYING", // 570 (828) + "VOLATILE", // 571 (829) + "WHEN-COMPILED", // 572 (830) + "WITH", // 573 (831) + "WORKING-STORAGE", // 574 (832) + "XML", // 575 (833) + "XMLGENERATE", // 576 (834) + "XMLPARSE", // 577 (835) + "YEAR-TO-YYYY", // 578 (836) + "YYYYDDD", // 579 (837) + "YYYYMMDD", // 580 (838) + "ARITHMETIC", // 581 (839) + "ATTRIBUTE", // 582 (840) + "AUTO", // 583 (841) + "AUTOMATIC", // 584 (842) + "AWAY-FROM-ZERO", // 585 (843) + "BACKGROUND-COLOR", // 586 (844) + "BELL", // 587 (845) + "BINARY-ENCODING", // 588 (846) + "BLINK", // 589 (847) + "CAPACITY", // 590 (848) + "CENTER", // 591 (849) + "CLASSIFICATION", // 592 (850) + "CYCLE", // 593 (851) + "DECIMAL-ENCODING", // 594 (852) + "ENTRY-CONVENTION", // 595 (853) + "EOL", // 596 (854) + "EOS", // 597 (855) + "ERASE", // 598 (856) + "EXPANDS", // 599 (857) + "FLOAT-BINARY", // 600 (858) + "FLOAT-DECIMAL", // 601 (859) + "FOREGROUND-COLOR", // 602 (860) + "FOREVER", // 603 (861) + "FULL", // 604 (862) + "HIGHLIGHT", // 605 (863) + "HIGH-ORDER-LEFT", // 606 (864) + "HIGH-ORDER-RIGHT", // 607 (865) + "IGNORING", // 608 (866) + "IMPLEMENTS", // 609 (867) + "INITIALIZED", // 610 (868) + "INTERMEDIATE", // 611 (869) + "LC-ALL", // 612 (870) + "LC-COLLATE", // 613 (871) + "LC-CTYPE", // 614 (872) + "LC-MESSAGES", // 615 (873) + "LC-MONETARY", // 616 (874) + "LC-NUMERIC", // 617 (875) + "LC-TIME", // 618 (876) + "LOWLIGHT", // 619 (877) + "NEAREST-AWAY-FROM-ZERO", // 620 (878) + "NEAREST-EVEN", // 621 (879) + "NEAREST-TOWARD-ZERO", // 622 (880) + "NONE", // 623 (881) + "NORMAL", // 624 (882) + "NUMBERS", // 625 (883) + "PREFIXED", // 626 (884) + "PREVIOUS", // 627 (885) + "PROHIBITED", // 628 (886) + "RELATION", // 629 (887) + "REQUIRED", // 630 (888) + "REVERSE-VIDEO", // 631 (889) + "ROUNDING", // 632 (890) + "SECONDS", // 633 (891) + "SECURE", // 634 (892) + "SHORT", // 635 (893) + "SIGNED", // 636 (894) + "STANDARD-BINARY", // 637 (895) + "STANDARD-DECIMAL", // 638 (896) + "STATEMENT", // 639 (897) + "STEP", // 640 (898) + "STRUCTURE", // 641 (899) + "TOWARD-GREATER", // 642 (900) + "TOWARD-LESSER", // 643 (901) + "TRUNCATION", // 644 (902) + "UCS-4", // 645 (903) + "UNDERLINE", // 646 (904) + "UNSIGNED", // 647 (905) + "UTF-16", // 648 (906) + "UTF-8", // 649 (907) + "ADDRESS", // 650 (908) + "END-ACCEPT", // 651 (909) + "END-ADD", // 652 (910) + "END-CALL", // 653 (911) + "END-COMPUTE", // 654 (912) + "END-DELETE", // 655 (913) + "END-DISPLAY", // 656 (914) + "END-DIVIDE", // 657 (915) + "END-EVALUATE", // 658 (916) + "END-MULTIPLY", // 659 (917) + "END-PERFORM", // 660 (918) + "END-READ", // 661 (919) + "END-RETURN", // 662 (920) + "END-REWRITE", // 663 (921) + "END-SEARCH", // 664 (922) + "END-START", // 665 (923) + "END-STRING", // 666 (924) + "END-SUBTRACT", // 667 (925) + "END-UNSTRING", // 668 (926) + "END-WRITE", // 669 (927) + "END-IF", // 670 (928) + "THRU", // 671 (929) + "OR", // 672 (930) + "AND", // 673 (931) + "NOT", // 674 (932) + "NE", // 675 (933) + "LE", // 676 (934) + "GE", // 677 (935) + "POW", // 678 (936) + "NEG", // 679 (937) +}; diff --git a/gcc/cobol/udf/stored-char-length.cbl b/gcc/cobol/udf/stored-char-length.cbl new file mode 100644 index 0000000..9ab3b14 --- /dev/null +++ b/gcc/cobol/udf/stored-char-length.cbl @@ -0,0 +1,15 @@ + * This function is in public domain. + * Contributed by James K. Lowden of Cobolworx in August 2024 + + Identification Division. + Function-ID. STORED-CHAR-LENGTH. + Data Division. + Linkage Section. + 01 Candidate PIC X Any Length. + 77 Output-Value PIC 9(8) COMP-5. + + Procedure Division using Candidate RETURNING Output-Value. + Move Function Length( Function Trim(Candidate) ) + to Output-Value. + End Function STORED-CHAR-LENGTH. + diff --git a/gcc/cobol/util.cc b/gcc/cobol/util.cc new file mode 100644 index 0000000..6ade146 --- /dev/null +++ b/gcc/cobol/util.cc @@ -0,0 +1,2310 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * This file supports parsing without requiring access to the symbol + * table definition. Unlike the Bison input, this file brings in gcc + * header files. + */ + +#include "cobol-system.h" +#include <langinfo.h> + +#include "coretypes.h" +#include "version.h" +#include "demangle.h" +#include "intl.h" +#include "backtrace.h" +#include "diagnostic.h" +#include "diagnostic-color.h" +#include "diagnostic-url.h" +#include "diagnostic-metadata.h" +#include "diagnostic-path.h" +#include "edit-context.h" +#include "selftest.h" +#include "selftest-diagnostic.h" +#include "opts.h" +#include "util.h" +#include "cbldiag.h" +#include "lexio.h" + +#define HOWEVER_GCC_DEFINES_TREE +#include "ec.h" +#include "common-defs.h" +#include "symbols.h" +#include "inspect.h" +#include "io.h" +#include "genapi.h" + +#pragma GCC diagnostic ignored "-Wunused-result" +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" + +// External declarations. +extern FILE * yyin; +extern int yyparse(void); + +extern int demonstration_administrator(int N); + +const char * +symbol_type_str( enum symbol_type_t type ) +{ + switch(type) { + case SymFilename: + return "SymFilename"; + case SymFunction: + return "SymFunction"; + case SymField: + return "SymField"; + case SymLabel: + return "SymLabel"; + case SymSpecial: + return "SymSpecial"; + case SymAlphabet: + return "SymAlphabet"; + case SymFile: + return "SymFile"; + case SymDataSection: + return "SymDataSection"; + } + dbgmsg("%s:%d: invalid symbol_type_t %d", __func__, __LINE__, type); + return "???"; +} + +const char * +cbl_field_type_str( enum cbl_field_type_t type ) +{ + switch(type) { + case FldDisplay: + return "FldDisplay"; + case FldInvalid: + return "Fld"; // Invalid"; + case FldGroup: + return "FldGroup"; + case FldAlphanumeric: + return "FldAlphanumeric"; + case FldNumericBinary: + return "FldNumericBinary"; + case FldFloat: + return "FldFloat"; + case FldNumericBin5: + return "FldNumericBin5"; + case FldPacked: + return "FldPacked"; + case FldNumericDisplay: + return "FldNumericDisplay"; + case FldNumericEdited: + return "FldNumericEdited"; + case FldAlphaEdited: + return "FldAlphaEdited"; + case FldLiteralA: + return "FldLiteralA"; + case FldLiteralN: + return "FldLiteralN"; + case FldClass: + return "FldClass"; + case FldConditional: + return "FldConditional"; + case FldForward: + return "FldForward"; + case FldIndex: + return "FldIndex"; + case FldSwitch: + return "FldSwitch"; + case FldPointer: + return "FldPointer"; + case FldBlob: + return "FldBlob"; + } + dbgmsg("%s:%d: invalid symbol_type_t %d", __func__, __LINE__, type); + return "???"; +} + +const char * +cbl_logop_str( enum logop_t op ) +{ + switch(op) { + case not_op: + return "not_op"; + case and_op: + return "and_op"; + case or_op: + return "or_op"; + case xor_op: + return "xor_op"; + case xnor_op: + return "xnor_op"; + case true_op: + return "true_op"; + case false_op: + return "false_op"; + } + dbgmsg("%s:%d: invalid logop_t %d", __func__, __LINE__, op); + return "???"; +} + +cbl_field_t +determine_intermediate_type( const cbl_refer_t& aref, + int op __attribute__ ((unused)), + const cbl_refer_t& bref ) + { + cbl_field_t output = {}; + + if( aref.field->type == FldFloat || bref.field->type == FldFloat ) + { + output.type = FldFloat; + output.data.capacity = 16; + output.attr = (intermediate_e ); + } + else if( op == '*' + && aref.field->data.digits + bref.field->data.digits + > MAX_FIXED_POINT_DIGITS) + { + output.type = FldFloat; + output.data.capacity = 16; + output.attr = (intermediate_e ); + } + else + { + output.type = FldNumericBin5; + output.data.capacity = 16; + output.data.digits = MAX_FIXED_POINT_DIGITS; + output.attr = (intermediate_e | signable_e ); + } + + return output; + } + +static char regexmsg[80]; + +/* + * Scan part of the picture, parsing any repetition count. + */ +int +repeat_count( const char picture[] ) +{ + char ch; + int n, count = -1; + + n = sscanf( picture, "%c(%d)", &ch, &count ); + if( count <= 0 && 4 < n ) { // parsed count is negative + count = 0; // zero is invalid; -1 means no repetition + } + return count; +} + +const char *numed_message; + +extern int yydebug, yy_flex_debug; + +bool +is_alpha_edited( const char picture[] ) { + static const char valid[] = "abxABX90/(),."; + assert(picture); + + for( const char *p = picture; *p != '\0'; p++ ) { + if( strchr(valid, *p) ) continue; + if( ISDIGIT(*p) ) continue; + if( symbol_decimal_point() == *p ) continue; + if( symbol_currency(*p) ) continue; + + if( yydebug ) { + dbgmsg( "%s: bad character '%c' at %.*s<-- in '%s'", + __func__, *p, int(p - picture) + 1, picture, picture ); + } + return false; + } + return true; +} + +bool +is_numeric_edited( const char picture[] ) { + static const char valid[] = "BbPpVvZz90/(),.+-*"; // and CR DB + const char *p; + assert(picture); + + if( strstr(picture, "(0)") ) { + numed_message = "'(0)' invalid in PICTURE (ISO 2023 13.18.40.3)"; + return false; + } + + // check for correct parenthetical constructs + for( p=picture; (p = strchr(p, '(')) != NULL; p++ ) { + int v, n, pos; + n = sscanf(++p, "%d%n", &v, &pos); + numed_message = NULL; + + if( n == -1 ) { + numed_message = "invalid repeat-count in PICTURE"; + } else if( n == 0 ) { + numed_message = "invalid repeat-count in PICTURE"; + } else if( p[pos] != ')' ) { + numed_message = "unbalanced parentheses in PICTURE"; + } + if( numed_message ) return false; + } + // check for dangling right parenthesis + for( p=picture; (p = strchr(p, ')')) != NULL; p++ ) { + auto prior = p; + while( picture < prior-- ) { + if( ISDIGIT(*prior) ) continue; + if( *prior == '(' ) break; + numed_message = "unbalanced parentheses in PICTURE"; + return false; + } + } + + if( (strchr(picture, 'Z') || strchr(picture, 'z')) && strchr(picture, '*') ) { + numed_message = "Z and * are mutually exclusive"; + return false; + } + + for( p = picture; *p != '\0'; p++ ) { + if( strchr(valid, *p) ) continue; + if( ISDIGIT(*p) ) continue; + if( symbol_decimal_point() == *p ) continue; + if( symbol_currency(*p) ) continue; + + switch(*p) { // test for CR or DB + case 'C': case 'c': + if( TOUPPER(*++p) == 'R' ) continue; + numed_message = "expected CR in PICTURE"; + break; + case 'D': case 'd': + if( TOUPPER(*++p) == 'B' ) continue; + numed_message = "expected DB in PICTURE"; + break; + default: + numed_message = xasprintf("invalid PICTURE character " + "'%c' at offset %zu in '%s'", + *p, p - picture, picture); + break; + } + + dbgmsg( "%s: no, because '%c' at %.*s<-- in '%s'", + __func__, *p, int(p - picture) + 1, picture, picture ); + + return false; + } + return true; +} + +char * +normalize_picture( char picture[] ) +{ + int erc; + char *p; + + regex_t *preg = NULL; + const char regex[] = "([AX9])[(]([[:digit:]]+)[)]"; + int cflags = REG_EXTENDED | REG_ICASE; + regmatch_t pmatch[4]; + + if( (erc = regcomp(preg, regex, cflags)) != 0 ) { + regerror(erc, preg, regexmsg, sizeof(regexmsg)); + dbgmsg( "%s:%d: could not compile regex: %s", __func__, __LINE__, regexmsg ); + return picture; + } + + while( (erc = regexec(preg, picture, COUNT_OF(pmatch), pmatch, 0)) == 0 ) { + assert(pmatch[1].rm_so != -1 && pmatch[1].rm_so < pmatch[1].rm_eo); + size_t len = pmatch[1].rm_eo - pmatch[1].rm_so; + assert(len == 1); + const char *start = picture + pmatch[1].rm_so; + + assert(pmatch[2].rm_so != -2 && pmatch[2].rm_so < pmatch[2].rm_eo); + len = pmatch[2].rm_eo - pmatch[2].rm_so; + assert(len > 0); + + /* + * Overwrite e.g. A(4) with AAAA. + */ + assert(pmatch[2].rm_so == pmatch[1].rm_eo + 1); // character paren number + p = picture + pmatch[2].rm_so; + len = 0; + if( 1 != sscanf(p, "%zu", &len) ) { + dbgmsg("%s:%d: no number found in '%s'", __func__, __LINE__, p); + goto irregular; + } + if( len == 0 ) { + dbgmsg("%s:%d: ZERO length found in '%s'", __func__, __LINE__, p); + goto irregular; + } + + char pic[len + 1]; + memset(pic, *start, len); + pic[len] = '\0'; + const char *finish = picture + pmatch[2].rm_eo, + *eopicture = picture + strlen(picture); + + p = xasprintf( "%*s%s%*s", + (int)(start - picture), picture, + pic, + (int)(eopicture - finish), finish ); + + free(picture); + picture = p; + continue; + } + assert(erc == REG_NOMATCH); + +irregular: + regfree(preg); + + return picture; +} + +static bool +memall( const char picture[], char ch ) +{ + for( const char *p=picture; *p != '\0'; p++ ) { + if( *p != ch ) { + return false; + } + } + return true; +} + +static const char * +match( const char picture[], const char pattern[] ) +{ + int erc; + + regex_t *preg = NULL; + int cflags = REG_EXTENDED; + regmatch_t pmatch[1]; + + if( (erc = regcomp(preg, pattern, cflags)) != 0 ) { + regerror(erc, preg, regexmsg, sizeof(regexmsg)); + dbgmsg( "%s:%d: could not compile regex: %s", __func__, __LINE__, regexmsg ); + return picture; + } + + if( (erc = regexec(preg, picture, COUNT_OF(pmatch), pmatch, 0)) != 0 ) { + assert(erc == REG_NOMATCH); + return NULL; + } + assert(pmatch[0].rm_so != -1); + return picture + pmatch[0].rm_so; +} + +bool +is_elementary( enum cbl_field_type_t type ) +{ + switch(type) { + case FldDisplay: + case FldInvalid: + case FldGroup: + case FldLiteralA: + case FldLiteralN: + case FldClass: + case FldConditional: + case FldForward: + case FldIndex: + case FldSwitch: + case FldBlob: + return false; + case FldPointer: + case FldAlphanumeric: + case FldPacked: + case FldNumericDisplay: + case FldNumericEdited: + case FldAlphaEdited: + case FldNumericBinary: + case FldNumericBin5: + case FldFloat: + return true; // takes up space + } + dbgmsg("%s:%d: invalid symbol_type_t %d", __func__, __LINE__, type); + return false; +} + +static bool +is_numericish( cbl_field_type_t type ) { + return + type == FldNumericDisplay || + type == FldNumericEdited || is_numeric(type); +} + +static inline bool +is_numericish( const struct cbl_field_t *field ) { + return is_numericish(field->type); +} + +static bool +integer_move_ok( const cbl_field_t *src, const cbl_field_t *tgt ) { + if( is_numericish(src) && + ! (tgt->type == FldInvalid || is_literal(tgt)) ) { + if( src->data.rdigits > 0 ) { + dbgmsg("%s has %d rdigits", src->name, src->data.rdigits); + } + return src->data.rdigits == 0; + } + return integer_move_ok( tgt, src ); +} + +static bool +is_alphanumeric( const cbl_field_t *field ) { + assert(field); + + if( is_elementary(field->type) ) { + switch(field->type) { + case FldAlphanumeric: + case FldPacked: + case FldNumericDisplay: + case FldNumericEdited: + case FldAlphaEdited: + case FldNumericBinary: + return true; + case FldNumericBin5: + case FldFloat: + return false; + default: + break; + } + return false; + } + + if( field->type != FldGroup ) return false; + + const struct symbol_elem_t *e = symbol_elem_of(field); + + for( ++e; e < symbols_end(); e++ ) { + if( e->type != SymField ) { + // Ignore non-fields: + continue; + } + const uint32_t level = cbl_field_of(e)->level; + if( level == 88 ) continue; + if( level <= field->level || level == LEVEL77 ) { + break; // stop if next field is higher in the hierarchy + } + + if( ! is_alphanumeric(cbl_field_of(e)) ) { + return false; + } + } + return true; +} + +/* + * When setting a field's type, there is a 3-way test involving: + * 1. The current value of cbl_field_t::type + * 2. The value of cbl_field_t::usage, from USAGE or parent's USAGE + * 3. The candidate (proposed new type) + * + * cbl_field_t::usage == FldInvalid indicates no prescribed + * type. Type-setting succeeds unless the candidate cannot override + * the current type. + * + * A candidate of FldDisplay updates cbl_field_t::usage only, and only + * if it is FldInvalid, provided the cbl_field_t::type is either + * FldInvalid or displayable. FldDisplay isn't really a type, but a + * kind of type: it constrains what the type may be set to. + * + * When cbl_field_t::usage == FldDisplay, the candidate type must be + * displayable, else the update is rejected. + * + * If the candidate passes the usage test, we consider the current type. + * + * cbl_field_t::type == FldInvalid indicates no defined type + * (yet). The candidate type becomes the type. Otherwise, the + * candidate must match the type, or can override it. + */ +static bool +is_displayable( cbl_field_type_t type ) { + switch(type) { + case FldDisplay: + case FldAlphaEdited: + case FldAlphanumeric: + case FldNumericDisplay: + case FldNumericEdited: + return true; + default: break; + } + return false; +} + +// disallow implausible combinations +static bool +plausible_usage( cbl_field_type_t usage, cbl_field_type_t candidate ) { + switch(usage) { + case FldInvalid: + return true; + case FldDisplay: + return is_displayable(candidate); + case FldGroup: + gcc_unreachable(); + default: + if( candidate == FldDisplay ) return false; // because overrides FldInvalid only + break; + } + + assert(is_elementary(usage)); + assert(is_elementary(candidate)); + return usage == candidate || (is_numericish(usage) && is_numericish(candidate)); +} + +cbl_field_t * +symbol_field_index_set( cbl_field_t *field ) { + static const cbl_field_data_t data { .capacity = 8, .digits = 0 }; + + field->data = data; + + field->type = FldIndex; + field->attr &= ~size_t(signable_e); + + return field; +} + +bool +symbol_field_type_update( cbl_field_t *field, + cbl_field_type_t candidate, bool is_usage ) { + + if( is_usage && (candidate == FldIndex || candidate == FldPointer) ) { + field->usage = candidate; + switch(field->type) { + case FldInvalid: + case FldIndex: + case FldPointer: + // set the type + field->type = candidate; + if( field->data.capacity == 0 ) { + static const cbl_field_data_t data = {0, 8, 0, 0, + NULL, NULL, {NULL}, {NULL}}; + field->data = data; + field->attr &= ~size_t(signable_e); + } + return true; + default: + break; + } + return false; // type unchanged + } + + assert(candidate == FldDisplay || is_elementary(candidate)); + assert(field->type != FldDisplay); // can never be + assert(field->usage == FldInvalid || + field->usage == FldDisplay || is_elementary(field->usage)); + + if( ! (field->type == FldInvalid || + field->type == FldGroup || is_elementary(field->type)) ) { + return false; // semantic user error + } + + // type matches itself + if( field->type == candidate ) { + if( is_usage ) field->usage = candidate; + return true; + } + if( is_usage && field->usage == candidate ) return true; + + if( ! plausible_usage(field->usage, candidate) ) return false; + + /* + * FldDisplay candidate + */ + if( candidate == FldDisplay ) { // update usage at most + if( field->type == FldInvalid || + field->type == FldGroup || + is_displayable(field->type) ) { + field->usage = candidate; + return true; + } + return false; + } + + assert(field->type != candidate && is_elementary(candidate)); + + /* + * Concrete usage candidate. Update usage first (if USAGE clause), then type. + */ + if( is_usage ) { + switch(field->type) { + case FldBlob: + case FldDisplay: + gcc_unreachable(); // type is never just "display" + break; + case FldAlphaEdited: + break; + case FldNumericEdited: + case FldPointer: + if( is_numeric(candidate) ) { + return false; + } + __attribute__((fallthrough)); + case FldInvalid: + case FldGroup: + case FldNumericDisplay: + field->usage = candidate; + break; + case FldLiteralA: + case FldLiteralN: + case FldClass: + case FldConditional: + case FldForward: + case FldIndex: + case FldSwitch: + gcc_unreachable(); + case FldAlphanumeric: + // MF allows PIC X(n) to have USAGE COMP-[5x] + if( candidate != FldNumericBin5 ) return false; + if( ! (dialect_mf() && field->has_attr(all_x_e)) ) { + return false; + } + __attribute__((fallthrough)); + case FldFloat: + case FldNumericBin5: + case FldNumericBinary: + case FldPacked: + assert(field->type != candidate); // ensured by test at start of function + field->usage = candidate; + } + } + + // Now, apply (possibly new) usage to type + assert( !is_usage || field->usage == candidate ); + + /* + * Concrete type candidate + */ + switch(field->usage) { + case FldInvalid: + field->type = candidate; + field->attr |= numeric_group_attrs(field); + return true; + case FldDisplay: + if( is_displayable(candidate) ) { + field->type = candidate; + field->attr |= numeric_group_attrs(field); + return true; + } + break; + case FldAlphaEdited: + case FldAlphanumeric: + assert( dialect_mf() && field->has_attr(all_x_e) ); + // convert all X's alphanumeric to numeric + field->clear_attr(all_x_e); + field->type = field->usage; + field->attr |= numeric_group_attrs(field); + return true; + case FldNumericDisplay: + case FldNumericEdited: + case FldGroup: + case FldLiteralA: + case FldLiteralN: + case FldClass: + case FldConditional: + case FldForward: + case FldSwitch: + case FldPointer: + case FldBlob: + // invalid usage value + gcc_unreachable(); + break; + case FldIndex: + if( field->usage == candidate ) { + field->type = candidate; + return true; + } + break; + case FldFloat: + case FldNumericBin5: + case FldNumericBinary: + case FldPacked: + if( field->usage == candidate ) { + field->type = candidate; + return true; + } + if( candidate == FldNumericDisplay ) { + field->type = field->usage; + field->attr |= numeric_group_attrs(field); + return true; + } + break; + } + return false; +} + +bool +redefine_field( cbl_field_t *field ) { + cbl_field_t *primary = symbol_redefines(field); + bool fOK = true; + + if( !primary ) return false; + + if( field->type == FldInvalid ) { // no PICTURE + field->type = primary->type; + field->data = primary->data; + field->data.initial = NULL; + } + + if( field->data.capacity == 0 ) field->data = primary->data; + + if( is_numeric(field->type) && field->usage == FldDisplay ) { + fOK = symbol_field_type_update(field, FldNumericDisplay, false); + } + + return fOK; +} + +void +cbl_field_t::report_invalid_initial_value(const YYLTYPE& loc) const { + + if( ! data.initial ) return; + + auto fig = cbl_figconst_of(data.initial); + + // numeric initial value + if( is_numeric(type) ) { + if( has_attr(quoted_e) ) { + error_msg(loc, "numeric type %s VALUE '%s' requires numeric VALUE", + name, data.initial); + return; + } + if( ! (fig == normal_value_e || fig == zero_value_e) ) { + error_msg(loc, "numeric type %s VALUE '%s' requires numeric VALUE", + name, cbl_figconst_str(fig)); + return; + } + + switch( type ) { + case FldIndex: + case FldNumericBin5: + if( data.digits == 0 ) { + // We are dealing with a pure binary type. If the capacity is + // 8 or more, we need do no further testing because we assume + // everything fits. + if( data.capacity < 8 ) { + auto p = strchr(data.initial, symbol_decimal_point()); + if( p && atoll(p+1) != 0 ) { + error_msg(loc, "integer type %s VALUE '%s' " + "requires integer VALUE", + name, data.initial); + } else { + // Calculate the maximum possible value that a binary with this + // many bytes can hold + size_t max_possible_value; + max_possible_value = 1; + max_possible_value <<= data.capacity*8; + max_possible_value -= 1; + if( attr & signable_e ) + { + // Because it is signable, we divide by two to account for the + // sign bit: + max_possible_value >>= 1; + } + // Pick up the given VALUE + size_t candidate; + if( *data.initial == '-' ) { + // We care about the magnitude, not the sign + if( !(attr & signable_e) ){ + error_msg(loc, "integer type %s VALUE '%s' " + "requires a non-negative integer", + name, data.initial); + } + candidate = atoll(data.initial+1); + } + else { + candidate = (size_t)atoll(data.initial); + } + if( candidate > max_possible_value ) { + error_msg(loc, "integer type %s VALUE '%s' " + "requires an integer of magnitude no greater than %zu", + name, data.initial, max_possible_value); + } + } + } + } + break; + case FldFloat: + break; + default: + if( ! has_attr(scaled_e) ) { + /* + * Check fraction for excess precision + */ + auto p = strchr(data.initial, symbol_decimal_point()); + if( p ) { + auto pend = std::find(p, p + strlen(p), 0x20); + int n = std::count_if( ++p, pend, isdigit ); + + if( data.precision() < n) { + if( 0 == data.rdigits ) { + error_msg(loc, "integer type %s VALUE '%s' requires integer VALUE", + name, data.initial); + } else { + auto has_exponent = std::any_of( p, pend, + []( char ch ) { + return TOUPPER(ch) == 'E'; + } ); + if( !has_exponent && data.precision() < pend - p ) { + error_msg(loc, "%s cannot represent VALUE '%s' exactly (max .%zu)", + name, data.initial, pend - p); + } + } + } + } else { + p = data.initial + strlen(data.initial); + } + + /* + * Check magnitude, whether or not there's a decimal point. + */ + // skip leading zeros + auto first_digit = std::find_if( data.initial, p, + []( char ch ) { + return ch != '0'; } ); + // count remaining digits, up to the decimal point + auto n = std::count_if( first_digit, p, isdigit ); + if( data.ldigits() < n ) { + error_msg(loc, "numeric %s VALUE '%s' holds only %u digits", + name, data.initial, + data.digits); + } + } + break; + } // end type switch for normal string initial value + return; + } // end numeric + assert( ! is_numeric(type) ); + + // consider all-alphabetic + if( has_attr(all_alpha_e) ) { + bool alpha_value = fig != zero_value_e; + + if( fig == normal_value_e ) { + alpha_value = std::all_of( data.initial, + data.initial + + strlen(data.initial), + []( char ch ) { + return ISSPACE(ch) || + ISPUNCT(ch) || + ISALPHA(ch); } ); + } + if( ! alpha_value ) { + error_msg(loc, "alpha-only %s VALUE '%s' contains non-alphabetic data", + name, fig == zero_value_e? cbl_figconst_str(fig) : data.initial); + } + } + + return; +} + +// Return the field representing the subscript whose literal value +// exceeds the OCCURS clause for that dimension, else NULL if all +// literals are in bounds. +const cbl_field_t * +literal_subscript_oob( const cbl_refer_t& r, size_t& isub /* output */) { + // Verify literal subscripts if dimensions are correct. + size_t ndim(dimensions(r.field)); + if( ndim == 0 || ndim != r.nsubscript ) return NULL; + cbl_refer_t *esub = r.subscripts + r.nsubscript; + cbl_field_t *dims[ ndim ], **pdim = dims + ndim; + std::fill(dims, pdim, (cbl_field_t*)NULL); + + for( auto f = r.field; f; f = parent_of(f) ) { + if( f->occurs.ntimes() ) { + --pdim; + *pdim = f; + } + } + assert(dims[0] != NULL); + assert(pdim == dims); + + /* + * For each subscript, if it is a literal, verify it is in bounds + * for the corresponding dimension. Return the first subscript not + * meeting those criteria, if any. + */ + auto p = std::find_if( r.subscripts, esub, + [&pdim]( const cbl_refer_t& r ) { + const auto& occurs((*pdim)->occurs); + pdim++; + return ! occurs.subscript_ok(r.field); + } ); + isub = p - r.subscripts; + return p == esub? NULL : dims[isub]; +} + +size_t +cbl_refer_t::subscripts_set( const std::list<cbl_refer_t>& subs ) { + nsubscript = subs.size(); + subscripts = new cbl_refer_t[nsubscript]; + std::copy( subs.begin(), subs.end(), subscripts ); + + return dimensions(field); +} + +const char * +cbl_refer_t::str() const { + static char subscripts[64]; + sprintf(subscripts, "(%u of %zu dimensions)", nsubscript, dimensions(field)); + char *output = xasprintf("%s %s %s", + field? field_str(field) : "(none)", + 0 < dimensions(field)? subscripts : "", + is_refmod_reference()? "(refmod)" : "" ); + return output; +} +const char * +cbl_refer_t::name() const { + if( prog_func ) return prog_func->name; + char *output = xasprintf("%s", field? field->name : "(none)" ); + return output; +} + +const char * +cbl_refer_t::deref_str() const { + char dimstr[nsubscript * 16] = "(", *p = dimstr + 1; + + if( !field ) return name(); + + for( auto sub = subscripts; sub < subscripts + nsubscript; sub++ ) { + auto initial = sub->field->data.initial ? sub->field->data.initial : "?"; + p += snprintf( p, (dimstr + sizeof(dimstr)) - p, "%s ", initial ); + } + if( 0 < nsubscript ) { + *--p = ')'; + } + char *output = xasprintf("%s%s", field->name, dimstr); + return output; +} + +struct move_corresponding_field { + cbl_refer_t tgt, src; + + move_corresponding_field( const cbl_refer_t& tgt, const cbl_refer_t& src ) + : tgt(tgt), src(src) {} + + void operator()( corresponding_fields_t::const_reference elem ) { + if( elem.second == 0 ) return; + src.field = cbl_field_of(symbol_at(elem.first)); + tgt.field = cbl_field_of(symbol_at(elem.second)); + + if( yydebug ) { + dbgmsg("move_corresponding:%d: SRC: %3zu %s", __LINE__, + elem.first, src.str()); + dbgmsg("move_corresponding:%d: to %3zu %s", __LINE__, + elem.second, tgt.str()); + } + + parser_move(tgt, src); + } +}; + +bool +move_corresponding( cbl_refer_t& tgt, cbl_refer_t& src ) +{ + assert(tgt.field && src.field); + assert(tgt.field->type == FldGroup); + assert(src.field->type == FldGroup); + + corresponding_fields_t pairs = corresponding_move_fields( src.field, + tgt.field ); + if( pairs.empty() ) return false; + + std::for_each( pairs.begin(), pairs.end(), + move_corresponding_field(tgt, src) ); + return true; +} + +bool +valid_move( const struct cbl_field_t *tgt, const struct cbl_field_t *src ) +{ + // This is the base matrix of allowable moves. Moves from Alphanumeric are + // modified based on the attribute bit all_alpha_e, and moves from Numeric + // types to Alphanumeric and AlphanumericEdited are allowable when the + // Numeric type is integer, and not allowed when the type has digits to the + // right of the decimal point. + + // Note that the ordering of elements in this matrix has to match the + // ordering of the symbols.h elements in enum cbl_field_type_t. + + static const unsigned char matrix[FldLiteralN+1][FldLiteralN+1] = { + // src down, tgt across + //I G A B F P 5 ND NE AE LA LN + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // FldInvalid + { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, }, // FldGroup + { 0, 1, 1, 8, 8, 8, 8, 8, 8, 1, 0, 0, }, // FldAlphanumeric + { 0, 1, 6, 1, 1, 1, 1, 1, 1, 2, 0, 0, }, // FldNumericBinary (numeric) + { 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, }, // FldFloat + { 0, 1, 6, 1, 1, 1, 1, 1, 1, 2, 0, 0, }, // FldPacked (numeric) + { 0, 1, 6, 1, 1, 1, 1, 1, 1, 2, 0, 0, }, // FldNumericBin5 (numeric) + { 0, 1, 6, 1, 1, 1, 1, 1, 1, 2, 0, 0, }, // FldNumericDisplay (numeric) + { 0, 1, 4, 1, 1, 1, 1, 1, 1, 1, 0, 0, }, // FldNumericEdited + { 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, }, // FldAlphaEdited + { 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, }, // FldLiteralA + { 0, 1, 6, 1, 1, 1, 1, 1, 1, 2, 0, 0, }, // FldLiteralN (numeric) + }; + /* Needs C++11 */ + static_assert(sizeof(matrix[0]) == COUNT_OF(matrix[0]), + "matrix should be square"); + + for( const cbl_field_t *args[] = {tgt, src}, **p=args; + p < args + COUNT_OF(args); p++ ) { + auto& f(**p); + switch(f.type) { + case FldClass: + case FldConditional: + case FldIndex: + case FldSwitch: + case FldDisplay: + case FldPointer: + return false; + // parser should not allow the following types here + case FldForward: + case FldBlob: + default: + if( sizeof(matrix[0]) < f.type ) { + cbl_internal_error("logic error: MOVE %s %s invalid type:", + cbl_field_type_str(f.type), f.name); + } + break; + } + } + + assert(tgt->type < sizeof(matrix[0])); + assert(src->type < sizeof(matrix[0])); + + // A value of zero means the move is prohibited. + // The 1 bit means the move is allowed + // The 2 bit means the move is allowed if the source has zero rdigits, + // or is P-scaled + // The 4 bit means the move is allowed if dest all_alpha_e is off. + // The 8 bit means the move is allowed if source all_alpha_e is off. + + bool retval = false; + bool nofraction = src->data.rdigits == 0 || src->has_attr(scaled_e); + bool alphabetic = tgt->has_attr(all_alpha_e); + bool src_alpha = src->has_attr(all_alpha_e); + + switch( matrix[src->type][tgt->type] ) + { + case 0: + if( src->type == FldLiteralA && is_numericish(tgt) && !is_literal(tgt) ) { + // Allow if input string is an integer. + const char *p = src->data.initial, *pend = p + src->data.capacity; + if( p[0] == '+' || p[0] == '-' ) p++; + retval = std::all_of( p, pend, isdigit ); + if( yydebug && ! retval ) { + auto bad = std::find_if( p, pend, + []( char ch ) { return ! ISDIGIT(ch); } ); + dbgmsg("%s:%d: offending character '%c' at position %zu", + __func__, __LINE__, *bad, bad - p); + } + } + break; + case 1: + retval = true; + break; + case 2: + retval = nofraction; + break; + case 4: + retval = !alphabetic; + break; + case 6: + retval = nofraction && !alphabetic; + break; + case 8: + retval = !src_alpha; + break; + default: + dbgmsg("%s:%d: matrix at %s, %s is %d", __func__, __LINE__, + cbl_field_type_str(src->type), cbl_field_type_str(tgt->type), + matrix[src->type][tgt->type]); + gcc_unreachable(); + } + + if( retval && src->has_attr(embiggened_e) ) { + if( is_numeric(tgt) && tgt->data.capacity < src->data.capacity ) { + dbgmsg("error: source no longer fits in target"); + return false; + } + } + + if( yydebug && getenv(__func__) ) { + dbgmsg("%s:%d: ok to move %s to %s (0x%02x)", __func__, __LINE__, + cbl_field_type_str(src->type), cbl_field_type_str(tgt->type), + retval); + } + + return retval; +} + +bool +valid_picture( enum cbl_field_type_t type, const char picture[] ) +{ + switch(type) { + case FldBlob: + gcc_unreachable(); // can't get here via the parser + case FldInvalid: + case FldGroup: + case FldLiteralA: + case FldLiteralN: + case FldClass: + case FldConditional: + case FldForward: + case FldIndex: + case FldSwitch: + case FldDisplay: + case FldPointer: + // These types don't take pictures; the grammar shouldn't call the function. + dbgmsg("%s:%d: no polaroid: %s", __func__, __LINE__, cbl_field_type_str(type)); + return false; + case FldNumericBinary: + case FldFloat: + case FldNumericBin5: + case FldPacked: + // Validated in scan.l. + return true; + case FldAlphanumeric: + // Cannot be all As or all 9s. + return !( memall(picture, 'A') || memall(picture, '9') ); + case FldNumericDisplay: + // Must have A or X and B, 0, or /. + return match(picture, "[AX]") && match(picture, "[B0/]"); + case FldNumericEdited: + case FldAlphaEdited: + break; + } + assert(type == FldNumericEdited ); + + // Must contain at least one 0, B, /, Z, *, +, (comma), ., –, CR, DB, or cs. + if( ! match( picture, "[$0B/Z*+.,+–]|DB$|CR$" ) ) { + return false; + } + + return true; +} + +uint32_t +type_capacity( enum cbl_field_type_t type, uint32_t digits ) +{ + switch(type) { + case FldBlob: gcc_unreachable(); + case FldInvalid: + case FldGroup: + case FldAlphanumeric: + case FldNumericDisplay: + case FldNumericEdited: + case FldAlphaEdited: + case FldClass: + case FldConditional: + case FldForward: + case FldIndex: + case FldSwitch: + case FldDisplay: + case FldPointer: + return digits; + case FldFloat: + case FldNumericBinary: + case FldNumericBin5: + case FldLiteralA: + case FldLiteralN: + break; + case FldPacked: + return (digits+2)/2; // one nybble per digit + a sign nybble + } + + switch(digits) { + case 1 ... 4: + return 2; + case 5 ... 9: + return 4; + case 10 ... 18: + return 8; + case 19 ... 38: + return 16; + } + + dbgmsg( "%s:%d: invalid size %u for type %s", __func__, __LINE__, + digits, cbl_field_type_str(type) ); + + return digits; +} + +typedef char hex_pair_t[2]; + +class scan_hex { +public: + unsigned char operator()( const hex_pair_t input ) { + static char buffer[ sizeof(hex_pair_t) + 1 ] = ""; + + memcpy( buffer, input, sizeof(buffer) - 1 ); + int x; + sscanf( buffer, "%x", &x ); + return x; + } +}; + +/* + * Convert hexadecimal string to ASCII, e.g. X'434154' to "CAT". + */ +char * +hex_decode( const char input[] ) { + const size_t len = strlen(input); + assert( 0 == len % 2 ); + + auto output = static_cast<char *>(xcalloc( 1, 1 + len / 2 )); + auto beg = reinterpret_cast<const hex_pair_t*>(input + 0), + end = reinterpret_cast<const hex_pair_t*>(input + len); + std::transform( beg, end, output, scan_hex() ); + return output; +} + +/* + * Verify unique procedure reference. + * + * Section and paragraph names need not be unique unless they are + * referenced, for example by PERFORM. + * + * When a program contains sections, a paragraph can be referenced + * without qualification if it's unique within the current section or + * globally. Else <para> OF <sect> is required. That means the + * validity of a reference depends on location of reference, which is + * why order matters. (We can't use line number because the Cobol text + * could be all on one line.) + * + * We maintain a map of referenceable {section,paragraph} pairs, with + * a count. A count of 1 means it's globally unique. + * + * For local calls, we maintain a multimap of sections (whose names might + * not be unique) in order of appearance. For each section, we have a + * set of paragraph names defined by the section, and a count, plus a + * list of references: {section,paragraph} names used by PERFORM or + * similar. + * + * To determine if a call is valid: + * For each key {section}: + * for each reference: + * Local: if section is empty or matches the key, the call is valid if + * if the paragraph name is unique within section: + * valid if count == 1 + * Global: valid if {section,paragraph} is unique in the global map + * + * Line numbers are just decoration. + */ + +bool +procref_base_t::operator<( const procref_base_t& that ) const { + int result = strcasecmp(section(), that.section()); + + if( result == 0 ) { + return strcasecmp(paragraph(), that.paragraph()) < 0; + } + return result < 0; +} + +bool +procref_base_t::operator==( const procref_base_t& that ) const { + return + 0 == strcasecmp(section(), that.section()) && + 0 == strcasecmp(paragraph(), that.paragraph()); +} + +class procdef_t : public procref_base_t { + size_t isym; +public: + procdef_t( const char *section, const char *paragraph, size_t isym ) + : procref_base_t(section, paragraph) + , isym(isym) + { + assert(isym); + } + procdef_t( const procref_base_t& ref ) + : procref_base_t(ref) + , isym(0) + {} + + bool operator<( const procdef_t& that ) const { + return procref_base_t(*this) < procref_base_t(that); + } + + bool operator<( const procref_base_t& that ) const { + if( that.has_section() ) { + return procref_base_t(*this) < that; + } + return strcasecmp(paragraph(), that.paragraph()) < 0; + } + + cbl_label_t * label_of() const { + return isym == 0? NULL : cbl_label_of(symbol_at(isym)); + } +}; + +/* + * Every reference occurs in a {program,section,paragraph} context, + * even if they're implicit. + */ + +typedef std::multimap<procdef_t, std::list<procref_t>> procedures_t; + +static std::map<size_t, procedures_t> programs; +static procedures_t::iterator current_procedure = programs.end()->second.end(); + +/* + * If a procedure reference uses only one name, it could refer to a + * section or paragraph. The "paragraph" name in the reference, if not + * paired with a section name, might refer to a section. + * + * For a 1-name reference: + * a global match means the name is defined exactly once + * a local match matches a unique paragraph name in the + * section in which the reference occurs, or the section name itself + * + * No paragraph can have the same name as a section. + */ +class procedure_match { + const procref_base_t& ref; +public: + procedure_match( const procref_base_t& ref ) : ref(ref) {} + // Match a 2-name reference to section & paragraph, else to one or the other. + bool operator()( procedures_t::const_reference elem ) { + const procdef_t& key = elem.first; + + if( ref.has_section() ) return ref == key; + + bool hit = + (!key.has_paragraph() && 0 == strcasecmp(key.section(), ref.paragraph())) + || 0 == strcasecmp(key.paragraph(), ref.paragraph()); + return hit; + } +}; + +static bool +globally_unique( size_t program, const procref_t& ref ) { + const procedures_t& procedures = programs[program]; + assert(!procedures.empty()); + return 1 == count_if(procedures.begin(), procedures.end(), procedure_match(ref)); +} + +static bool +locally_unique( size_t program, const procdef_t& key, const procref_t& ref ) { + const procedures_t& procedures = programs[program]; + assert(!procedures.empty()); + const char *section_name = ref.has_section()? ref.section() : key.section(); + procref_base_t full_ref(section_name, ref.paragraph()); + + if( getenv(__func__) ) { + dbgmsg("%s: %zu for ref %s of '%s' (line %d) " + "in %s of '%s' (as %s of '%s')", __func__, + procedures.count(full_ref), + ref.paragraph(), ref.section(), ref.line_number(), + key.paragraph(), key.section(), + full_ref.paragraph(), full_ref.section() ); + } + + return 1 == procedures.count(full_ref); +} + +// Add each section and paragraph to the map as it occurs in the Cobol text. +void +procedure_definition_add( size_t program, const cbl_label_t *procedure ) { + const char *section_name = NULL, *paragraph_name = NULL; + size_t isym = symbol_index(symbol_elem_of(procedure)); + + if( procedure->type == LblParagraph ) { + if( procedure->parent > 0) { + section_name = cbl_label_of(symbol_at(procedure->parent))->name; + } + paragraph_name = procedure->name; + + } else { + assert( procedure->type == LblSection ); + section_name = procedure->name; + } + + procdef_t key( section_name, paragraph_name, isym ); + if( getenv(__func__) ) { + dbgmsg("%s: #%3zu %s of %s", __func__, isym, paragraph_name, section_name); + } + current_procedure = + programs[program].insert( make_pair(key, procedures_t::mapped_type()) ); +} + +// Add each procedure reference as it occurs in the Cobol text, in context. +void +procedure_reference_add( const char *section, const char *paragraph, + int line, size_t context ) +{ + if( getenv(__func__) ) { + dbgmsg("%s: line %3d %s of %s", __func__, line, paragraph, section); + } + current_procedure->second.push_back( procref_t(section, paragraph, + line, context) ); +} + +// Verify each reference in a map element is locally or globally unique +class is_unique { + size_t program; + procedures_t::key_type key; +public: + is_unique( size_t program, const procedures_t::key_type& key ) + : program(program) + , key(key) + {} + + bool operator()( procedures_t::mapped_type::const_reference ref ) { + return + locally_unique( program, key, ref ) || + globally_unique( program, ref); + } +}; + +procref_t * +ambiguous_reference( size_t program ) { + procedures_t& procedures = programs[program]; + + for( const auto& proc : procedures ) { + procedures_t::mapped_type::const_iterator + ambiguous = find_if_not( proc.second.begin(), proc.second.end(), + is_unique(program, proc.first) ); + if( proc.second.end() != ambiguous ) { + if( yydebug || getenv("symbol_label_add")) { + dbgmsg("%s: %s of '%s' has %zu potential matches", __func__, + ambiguous->paragraph(), ambiguous->section(), + procedures.count(*ambiguous)); + } + return new procref_t(*ambiguous); + } + } + return NULL; +} + +/* + * See declaratives nonterminal in parse.y + */ +// Todo: unused +cbl_label_t * +intradeclarative_reference() { + const procedures_t& procedures = programs[current_program_index()]; + + for( auto elem : procedures ) { + procdef_t key( elem.first ); + auto L = key.label_of(); + if( L->type != LblNone ) return L; + } + return NULL; +} + +class next_group { + size_t isym; +public: + next_group( symbol_elem_t *group ) : isym(symbol_index(group)) {} + + // return true if elem is not a member of the group + bool operator()( const symbol_elem_t& elem ) { + if( elem.type != SymField ) return false; + if( symbol_index(&elem) == isym ) return false; + return cbl_field_of(&elem)->parent < isym; + } +}; + +static void +parent_names( const symbol_elem_t *elem, + const symbol_elem_t *group, std::list<const char *>& names ) { + + if( is_filler(cbl_field_of(elem)) ) return; + + // dbgmsg("%s: asked about %s of %s (%zu away)", __func__, + // cbl_field_of(elem)->name, + // cbl_field_of(group)->name, elem - group); + + for( const symbol_elem_t *e=elem; e && group < e; e = symbol_parent(e) ) { + names.push_front( cbl_field_of(e)->name ); + } +} + +extern int yylineno; +class find_corresponding { +public: + enum type_t { arith_op, move_op }; +private: + symbol_elem_t *lgroup, *rgroup; + type_t type; +public: + find_corresponding( symbol_elem_t *lgroup, + symbol_elem_t *rgroup, type_t type ) + : lgroup(lgroup), rgroup(rgroup), type(type) + { + dbgmsg( "%s:%d: for #%zu %s and #%zu %s on line %d", __func__, __LINE__, + symbol_index(lgroup), cbl_field_of(lgroup)->name, + symbol_index(rgroup), cbl_field_of(rgroup)->name, yylineno ); + } + + static bool + any_redefines( const cbl_field_t& field, const symbol_elem_t *group ) { + for( const cbl_field_t *f = &field; f && f->parent > 0; f = parent_of(f) ) { + symbol_elem_t *e = symbol_at(f->parent); + if( e == group || e->type != SymField ) break; + if( symbol_redefines(f) ) return true; + } + return false; + } + + corresponding_fields_t::value_type + operator()( const symbol_elem_t& that ) { + if( &that == lgroup ) return std::make_pair(0,0); + if( that.type != SymField ) return std::make_pair(0,0); + + const cbl_field_t& lfield( *cbl_field_of(&that) ); + + switch(lfield.level) { + case 66: case 77: case 88: + return std::make_pair(0,0); + default: + if( any_redefines(lfield, lgroup) ) return std::make_pair(0,0); + if( is_filler(&lfield) ) return std::make_pair(0,0); + if( is_table(&lfield) ) return std::make_pair(0,0); + break; + } + + std::list<const char *> names; + parent_names( &that, lgroup, names ); + names.push_front(cbl_field_of(rgroup)->name); + + symbol_elem_t *e = symbol_find_of( that.program, names, symbol_index(rgroup) ); + if( !e ) return std::make_pair(0,0); + + const cbl_field_t& rfield( *cbl_field_of(e) ); + + switch(rfield.level) { + case 66: case 77: case 88: + return std::make_pair(0,0); + default: + if( any_redefines(rfield, rgroup) ) return std::make_pair(0,0); + if( is_table(&rfield) ) return std::make_pair(0,0); + break; + } + + switch(type) { + case arith_op: + if( !(is_numeric(lfield.type) && is_numeric(rfield.type)) ) { + return std::make_pair(0,0); + } + break; + case move_op: + if( !(is_elementary(lfield.type) || is_elementary(rfield.type)) ) { + return std::make_pair(0,0); + } + break; + } + + return std::make_pair( symbol_index(&that), symbol_index(e)); + } +}; + +static corresponding_fields_t +corresponding_fields( cbl_field_t *lhs, cbl_field_t *rhs, + find_corresponding::type_t type ) { + corresponding_fields_t output; + assert(lhs); assert(rhs); + assert(lhs->type == FldGroup && rhs->type == FldGroup); + + struct { symbol_elem_t *a, *z; } lhsg; + + lhsg.a = symbols_begin(field_index(lhs)); + lhsg.z = std::find_if( lhsg.a, symbols_end(), next_group(lhsg.a) ); + + dbgmsg("%s:%d: examining %zu symbols after %s", __func__, __LINE__, + lhsg.z - lhsg.a, lhs->name); + + find_corresponding finder( symbol_at(field_index(lhs)), + symbol_at(field_index(rhs)), type ); + std::transform( lhsg.a, lhsg.z, std::inserter(output, output.begin()), finder ); + + output.erase(0); + + dbgmsg( "%s:%d: %s and %s have %zu corresponding fields", + __func__, __LINE__, lhs->name, rhs->name, output.size() ); + + return output; +} + +corresponding_fields_t +corresponding_move_fields( cbl_field_t *lhs, cbl_field_t *rhs ) { + return corresponding_fields( lhs, rhs, find_corresponding::move_op ); +} + +corresponding_fields_t +corresponding_arith_fields( cbl_field_t *lhs, cbl_field_t *rhs ) { + return corresponding_fields( lhs, rhs, find_corresponding::arith_op ); +} + +char +date_time_fmt( const char input[] ) { + if( ! input ) return 0; + +#define DATE_FMT_B "(YYYYMMDD|YYYYDDD|YYYYWwwD)" +#define DATE_FMT_E "(YYYY-MM-DD|YYYY-DDD|YYYY-Www-D)" +#define TIME_FMT1 "hhmmss([.,]s+)?" +#define TIME_FMT3 "hhmmss([.,]s+)?Z" +#define TIME_FMT5 "hhmmss([.,]s+)?[+]hhmm" +#define TIME_FMT2 "hh:mm:ss([.,]s+)?" +#define TIME_FMT4 "hh:mm:ss([.,]s+)?Z" +#define TIME_FMT6 "hh:mm:ss([.,]s+)?[+]hh:mm" + +#define TIME_FMT_B "(" TIME_FMT1 "|" TIME_FMT3 "|" TIME_FMT5 ")" +#define TIME_FMT_E "(" TIME_FMT2 "|" TIME_FMT4 "|" TIME_FMT6 ")" + + static bool compiled = false; + static struct fmts_t { + regex_t reg; char type; char pattern[256]; + } fmts[] = { + { regex_t(), 'D', "^((" DATE_FMT_B "T" TIME_FMT_B ")|(" + DATE_FMT_E "T" TIME_FMT_E "))$" }, + { regex_t(), 'd', "^(" DATE_FMT_B "|" DATE_FMT_E ")$" }, + { regex_t(), 't', "^(" TIME_FMT_B "|" TIME_FMT_E ")$" }, + }; + int erc, cflags = REG_EXTENDED | REG_ICASE, eflags=0; + regmatch_t m[5]; + char result = 0; + + if( ! compiled ) { + for( auto& fmt : fmts ) { + if( (erc = regcomp(&fmt.reg, fmt.pattern, cflags)) != 0 ) { + char msg[80]; + regerror(erc, &fmt.reg, msg, sizeof(msg)); + cbl_errx( "%s: regcomp: %s", __func__, msg ); + } + } + compiled = true; + } + + for( auto& fmt : fmts ) { + if( 0 == regexec(&fmt.reg, input, COUNT_OF(m), m, eflags) ) { + result = fmt.type; + break; + } + } + + return result; +} + + + +/* + * Development suppport + */ + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-variable" + +struct input_file_t { + ino_t inode; + int lineno; + const char *name; + const line_map *lines; + + input_file_t( const char *name, ino_t inode, + int lineno=1, const line_map *lines = NULL ) + : inode(inode), lineno(lineno), name(name), lines(lines) + { + if( inode == 0 ) inode_set(); + } + bool operator==( const input_file_t& that ) const { + return inode == that.inode; + } + protected: + void inode_set() { + struct stat sb; + if( -1 == stat(name, &sb) ) { + cbl_err( "could not stat '%s'", name); + } + inode = sb.st_ino; + } +}; + +class unique_stack : public std::stack<input_file_t> +{ + public: + bool push( const value_type& value ) { + auto ok = std::none_of( c.cbegin(), c.cend(), + [value]( auto& that ) { + return value == that; + } ); + if( ok ) { + std::stack<input_file_t>::push(value); + return true; + } + size_t n = c.size(); + if( n > 1 ) { + char *wd = get_current_dir_name(); + if( wd ) { + dbgmsg( "depth line copybook filename\n" + "----- ---- --------" + "----------------------------------------"); + for( const auto& v : c ) { + dbgmsg( " %4zu %4d %s", c.size() - --n, v.lineno, no_wd(wd, v.name) ); + } + } else { + dbgmsg("unable to get current working directory: %m"); + } + free(wd); + } + return false; + } + const char * + no_wd( const char *wd, const char *name ) { + int i; + for( i=0; wd[i] == name[i]; i++ ) i++; + if( wd[i] == '\0' && name[i] == '/' ) i++; + return yydebug? name : name + i; + } +}; + +static const char *input_filename_vestige; +static unique_stack input_filenames; +static std::map<std::string, ino_t> old_filenames; +static const unsigned int sysp = 0; // not a C header file, cf. line-map.h + +/* + * Maintain a stack of input filenames. Ensure the files are unique (by + * inode), to prevent copybook cycles. Before pushing a new name, Record the + * line number that was is current for the current name, so that it can be + * restored when the usurper is popped. + * + * Both the file-reader (lexio) and the scanner use this stack. Lexio uses it + * to enforce uniqueness, and the scanner to maintain line numbers. + */ +bool cobol_filename( const char *name, ino_t inode ) { + line_map *lines = NULL; + if( inode == 0 ) { + auto p = old_filenames.find(name); + if( p == old_filenames.end() ) { + for( auto& elem : old_filenames ) { + dbgmsg("%6zu %-30s", elem.second, elem.first.c_str()); + } + cbl_errx( "logic error: missing inode for %s", name); + } + inode = p->second; + assert(inode != 0); + } + linemap_add(line_table, LC_ENTER, sysp, name, 1); + input_filename_vestige = name; + bool pushed = input_filenames.push( input_file_t(name, inode, 1, lines) ); + input_filenames.top().lineno = yylineno = 1; + if( getenv(__func__) ) { + dbgmsg(" saving %s with lineno as %d", + input_filenames.top().name, input_filenames.top().lineno); + } + return pushed; +} + +const char * +cobol_lineno_save() { + if( input_filenames.empty() ) return NULL; + auto& input( input_filenames.top() ); + input.lineno = yylineno; + if( getenv(__func__) ) { + dbgmsg(" setting %s with lineno as %d", input.name, input.lineno); + } + return input.name; +} + +const char * +cobol_filename() { + return input_filenames.empty()? input_filename_vestige : input_filenames.top().name; +} + +const char * +cobol_filename_restore() { + assert(!input_filenames.empty()); + const input_file_t& top( input_filenames.top() ); + old_filenames[top.name] = top.inode; + input_filename_vestige = top.name; + + input_filenames.pop(); + if( input_filenames.empty() ) return NULL; + + auto& input = input_filenames.top(); + + input.lines = linemap_add(line_table, LC_LEAVE, sysp, NULL, 0); + + yylineno = input.lineno; + if( getenv("cobol_filename") ) { + dbgmsg("restoring %s with lineno to %d", input.name, input.lineno); + } + return input.name; +} + +static location_t token_location; + +template <typename LOC> +static void +gcc_location_set_impl( const LOC& loc ) { + token_location = linemap_line_start( line_table, loc.last_line, 80 ); + token_location = linemap_position_for_column( line_table, loc.first_column); + location_dump(__func__, __LINE__, "parser", loc); +} + +void gcc_location_set( const YYLTYPE& loc ) { + gcc_location_set_impl(loc); +} + +void gcc_location_set( const YDFLTYPE& loc ) { + gcc_location_set_impl(loc); +} + +#ifdef NDEBUG +# define verify_format(M) +#else +#include <regex.h> + +static void +verify_format( const char gmsgid[] ) { + static const char pattern[] = "%[[:digit:]][[:digit:].]*[^s]"; + static regex_t re; + static int cflags = REG_EXTENDED; + static int status = regcomp( &re, pattern, cflags ); + static char errbuf[80]; + + + + if( status != 0 ) { + int n = regerror(status, &re, errbuf, sizeof(errbuf)); + gcc_assert(size_t(n) < sizeof(errbuf)); + fprintf(stderr, "%s:%d: %s", __func__, __LINE__, errbuf); + return; + } + gcc_assert(status == 0); + + regmatch_t rm[30]; + + if( REG_NOMATCH != regexec(&re, gmsgid, COUNT_OF(rm), rm, 0) ){ + fprintf(stderr, "bad diagnositic format: '%s'\n", gmsgid); + } +} +#endif + +static const diagnostic_option_id option_zero; +size_t parse_error_inc(); + +void +ydferror( const char gmsgid[], ... ) { + verify_format(gmsgid); + parse_error_inc(); + auto_diagnostic_group d; + va_list ap; + va_start (ap, gmsgid); + rich_location richloc (line_table, token_location); + bool ret = global_dc->diagnostic_impl (&richloc, nullptr, option_zero, + gmsgid, &ap, DK_ERROR); + va_end (ap); +} + +extern int yychar; +extern YYLTYPE yylloc; + +/* + * temp_loc_t is a hack in lieu of "%define parse.error custom". When + * instantiated, if there is a lookahead token (or one is provided), it sets + * the global token_location, which is passed to the diagnostic framework. The + * original value is restored when the instantiated variable goes out of scope. + */ +class temp_loc_t : protected YYLTYPE { + location_t orig; + public: + temp_loc_t() : orig(token_location) { + if( yychar < 3 ) return; + + gcc_location_set(yylloc); // use lookahead location + } + temp_loc_t( const YYLTYPE& loc) : orig(token_location) { + gcc_location_set(loc); + } + temp_loc_t( const YDFLTYPE& loc) : orig(token_location) { + YYLTYPE lloc = { + loc.first_line, loc.first_column, + loc.last_line, loc.last_column }; + gcc_location_set(lloc); + } + ~temp_loc_t() { + if( orig != token_location ) { + token_location = orig; + } + } +}; + +/* + * Both CDF and parser need to call error_msg, each with their own distinct + * location type, not because they *need* to be different, but because they + * are, as an artifact of using different prefixes. Possibly a better plan + * would be to convert cdf.y to a pure parser, using no global variables. But + * this is where we are. + * + * Because we can't reliably instantiate it as a forward-declared template + * function, and because the paramters are variadic, we can't use a template + * function or call one. So, a macro. + */ + +#define ERROR_MSG_BODY \ + temp_loc_t looker(loc); \ + verify_format(gmsgid); \ + parse_error_inc(); \ + global_dc->begin_group(); \ + va_list ap; \ + va_start (ap, gmsgid); \ + rich_location richloc (line_table, token_location); \ + bool ret = global_dc->diagnostic_impl (&richloc, nullptr, option_zero, \ + gmsgid, &ap, DK_ERROR); \ + va_end (ap); \ + global_dc->end_group(); + + +void error_msg( const YYLTYPE& loc, const char gmsgid[], ... ) { + ERROR_MSG_BODY +} + +void error_msg( const YDFLTYPE& loc, const char gmsgid[], ... ) { + ERROR_MSG_BODY +} + +void +cdf_location_set(YYLTYPE loc) { + extern YDFLTYPE ydflloc; + + ydflloc.first_line = loc.first_line; + ydflloc.first_column = loc.first_column; + ydflloc.last_line = loc.last_line; + ydflloc.last_column = loc.last_column; +} + +void +yyerror( const char gmsgid[], ... ) { + temp_loc_t looker; + verify_format(gmsgid); + parse_error_inc(); + global_dc->begin_group(); + va_list ap; + va_start (ap, gmsgid); + rich_location richloc (line_table, token_location); + bool ret = global_dc->diagnostic_impl (&richloc, nullptr, option_zero, + gmsgid, &ap, DK_ERROR); + va_end (ap); + global_dc->end_group(); +} + +bool +yywarn( const char gmsgid[], ... ) { + verify_format(gmsgid); + auto_diagnostic_group d; + va_list ap; + va_start (ap, gmsgid); + auto ret = emit_diagnostic_valist( DK_WARNING, token_location, + option_zero, gmsgid, &ap ); + va_end (ap); + return ret; +} + +/* + * Sometimes during parsing an error is noticed late. This message refers back + * to an arbitrary file and line number. + */ +void +yyerrorvl( int line, const char *filename, const char fmt[], ... ) { + verify_format(fmt); + parse_error_inc(); + auto_diagnostic_group d; // not needed unless we can use global_dc + char *msg; + va_list ap; + + va_start(ap, fmt); + msg = xvasprintf(fmt, ap); + + if( !filename ) filename = cobol_filename(); + + fprintf( stderr, "%s:%d: %s\n", filename, line, msg); + + free(msg); + va_end(ap); +} + +static inline size_t +matched_length( const regmatch_t& rm ) { return rm.rm_eo - rm.rm_so; } + +const char * +cobol_fileline_set( const char line[] ) { + static const char pattern[] = "#line +([[:alnum:]]+) +[\"']([^\"']+). *\n"; + static const int cflags = REG_EXTENDED | REG_ICASE; + static regex_t re, *preg = NULL; + + int erc; + regmatch_t pmatch[4]; + + if( !preg ) { + if( (erc = regcomp(&re, pattern, cflags)) != 0 ) { + regerror(erc, &re, regexmsg, sizeof(regexmsg)); + dbgmsg( "%s:%d: could not compile regex: %s", __func__, __LINE__, regexmsg ); + return line; + } + preg = &re; + } + if( (erc = regexec(preg, line, COUNT_OF(pmatch), pmatch, 0)) != 0 ) { + if( erc != REG_NOMATCH ) { + regerror(erc, preg, regexmsg, sizeof(regexmsg)); + dbgmsg( "%s:%d: could not compile regex: %s", __func__, __LINE__, regexmsg ); + return line; + } + error_msg(yylloc, "invalid #line directive: %s", line ); + return line; + } + + const char + *line_str = xstrndup(line + pmatch[1].rm_so, matched_length(pmatch[1])), + *filename = xstrndup(line + pmatch[2].rm_so, matched_length(pmatch[2])); + int fileline; + + if( 1 != sscanf(line_str, "%d", &fileline) ) + yywarn("could not parse line number %s from #line directive", line_str); + + input_file_t input_file( filename, ino_t(0), fileline ); // constructor sets inode + + if( getenv(__func__) ) return filename; // ignore #line directive + + if( input_filenames.empty() ) { + input_file.lines = linemap_add(line_table, LC_ENTER, sysp, filename, 1); + input_filenames.push(input_file); + } + + input_file_t& file = input_filenames.top(); + file = input_file; + yylineno = file.lineno; + + return file.name; +} + +class timespec_t { + struct timespec now; + public: + timespec_t() { + clock_gettime(CLOCK_MONOTONIC, &now); + } + double ns() const { + return now.tv_sec * 1000000000 + now.tv_nsec; + } + friend double operator-( const timespec_t& now, const timespec_t& then ); +}; + +double +operator-( const timespec_t& then, const timespec_t& now ) { + return (now.ns() - then.ns()) / 1000000000; +} + +static int +parse_file( const char filename[] ) +{ + if( (yyin = cdftext::lex_open(filename)) == NULL) { + cbl_err("cannot open %s", filename); + } + + parser_enter_file(filename); + + timespec_t start; + + int erc = yyparse(); + + timespec_t finish; + double dt = finish - start; + parser_leave_file(); + + //printf("Overall parse & generate time is %.6f seconds\n", dt); + + fclose (yyin); + + if( erc ) { + error_at (UNKNOWN_LOCATION, "failed compiling %s", filename); + } + + return erc; +} + +#pragma GCC diagnostic pop + +extern int yy_flex_debug, yydebug, ydfdebug; +extern int f_trace_debug; + +void cobol_set_indicator_column( int column ); + +void +cobol_set_debugging( bool flex, bool yacc, bool parser ) +{ + yy_flex_debug = flex? 1 : 0; + ydfdebug = yydebug = yacc? 1 : 0; + f_trace_debug = parser? 1 : 0; + + char *ind = getenv("INDICATOR_COLUMN"); + if( ind ) { + int col; + if( 1 != sscanf(ind, "%d", &col) ) { + yywarn("ignored non-integer value for INDICATOR_COLUMN=%s", ind); + } + cobol_set_indicator_column(col); + } +} + +os_locale_t os_locale = { "UTF-8", xstrdup("C.UTF-8") }; + + +void +cobol_parse_files (int nfile, const char **files) +{ + char * opaque = setlocale(LC_CTYPE, ""); + if( ! opaque ) { + yywarn("setlocale: unable to initialize LOCALE"); + } else { + char *codeset = nl_langinfo(CODESET); + if( ! codeset ) { + yywarn("nl_langinfo failed after setlocale succeeded"); + } else { + os_locale.codeset = codeset; + } + } + assert(os_locale.codeset); + + for (int i = 0; i < nfile; i++) { + parse_file (files[i]); + } +} + +/* Outputs the formatted string onto the file descriptor */ + +void +cbl_message(int fd, const char *format_string, ...) + { + va_list ap; + va_start(ap, format_string); + char *ostring = xvasprintf(format_string, ap); + va_end(ap); + write(fd, ostring, strlen(ostring)); + free(ostring); + } + +/* Uses the GCC internal_error () to output the formatted string. Processing + ends with a stack trace */ + +void +cbl_internal_error(const char *gmsgid, ...) { + verify_format(gmsgid); + auto_diagnostic_group d; + va_list ap; + va_start(ap, gmsgid); + emit_diagnostic_valist( DK_ICE, token_location, option_zero, gmsgid, &ap ); + va_end(ap); +} + +void +cbl_unimplementedw(const char *gmsgid, ...) { + verify_format(gmsgid); + auto_diagnostic_group d; + va_list ap; + va_start(ap, gmsgid); + emit_diagnostic_valist( DK_SORRY, token_location, option_zero, gmsgid, &ap ); + va_end(ap); +} + +void +cbl_unimplemented(const char *gmsgid, ...) { + verify_format(gmsgid); + auto_diagnostic_group d; + va_list ap; + va_start(ap, gmsgid); + emit_diagnostic_valist( DK_SORRY, token_location, option_zero, gmsgid, &ap ); + va_end(ap); +} + +void +cbl_unimplemented_at( const YYLTYPE& loc, const char *gmsgid, ... ) { + temp_loc_t looker(loc); + verify_format(gmsgid); + auto_diagnostic_group d; + va_list ap; + va_start(ap, gmsgid); + emit_diagnostic_valist( DK_SORRY, token_location, option_zero, gmsgid, &ap ); + va_end(ap); +} + +/* + * analogs to err(3) and errx(3). + */ +void +cbl_err(const char *fmt, ...) { + auto_diagnostic_group d; + char *gmsgid = xasprintf("%m: %s", fmt); + verify_format(gmsgid); + va_list ap; + va_start(ap, fmt); + emit_diagnostic_valist( DK_FATAL, token_location, option_zero, gmsgid, &ap ); + va_end(ap); +} +void +cbl_errx(const char *gmsgid, ...) { + verify_format(gmsgid); + auto_diagnostic_group d; + va_list ap; + va_start(ap, gmsgid); + emit_diagnostic_valist( DK_FATAL, token_location, option_zero, gmsgid, &ap ); + va_end(ap); + } + +void +dbgmsg(const char *msg, ...) { + if( yy_flex_debug || yydebug ) { + fflush(stdout); + va_list ap; + va_start(ap, msg); + vfprintf(stderr, msg, ap); + fprintf(stderr, "\n"); + va_end(ap); + } +} + +void +dialect_error( const YYLTYPE& loc, const char term[], const char dialect[] ) { + error_msg(loc, "%s is not ISO syntax, requires -dialect %s", + term, dialect); +} + +bool fisdigit(int c) + { + return ISDIGIT(c); + } +bool fisspace(int c) + { + return ISSPACE(c); + }; +int ftolower(int c) + { + return TOLOWER(c); + } +bool fisprint(int c) + { + return ISPRINT(c); + }; diff --git a/gcc/cobol/util.h b/gcc/cobol/util.h new file mode 100644 index 0000000..eb08ed7 --- /dev/null +++ b/gcc/cobol/util.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _UTIL_H_ +#define _UTIL_H_ + +void cbl_message(int fd, const char *format_string, ...); +void cbl_internal_error(const char *format_string, ...); + +void cbl_err(const char *format_string, ...); +void cbl_errx(const char *format_string, ...); + +bool fisdigit(int c); +bool fisspace(int c); +int ftolower(int c); +bool fisprint(int c); + +const char * cobol_filename_restore(); +const char * cobol_lineno_save(); + + +#endif |