/* * Copyright (c) 2021-2025 Symas Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * * Neither the name of the Symas Corporation nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // NOTE: Unlike charmaps-copy.cc and valprint-copy.cc, this file implements // the Compiler Directives Facility for the COBOL "COPY" statement. So, this // file is the actual source code, and not a copy of something in libgcobol // // We regret any confusion engendered. #include "cobol-system.h" #include "cbldiag.h" #include "util.h" #include "copybook.h" #include #define COUNT_OF(X) (sizeof(X) / sizeof(X[0])) /* * There are 3 kinds of replacement types: * 1. keywords, identifiers, figurative constants, and function names * 2. string literals * 3. pseudo-text * * Types #1 and #3 are delimited by separators: * [[:space:],.;()]. String literals begin and end with ["] or ['] * (matched). * * Space in pseudo-text is "elastic"; one or more in the matching * argument matches one or more in the input. Exception: when the * argument is only a comma or semicolon, it matches exactly. * * The matching algorithm operates on the source file word by word. * Comments are copied literally, as are any CDF statements. * * The candidate word is used as the beginning of all possible * matches, in the order they appear in the COPY statement. If none * match, the word is copied to the output and the next word is * tried. * * On a match, the replacement is applied, the result copied to the * output, and the next word is tried, starting again from the first * match candidate. * * The parser composes the regular expressions. It "literalizes" * any regex metacharacters that may appear in the COPY text and * constructs the correct matching expression for "stretchable" * space. This function only applies them. */ extern int yydebug; const char * cobol_filename(); bool is_fixed_format(); bool is_reference_format(); struct line_t { char *p, *pend; line_t( size_t len, char *data ) : p(data), pend(data + len) { gcc_assert(p && p <= pend); } line_t( char *data, char *eodata ) : p(data), pend(eodata) { gcc_assert(p && p <= pend); } ssize_t size() const { return pend - p; } }; static bool is_separator_space( const char *p) { switch( *p ) { case ',': case ';': if( p[1] == 0x20 ) return true; break; } return ISSPACE(*p); } static void verify_bounds( size_t pos, size_t size, const char input[] ) { gcc_assert(pos < size ); if( !( pos < size) ) { cbl_internal_error( "REPLACING %zu characters exceeds system capacity" "'%s'", pos, input); } } /* * Replace any separators in the copybook's REPLACING candidate with * "stretchable" space. Escape any regex metacharacters in candidate. * * "For matching purposes, each occurrence of a separator comma, a * separator semicolon, or a sequence of one or more separator spaces * is considered to be a single space." * * If the indicator column is column 7 and is a 'D', we treat that as * a SPACE for the purposes of matching a COPY REPLACING or REPLACE * directive. */ const char * esc( size_t len, const char input[] ) { static char spaces[] = "([,;]?[[:space:]])+"; static char spaceD[] = "(\n {6}D" "|" "[,;]?[[:space:]])+"; static char buffer[64 * 1024]; char *p = buffer; const char *eoinput = input + len; const char *spacex = is_reference_format()? spaceD : spaces; for( const char *s=input; *s && s < eoinput; s++ ) { *p = '\0'; verify_bounds( 4 + size_t(p - buffer), sizeof(buffer), buffer ); switch(*s) { case '^': case '$': case '(': case ')': case '*': case '+': case '?': case '[': case ']': case '{': case '}': case '|': case '.': *p++ = '\\'; *p++ = *s; break; case '\\': *p++ = '['; *p++ = *s; *p++ = ']'; break; case ';': case ',': if( ! (s+1 < eoinput && s[1] == 0x20) ) { *p++ = *s; break; } __attribute__((fallthrough)); case 0x20: case '\n': verify_bounds( (p + sizeof(spacex)) - buffer, sizeof(buffer), buffer ); p = stpcpy( p, spacex ); while( s+1 < eoinput && is_separator_space(s+1) ) { s++; } break; default: *p++ = *s; break; } } *p = '\0'; #if 0 dbgmsg("%s:%d: regex '%s'", __func__, __LINE__, buffer); #endif return buffer; // caller must strdup static buffer } static int glob_error(const char *epath, int eerrno) { dbgmsg("%s: COPY file search: '%s': %s", __func__, epath, xstrerror(eerrno)); return 0; } void copybook_directory_add( const char gcob_copybook[] ) { if( !gcob_copybook ) return; char *directories = xstrdup(gcob_copybook), *p = directories; char *eodirs = strchr(directories, '\0'); gcc_assert(eodirs); do { char *pend = std::find(p, eodirs, ':'); if( pend != eodirs ) { *pend = '\0'; } copybook.directory_add(p); p = pend; } while( ++p < eodirs ); } class case_consistent { int lower_upper; // -1 lower, 1 upper public: case_consistent() : lower_upper(0) {} bool operator()( char ch ) { if( !ISALPHA(ch) ) return true; int lu = ISLOWER(ch)? -1 : 1; if( !lower_upper ) { lower_upper = lu; return true; } return lu == lower_upper; } }; void copybook_extension_add( const char ext[] ) { char *alt = NULL; bool one_case = std::all_of( ext, ext + strlen(ext), case_consistent() ); if( one_case ) { alt = xstrdup(ext); gcc_assert(alt); auto convert = ISLOWER(ext[0])? toupper : tolower; std::transform( alt, alt+strlen(alt), alt, convert ); } copybook.extensions_add( ext, alt ); } extern int yydebug; const char * copybook_elem_t::extensions; void copybook_t::extensions_add( const char ext[], const char alt[] ) { char *output; if( alt ) { output = xasprintf("%s,%s", ext, alt); } else { output = xstrdup(ext); } gcc_assert(output); if( book.extensions ) { char *s = xasprintf("%s,%s", output, book.extensions); free(const_cast(book.extensions)); free(output); book.extensions = s; } else { book.extensions = output; } } static inline ino_t inode_of( int fd ) { struct stat sb; if( -1 == fstat(fd, &sb) ) { cbl_err("could not stat fd %d", fd); } return sb.st_ino; } int copybook_elem_t::open_file( const char directory[], bool literally ) { int erc; char *pattern, *copier = xstrdup(cobol_filename()); char *dname = NULL; if ( directory ) { dname = xstrdup(directory); } else { dname = ldirname(copier); gcc_assert (dname != NULL); /* out of memory */ if( '\0' == dname[0] ) { free (dname); dname = NULL; } } char *path = NULL; if( dname || library.name ) { if( dname && library.name ) { path = xasprintf( "%s/%s/%s", dname, library.name, source.name ); } else { const char *dir = dname? dname : library.name; path = xasprintf( "%s/%s", dir, source.name ); } } else { path = xasprintf( "%s", source.name ); } free(dname); gcc_assert(path); if( literally ) { dbgmsg("copybook_elem_t::open_file: trying %s", path); if( (this->fd = open(path, O_RDONLY)) == -1 ) { dbgmsg("could not open %s: %m", path); return fd; } this->source.name = path; if( ! cobol_filename(this->source.name, inode_of(fd)) ) { error_msg(source.loc, "recursive copybook: '%s' includes itself", path); (void)! close(fd); fd = -1; } return fd; } gcc_assert( ! literally ); if( extensions ) { pattern = xasprintf("%s{,.cpy,.CPY,.cbl,.CBL,.cob,.COB,%s}", path, this->extensions); } else { pattern = xasprintf("%s{,.cpy,.CPY,.cbl,.CBL,.cob,.COB}", path); } free(copier); static int flags = GLOB_MARK | GLOB_BRACE | GLOB_TILDE; glob_t globber; if( (erc = glob(pattern, flags, glob_error, &globber)) != 0 ) { switch(erc) { case GLOB_NOSPACE: yywarn("COPY file search: out of memory"); break; case GLOB_ABORTED: yywarn("COPY file search: read error"); break; case GLOB_NOMATCH: dbgmsg("COPY '%s': no files match %s", this->source.name, pattern); default: break; // caller says no file found } return -1; } free(pattern); for( size_t i=0; i < globber.gl_pathc; i++ ) { auto filename = globber.gl_pathv[i]; if( (this->fd = open(filename, O_RDONLY)) != -1 ) { dbgmsg("found copybook file %s", filename); this->source.name = xstrdup(filename); if( ! cobol_filename(this->source.name, inode_of(fd)) ) { error_msg(source.loc, "recursive copybook: '%s' includes itself", this->source); (void)! close(fd); fd = -1; } globfree(&globber); return fd; } } yywarn("could not open copy source for '%s'", source); globfree(&globber); return -1; }