diff options
author | Jerry DeLisle <jvdelisle@gcc.gnu.org> | 2025-09-02 15:58:26 -0700 |
---|---|---|
committer | Jerry DeLisle <jvdelisle@gcc.gnu.org> | 2025-09-02 15:58:26 -0700 |
commit | 071b4126c613881f4cb25b4e5c39032964827f88 (patch) | |
tree | 7ed805786566918630d1d617b1ed8f7310f5fd8e /libgcobol | |
parent | 845d23f3ea08ba873197c275a8857eee7edad996 (diff) | |
parent | caa1c2f42691d68af4d894a5c3e700ecd2dba080 (diff) | |
download | gcc-devel/gfortran-test.zip gcc-devel/gfortran-test.tar.gz gcc-devel/gfortran-test.tar.bz2 |
Merge branch 'master' into gfortran-testdevel/gfortran-test
Diffstat (limited to 'libgcobol')
-rw-r--r-- | libgcobol/ChangeLog | 80 | ||||
-rw-r--r-- | libgcobol/Makefile.am | 1 | ||||
-rw-r--r-- | libgcobol/Makefile.in | 4 | ||||
-rw-r--r-- | libgcobol/common-defs.h | 53 | ||||
-rwxr-xr-x | libgcobol/configure | 1 | ||||
-rw-r--r-- | libgcobol/libgcobol.cc | 489 | ||||
-rw-r--r-- | libgcobol/stringbin.cc | 810 | ||||
-rw-r--r-- | libgcobol/stringbin.h | 57 |
8 files changed, 1221 insertions, 274 deletions
diff --git a/libgcobol/ChangeLog b/libgcobol/ChangeLog index 91a3b86..1751898 100644 --- a/libgcobol/ChangeLog +++ b/libgcobol/ChangeLog @@ -1,3 +1,83 @@ +2025-08-20 Robert Dubner <rdubner@symas.com> + + * libgcobol.cc (format_for_display_internal): Handle NumericDisplay + properly. + (compare_88): Fix memory access error. + (__gg__unstring): Likewise. + +2025-08-13 Mark Wielaard <mark@klomp.org> + + * configure: Regenerate. + +2025-08-13 Robert Dubner <rdubner@symas.com> + + * libgcobol.cc (get_binary_value_local): Use the new routine. + * stringbin.cc (int_from_string): Removed. + (__gg__packed_to_binary): Implement new routine. + * stringbin.h (__gg__packed_to_binary): Likewise. + +2025-08-13 Robert Dubner <rdubner@symas.com> + + * libgcobol.cc (int128_to_field): Use the new routine. + (get_binary_value_local): Use the new routine. + (format_for_display_internal): Formatting. + (__gg__get_file_descriptor): Likewise. + * stringbin.cc (string_from_combined): Formatting. + (packed_from_combined): Likewise. + (int_from_string): New routine. + (__gg__numeric_display_to_binary): Likewise. + * stringbin.h (__gg__numeric_display_to_binary): Likewise. + +2025-08-12 Robert Dubner <rdubner@symas.com> + + * common-defs.h (NUMERIC_DISPLAY_SIGN_BIT): New comment; new constant. + (EBCDIC_MINUS): New constant. + (EBCDIC_PLUS): Likewise. + (EBCDIC_ZERO): Likewise. + (EBCDIC_NINE): Likewise. + (PACKED_NYBBLE_PLUS): Likewise. + (PACKED_NYBBLE_MINUS): Likewise. + (PACKED_NYBBLE_UNSIGNED): Likewise. + (NUMERIC_DISPLAY_SIGN_BIT_ASCII): Likewise. + (NUMERIC_DISPLAY_SIGN_BIT_EBCDIC): Likewise. + (SEPARATE_PLUS): Likewise. + (SEPARATE_MINUS): Likewise. + (ZONED_ZERO): Likewise. + (ZONE_SIGNED_EBCDIC): Likewise. + * configure: Regenerate. + * libgcobol.cc (turn_sign_bit_on): Handle new EBCDIC sign convention. + (turn_sign_bit_off): Likewise. + (is_sign_bit_on): Likewise. + (int128_to_field): EBCDIC NumericDisplay conversion. + (get_binary_value_local): Likewise. + (format_for_display_internal): Likewise. + (normalize_id): Likewise. + (__gg__inspect_format_1): Convert EBCDIC negative numbers to positive. + * stringbin.cc (packed_from_combined): Quell cppcheck warning. + +2025-08-10 H.J. Lu <hjl.tools@gmail.com> + + * configure: Regenerated. + +2025-08-08 Robert Dubner <rdubner@symas.com> + + * libgcobol.cc (int128_to_field): Switch to the new routine. + * stringbin.cc (packed_from_combined): Implement the new routine. + (__gg__binary_to_packed): Likewise. + * stringbin.h (__gg__binary_to_packed): Likewise. + +2025-08-07 Robert Dubner <rdubner@symas.com> + + * Makefile.am: Include new stringbin.cc file. + * Makefile.in: Regenerated. + * libgcobol.cc (__gg__power_of_ten): Improve error message. + (__gg__binary_to_string): Deleted. + (__gg__binary_to_string_internal): Deleted. + (int128_to_field): Use new conversion routine. + (__gg__move): Use new conversion routine. + * stringbin.cc: New file. Implements new conversion routine. + * stringbin.h: New file. Likewise. + 2025-07-13 Robert Dubner <rdubner@symas.com> * common-defs.h (PTRCAST): Moved here from libgcobol.h. diff --git a/libgcobol/Makefile.am b/libgcobol/Makefile.am index 0a17d20..1e3d3432 100644 --- a/libgcobol/Makefile.am +++ b/libgcobol/Makefile.am @@ -42,6 +42,7 @@ libgcobol_la_SOURCES = \ intrinsic.cc \ io.cc \ libgcobol.cc \ + stringbin.cc \ valconv.cc WARN_CFLAGS = -W -Wall -Wwrite-strings diff --git a/libgcobol/Makefile.in b/libgcobol/Makefile.in index 5fdc42c..42dc823 100644 --- a/libgcobol/Makefile.in +++ b/libgcobol/Makefile.in @@ -178,7 +178,7 @@ libgcobol_la_LIBADD = @BUILD_LIBGCOBOL_TRUE@am_libgcobol_la_OBJECTS = charmaps.lo \ @BUILD_LIBGCOBOL_TRUE@ constants.lo gfileio.lo gmath.lo \ @BUILD_LIBGCOBOL_TRUE@ intrinsic.lo io.lo libgcobol.lo \ -@BUILD_LIBGCOBOL_TRUE@ valconv.lo +@BUILD_LIBGCOBOL_TRUE@ stringbin.lo valconv.lo libgcobol_la_OBJECTS = $(am_libgcobol_la_OBJECTS) @BUILD_LIBGCOBOL_TRUE@am_libgcobol_la_rpath = -rpath $(toolexeclibdir) AM_V_P = $(am__v_P_@AM_V@) @@ -404,6 +404,7 @@ gcc_version := $(shell @get_gcc_base_ver@ $(top_srcdir)/../gcc/BASE-VER) @BUILD_LIBGCOBOL_TRUE@ intrinsic.cc \ @BUILD_LIBGCOBOL_TRUE@ io.cc \ @BUILD_LIBGCOBOL_TRUE@ libgcobol.cc \ +@BUILD_LIBGCOBOL_TRUE@ stringbin.cc \ @BUILD_LIBGCOBOL_TRUE@ valconv.cc @BUILD_LIBGCOBOL_TRUE@WARN_CFLAGS = -W -Wall -Wwrite-strings @@ -526,6 +527,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intrinsic.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/io.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgcobol.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stringbin.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/valconv.Plo@am__quote@ .cc.o: diff --git a/libgcobol/common-defs.h b/libgcobol/common-defs.h index 15d0683..80e524c 100644 --- a/libgcobol/common-defs.h +++ b/libgcobol/common-defs.h @@ -52,12 +52,53 @@ // COBOL tables can have up to seven subscripts #define MAXIMUM_TABLE_DIMENSIONS 7 -// This bit gets turned on in the first or last byte (depending on the leading_e attribute -// phrase) of a NumericDisplay to indicate that the value is negative. - -// When running the EBCDIC character set, the meaning of this bit is flipped, -// because an EBCDIC zero is 0xF0, while ASCII is 0x30 -#define NUMERIC_DISPLAY_SIGN_BIT 0x40 +/* COBOL has the concept of Numeric Display values, which use an entire byte + per digit. IBM also calls this "Zoned Decimal". + + In ASCII, the digits are '0' through '9' (0x30 through 0x39'. Signed + values are indicated by turning on the 0x40 bit in either the first + byte (for LEADING variables) or the last byte (for TRAILING). + + In IBM EBCDIC, the representation is slightly more complex, because the + concept of Zone carries a little more information. Unsigned numbers are + made up of just the EBCDIC digits '0' through '9' (0xF0 through 0xF9). + + The TRAILING signed value +1234 has the byte sequence 0xF1 0xF2 0xF3 0xC3. + The TRAILING signed value -1234 has the byte sequence 0xF1 0xF2 0xF3 0xD3. + The LEADING signed value +1234 has the byte sequence 0xC1 0xF2 0xF3 0xF3. + The LEADING signed value -1234 has the byte sequence 0xD1 0xF2 0xF3 0xF3. + + Note that for IBM EBCDIC, the nybble indicating sign has the same meaning + as for COMP-3/packed-decimal numbers. + + The effective result of this is that for ASCII, the byte carrying the sign + is made negative by turning on the 0x40 bit. + + For EBCDIC, the value must be constructed properly as a positive value by + setting the high nybble of the sign-carrying byte to 0xC0, after which the + value is flagged negative by turning on the 0x10 bit, turning the 0xC0 to + 0xD0. */ + +#define EBCDIC_MINUS (0x60) +#define EBCDIC_PLUS (0x4E) +#define EBCDIC_ZERO (0xF0) +#define EBCDIC_NINE (0xF9) + +#define PACKED_NYBBLE_PLUS 0x0C +#define PACKED_NYBBLE_MINUS 0x0D +#define PACKED_NYBBLE_UNSIGNED 0x0F + +#define NUMERIC_DISPLAY_SIGN_BIT_ASCII 0x40 +#define NUMERIC_DISPLAY_SIGN_BIT_EBCDIC 0x10 + +#define NUMERIC_DISPLAY_SIGN_BIT (__gg__ebcdic_codeset_in_use ? \ + NUMERIC_DISPLAY_SIGN_BIT_EBCDIC : \ + NUMERIC_DISPLAY_SIGN_BIT_ASCII) + +#define SEPARATE_PLUS (__gg__ebcdic_codeset_in_use ? EBCDIC_PLUS : '+') +#define SEPARATE_MINUS (__gg__ebcdic_codeset_in_use ? EBCDIC_MINUS : '-') +#define ZONED_ZERO (__gg__ebcdic_codeset_in_use ? EBCDIC_ZERO : '0') +#define ZONE_SIGNED_EBCDIC (0xC0) #define LEVEL01 (1) #define LEVEL49 (49) diff --git a/libgcobol/configure b/libgcobol/configure index 7271517..d130002 100755 --- a/libgcobol/configure +++ b/libgcobol/configure @@ -16019,6 +16019,7 @@ fi + use_additional=yes acl_save_prefix="$prefix" diff --git a/libgcobol/libgcobol.cc b/libgcobol/libgcobol.cc index c3d78d4..1b54cfd 100644 --- a/libgcobol/libgcobol.cc +++ b/libgcobol/libgcobol.cc @@ -72,6 +72,8 @@ #include <sys/time.h> #include <execinfo.h> #include "exceptl.h" +#include "stringbin.h" + /* BSD extension. */ #if !defined(LOG_PERROR) @@ -798,7 +800,7 @@ __gg__power_of_ten(int n) fprintf(stderr, "Trying to raise 10 to %d as an int128, which we can't do.\n", n); - fprintf(stderr, "The problem is in %s.\n", __func__); + fprintf(stderr, "The problem is in %s %s:%d.\n", __func__, __FILE__, __LINE__); abort(); } if( n <= MAX_POWER ) @@ -875,56 +877,6 @@ __gg__scale_by_power_of_ten_2(__int128 value, int N) return value; } -extern "C" -bool -__gg__binary_to_string(char *result, int digits, __int128 value) - { - // The result is not terminated, because this routine is used - // to put information directly into cblc_field_t::data - // Our caller has to keep track of whether value was negative. - - // Note that this routine operates in the source code-set space; that is - // the result comes back with zero as an ASCII 0x30, not an EBCDIC 0xF0 - - if( value < 0 ) - { - value = -value; - } - result += digits-1 ; - while( digits-- ) - { - *result-- = value%10 + ascii_zero; - value /= 10; - } - // Should value be non-zero, it means we potentially have a size error - return value != 0; - } - -extern "C" -bool -__gg__binary_to_string_internal(char *result, int digits, __int128 value) - { - // The result is not terminated, because this routine is used - // to put information directly into cblc_field_t::data - // Our caller has to keep track of whether value was negative. - - // Note that this routine operates in the source code-set space; that is - // the result comes back with zero as an ASCII 0x30, not an EBCDIC 0xF0 - - if( value < 0 ) - { - value = -value; - } - result += digits-1 ; - while( digits-- ) - { - *result-- = (value%10) + internal_zero; - value /= 10; - } - // Should value be non-zero, it means we potentially have a size error - return value != 0; - } - static bool value_is_too_big(const cblc_field_t *var, __int128 value, @@ -1019,11 +971,11 @@ turn_sign_bit_on(unsigned char *location) { if( internal_is_ebcdic ) { - *location &= ~NUMERIC_DISPLAY_SIGN_BIT; + *location = (*location & 0xF) + 0xD0; } else { - *location |= NUMERIC_DISPLAY_SIGN_BIT; + *location = (*location & 0xF) + 0x70; } } @@ -1032,11 +984,11 @@ turn_sign_bit_off(unsigned char *location) { if( internal_is_ebcdic ) { - *location |= NUMERIC_DISPLAY_SIGN_BIT; + *location = (*location & 0xF) + 0xF0; } else { - *location &= ~NUMERIC_DISPLAY_SIGN_BIT; + *location = (*location & 0xF) + 0x30; } } @@ -1051,14 +1003,7 @@ is_sign_bit_on(char ch) } else { - if( internal_is_ebcdic ) - { - retval = (ch & NUMERIC_DISPLAY_SIGN_BIT) == 0; - } - else - { - retval = (ch & NUMERIC_DISPLAY_SIGN_BIT) != 0; - } + retval = (ch & NUMERIC_DISPLAY_SIGN_BIT) != 0; } return retval; } @@ -1617,14 +1562,21 @@ int128_to_field(cblc_field_t *var, // Note that sending a signed value to an alphanumeric strips off // any plus or minus signs. + memset(location, 0, length); size_error = __gg__binary_to_string_internal( - PTRCAST(char, location), - length, value); + PTRCAST(char, location), + length > MAX_FIXED_POINT_DIGITS + ? MAX_FIXED_POINT_DIGITS + : length, + value); break; case FldNumericDisplay: if( var->attr & signable_e ) { + /* There is a regrettable plethora of possibilities, here. */ + + // Things get exciting when a numeric-display value is signable if( var->attr & separate_e ) @@ -1636,7 +1588,8 @@ int128_to_field(cblc_field_t *var, // The sign character goes into the first location size_error = __gg__binary_to_string_internal(PTRCAST(char, location+1), - length-1, value); + length-1, + value); location[0] = sign_ch; } else @@ -1650,12 +1603,21 @@ int128_to_field(cblc_field_t *var, } else { - // The sign information is not separate, so we put it into - // the number + /* The sign information is not separate. The sign information + goes into the first byte for LEADING, or the last byte for + TRAILING. For ASCII, the zone will be 0x30. For EBCDIC, + the the zone is 0xC0. Those get modified, respectively, to + 0x70 and 0xD0 when the value is negative. */ + + // First, convert the binary value to the correct-length string size_error = __gg__binary_to_string_internal(PTRCAST(char, location), - length, value); + length, + value); + // Check for a size error on a negative value. It conceivably + // was truncated down to zero, in which case we need to + // suppress this is_negative flag. if( size_error && is_negative ) { // If all of the digits are zero, then the result is zero, and @@ -1671,27 +1633,28 @@ int128_to_field(cblc_field_t *var, } } + unsigned char *sign_location = + var->attr & leading_e ? location : location + length - 1; + + if( internal_is_ebcdic ) + { + // Change the sign location from 0xF0 to 0xC0. + *sign_location &= (ZONE_SIGNED_EBCDIC + 0xF); + } + if( is_negative ) { - if( var->attr & leading_e ) - { - // The sign bit goes into the first digit: - turn_sign_bit_on(&location[0]); - } - else - { - // The sign bit goes into the last digit: - turn_sign_bit_on(&location[length-1]); - } + *sign_location |= NUMERIC_DISPLAY_SIGN_BIT; } } } else { // It's a simple positive number - size_error = __gg__binary_to_string_internal( PTRCAST(char, - location), - length, value); + size_error = __gg__binary_to_string_internal( + PTRCAST(char, location), + length, + value); } break; @@ -1708,7 +1671,7 @@ int128_to_field(cblc_field_t *var, // At this point, value is scaled to the target's rdigits - size_error = __gg__binary_to_string(ach, var->digits, value); + size_error = __gg__binary_to_string_ascii(ach, var->digits, value); ach[var->digits] = NULLCH; // Convert that string according to the PICTURE clause @@ -1749,7 +1712,7 @@ int128_to_field(cblc_field_t *var, case FldAlphaEdited: { char ach[128]; - size_error = __gg__binary_to_string(ach, length, value); + size_error = __gg__binary_to_string_ascii(ach, length, value); ach[length] = NULLCH; // Convert that string according to the PICTURE clause @@ -1763,34 +1726,27 @@ int128_to_field(cblc_field_t *var, case FldPacked: { - static const unsigned char bin2pd[100] = - { - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, - 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, - 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, - 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, - 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, - 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, - 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, - 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, - 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, - } ; - // Convert the binary value to packed decimal. + int digits = var->digits; - // Set the destination bytes to zero - memset(location, 0, length); + // Assume for the moment that the res unsigned char sign_nybble = 0; - if( !(var->attr & packed_no_sign_e) ) + if( var->attr & packed_no_sign_e ) + { + // This is COMP-6 packed decimal, with no sign nybble + sign_nybble = 0; + } + else { // This is COMP-3 packed decimal, so we need to make room to the // right of the final decimal digit for the sign nybble: value *= 10; + digits += 1; // Figure out what the sign nybble is going to be, and make the // the value positive: if(var->attr & signable_e) { + // It is signable, so 0xD for negative, and 0xC for positive if(value < 0) { sign_nybble = 0x0D; @@ -1803,6 +1759,7 @@ int128_to_field(cblc_field_t *var, } else { + // The value is not signable, so the sign nybble is 0xF sign_nybble = 0x0F; if(value < 0) { @@ -1810,43 +1767,25 @@ int128_to_field(cblc_field_t *var, } } } - // ploc points to the current rightmost byte of the location: - unsigned char *ploc = location + length -1 ; - // Build the target from right to left, so that the result is - // big-endian: - while( value && ploc >= location ) - { - *ploc-- = bin2pd[value%100]; - value /= 100; - } + /* We need to check if the value is too big, in case our caller + wants to check for the error condition. In any event, we need + to make sure the value actually fits, because otherwise the + result might have a bad high-place digit for a value with an + odd number of places. */ + + __int128 mask = __gg__power_of_ten(digits); + size_error = !!(value / mask); + value %= mask; + + // We are now set up to do the conversion: + __gg__binary_to_packed(location, digits, value); // We can put the sign nybble into place at this point. Note that // for COMP-6 numbers the sign_nybble value is zero, so the next // operation is harmless. location[length -1] |= sign_nybble; - // If we still have value left, we have a size error - if( value ) - { - size_error = true; - } - else - { - if( ( sign_nybble && !(var->digits&1) ) - || ( !sign_nybble && (var->digits&1) ) ) - { - // This is either - // comp-3 with an even number of digits, or - // comp-6 with an odd number of digits. - // Either way, the first byte of the target has to have a high - // nybble of zero. If it's non-zero, then we have a size error: - if( location[0] & 0xF0 ) - { - size_error = true; - } - } - } // And we're done. break; } @@ -2053,10 +1992,8 @@ get_binary_value_local( int *rdigits, { __int128 retval = 0; - unsigned char ch; switch( resolved_var->type ) { -#if 1 case FldLiteralA : fprintf(stderr, "%s(): is trying to handle a FldLiteralA\n", __func__); abort(); @@ -2065,7 +2002,6 @@ get_binary_value_local( int *rdigits, // resolved_length, // rdigits ); break; -#endif case FldGroup : case FldAlphanumeric : @@ -2076,7 +2012,9 @@ get_binary_value_local( int *rdigits, rdigits ); break; - case FldNumericDisplay : + case FldNumericDisplay: + { + *rdigits = resolved_var->rdigits; if( resolved_location[resolved_length-1] == DEGENERATE_HIGH_VALUE ) { // This is a degenerate case, which violates the language @@ -2100,57 +2038,58 @@ get_binary_value_local( int *rdigits, // Make it positive by turning off the highest order bit: (PTRCAST(unsigned char, &retval))[sizeof(retval)-1] = 0x3F; - *rdigits = resolved_var->rdigits; } else { - // Pick up the sign byte, and force our value to be positive + unsigned char *digits; unsigned char *sign_byte_location; - if( (resolved_var->attr & separate_e ) - && (resolved_var->attr & leading_e ) ) + int ndigits; + if( resolved_var->attr & signable_e ) { - sign_byte_location = resolved_location; - ch = *sign_byte_location; - *sign_byte_location = internal_plus; - } - else if( (resolved_var->attr & separate_e) - && !(resolved_var->attr & leading_e ) ) - { - sign_byte_location = resolved_location + resolved_length - 1; - ch = *sign_byte_location; - *sign_byte_location = internal_plus; + // Pick up the sign byte, and force our value to be positive + if( (resolved_var->attr & separate_e ) + && (resolved_var->attr & leading_e ) ) + { + // LEADING SEPARATE + digits = resolved_location+1; + sign_byte_location = resolved_location; + ndigits = resolved_length - 1; + } + else if( (resolved_var->attr & separate_e) + && !(resolved_var->attr & leading_e ) ) + { + // TRAILING SEPARATE + digits = resolved_location; + sign_byte_location = resolved_location + resolved_length - 1; + ndigits = resolved_length - 1; + } + else if( (resolved_var->attr & leading_e) ) + { + // LEADING + digits = resolved_location; + sign_byte_location = resolved_location; + ndigits = resolved_length; + } + else // if( !(resolved_var->attr & leading_e) ) + { + // TRAILING + digits = resolved_location; + sign_byte_location = resolved_location + resolved_length - 1; + ndigits = resolved_length; + } } - else if( (resolved_var->attr & leading_e) ) + else { + digits = resolved_location; sign_byte_location = resolved_location; - ch = *sign_byte_location; - turn_sign_bit_off(sign_byte_location); - } - else // if( !(resolved_var->attr & leading_e) ) - { - sign_byte_location = resolved_location + resolved_length - 1; - ch = *sign_byte_location; - turn_sign_bit_off(sign_byte_location); - } - - // We know where the decimal point is because of rdigits. Because - // we know that it a clean string of ASCII digits, we can use the - // dirty converter: - retval = __gg__dirty_to_binary_internal(PTRCAST(const char, - resolved_location), - resolved_length, - rdigits ); - *rdigits = resolved_var->rdigits; - - // Restore the sign byte - *sign_byte_location = ch; - - if( ch == internal_minus || is_sign_bit_on(ch) ) - { - retval = -retval; + ndigits = resolved_length; } + retval = __gg__numeric_display_to_binary(sign_byte_location, + digits, + ndigits); } break; + } case FldNumericEdited : retval = edited_to_binary( PTRCAST(char, resolved_location), @@ -2210,52 +2149,9 @@ get_binary_value_local( int *rdigits, case FldPacked: { - static const unsigned char dp2bin[160] = - { - // This may not be the weirdest table I've ever created, but it is - // certainly a contender. Given the packed decimal byte 0x23, it - // returns the equivalent decimal value of 23. - 00, 01, 02, 03, 04, 05, 06, 07, 8, 9, 0, 0, 0, 0, 0, 0, // 0x00 - 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 0, 0, 0, 0, 0, // 0x10 - 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 0, 0, 0, 0, 0, 0, // 0x20 - 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 0, 0, 0, 0, 0, 0, // 0x30 - 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 0, 0, 0, 0, 0, 0, // 0x40 - 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 0, 0, 0, 0, 0, 0, // 0x50 - 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 0, 0, 0, 0, 0, // 0x60 - 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 0, 0, 0, 0, 0, 0, // 0x70 - 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 0, 0, 0, 0, 0, 0, // 0x80 - 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 0, 0, 0, 0, 0, 0, // 0x90 - }; - - if( resolved_var->attr & packed_no_sign_e ) - { - // This is packed decimal without a sign nybble - retval = 0; - for(size_t i=0; i<resolved_var->capacity; i++) - { - retval *= 100; - retval += dp2bin[resolved_location[i]]; - } - } - else - { - // This is packed decimal with a final sign nybble - retval = 0; - size_t imputed_length = (resolved_var->digits + 2)/2; - for(size_t i=0; i<imputed_length-1; i++) - { - retval *= 100; - retval += dp2bin[resolved_location[i]]; - } - retval *= 10; - retval += resolved_location[imputed_length-1]>>4; - if( (resolved_location[imputed_length-1]&0x0F) == 0x0D - || (resolved_location[imputed_length-1]&0x0F) == 0x0B ) - { - retval = -retval; - } - } - *rdigits = resolved_var->rdigits; + *rdigits = resolved_var->rdigits; + retval = __gg__packed_to_binary(resolved_location, + resolved_length); break; } } @@ -3092,6 +2988,50 @@ format_for_display_internal(char **dest, case FldNumericDisplay: { + // Because a NumericDisplay can have any damned thing as a character, + // we are going force things that aren't digits to display as '0' + + // 0xFF is an exception, so that a HIGH-VALUE in a numeric display shows + // up in a unique way. + static const uint8_t ascii_chars[256] = + { + '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', // 0x00 + '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', // 0x10 + '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', // 0x20 + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '0', '0', '0', '0', '0', // 0x30 + '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', // 0x40 + '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', // 0x50 + '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', // 0x60 + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '0', '0', '0', '0', '0', // 0x70 + '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', // 0x80 + '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', // 0x90 + '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', // 0xa0 + '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', // 0xb0 + '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', // 0xc0 + '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', // 0xd0 + '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', // 0xe0 + '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 0xFF, // 0xf0 + }; + static const uint8_t ebcdic_chars[256] = + { + 0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0, // 0x00 + 0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0, // 0x10 + 0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0, // 0x20 + 0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0, // 0x30 + 0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0, // 0x40 + 0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0, // 0x50 + 0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0, // 0x60 + 0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0, // 0x70 + 0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0, // 0x80 + 0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0, // 0x90 + 0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0, // 0xa0 + 0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0, // 0xb0 + 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0, // 0xc0 + 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0, // 0xd0 + 0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0,0xf0, // 0xe0 + 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xf0,0xf0,0xf0,0xf0,0xf0,0xFF, // 0xf0 + } ; + // We are going to make use of fact that a NumericDisplay's data is // almost already in the format we need. We have to add a decimal point, // if necessary, in the right place, and we need to tack on leading or @@ -3165,50 +3105,62 @@ format_for_display_internal(char **dest, } } - {//xxx - // copy over the characters to the left of the decimal point: - for(int i=0; i<ldigits; i++ ) - { - char ch = *running_location++; + // copy over the characters to the left of the decimal point: + for(int i=0; i<ldigits; i++ ) + { + unsigned char ch = *running_location++; - // The default HIGH-VALUE of 0xFF runs afoul of the - // NumericDisplay sign bit 0f 0x40 when running in - // ASCII mode. The following test handles that problem - // when HIGH-VALUE is still 0xFF. That HIGH-VALUE can - // be changed by the SPECIAL-NAMES ALPHABET clause. But + // Welcome to COBOL. We might be dealing with a HIGH-VALUE, which + // is usually, but not always 0xFF. I am going to handle the 0xFF + // case. When the programmer messes with HIGH-VALUE in the + // SPECIAL-NAMES ALPHABET clause, then it becomes their problem. - // I have decided that the onus of that problem is on - // the user. - if( (*dest)[index-1] != (char)DEGENERATE_HIGH_VALUE ) - { - turn_sign_bit_off( PTRCAST(unsigned char, &ch)); - } - (*dest)[index++] = ch; + // But when it isn't HIGH-VALUE, we don't want to see the effects + // of the internal sign. + + // Another tricky thing, though, is that for various reasons + // the string of digits might not be digits. There can be + // REDEFINES, or the middle of the number might have been changed + // with an INITIALIZE into spaces. But we do want numbers to + // look like numbers. So, we do what we can: + + if( internal_is_ebcdic ) + { + ch = ebcdic_chars[ch]; } - if( rdigits ) + else { - // Lay down a decimal point - (*dest)[index++] = ascii_to_internal(__gg__decimal_point); + ch = ascii_chars[ch]; + } + (*dest)[index++] = ch; + } + if( rdigits ) + { + // Lay down a decimal point + (*dest)[index++] = ascii_to_internal(__gg__decimal_point); - if( ldigits < 0 ) + if( ldigits < 0 ) + { + // This is a scaled_e value, and we need that many zeroes: + for( int i=0; i<-ldigits; i++ ) { - // This is a scaled_e value, and we need that many zeroes: - for( int i=0; i<-ldigits; i++ ) - { - (*dest)[index++] = internal_zero; - } + (*dest)[index++] = internal_zero; } + } - // And the digits to the right - for(int i=0; i<rdigits; i++ ) + // And the digits to the right + for(int i=0; i<rdigits; i++ ) + { + unsigned char ch = *running_location++; + if( internal_is_ebcdic ) { - char ch = *running_location++; - if( (*dest)[index-1] != (char)DEGENERATE_HIGH_VALUE ) - { - turn_sign_bit_off(PTRCAST(unsigned char, &ch)); - } - (*dest)[index++] = ch; + ch = ebcdic_chars[ch]; } + else + { + ch = ascii_chars[ch]; + } + (*dest)[index++] = ch; } } // At this point, for a 999PPP number, we need to tack on the zeroes @@ -3710,7 +3662,9 @@ compare_88( const char *list, cmpval = cstrncmp (test, PTRCAST(char, conditional_location), conditional_length); - if( cmpval == 0 && (int)strlen(test) != conditional_length ) + +// if( cmpval == 0 && (int)strlen(test) != conditional_length ) + if( cmpval == 0 && test_len != conditional_length ) { // When strncmp returns 0, the actual smaller string is the // the shorter of the two: @@ -6126,7 +6080,7 @@ __gg__move( cblc_field_t *fdest, // Convert it to the full complement of digits available // from the source...but no more - __gg__binary_to_string(ach, source_digits, value); + __gg__binary_to_string_ascii(ach, source_digits, value); // Binary to string returns ASCII characters: for(int i=0; i<source_digits; i++) @@ -6783,7 +6737,7 @@ typedef struct normalized_operand { // These are the characters of the string. When the field is NumericDisplay // any leading or trailing +/- characters are removed, and any embedded - // NUMERIC_DISPLAY_SIGN_BIT bits are removed. + // minus bits are removed. std::string the_characters; size_t offset; // Usually zero. One when there is a leading sign. size_t length; // Usually the same as the original. But it is one less @@ -6846,7 +6800,7 @@ normalize_id( const cblc_field_t *refer, for( size_t i=retval.offset; i<retval.length; i++ ) { // Because we are dealing with a NumericDisplay that might have - // the NUMERIC_DISPLAY_SIGN_BIT turned on, we need to mask it off + // the minus bit turned on, we need to mask it off unsigned char ch = data[i]; turn_sign_bit_off(&ch); retval.the_characters += ch; @@ -7543,10 +7497,8 @@ __gg__inspect_format_1(int backward, size_t integers[]) // We are now set up to accomplish the data flow described // in the language specification. We loop through the // the character positions in normalized_id_1: - const char *leftmost - = normalized_id_1.the_characters.c_str(); - const char *rightmost - = leftmost + normalized_id_1.length; + const char *leftmost = normalized_id_1.the_characters.c_str(); + const char *rightmost = leftmost + normalized_id_1.length; while( leftmost < rightmost ) { @@ -7601,7 +7553,7 @@ __gg__inspect_format_1(int backward, size_t integers[]) break; case bound_characters_e: - match = 1; + match = true; break; case bound_all_e: @@ -11106,9 +11058,12 @@ __gg__unstring( const cblc_field_t *id1, // The string being unstring } // Update the state variables: - pointer += examined + id2_s[ifound]; tally += 1; nreceiver += 1; + if( ifound >= 0 ) + { + pointer += examined + id2_s[ifound]; + } } done: @@ -13387,7 +13342,7 @@ int __gg__get_file_descriptor(const char *device) { int retval = open(device, O_WRONLY); - + if( retval == -1 ) { char *msg; @@ -13403,7 +13358,7 @@ __gg__get_file_descriptor(const char *device) open_syslog(option, facility); syslog(priority, "%s", msg); } - + // Open a new handle to /dev/stdout, since our caller will be closing it retval = open("/dev/stdout", O_WRONLY); } diff --git a/libgcobol/stringbin.cc b/libgcobol/stringbin.cc new file mode 100644 index 0000000..63976cf --- /dev/null +++ b/libgcobol/stringbin.cc @@ -0,0 +1,810 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <algorithm> +#include <cctype> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <ctime> +#include <set> +#include <stack> +#include <string> +#include <unordered_map> +#include <vector> + +#include <dirent.h> +#include <dlfcn.h> +#include <err.h> +#include <fcntl.h> +#include <fenv.h> +#include <math.h> // required for fpclassify(3), not in cmath +#include <setjmp.h> +#include <signal.h> +#include <syslog.h> +#include <unistd.h> +#include <stdarg.h> +#if __has_include(<errno.h>) +# include <errno.h> // for program_invocation_short_name +#endif + +#include "config.h" +#include "libgcobol-fp.h" + +#include "ec.h" +#include "common-defs.h" +#include "io.h" +#include "gcobolio.h" +#include "libgcobol.h" +#include "gfileio.h" +#include "charmaps.h" +#include "valconv.h" +#include <sys/mman.h> +#include <sys/resource.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/time.h> +#include <execinfo.h> +#include "exceptl.h" +#include "stringbin.h" + +/* This routine evolved from a primitive binary-to-string routine that simply + peeled digits off the bottom of an __int128 by using + + value % 10 + '0'; + value /= 10; + + That turns out to be unnecessarily slow. + + The routine implemented here uses a divide-and-conquer approach to + minimimizing the number of operations, and when you get down to two + digits it does a divide-by-100 and uses the remainder in a table lookup + to get the digits. */ + +/* These static tables are born of a pathologic desire to avoid calculations. + Whether that paranoia is justified (perhaps "digit%10 + '0';" ) would + actually be faster) is currently untested. But I figured this would be + pretty darn fast. + + Use them when you know the index is between zero and one hundred. */ + +static const char digit_low[100] = + { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + }; + +static const char digit_high[100] = + { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + }; + +static char combined_string[128]; +static char zero_char; + +typedef struct + { + int start; + int run; + union + { + unsigned __int128 val128; + uint64_t val64; + uint32_t val32; + uint16_t val16; + uint8_t val8; + }; + } COMBINED; + +static +void +string_from_combined(const COMBINED &combined) + { + COMBINED left; + COMBINED right; + + uint16_t v16; + + switch(combined.run) + { + case 1: + // We know that val8 is a single digit + combined_string[combined.start] = combined.val8 + zero_char;; + break; + + case 2: + // We know that val8 has two digits + combined_string[combined.start] = digit_high[combined.val8] + zero_char; + combined_string[combined.start+1] = digit_low [combined.val8] + zero_char; + break; + + case 3: + // We know that val16 has three digits. + v16 = combined.val16; + combined_string[combined.start] = v16 / 100 + zero_char; + v16 %= 100; + combined_string[combined.start+1] = v16 / 10 + zero_char; + combined_string[combined.start+2] = v16 % 10 + zero_char; + break; + + case 4: + // We know that val16 has four digits: + v16 = combined.val16; + combined_string[combined.start] = v16 / 1000 + zero_char; + v16 %= 1000; + combined_string[combined.start+1] = v16 / 100 + zero_char; + v16 %= 100; + combined_string[combined.start+2] = v16 / 10 + zero_char; + combined_string[combined.start+3] = v16 % 10 + zero_char; + break; + + case 5: + case 6: + case 7: + case 8: + // We know that val32 can be treated as two 4-digit pieces + left.start = combined.start; + left.run = combined.run - 4; + left.val16 = combined.val32 / 10000; + + right.start = combined.start+left.run; + right.run = 4; + right.val16 = combined.val32 % 10000; + + string_from_combined(left); + string_from_combined(right); + break; + + case 9: + // We break val32 into a 1-digit piece, and an 8-digit piece: + left.start = combined.start; + left.run = combined.run - 8; + left.val32 = combined.val32 / 100000000; + + right.start = combined.start+left.run; + right.run = 8; + right.val32 = combined.val32 % 100000000; + + string_from_combined(left); + string_from_combined(right); + break; + + case 10: + case 11: + case 12: + case 13: + case 14: + case 15: + case 16: + case 17: + case 18: + // We know we can treat val64 as two 9-digit pieces: + left.start = combined.start; + left.run = combined.run - 9; + left.val32 = combined.val64 / 1000000000; + + right.start = combined.start+left.run; + right.run = 9; + right.val32 = combined.val64 % 1000000000; + + string_from_combined(left); + string_from_combined(right); + break; + + case 19: + // We split off the bottom nine digits + left.start = combined.start; + left.run = combined.run - 9; + left.val64 = combined.val64 / 1000000000; + + right.start = combined.start+left.run; + right.run = 9; + right.val32 = combined.val64 % 1000000000; + + string_from_combined(left); + string_from_combined(right); + break; + + default: + // For twenty or more digits we peel eighteen digits at a time off the + // right side: + left.start = combined.start; + left.run = combined.run - 18; + left.val128 = combined.val128 / 1000000000000000000ULL; + + right.start = combined.start+left.run; + right.run = 18; + right.val64 = combined.val128 % 1000000000000000000ULL; + + string_from_combined(left); + string_from_combined(right); + break; + } + } + +bool +__gg__binary_to_string_ascii(char *result, int digits, __int128 value) + { + zero_char = ascii_zero; + + // Note that this routine does not terminate the generated string with a + // NUL. This routine is sometimes used to generate a NumericDisplay string + // of digits in place, with no terminator. + __int128 mask = __gg__power_of_ten(digits); + + COMBINED combined; + if( value < 0 ) + { + value = -value; + } + + // A non-zero retval means the number was too big to fit into the desired + // number of digits: + bool retval = !!(value / mask); + + // mask off the bottom digits to avoid garbage when value is too large + value %= mask; + + combined.start = 0; + combined.run = digits; + combined.val128 = value; + string_from_combined(combined); + memcpy(result, combined_string, digits); + return retval; + } + +bool +__gg__binary_to_string_internal(char *result, int digits, __int128 value) + { + zero_char = internal_zero; + + // Note that this routine does not terminate the generated string with a + // NUL. This routine is sometimes used to generate a NumericDisplay string + // of digits in place, with no terminator. + __int128 mask = __gg__power_of_ten(digits); + + COMBINED combined; + if( value < 0 ) + { + value = -value; + } + + // A non-zero retval means the number was too big to fit into the desired + // number of digits: + bool retval = !!(value / mask); + + // mask off the bottom digits to avoid garbage when value is too large + value %= mask; + + combined.start = 0; + combined.run = digits; + combined.val128 = value; + string_from_combined(combined); + memcpy(result, combined_string, digits); + return retval; + } + + +static +void +packed_from_combined(const COMBINED &combined) + { + /* The combined.value must be positive at this point. + + The combined.run value has to be the number of places needed to hold + combined.value. The proper calculation is (digits+1)/2. + + For a signable value, the caller had to multiple the original value by + ten to create room on the right for the sign nybble. */ + + static const unsigned char bin2pd[100] = + { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, + 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, + } ; + + COMBINED left; + COMBINED right; + + switch(combined.run) + { + case 1: + // We know that val8 has two digits. + combined_string[combined.start] = bin2pd[combined.val8]; + break; + + case 2: + // We know that val16 has four digits. + combined_string[combined.start ] = bin2pd[combined.val16/100]; + combined_string[combined.start+1] = bin2pd[combined.val16%100]; + break; + + case 3: + case 4: + // We know that val32 can hold up to eight digits. Break it in half. + left.start = combined.start; + left.run = combined.run - 2; + left.val16 = combined.val32 / 10000; + + right.start = combined.start+left.run; + right.run = 2; + right.val16 = combined.val32 % 10000; + + packed_from_combined(left); + packed_from_combined(right); + break; + + case 5: + case 6: + case 7: + case 8: + // We know that val64 is holding up to 18 digits. Break it into two + // eight-digit places that can each go into a val23 + left.start = combined.start; + left.run = combined.run - 4; + left.val32 = combined.val64 / 100000000; + + right.start = combined.start+left.run; + right.run = 4; + right.val32 = combined.val64 % 100000000; + + packed_from_combined(left); + packed_from_combined(right); + break; + + case 9: + // We know that val64 is holding 17 or 18 digits. Break off the + // bottom eight. + left.start = combined.start; + left.run = combined.run - 4; + left.val64 = combined.val64 / 100000000; + + right.start = combined.start+left.run; + right.run = 4; + right.val32 = combined.val64 % 100000000; + + packed_from_combined(left); + packed_from_combined(right); + break; + + case 10: + case 11: + case 12: + case 13: + case 14: + case 15: + case 16: + case 17: + case 18: + // We know that val64 is holding between 18 and 36 digits. Break it + // two val64: + + left.start = combined.start; + left.run = combined.run - 9; + left.val64 = combined.val128 / 1000000000000000000ULL; + + right.start = combined.start+left.run; + right.run = 9; + right.val64 = combined.val128 % 1000000000000000000ULL; + + packed_from_combined(left); + packed_from_combined(right); + break; + + default: + // For twenty or more digits we peel eighteen digits at a time off the + // right side: + left.start = combined.start; + left.run = combined.run - 9; + left.val128 = combined.val128 / 1000000000000000000ULL; + + right.start = combined.start+left.run; + right.run = 9; + right.val64 = combined.val128 % 1000000000000000000ULL; + + packed_from_combined(left); + packed_from_combined(right); + break; + } + } + +extern "C" +void +__gg__binary_to_packed( unsigned char *result, + int digits, + __int128 value) + { + size_t length = (digits+1)/2; + + COMBINED combined; + combined.start = 0; + combined.run = length; + combined.val128 = value; + packed_from_combined(combined); + memcpy(result, combined_string, length); + } + +extern "C" +__int128 +__gg__numeric_display_to_binary(unsigned char *signp, + const unsigned char *psz, + int n ) + { + /* This is specific to numeric display values. + + Such values can be unsigned, or they can have leading or trailing + internal sign information, or they can have leading or trailing external + sign information. + + In ASCII, digits are 030; internal sign is has the zone 0x70. + + In EBDIC, normal digits are 0xF0. The sign byte in for a positive + signable number has the zone 0xC0; a negative value has the zone 0xD0. + + A further complication is that it is legal for NumericDisplay values to + have non-digit characters. This is because of REDEFINES, and whatnot. + Some COBOL implementations just look at the bottom four bits of + characters regardless of their legality. I am choosing to have non-legal + characters come back as zero. I do this with tables, so the cost is low. + */ + + /* We are assuming that 64-bit arithmetic is faster than 128-bit arithmetic, + and so we build up a 128-bit result in three 64-bit pieces, and assemble + them at the end. */ + + + static const uint8_t lookup[] = + { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0,0,0,0,0,0, + 10,11,12,13,14,15,16,17,18,19, 0,0,0,0,0,0, + 20,21,22,23,24,25,26,27,28,29, 0,0,0,0,0,0, + 30,31,32,33,34,35,36,37,38,39, 0,0,0,0,0,0, + 40,41,42,43,44,45,46,47,48,49, 0,0,0,0,0,0, + 50,51,52,53,54,55,56,57,58,59, 0,0,0,0,0,0, + 60,61,62,63,64,65,66,67,68,69, 0,0,0,0,0,0, + 70,71,72,73,74,75,76,77,78,79, 0,0,0,0,0,0, + 80,81,82,83,84,85,86,87,88,89, 0,0,0,0,0,0, + 90,91,92,93,94,95,96,97,98,99, 0,0,0,0,0,0, + }; + + static const uint8_t from_ebcdic[256] = + { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x00 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x10 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x20 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x30 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x40 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x50 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x60 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x70 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x80 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x90 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xa0 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xb0 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xc0 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xd0 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xe0 + 0,1,2,3,4,5,6,7,8,9,0,0,0,0,0,0, // 0xf0 + }; + + static const uint8_t from_ascii[256] = + { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x00 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x10 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x20 + 0,1,2,3,4,5,6,7,8,9,0,0,0,0,0,0, // 0x30 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x40 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x50 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x60 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x70 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x80 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x90 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xa0 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xb0 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xc0 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xd0 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xe0 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xf0 + }; + + __int128 retval; + + uint64_t top = 0; + uint64_t middle = 0; + uint64_t bottom = 0; + + int count_bottom; + int count_middle; + int count_top; + + bool is_negative = false; + + // Pick up the original sign byte: + unsigned char sign_byte = *signp; + + const unsigned char *mapper; + if( internal_is_ebcdic ) + { + mapper = from_ebcdic; + if( sign_byte == EBCDIC_MINUS ) + { + is_negative = true; + } + else if( (sign_byte & 0xF0) == 0xD0 ) + { + is_negative = true; + } + // No matter what the digit, force it to be a valid positive digit by + // forcing the zone to 0xF0. Note that this is harmless if redundant, and + // harmless as well if the data SIGN IS SEPARATE. Whatever we do to this + // byte will be undone at the end of the routine. + *signp |= 0xF0; + } + else + { + mapper = from_ascii; + if( sign_byte == '-' ) + { + is_negative = true; + } + else if( (sign_byte & 0xF0) == 0x70 ) + { + is_negative = true; + + // Make it a valid positive digit by turning the zone to 0x30 + *signp &= 0x3F; + } + } + + // Digits 1 through 18 come from the bottom: + if( n <= 18 ) + { + count_bottom = n; + count_middle = 0; + count_top = 0; + } + else if( n<= 36 ) + { + count_bottom = 18; + count_middle = n - 18; + count_top = 0; + } + else + { + count_bottom = 18; + count_middle = 18; + count_top = n - 36; + } + + if( n & 1 ) + { + // We are dealing with an odd number of digits + if( count_top ) + { + top = mapper[*psz++]; + count_top -= 1; + } + else if( count_middle ) + { + middle = mapper[*psz++]; + count_middle -= 1; + } + else + { + bottom = mapper[*psz++]; + count_bottom -= 1; + } + } + + uint8_t add_me; + + while( count_top ) + { + add_me = mapper[*psz++] << 4; + add_me += mapper[*psz++]; + top *= 100 ; + top += lookup[add_me]; + count_top -= 2; + } + + while( count_middle ) + { + add_me = mapper[*psz++] << 4; + add_me += mapper[*psz++]; + middle *= 100 ; + middle += lookup[add_me]; + count_middle -= 2; + } + + while( count_bottom ) + { + add_me = mapper[*psz++] << 4; + add_me += mapper[*psz++]; + bottom *= 100 ; + bottom += lookup[add_me]; + count_bottom -= 2; + } + + retval = top; + retval *= 1000000000000000000ULL; // 10E18 + + retval += middle; + retval *= 1000000000000000000ULL; + + retval += bottom; + + if( is_negative ) + { + retval = -retval; + } + + // Replace the original sign byte: + *signp = sign_byte; // cppcheck-suppress redundantAssignment + + return retval; + } + +extern "C" +__int128 +__gg__packed_to_binary(const unsigned char *psz, + int nplaces ) + { + // See the comments in __gg__numeric_display_to_binary() above. + + __int128 retval = 0; + + static const unsigned char dp2bin[160] = + { + // This may not be the weirdest table I've ever created, but it is + // certainly a contender. Given the packed decimal byte 0x23, it + // returns the equivalent decimal value of 23. Note that the final + // entries in each line are intended to handle the final place of + // signed values. 0x2D, for example, gets picked up as 20. + 00, 01, 02, 03, 04, 05, 06, 07, 8, 9, 0, 0, 0, 0, 0, 0, // 0x00 + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 10, 10, 10, 10, 10, 10, // 0x10 + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 20, 20, 20, 20, 20, 20, // 0x20 + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 30, 30, 30, 30, 30, 30, // 0x30 + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 40, 40, 40, 40, 40, 40, // 0x40 + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 50, 50, 50, 50, 50, 50, // 0x50 + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 60, 60, 60, 60, 60, 60, // 0x60 + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 70, 70, 70, 70, 70, 70, // 0x70 + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 80, 80, 80, 80, 80, 80, // 0x80 + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 90, 90, 90, 90, 90, 90, // 0x90 + }; + + uint64_t top = 0; + uint64_t middle = 0; + uint64_t bottom = 0; + + int count_bottom; + int count_middle; + int count_top; + + // Turn places into n digits + int n = nplaces * 2; + + // Digits 1 through 18 come from the bottom: + if( n <= 18 ) + { + count_bottom = n; + count_middle = 0; + count_top = 0; + } + else if( n<= 36 ) + { + count_bottom = 18; + count_middle = n - 18; + count_top = 0; + } + else + { + count_bottom = 18; + count_middle = 18; + count_top = n - 36; + } + + while( count_top ) + { + top *= 100 ; + top += dp2bin[*psz++]; + count_top -= 2; + } + + while( count_middle ) + { + middle *= 100 ; + middle += dp2bin[*psz++]; + count_middle -= 2; + } + + while( count_bottom ) + { + bottom *= 100 ; + bottom += dp2bin[*psz++]; + count_bottom -= 2; + } + + retval = top; + retval *= 1000000000000000000ULL; // 10E18 + + retval += middle; + retval *= 1000000000000000000ULL; + + retval += bottom; + + // retval is now the binary value of the packed decimal number. + + // back up one byte to fetch the sign nybble. + uint8_t sign_nybble = *(psz-1) & 0x0F; + + if( sign_nybble > 9 ) + { + // There is a sign nybble. We have to divide the result by ten to offset + // left shift due place taken up by the sign nybble. + retval /= 10; + + if( sign_nybble == PACKED_NYBBLE_MINUS ) + { + retval = -retval ; + } + } + + return retval; + } + + + + + diff --git a/libgcobol/stringbin.h b/libgcobol/stringbin.h new file mode 100644 index 0000000..de003e7 --- /dev/null +++ b/libgcobol/stringbin.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef STRINGBIN_H_ +#define STRINGBIN_H_ + +extern "C" +bool __gg__binary_to_string_ascii(char *result, + int digits, + __int128 value); +extern "C" +bool __gg__binary_to_string_internal( char *result, + int digits, + __int128 value); + +extern "C" +void __gg__binary_to_packed( unsigned char *result, + int digits, + __int128 value); + +extern "C" +__int128 __gg__numeric_display_to_binary( unsigned char *sign_byte, + const unsigned char *digits, + int ndigits ); + +extern "C" +__int128 +__gg__packed_to_binary(const unsigned char *psz, + int nplaces ); + +#endif |