/* * Copyright (c) 2021-2025 Symas Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * * Neither the name of the Symas Corporation nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include "config.h" #include "ec.h" #include "common-defs.h" #include "io.h" #include "gcobolio.h" #include "libgcobol.h" #include "common-defs.h" #include "gmath.h" #include "gcobolio.h" #include #include #include #ifdef __aarch64__ #define __float128 _Float128 #endif #define MAX_INTERMEDIATE_BITS 126 #define MAX_INTERMEDIATE_DECIMALS 16 static int conditional_stash( cblc_field_t *destination, size_t destination_o, size_t destination_s, bool on_error_flag, __int128 value, int rdigits, cbl_round_t rounded) { int retval = compute_error_none; if( !on_error_flag ) { // It's an uncomplicated assignment, because there was no // ON SIZE ERROR phrase __gg__int128_to_qualified_field(destination, destination_o, destination_s, value, rdigits, rounded, &retval); } else { // This is slightly more complex, because in the event of a // SIZE ERROR. we need to leave the original value untouched unsigned char *stash = (unsigned char *)malloc(destination_s); memcpy(stash, destination->data+destination_o, destination_s); __gg__int128_to_qualified_field(destination, destination_o, destination_s, value, rdigits, rounded, &retval); if( retval ) { // Because there was a size error, we will report that // upon return, and we need to put back the original value: memcpy(destination->data+destination_o, stash, destination_s); } free(stash); } return retval; } static int conditional_stash( cblc_field_t *destination, size_t destination_o, size_t destination_s, bool on_error_flag, _Float128 value, cbl_round_t rounded) { int retval = compute_error_none; if( !on_error_flag ) { // It's an uncomplicated assignment, because there was no // ON SIZE ERROR phrase __gg__float128_to_qualified_field(destination, destination_o, value, rounded, &retval); } else { // This is slightly more complex, because in the event of a // SIZE ERROR. we need to leave the original value untouched unsigned char *stash = (unsigned char *)malloc(destination_s); memcpy(stash, destination->data+destination_o, destination_s); __gg__float128_to_qualified_field(destination, destination_o, value, rounded, &retval); if( retval ) { // Because there was a size error, we will report that // upon return, and we need to put back the original value: memcpy(destination->data+destination_o, stash, destination_s); } free(stash); } return retval; } #if defined(__aarch64__) # define __float128 _Float128 /* double */ #endif static _Float128 divide_helper_float(_Float128 a_value, _Float128 b_value, int *compute_error) { if( b_value == 0 ) { // Can't divide by zero *compute_error |= compute_error_divide_by_zero; return a_value; } // Do the actual division, giving us 0.399999999999999999999999999999999971 a_value /= b_value; if( __builtin_isinf(a_value) ) { *compute_error |= compute_error_overflow; return 0; } if( __builtin_isnan(a_value) ) { *compute_error |= compute_error_underflow; return 0; } return a_value; } static _Float128 multiply_helper_float(_Float128 a_value, _Float128 b_value, int *compute_error) { a_value *= b_value; if( __builtin_isinf(a_value) ) { *compute_error |= compute_error_overflow; return 0; } if( __builtin_isnan(a_value) ) { *compute_error |= compute_error_underflow; return 0; } return a_value; } static _Float128 addition_helper_float(_Float128 a_value, _Float128 b_value, int *compute_error) { a_value += b_value; if( __builtin_isinf(a_value) ) { *compute_error |= compute_error_overflow; return 0; } if( __builtin_isnan(a_value) ) { *compute_error |= compute_error_underflow; return 0; } return a_value; } static _Float128 subtraction_helper_float(_Float128 a_value, _Float128 b_value, int *compute_error) { a_value -= b_value; if( __builtin_isinf(a_value) ) { *compute_error |= compute_error_overflow; return 0; } if( __builtin_isnan(a_value) ) { *compute_error |= compute_error_underflow; return 0; } return a_value; } extern "C" void __gg__pow( cbl_arith_format_t, size_t, size_t, size_t, cbl_round_t *rounded, int on_error_flag, int *compute_error ) { cblc_field_t **A = __gg__treeplet_1f; size_t *A_o = __gg__treeplet_1o; size_t *A_s = __gg__treeplet_1s; cblc_field_t **B = __gg__treeplet_2f; size_t *B_o = __gg__treeplet_2o; size_t *B_s = __gg__treeplet_2s; cblc_field_t **C = __gg__treeplet_3f; size_t *C_o = __gg__treeplet_3o; size_t *C_s = __gg__treeplet_3s; _Float128 avalue = __gg__float128_from_qualified_field(A[0], A_o[0], A_s[0]); _Float128 bvalue = __gg__float128_from_qualified_field(B[0], B_o[0], B_s[0]); _Float128 tgt_value; if( avalue == 0 && bvalue == 0 ) { *compute_error |= compute_error_exp_zero_by_zero; tgt_value = 1; } else if(avalue == 0 && bvalue < 0 ) { *compute_error |= compute_error_exp_zero_by_minus; tgt_value = 0; } else { // Calculate our answer, in floating point: errno = 0; feclearexcept(FE_ALL_EXCEPT); tgt_value = powf128(avalue, bvalue); if( errno || fetestexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW) ) { // One of a large number of errors took place. See math_error(7) and // pow(3). Let's just use this last error as a grab-bag; I didn't // care to go down the rabbit hole of figuring out if a floating point // number did or did not have a fractional part. That way lies // madness. *compute_error |= compute_error_exp_minus_by_frac; // This kind of error doesn't overwrite the target, so the returned // value is not relevant. Make it zero to avoid overheating the // converstion routine tgt_value = 0; } } if( !(*compute_error & compute_error_exp_minus_by_frac) ) { *compute_error |= conditional_stash(C[0], C_o[0], C_s[0], (on_error_flag & ON_SIZE_ERROR), tgt_value, *rounded); } } extern "C" void __gg__process_compute_error(int compute_error) { // This routine gets called after a series of parser_op operations is // complete (see parser_assign()) when the source code didn't specify // an ON SIZE ERROR clause. if( compute_error & compute_error_divide_by_zero) { exception_raise(ec_size_zero_divide_e); } else if( compute_error & compute_error_truncate ) { exception_raise(ec_size_truncation_e); } else if( compute_error & ( compute_error_exp_zero_by_zero | compute_error_exp_zero_by_minus | compute_error_exp_minus_by_frac ) ) { exception_raise(ec_size_exponentiation_e); } else if( compute_error & compute_error_overflow ) { exception_raise(ec_size_overflow_e); } else if( compute_error & compute_error_underflow ) { exception_raise(ec_size_underflow_e); } } typedef unsigned __int128 uint128; typedef struct int256 { union { unsigned char data[32]; uint64_t i64 [4]; uint128 i128[2]; }; }int256; static int multiply_int256_by_int64(int256 &product, const uint64_t multiplier) { // Typical use of this routine is multiplying a temporary value by // a factor of ten. This is effectively left-shifting by decimal // digits. See scale_int256_by_digits uint64_t overflows[5] = {}; for(int i=0; i<4; i++) { uint128 temp = (uint128)product.i64[i] * multiplier; product.i64[i] = *(uint64_t *)(&temp); overflows[i+1] = *(uint64_t *)((uint8_t *)(&temp) + 8); } for(int i=1; i<4; i++) { product.i64[i] += overflows[i]; if(product.i64[i] < overflows[i]) { overflows[i+1] += 1; } } // Indicate that an overflow took place. This is not useful unless the int256 // is known to be positive. return overflows[4]; } static int add_int256_to_int256(int256 &sum, const int256 addend) { uint128 overflows[3] = {}; for(int i=0; i<2; i++) { sum.i128[i] += addend.i128[i]; if( sum.i128[i] < addend.i128[i] ) { overflows[i+1] = 1; } } if( overflows[1] ) { sum.i128[1] += overflows[1]; if( sum.i128[1] == 0 ) { overflows[2] = 1; } } // Indicate that an overflow took place. This is not useful unless the two // values are known to be positive. return (int)overflows[2]; } static void negate_int256(int256 &val) { val.i128[0] = ~val.i128[0]; val.i128[1] = ~val.i128[1]; val.i128[0] += 1; if( !val.i128[0] ) { val.i128[1] += 1; } } static int subtract_int256_from_int256(int256 &difference, int256 subtrahend) { negate_int256(subtrahend); return add_int256_to_int256(difference, subtrahend); } static void scale_int256_by_digits(int256 &val, int digits) { uint64_t pot; while(digits > 17) { pot = (uint64_t)__gg__power_of_ten(17); multiply_int256_by_int64(val, pot); digits -= 17; } pot = (uint64_t)__gg__power_of_ten(digits); multiply_int256_by_int64(val, pot); } static void divide_int256_by_int64(int256 &val, uint64_t divisor) { // val needs to be a positive number uint128 temp = 0; for( int i=3; i>=0; i-- ) { // Left shift temp 64 bits: *(uint64_t *)(((uint8_t *)&temp)+8) = *(uint64_t *)(((uint8_t *)&temp)+0); // Put the high digit of val into the bottom of temp *(uint64_t *)(((uint8_t *)&temp)+0) = val.i64[i]; // Divide that combinary by divisor to get the new digits val.i64[i] = temp / divisor; // And the new temp is that combination modulo divisor temp = temp % divisor; } } static int squeeze_int256(int256 &val, int &rdigits) { int overflow = 0; // It has been decreed that at this juncture the result must fit into // MAX_FIXED_POINT_DIGITS. If the result does not, we have an OVERFLOW error. int is_negative = val.data[31] & 0x80; if( is_negative ) { negate_int256(val); } // As long as there are some decimal places left, we hold our nose and right- // shift a too-large value rightward by decimal digits. In other words, we // truncate the fractional part to make room for the integer part: while(rdigits > 0 && val.i128[1] ) { divide_int256_by_int64(val, 10UL); rdigits -= 1; } // At this point, to be useful, val has to have fewer than 128 bits: if( val.i128[1] ) { overflow = compute_error_overflow; } else { // We know that it has fewer than 128 bits. But the remaining 128 bits need // to be less than 10^MAX_FIXED_POINT_DIGITS. This gets a bit nasty here, // since at this writing the gcc compiler doesn't understand 128-bit // constants. So, we are forced into some annoying compiler gymnastics. #if MAX_FIXED_POINT_DIGITS != 37 #error MAX_FIXED_POINT_DIGITS needs to be 37 #endif // These sixteen bytes comprise the binary value of 10^38 static const uint8_t C1038[] = {0x00, 0x00, 0x00, 0x00, 0x40, 0x22, 0x8a, 0x09, 0x7a, 0xc4, 0x86, 0x5a, 0xa8, 0x4c, 0x3b, 0x4b}; static const uint128 biggest = *(uint128 *)C1038; // If we still have some rdigits to throw away, we can keep shrinking // the value: while(rdigits > 0 && val.i128[0] >= biggest ) { divide_int256_by_int64(val, 10UL); rdigits -= 1; } // And we have to make sure that rdigits isn't too big while(rdigits > MAX_FIXED_POINT_DIGITS) { divide_int256_by_int64(val, 10UL); rdigits -= 1; } if( val.i128[0] >= biggest ) { overflow = compute_error_overflow; } } if( is_negative ) { negate_int256(val); } return overflow; } static void get_int256_from_qualified_field(int256 &var, int &rdigits, cblc_field_t *field, size_t field_o, size_t field_s) { var.i128[0] = (uint128)__gg__binary_value_from_qualified_field( &rdigits, field, field_o, field_s); if( var.data[15] & 0x80 ) { // This value is negative, so extend the sign bit: memset(var.data + 16, 0xFF, 16); } else { // This value is positive memset(var.data + 16, 0x00, 16); } } static int256 phase1_result; static int phase1_rdigits; static _Float128 phase1_result_float; extern "C" void __gg__add_fixed_phase1( cbl_arith_format_t , size_t nA, size_t , size_t , cbl_round_t *, int , int *compute_error ) { // Our job is to add together the nA fixed-point values in the A[] array // The result goes into the temporary phase1_result. cblc_field_t **A = __gg__treeplet_1f; size_t *A_o = __gg__treeplet_1o; size_t *A_s = __gg__treeplet_1s; // Let us prime the pump with the first value of A[] get_int256_from_qualified_field(phase1_result, phase1_rdigits, A[0], A_o[0], A_s[0]); // We now go into a loop adding each of the A[] values to phase1_result: for( size_t i=1; i temp_rdigits ) { scale_int256_by_digits(temp, phase1_rdigits - temp_rdigits); temp_rdigits = phase1_rdigits; } else if( phase1_rdigits < temp_rdigits ) { scale_int256_by_digits(phase1_result, temp_rdigits - phase1_rdigits); phase1_rdigits = temp_rdigits; } // The two numbers have the same number of rdigits. It's now safe to add // them. add_int256_to_int256(phase1_result, temp); } // phase1_result/phase1_rdigits now reflect the sum of all A[] int overflow = squeeze_int256(phase1_result, phase1_rdigits); if( overflow ) { *compute_error |= compute_error_overflow; } } extern "C" void __gg__addf1_fixed_phase2( cbl_arith_format_t , size_t , size_t , size_t , cbl_round_t *rounded, int on_error_flag, int *compute_error ) { cblc_field_t **C = __gg__treeplet_3f; size_t *C_o = __gg__treeplet_3o; size_t *C_s = __gg__treeplet_3s; // This is the assignment phase of an ADD Format 1 // We take phase1_result and accumulate it into C bool on_size_error = !!(on_error_flag & ON_SIZE_ERROR); if( C[0]->type == FldFloat) { // The target we need to accumulate into is a floating-point number, so we // need to convert our fixed-point intermediate into floating point and // proceed accordingly. // Convert the intermediate _Float128 value_a = (_Float128)phase1_result.i128[0]; value_a /= __gg__power_of_ten(phase1_rdigits); // Pick up the target _Float128 value_b = __gg__float128_from_qualified_field(C[0], C_o[0], C_s[0]); value_a += value_b; // At this point, we assign running_sum to *C. *compute_error |= conditional_stash(C[0], C_o[0], C_s[0], on_size_error, value_a, *rounded++); } else { // We have a fixed-point intermediate, and we are accumulating intoi a // fixed point target. int256 value_a = phase1_result; int rdigits_a = phase1_rdigits; int256 value_b = {}; int rdigits_b; get_int256_from_qualified_field(value_b, rdigits_b, C[0], C_o[0], C_s[0]); // We have to scale the one with fewer rdigits to match the one with greater // rdigits: if( rdigits_a > rdigits_b ) { scale_int256_by_digits(value_b, rdigits_a - rdigits_b); rdigits_b = rdigits_a; } else if( rdigits_a < rdigits_b ) { scale_int256_by_digits(value_a, rdigits_b - rdigits_a); rdigits_a = rdigits_b; } // The two numbers have the same number of rdigits. It's now safe to add // them. add_int256_to_int256(value_a, value_b); int overflow = squeeze_int256(value_a, rdigits_a); if( overflow ) { *compute_error |= compute_error_overflow; } // At this point, we assign running_sum to *C. *compute_error |= conditional_stash(C[0], C_o[0], C_s[0], on_size_error, value_a.i128[0], rdigits_a, *rounded++); } } extern "C" void __gg__fixed_phase2_assign_to_c( cbl_arith_format_t , size_t , size_t , size_t , cbl_round_t *rounded, int on_error_flag, int *compute_error ) { // This is the assignment phase of an ADD Format 2 cblc_field_t **C = __gg__treeplet_3f; size_t *C_o = __gg__treeplet_3o; size_t *C_s = __gg__treeplet_3s; // We take phase1_result and put it into C bool on_size_error = !!(on_error_flag & ON_SIZE_ERROR); if( C[0]->type == FldFloat) { // The target we need to accumulate into is a floating-point number, so we // need to convert our fixed-point intermediate into floating point and // proceed accordingly. // Convert the intermediate _Float128 value_a = (_Float128)phase1_result.i128[0]; value_a /= __gg__power_of_ten(phase1_rdigits); *compute_error |= conditional_stash(C[0], C_o[0], C_s[0], on_size_error, value_a, *rounded++); } else { // We have a fixed-point intermediate, and we are accumulating intoi a // fixed point target. int256 value_a = phase1_result; int rdigits_a = phase1_rdigits; int overflow = squeeze_int256(value_a, rdigits_a); if( overflow ) { *compute_error |= compute_error_overflow; } // At this point, we assign that value to *C. *compute_error |= conditional_stash(C[0], C_o[0], C_s[0], on_size_error, value_a.i128[0], rdigits_a, *rounded++); } } extern "C" void __gg__add_float_phase1( cbl_arith_format_t , size_t nA, size_t , size_t , cbl_round_t *, int , int *compute_error ) { // Our job is to add together the nA floating-point values in the A[] array // The result goes into the temporary phase1_result_ffloat. cblc_field_t **A = __gg__treeplet_1f; size_t *A_o = __gg__treeplet_1o; size_t *A_s = __gg__treeplet_1s; // Let us prime the pump with the first value of A[] phase1_result_float = __gg__float128_from_qualified_field(A[0], A_o[0], A_s[0]); // We now go into a loop adding each of the A[] values to phase1_result_flt: for( size_t i=1; itype == FldFloat || C[i]->type == FldFloat ) { _Float128 value_a = __gg__float128_from_qualified_field(A[i], A_o[i], A_s[i]); _Float128 value_b = __gg__float128_from_qualified_field(C[i], C_o[i], C_s[i]); value_a = addition_helper_float(value_a, value_b, compute_error); // At this point, we assign the sum to *C. *compute_error |= conditional_stash(C[i], C_o[i], C_s[i], on_size_error, value_a, *rounded++); } else { // We have are doing fixed-point arithmetic. int256 value_a; int rdigits_a; int256 value_b; int rdigits_b; get_int256_from_qualified_field(value_a, rdigits_a, A[i], A_o[i], A_s[i]); get_int256_from_qualified_field(value_b, rdigits_b, C[i], C_o[i], C_s[i]); // We have to scale the one with fewer rdigits to match the one with greater // rdigits: if( rdigits_a > rdigits_b ) { scale_int256_by_digits(value_b, rdigits_a - rdigits_b); rdigits_b = rdigits_a; } else if( rdigits_a < rdigits_b ) { scale_int256_by_digits(value_a, rdigits_b - rdigits_a); rdigits_a = rdigits_b; } // The two numbers have the same number of rdigits. It's now safe to add // them. add_int256_to_int256(value_a, value_b); int overflow = squeeze_int256(value_a, rdigits_a); if( overflow ) { *compute_error |= compute_error_overflow; } // At this point, we assign the sum to *C. *compute_error |= conditional_stash(C[i], C_o[i], C_s[i], on_size_error, value_a.i128[0], rdigits_a, *rounded++); } } } extern "C" void __gg__subtractf1_fixed_phase2(cbl_arith_format_t , size_t , size_t , size_t , cbl_round_t *rounded, int on_error_flag, int *compute_error ) { cblc_field_t **C = __gg__treeplet_3f; size_t *C_o = __gg__treeplet_3o; size_t *C_s = __gg__treeplet_3s; // This is the assignment phase of an ADD Format 1 // We take phase1_result and subtrace it from C bool on_size_error = !!(on_error_flag & ON_SIZE_ERROR); if( C[0]->type == FldFloat) { // The target we need to accumulate into is a floating-point number, so we // need to convert our fixed-point intermediate into floating point and // proceed accordingly. // Convert the intermediate _Float128 value_a = (_Float128)phase1_result.i128[0]; value_a /= __gg__power_of_ten(phase1_rdigits); // Pick up the target _Float128 value_b = __gg__float128_from_qualified_field(C[0], C_o[0], C_s[0]); value_b -= value_a; // At this point, we assign the difference to *C. *compute_error |= conditional_stash(C[0], C_o[0], C_s[0], on_size_error, value_b, *rounded++); } else { // We have a fixed-point intermediate, and we are accumulating intoi a // fixed point target. int256 value_a = phase1_result; int rdigits_a = phase1_rdigits; int256 value_b = {}; int rdigits_b; get_int256_from_qualified_field(value_b, rdigits_b, C[0], C_o[0], C_s[0]); // We have to scale the one with fewer rdigits to match the one with greater // rdigits: if( rdigits_a > rdigits_b ) { scale_int256_by_digits(value_b, rdigits_a - rdigits_b); rdigits_b = rdigits_a; } else if( rdigits_a < rdigits_b ) { scale_int256_by_digits(value_a, rdigits_b - rdigits_a); rdigits_a = rdigits_b; } // The two numbers have the same number of rdigits. It's now safe to add // them. subtract_int256_from_int256(value_b, value_a); int overflow = squeeze_int256(value_b, rdigits_b); if( overflow ) { *compute_error |= compute_error_overflow; } // At this point, we assign running_sum to *C. *compute_error |= conditional_stash(C[0], C_o[0], C_s[0], on_size_error, value_b.i128[0], rdigits_b, *rounded++); } } extern "C" void __gg__subtractf2_fixed_phase1(cbl_arith_format_t , size_t nA, size_t , size_t , cbl_round_t *rounded, int on_error_flag, int *compute_error ) { // This is the calculation phase of a fixed-point SUBTRACT Format 2 cblc_field_t **B = __gg__treeplet_2f; size_t *B_o = __gg__treeplet_2o; size_t *B_s = __gg__treeplet_2s; // Add up all the A values __gg__add_fixed_phase1( not_expected_e , nA, 0, 0, rounded, on_error_flag, compute_error); // Subtract that from the B value: int256 value_a = phase1_result; int rdigits_a = phase1_rdigits; int256 value_b = {}; int rdigits_b; get_int256_from_qualified_field(value_b, rdigits_b, B[0], B_o[0], B_s[0]); // We have to scale the one with fewer rdigits to match the one with greater // rdigits: if( rdigits_a > rdigits_b ) { scale_int256_by_digits(value_b, rdigits_a - rdigits_b); rdigits_b = rdigits_a; } else if( rdigits_a < rdigits_b ) { scale_int256_by_digits(value_a, rdigits_b - rdigits_a); rdigits_a = rdigits_b; } // The two numbers have the same number of rdigits. It's now safe to add // them. subtract_int256_from_int256(value_b, value_a); int overflow = squeeze_int256(value_b, rdigits_b); if( overflow ) { *compute_error |= compute_error_overflow; } phase1_result = value_b; phase1_rdigits = rdigits_b; } extern "C" void __gg__subtractf1_float_phase2(cbl_arith_format_t , size_t , size_t , size_t , cbl_round_t *rounded, int on_error_flag, int *compute_error ) { cblc_field_t **C = __gg__treeplet_3f; size_t *C_o = __gg__treeplet_3o; size_t *C_s = __gg__treeplet_3s; bool on_size_error = !!(on_error_flag & ON_SIZE_ERROR); // This is the assignment phase of an ADD Format 2 // We take phase1_result and subtract it from C _Float128 temp = __gg__float128_from_qualified_field(C[0], C_o[0], C_s[0]); temp = subtraction_helper_float(temp, phase1_result_float, compute_error); *compute_error |= conditional_stash(C[0], C_o[0], C_s[0], on_size_error, temp, *rounded++); } extern "C" void __gg__subtractf2_float_phase1(cbl_arith_format_t , size_t nA, size_t , size_t , cbl_round_t *rounded, int on_error_flag, int *compute_error ) { // This is the calculation phase of a fixed-point SUBTRACT Format 2 cblc_field_t **B = __gg__treeplet_2f; size_t *B_o = __gg__treeplet_2o; size_t *B_s = __gg__treeplet_2s; // Add up all the A values __gg__add_float_phase1( not_expected_e , nA, 0, 0, rounded, on_error_flag, compute_error ); // Subtract that from the B value: _Float128 value_b = __gg__float128_from_qualified_field(B[0], B_o[0], B_s[0]); // The two numbers have the same number of rdigits. It's now safe to add // them. phase1_result_float = subtraction_helper_float(value_b, phase1_result_float, compute_error); } extern "C" void __gg__subtractf3( cbl_arith_format_t , size_t nA, size_t , size_t , cbl_round_t *rounded, int on_error_flag, int *compute_error ) { // This is an ADD Format 3. Each A[i] gets accumulated into each C[i]. Each // SUBTRACTION is treated separately. cblc_field_t **A = __gg__treeplet_1f; size_t *A_o = __gg__treeplet_1o; size_t *A_s = __gg__treeplet_1s; cblc_field_t **C = __gg__treeplet_3f; size_t *C_o = __gg__treeplet_3o; size_t *C_s = __gg__treeplet_3s; bool on_size_error = !!(on_error_flag & ON_SIZE_ERROR); for(size_t i=0; itype == FldFloat || C[i]->type == FldFloat) { _Float128 value_a = __gg__float128_from_qualified_field(A[i], A_o[i], A_s[i]); _Float128 value_b = __gg__float128_from_qualified_field(C[i], C_o[i], C_s[i]); value_b = subtraction_helper_float(value_b, value_a, compute_error); // At this point, we assign the sum to *C. *compute_error |= conditional_stash(C[i], C_o[i], C_s[i], on_size_error, value_b, *rounded++); } else { // We are doing fixed-point subtraction. int256 value_a; int rdigits_a; int256 value_b; int rdigits_b; get_int256_from_qualified_field(value_a, rdigits_a, A[i], A_o[i], A_s[i]); get_int256_from_qualified_field(value_b, rdigits_b, C[i], C_o[i], C_s[i]); // We have to scale the one with fewer rdigits to match the one with greater // rdigits: if( rdigits_a > rdigits_b ) { scale_int256_by_digits(value_b, rdigits_a - rdigits_b); rdigits_b = rdigits_a; } else if( rdigits_a < rdigits_b ) { scale_int256_by_digits(value_a, rdigits_b - rdigits_a); rdigits_a = rdigits_b; } // The two numbers have the same number of rdigits. It's now safe to add // them. subtract_int256_from_int256(value_b, value_a); int overflow = squeeze_int256(value_b, rdigits_b); if( overflow ) { *compute_error |= compute_error_overflow; } // At this point, we assign the sum to *C. *compute_error |= conditional_stash(C[i], C_o[i], C_s[i], on_size_error, value_b.i128[0], rdigits_b, *rounded++); } } } static bool multiply_intermediate_is_float; static _Float128 multiply_intermediate_float; static __int128 multiply_intermediate_int128; static int multiply_intermediate_rdigits; extern "C" void __gg__multiplyf1_phase1(cbl_arith_format_t , size_t , size_t , size_t , cbl_round_t *, int , int *) { // We are getting just the one value, which we are converting to the necessary // intermediate form cblc_field_t **A = __gg__treeplet_1f; size_t *A_o = __gg__treeplet_1o; size_t *A_s = __gg__treeplet_1s; if( A[0]->type == FldFloat ) { multiply_intermediate_is_float = true; multiply_intermediate_float = __gg__float128_from_qualified_field(A[0], A_o[0], A_s[0]); } else { multiply_intermediate_is_float = false; multiply_intermediate_int128 = __gg__binary_value_from_qualified_field(&multiply_intermediate_rdigits, A[0], A_o[0], A_s[0]); } } static void multiply_int128_by_int128(int256 &ABCD, __int128 ab_value, __int128 cd_value) { int is_negative = ( ((uint8_t *)(&ab_value))[15]^((uint8_t *)(&cd_value))[15]) & 0x80; if( ab_value < 0 ) { ab_value = -ab_value; } if( cd_value < 0 ) { cd_value = -cd_value; } uint128 AC00; uint128 AD0; uint128 BC0; uint128 BD; // Let's extract the digits. uint64_t a = *(uint64_t *)((unsigned char *)(&ab_value)+8); uint64_t b = *(uint64_t *)((unsigned char *)(&ab_value)+0); uint64_t c = *(uint64_t *)((unsigned char *)(&cd_value)+8); uint64_t d = *(uint64_t *)((unsigned char *)(&cd_value)+0); // multiply (a0 + b) * (c0 + d) AC00 = (uint128)a * c; AD0 = (uint128)a * d; BC0 = (uint128)b * c; BD = (uint128)b * d; // ABCD is the sum of those four pieces int256 temp; ABCD.i128[1] = 0; ABCD.i128[0] = BD; temp.i64[3] = 0; memcpy(&temp.i64[1], &BC0, 16); temp.i64[0] = 0; add_int256_to_int256(ABCD, temp); memcpy(&temp.i64[1], &AD0, 16); add_int256_to_int256(ABCD, temp); temp.i64[1] = 0; memcpy(&temp.i64[2], &AC00, 16); add_int256_to_int256(ABCD, temp); // ABCD is now a 256-bit integer with rdigits decimal places if(is_negative) { negate_int256(ABCD); } } extern "C" void __gg__multiplyf1_phase2(cbl_arith_format_t , size_t , size_t , size_t , cbl_round_t *rounded, int on_error_flag, int *compute_error ) { cblc_field_t **C = __gg__treeplet_3f; size_t *C_o = __gg__treeplet_3o; size_t *C_s = __gg__treeplet_3s; bool on_size_error = !!(on_error_flag & ON_SIZE_ERROR); int error_this_time=0; _Float128 a_value; _Float128 b_value; if( multiply_intermediate_is_float ) { a_value = multiply_intermediate_float; if( C[0]->type == FldFloat ) { b_value = __gg__float128_from_qualified_field(C[0], C_o[0], C_s[0]); goto float_float; } else { // float times fixed b_value = __gg__float128_from_qualified_field(C[0], C_o[0], C_s[0]); goto float_float; } } else { if( C[0]->type == FldFloat ) { // gixed * float a_value = (_Float128) multiply_intermediate_int128; if( multiply_intermediate_rdigits ) { a_value /= (_Float128)__gg__power_of_ten(multiply_intermediate_rdigits); } b_value = __gg__float128_from_qualified_field(C[0], C_o[0], C_s[0]); goto float_float; } else { // fixed times fixed // We have two 128-bit numbers. Call them AB and CD, where A, B, C, D are // 64-bit "digits". We need to multiply them to create a 256-bit result int cd_rdigits; __int128 ab_value = multiply_intermediate_int128; __int128 cd_value = __gg__binary_value_from_qualified_field(&cd_rdigits, C[0], C_o[0], C_s[0]); int256 ABCD; int rdigits = multiply_intermediate_rdigits + cd_rdigits; multiply_int128_by_int128(ABCD, ab_value, cd_value); int overflow = squeeze_int256(ABCD, rdigits); if( overflow ) { *compute_error |= compute_error_overflow; } // At this point, we assign running_sum to *C. *compute_error |= conditional_stash(C[0], C_o[0], C_s[0], on_size_error, ABCD.i128[0], rdigits, *rounded++); goto done; } } float_float: a_value = multiply_helper_float(a_value, b_value, &error_this_time); if( error_this_time && on_size_error) { *compute_error |= error_this_time; rounded++; } else { *compute_error |= conditional_stash(C[0], C_o[0], C_s[0], on_size_error, a_value, *rounded++); } done: return; } extern "C" void __gg__multiplyf2( cbl_arith_format_t , size_t , size_t , size_t nC, cbl_round_t *rounded, int on_error_flag, int *compute_error ) { cblc_field_t **A = __gg__treeplet_1f; size_t *A_o = __gg__treeplet_1o; size_t *A_s = __gg__treeplet_1s; cblc_field_t **B = __gg__treeplet_2f; size_t *B_o = __gg__treeplet_2o; size_t *B_s = __gg__treeplet_2s; cblc_field_t **C = __gg__treeplet_3f; size_t *C_o = __gg__treeplet_3o; size_t *C_s = __gg__treeplet_3s; bool on_size_error = !!(on_error_flag & ON_SIZE_ERROR); bool got_float = false; _Float128 product_float; int256 product_fix; int product_fix_digits; if( A[0]->type == FldFloat || B[0]->type == FldFloat ) { _Float128 a_value = __gg__float128_from_qualified_field(A[0], A_o[0], A_s[0]); _Float128 b_value = __gg__float128_from_qualified_field(B[0], B_o[0], B_s[0]); product_float = multiply_helper_float(a_value, b_value, compute_error); got_float = true; } else { int a_rdigits; int b_rdigits; __int128 a_value = __gg__binary_value_from_qualified_field(&a_rdigits, A[0], A_o[0], A_s[0]); __int128 b_value = __gg__binary_value_from_qualified_field(&b_rdigits, B[0], B_o[0], B_s[0]); product_fix_digits = a_rdigits + b_rdigits; multiply_int128_by_int128(product_fix, a_value, b_value); int overflow = squeeze_int256(product_fix, product_fix_digits); if( overflow ) { *compute_error |= compute_error_overflow; } } for(size_t i=0; i> (128 - bits); as128[i] <<= bits; as128[i] += overflow; overflow = temp; } } #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-function" static char * int256_as_decimal(int256 val) { char ach[120]; memset(ach, 0, sizeof(ach)); strcpy(ach, "0"); int index = 0; while(val.i128[0] || val.i128[1]) { int256 before; int256 after; before = val; after = val; divide_int256_by_int64(after, 10); multiply_int256_by_int64(after, 10); uint64_t digit = before.i64[0] - after.i64[0]; ach[index++] = digit + '0'; divide_int256_by_int64(val, 10); } if( !index ) { index = 1; } index -= 1; int r = 0; static char retval[120]; while(index >= 0) { retval[r++] = ach[index--]; } retval[r++] = '\0'; return retval; } #pragma GCC diagnostic pop static void divide_int128_by_int128(int256 "ient, int "ient_rdigits, __int128 dividend, int dividend_rdigits, __int128 divisor, int divisor_rdigits, int *compute_error) { if( divisor == 0 ) { *compute_error |= compute_error_divide_by_zero; memset("ient, 0, sizeof(quotient)); memcpy("ient, ÷nd, 8); quotient_rdigits = dividend_rdigits; return; } bool is_negative = false; if(dividend < 0) { is_negative = !is_negative; dividend = -dividend; } if(divisor < 0) { is_negative = !is_negative; divisor = -divisor; } // We are going to be referencing the 64-bit pices of the 128-bit divisor: uint64_t *divisor64 = (uint64_t *)&divisor; quotient.i128[1] = 0; quotient.i128[0] = dividend; // In order to get 0.3333333.... from 1 / 3, we are going to scale up the // numerator so that it has 37 rdigits: int scale = MAX_FIXED_POINT_DIGITS; scale_int256_by_digits(quotient, scale); quotient_rdigits = scale + dividend_rdigits - divisor_rdigits; // Now, let's see if we can do a simple divide-by-single-place calculation: if( divisor64[1] == 0 ) { // Yes! The divisor fits into 64 bits: divide_int256_by_int64(quotient, (uint64_t)divisor); } else { // We have to do long division, and that means Knuth's Algorithm D. Let's // review where we are. // // We are using 64-bit places to divide one 128-bit number by another. We // know that both are positive. So, we are dividing // // AB by CD, where we know C is non-zero. // // Because we want fractional digits to the right, we multiplied by 10**37, // which is smaller than 2**64, so we have one additional place: // // ABx by CD // // Algorithm D requires that we left-shift ABX and CD by enough bits to make // turn on high-order by of CD. This will be a value between 1 and 63 // shifts, resulting in // // ABxy by CD. // // We can know that the quotient will have at most two places. We can see // this in the decimal analogy. The worst case scenario is dividing // // 99 by 10 // // We multiply the top by 10 to give us one fractional decimal place in the // result: // // 990 by 10 // // To satisfy Algorithm D's requirement that C be >= b/2, we multiply both // dividend and divisor by 5: // // 4950 by 50 // // And then we do the work that gives us the two-place answer of 99. // // // Here is our four-place 256-bit "numerator" int256 numerator; // Copy the three-place ABx value into the numerator area memcpy(&numerator, "ient, sizeof(quotient)); // Calculate how many bits we need to shift CD to make the high-order bit // turn on: int bits_to_shift = 0; int i=15; while( ((uint8_t *)(&divisor))[i] == 0 ) { i -= 1; bits_to_shift += 8; } uint8_t tail = ((uint8_t *)(&divisor))[i]; while( !(tail & 0x80) ) { bits_to_shift += 1; tail <<= 1; } // Shift both the numerator and the divisor that number of bits shift_in_place128((uint8_t *)&numerator, sizeof(numerator), bits_to_shift); shift_in_place128((uint8_t *)&divisor, sizeof(divisor), bits_to_shift); // We are now ready to do the guess-multiply-subtract loop. We know that // the result will have two places, so we know we are going to go through // that loop two times. We will build the quotient from the high-order // place down: // Let's prime the pump by loading remnant with the A value of ABxyz int q_place = 1; memset("ient, 0, sizeof(quotient)); while( q_place >= 0 ) { // We develop our guess for a quotient by dividing the top two places of // the numerator area by C uint128 temp; uint64_t *temp64 = (uint64_t *)&temp; temp64[1] = numerator.i64[q_place+2]; temp64[0] = numerator.i64[q_place+1]; quotient.i64[q_place] = temp / divisor64[1]; // Now we multiply our 64-bit guess by the 128-bit CD to get the // three-place value we are going to subtract from the numerator area. uint64_t subber[3]; subber[2] = 0; // Start with the bottom 128 bits of the "subber" *(uint128 *)subber = (uint128) divisor64[0] * quotient.i64[q_place]; // Get the next 128 bits of subber temp = (uint128) divisor64[1] * quotient.i64[q_place]; // Add the top of the first product to the bottom of the second: subber[1] += temp64[0]; // See if there was an overflow: if( subber[1] < temp64[0] ) { // There was an overflow subber[2] = 1; } // And now put the top of the second product into place: subber[2] += temp64[1]; // "subber" is now the three-place product of the two-place divisor times // the one-place guess // We now subtract the three-place subber from the appropriate place of // the numerator: uint64_t borrow = 0; for(size_t i=0; i<3; i++) { if( numerator.i64[q_place + i] == 0 && borrow ) { // We are subtracting from zero and we have a borrow. Leave the // borrow on and just do the subtraction: numerator.i64[q_place + i] -= subber[i]; } else { uint64_t stash = numerator.i64[q_place + i]; numerator.i64[q_place + i] -= borrow; numerator.i64[q_place + i] -= subber[i]; if( numerator.i64[q_place + i] > stash ) { // After subtracting, the value got bigger, which means we have // to borrow from the next value to the left borrow = 1; } else { borrow = 0; } } } // The three-place subber has been removed from the numerator. // Now Algorithm D comes back into play. Knuth has proved that the guess // is usually correct, but sometimes can be one too large, which means // that the numerator area goes negative. On rare occasions, the guess can // be two too large. So, we have to make sure that the numerator area is // actually positive by adding subber back in. while( numerator.i64[q_place+2] & 0x8000000000000000UL ) { // We need to add subber back into the numerator area uint64_t carry = 0; for(size_t i=0; i<3; i++) { if( numerator.i64[q_place + i] == 0xFFFFFFFFFFFFFFFFUL && carry ) { // We are at the top and have a carry. Just leave the carry on // and do the addition: numerator.i64[q_place + i] += subber[i]; } else { // We are not at the top. uint64_t stash = numerator.i64[q_place + i]; numerator.i64[q_place + i] += carry; numerator.i64[q_place + i] += subber[i]; if( numerator.i64[q_place + i] < stash ) { // The addition caused the result to get smaller, meaning that // we wrapped around: carry = 1; } else { carry = 0; } } } } q_place -= 1; } } if( is_negative ) { negate_int256(quotient); } } extern "C" void __gg__dividef1_phase2(cbl_arith_format_t , size_t , size_t , size_t , cbl_round_t *rounded, int on_error_flag, int *compute_error ) { cblc_field_t **C = __gg__treeplet_3f; size_t *C_o = __gg__treeplet_3o; size_t *C_s = __gg__treeplet_3s; bool on_size_error = !!(on_error_flag & ON_SIZE_ERROR); int error_this_time=0; _Float128 a_value; _Float128 b_value; if( multiply_intermediate_is_float ) { a_value = multiply_intermediate_float; if( C[0]->type == FldFloat ) { b_value = __gg__float128_from_qualified_field(C[0], C_o[0], C_s[0]); goto float_float; } else { // float times fixed b_value = __gg__float128_from_qualified_field(C[0], C_o[0], C_s[0]); goto float_float; } } else { if( C[0]->type == FldFloat ) { // gixed * float a_value = (_Float128) multiply_intermediate_int128; if( multiply_intermediate_rdigits ) { a_value /= (_Float128)__gg__power_of_ten(multiply_intermediate_rdigits); } b_value = __gg__float128_from_qualified_field(C[0], C_o[0], C_s[0]); goto float_float; } else { // fixed times fixed // We have two 128-bit numbers. Call them AB and CD, where A, B, C, D are // 64-bit "digits". We need to multiply them to create a 256-bit result int dividend_rdigits; __int128 dividend = __gg__binary_value_from_qualified_field(÷nd_rdigits, C[0], C_o[0], C_s[0]); int quotient_rdigits; int256 quotient; divide_int128_by_int128(quotient, quotient_rdigits, dividend, dividend_rdigits, multiply_intermediate_int128, multiply_intermediate_rdigits, compute_error); int overflow = squeeze_int256(quotient, quotient_rdigits); if( overflow ) { *compute_error |= compute_error_overflow; } // At this point, we assign the quotient to *C. *compute_error |= conditional_stash(C[0], C_o[0], C_s[0], on_size_error, quotient.i128[0], quotient_rdigits, *rounded++); goto done; } } float_float: b_value = divide_helper_float(b_value, a_value, &error_this_time); *compute_error |= error_this_time; if( error_this_time && on_size_error) { rounded++; } else { *compute_error |= conditional_stash(C[0], C_o[0], C_s[0], on_size_error, b_value, *rounded++); } done: return; } extern "C" void __gg__dividef23(cbl_arith_format_t , size_t , size_t , size_t nC, cbl_round_t *rounded, int on_error_flag, int *compute_error ) { cblc_field_t **A = __gg__treeplet_1f; size_t *A_o = __gg__treeplet_1o; size_t *A_s = __gg__treeplet_1s; cblc_field_t **B = __gg__treeplet_2f; size_t *B_o = __gg__treeplet_2o; size_t *B_s = __gg__treeplet_2s; cblc_field_t **C = __gg__treeplet_3f; size_t *C_o = __gg__treeplet_3o; size_t *C_s = __gg__treeplet_3s; bool on_size_error = !!(on_error_flag & ON_SIZE_ERROR); int error_this_time=0; if( A[0]->type == FldFloat || B[0]->type == FldFloat ) { _Float128 a_value; _Float128 b_value; _Float128 c_value; a_value = __gg__float128_from_qualified_field(A[0], A_o[0], A_s[0]); b_value = __gg__float128_from_qualified_field(B[0], B_o[0], B_s[0]); c_value = divide_helper_float(a_value, b_value, &error_this_time); *compute_error |= error_this_time; if( !error_this_time ) { for(size_t i=0; itype == FldFloat || B[0]->type == FldFloat ) { _Float128 a_value; _Float128 b_value; _Float128 c_value; a_value = __gg__float128_from_qualified_field(A[0], A_o[0], A_s[0]); b_value = __gg__float128_from_qualified_field(B[0], B_o[0], B_s[0]); c_value = divide_helper_float(a_value, b_value, &error_this_time); *compute_error |= error_this_time; if( !error_this_time ) { *compute_error |= conditional_stash(C[1], C_o[1], C_s[1], on_size_error, c_value, *rounded_p++); // This is floating point, and there is a remainder, and we don't know // what that means. Set the remainder to zero if( !*compute_error ) { c_value = 0; *compute_error |= conditional_stash(C[0], C_o[0], C_s[0], on_size_error, c_value, *rounded_p++); } } } else { // fixed divided by fixed int dividend_rdigits; __int128 dividend = __gg__binary_value_from_qualified_field(÷nd_rdigits, A[0], A_o[0], A_s[0]); int divisor_rdigits; __int128 divisor = __gg__binary_value_from_qualified_field(&divisor_rdigits, B[0], B_o[0], B_s[0]); int quotient_rdigits; int256 quotient; divide_int128_by_int128(quotient, quotient_rdigits, dividend, dividend_rdigits, divisor, divisor_rdigits, compute_error); *compute_error |= squeeze_int256(quotient, quotient_rdigits); if( !*compute_error ) { // We are going to need the unrounded quotient to calculate the remainder __int128 unrounded_quotient = 0; int unrounded_quotient_digits; rounded_p += 1;// Skip the rounded value for the remainder cbl_round_t rounded = *rounded_p; switch(rounded) { case truncation_e: { *compute_error |= conditional_stash(C[1], C_o[1], C_s[1], on_size_error, quotient.i128[0], quotient_rdigits, *rounded_p++); unrounded_quotient = __gg__binary_value_from_qualified_field( &unrounded_quotient_digits, C[1], C_o[1], C_s[1]); break; } default: { conditional_stash(C[1], C_o[1], C_s[1], false, quotient.i128[0], quotient_rdigits, truncation_e); unrounded_quotient = __gg__binary_value_from_qualified_field( &unrounded_quotient_digits, C[1], C_o[1], C_s[1]); // At this point, we assign the rounded quotient to *C. *compute_error |= conditional_stash(C[1], C_o[1], C_s[1], on_size_error, quotient.i128[0], quotient_rdigits, *rounded_p++); break; } } if( !*compute_error ) { // We need to calculate the remainder // Remainders in COBOL are seriously weird. The NIST suite // has an example where 174 is divided by 16. The quotient // is a 999.9, and the remainder is a 9999 // So, here goes: 174 by 16 is 10.875. The unrounded // assignment to Q is thus 10.8 // You then multiply 10.8 by 16, giving 172.8 // That gets subtracted from 174, giving 1.2 // That gets assigned to the 9999 remainder, which is // thus 1 // Any mathematician would walk away, slowly, shaking their head. // We need to multiply the unrounded quotient by the divisor. int256 temp; int temp_rdigits; // Step 1: Multiply the unrounded quotient by the divisor multiply_int128_by_int128(temp, unrounded_quotient, divisor); temp_rdigits = unrounded_quotient_digits + divisor_rdigits; int256 odividend = {}; memcpy(&odividend, ÷nd, sizeof(dividend)); // We need to line up the rdigits so that we can subtract temp from // odividend: if( temp_rdigits < dividend_rdigits ) { scale_int256_by_digits(temp, dividend_rdigits-temp_rdigits); temp_rdigits = dividend_rdigits; } else if(temp_rdigits > dividend_rdigits) { scale_int256_by_digits(odividend, temp_rdigits-dividend_rdigits); } subtract_int256_from_int256(odividend, temp); *compute_error |= squeeze_int256(odividend, temp_rdigits); if( !*compute_error ) { *compute_error |= conditional_stash(C[0], C_o[0], C_s[0], on_size_error, odividend.i128[0], temp_rdigits, truncation_e); } } } } }