diff options
author | Georg-Johann Lay <avr@gjlay.de> | 2020-01-08 09:31:07 +0000 |
---|---|---|
committer | Georg-Johann Lay <gjl@gcc.gnu.org> | 2020-01-08 09:31:07 +0000 |
commit | f30dd607669212de135dec1f1d8a93b8954c327c (patch) | |
tree | 497248e433c158b74b956765c29800219a4f94e2 /libgcc/config/avr | |
parent | d5bc18085c8b0344e7b53febc3cd3cc681a98ea3 (diff) | |
download | gcc-f30dd607669212de135dec1f1d8a93b8954c327c.zip gcc-f30dd607669212de135dec1f1d8a93b8954c327c.tar.gz gcc-f30dd607669212de135dec1f1d8a93b8954c327c.tar.bz2 |
Implement 64-bit double functions.
gcc/
PR target/92055
* config.gcc (tm_defines) [target=avr]: Support --with-libf7,
--with-double-comparison.
* doc/install.texi: Document them.
* config/avr/avr-c.c (avr_cpu_cpp_builtins)
<WITH_LIBF7_LIBGCC, WITH_LIBF7_MATH, WITH_LIBF7_MATH_SYMBOLS>
<WITH_DOUBLE_COMPARISON>: New built-in defines.
* doc/invoke.texi (AVR Built-in Macros): Document them.
* config/avr/avr-protos.h (avr_float_lib_compare_returns_bool): New.
* config/avr/avr.c (avr_float_lib_compare_returns_bool): New function.
* config/avr/avr.h (FLOAT_LIB_COMPARE_RETURNS_BOOL): New macro.
libgcc/
PR target/92055
* config.host (tmake_file) [target=avr]: Add t-libf7,
t-libf7-math, t-libf7-math-symbols as specified by --with-libf7=.
* config/avr/t-avrlibc: Don't copy libgcc.a if there are modules
depending on sizeof (double) or sizeof (long double).
* config/avr/libf7: New folder.
libgcc/config/avr/libf7/
PR target/92055
* t-libf7: New file.
* t-libf7-math: New file.
* t-libf7-math-symbols: New file.
* libf7-common.mk: New file.
* libf7-asm-object.mk: New file.
* libf7-c-object.mk: New file.
* asm-defs.h: New file.
* libf7.h: New file.
* libf7.c: New file.
* libf7-asm.sx: New file.
* libf7-array.def: New file.
* libf7-const.def: New file.
* libf7-constdef.h: New file.
* f7renames.sh: New script.
* f7wraps.sh: New script.
* f7-renames.h: New generated file.
* f7-wraps.h: New generated file.
From-SVN: r279994
Diffstat (limited to 'libgcc/config/avr')
-rw-r--r-- | libgcc/config/avr/libf7/ChangeLog | 22 | ||||
-rw-r--r-- | libgcc/config/avr/libf7/asm-defs.h | 237 | ||||
-rw-r--r-- | libgcc/config/avr/libf7/f7-renames.h | 234 | ||||
-rw-r--r-- | libgcc/config/avr/libf7/f7-wraps.h | 638 | ||||
-rwxr-xr-x | libgcc/config/avr/libf7/f7renames.sh | 80 | ||||
-rwxr-xr-x | libgcc/config/avr/libf7/f7wraps.sh | 244 | ||||
-rw-r--r-- | libgcc/config/avr/libf7/libf7-array.def | 45 | ||||
-rw-r--r-- | libgcc/config/avr/libf7/libf7-asm-object.mk | 20 | ||||
-rw-r--r-- | libgcc/config/avr/libf7/libf7-asm.sx | 1664 | ||||
-rw-r--r-- | libgcc/config/avr/libf7/libf7-c-object.mk | 20 | ||||
-rw-r--r-- | libgcc/config/avr/libf7/libf7-common.mk | 102 | ||||
-rw-r--r-- | libgcc/config/avr/libf7/libf7-const.def | 201 | ||||
-rw-r--r-- | libgcc/config/avr/libf7/libf7-constdef.h | 43 | ||||
-rw-r--r-- | libgcc/config/avr/libf7/libf7.c | 2501 | ||||
-rw-r--r-- | libgcc/config/avr/libf7/libf7.h | 687 | ||||
-rw-r--r-- | libgcc/config/avr/libf7/t-libf7 | 159 | ||||
-rw-r--r-- | libgcc/config/avr/libf7/t-libf7-math | 21 | ||||
-rw-r--r-- | libgcc/config/avr/libf7/t-libf7-math-symbols | 11 | ||||
-rw-r--r-- | libgcc/config/avr/t-avrlibc | 7 |
19 files changed, 6936 insertions, 0 deletions
diff --git a/libgcc/config/avr/libf7/ChangeLog b/libgcc/config/avr/libf7/ChangeLog new file mode 100644 index 0000000..1ec4735 --- /dev/null +++ b/libgcc/config/avr/libf7/ChangeLog @@ -0,0 +1,22 @@ +2020-01-08 Georg-Johann Lay <avr@gjlay.de> + + Implement 64-bit double functions. + + PR target/92055 + * t-libf7: New file. + * t-libf7-math: New file. + * t-libf7-math-symbols: New file. + * libf7-common.mk: New file. + * libf7-asm-object.mk: New file. + * libf7-c-object.mk: New file. + * asm-defs.h: New file. + * libf7.h: New file. + * libf7.c: New file. + * libf7-asm.sx: New file. + * libf7-array.def: New file. + * libf7-const.def: New file. + * libf7-constdef.h: New file. + * f7renames.sh: New script. + * f7wraps.sh: New script. + * f7-renames.h: New generated file. + * f7-wraps.h: New generated file. diff --git a/libgcc/config/avr/libf7/asm-defs.h b/libgcc/config/avr/libf7/asm-defs.h new file mode 100644 index 0000000..aea245a --- /dev/null +++ b/libgcc/config/avr/libf7/asm-defs.h @@ -0,0 +1,237 @@ +/* Copyright (C) 2019-2020 Free Software Foundation, Inc. + + This file is part of LIBF7, which is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef ASM_DEFS_H +#define ASM_DEFS_H + +#ifdef __AVR__ +#ifdef __ASSEMBLER__ +/*****************************************************************/ +/* Stuff for Assembler-only */ +/*****************************************************************/ + +#if defined (__AVR_TINY__) + #define __tmp_reg__ 16 + #define __zero_reg__ 17 +#else + #define __tmp_reg__ 0 + #define __zero_reg__ 1 +#endif /* AVR_TINY */ + +#define __SREG__ 0x3f +#define __SP_L__ 0x3d +#if defined (__AVR_HAVE_SPH__) +#define __SP_H__ 0x3e +#endif + +#if !defined ASM_DEFS_HAVE_DEFUN +.macro DEFUN name + .global \name + .func \name + \name: +.endm + +.macro ENDF name + .size \name, .-\name + .endfunc +.endm + +.macro LABEL name + .global \name + \name: +.endm +#endif /* HAVE_DEFUN */ + + +#if defined (__AVR_HAVE_JMP_CALL__) + #define XCALL call + #define XJMP jmp +#else + #define XCALL rcall + #define XJMP rjmp +#endif + +#if defined (__AVR_HAVE_EIJMP_EICALL__) + #define XICALL eicall + #define XIJMP eijmp + #define PC_SIZE 3 +#else + #define XICALL icall + #define XIJMP ijmp + #define PC_SIZE 2 +#endif + +.macro skipnext + cpse r16, r16 +.endm + +/* + Factor out support of MOVW. Usage is like + + wmov 30, 24 + + to move R25:R24 to R31:R30, i.e. plain register numbers + are required and no register prefix 'R'. +*/ + +#if defined (__AVR_HAVE_MOVW__) +#define wmov movw +#else + .macro wmov dst src + ..dst = \dst + ..src = \src + ..regno = 0 + .irp reg, \ + r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, \ + r10, r11, r12, r13, r14, r15, r16, r17, r18, r19, \ + r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, \ + r30, r31 + .ifc \reg,\dst + ..dst = ..regno + .endif + .ifc \reg,\src + ..src = ..regno + .endif + ..regno = ..regno + 1 + .endr + + ..regno = 0 + + .irp reg, \ + R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, \ + R10, R11, R12, R13, R14, R15, R16, R17, R18, R19, \ + R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, \ + R30, R31 + .ifc \reg,\dst + ..dst = ..regno + .endif + .ifc \reg,\src + ..src = ..regno + .endif + ..regno = ..regno + 1 + .endr + + ..regno = 0 + + .irp reg, \ + X, x, XL, xl, Xl, xL, x, x \ + Y, y, YL, yl, Yl, yL, y, y, \ + Z, z, ZL, zl, Zl, zL, z, z + .ifc \reg,\dst + ..dst = (..regno / 8) + 26 + .endif + .ifc \reg,\src + ..src = (..regno / 8) + 26 + .endif + ..regno = ..regno + 1 + .endr + + mov ..dst+0, ..src+0 + mov ..dst+1, ..src+1 + .endm +#endif /* MOVW */ + + +#if !defined (__AVR_TINY__) +/* + Convenience macro for easy use of __prologue_saves__ from libgcc. + Push the N_PUSHED callee-saved registers Y, R17, R16, R15, ... + with 0 <= N_PUSHED <= 18. The frame pointer (Y) is set up according + to a frame size of N_FRAME. Clobbers TMP_REG. + For the code of __prologue_saves__ from libgcc see + http://gcc.gnu.org/viewcvs/gcc/trunk/libgcc/config/avr/lib1funcs.S?revision=267494&view=markup#l2159 +*/ + +.macro do_prologue_saves n_pushed n_frame=0 + ldi r26, lo8(\n_frame) + ldi r27, hi8(\n_frame) + ldi r30, lo8(gs(.L_prologue_saves.\@)) + ldi r31, hi8(gs(.L_prologue_saves.\@)) + XJMP __prologue_saves__ + ((18 - (\n_pushed)) * 2) +.L_prologue_saves.\@: +.endm + +/* + Convenience macro for easy use of __epilogue_restores__ from libgcc. + Undo the effect of __prologue_saves__. Clobbers TMP_REG. + For the code of __epilogue_restores__ from libgcc see + http://gcc.gnu.org/viewcvs/gcc/trunk/libgcc/config/avr/lib1funcs.S?revision=267494&view=markup#l2216 +*/ + +.macro do_epilogue_restores n_pushed n_frame=0 + in r28, __SP_L__ +#ifdef __AVR_HAVE_SPH__ + in r29, __SP_H__ +.if \n_frame > 63 + subi r28, lo8(-\n_frame) + sbci r29, hi8(-\n_frame) +.elseif \n_frame > 0 + adiw r28, \n_frame +.endif +#else + clr r29 +.if \n_frame > 0 + subi r28, lo8(-\n_frame) +.endif +#endif /* HAVE SPH */ + ldi r30, \n_pushed + XJMP __epilogue_restores__ + ((18 - (\n_pushed)) * 2) +.endm + +#endif /* AVR_TINY */ + +#else /* Assembler */ +/*****************************************************************/ +/* Space for C/C++ only Stuff */ +/*****************************************************************/ +#endif /* Assembler */ + +/*****************************************************************/ +/* Space for Generic Stuff (Assembler, C, C++) */ +/*****************************************************************/ + +#ifdef __AVR_PM_BASE_ADDRESS__ + /* + Devices with a linear address space: Flash memory is seen in the + RAM address space at an offset of __AVR_PM_BASE_ADDRESS__ and can + be accessed by LD*. This is the case for devices like ATtiny40 + (avrtiny) or ATtiny1616 and ATmega4808 (avrxmega3). The default + linker script locates .rodata in the .text output section and + at the required offset. + */ + #define RODATA_SECTION .rodata.asm + #define USE_LD 1 + #define USE_LPM 0 +#else /* PM_BASE_ADDRESS */ + /* + No linear address space. As .rodata is located in RAM, we have to + use .progmem.data (located in flash) and LPM to read the data. + This will also work for devices from avrxmega3. + */ + #define RODATA_SECTION .progmem.data.asm + #define USE_LD 0 + #define USE_LPM 1 +#endif /* PM_BASE_ADDRESS */ + +#endif /* target AVR */ +#endif /* ASM_DEFS_H */ diff --git a/libgcc/config/avr/libf7/f7-renames.h b/libgcc/config/avr/libf7/f7-renames.h new file mode 100644 index 0000000..e4c0c0d --- /dev/null +++ b/libgcc/config/avr/libf7/f7-renames.h @@ -0,0 +1,234 @@ +/* + Auto-generated file, do not change by hand. + + Generated by: f7renames.sh. + Generated using: F7_PREFIX = __f7_ from t-libf7. + F7F, F7F_cst, F7F_asm from libf7-common.mk. + Included by: libf7.h. + Used by: libf7.c, libf7.h, libf7-asm.sx, f7-wraps.h. +*/ + +#ifndef F7_RENAMES_H +#define F7_RENAMES_H + +#define F7_(name) __f7_##name +#define F7P __f7_ + +/* Renames for libf7.c, libf7.h. */ + +#define f7_fabs __f7_fabs +#define f7_neg __f7_neg +#define f7_add __f7_add +#define f7_sub __f7_sub +#define f7_addsub __f7_addsub +#define f7_div __f7_div +#define f7_div1 __f7_div1 +#define f7_divx __f7_divx +#define f7_fmod __f7_fmod +#define f7_sqrt __f7_sqrt +#define f7_cbrt __f7_cbrt +#define f7_square __f7_square +#define f7_mul __f7_mul +#define f7_mulx __f7_mulx +#define f7_madd_msub __f7_madd_msub +#define f7_madd __f7_madd +#define f7_msub __f7_msub +#define f7_hypot __f7_hypot +#define f7_Ineg __f7_Ineg +#define f7_Iadd __f7_Iadd +#define f7_Isub __f7_Isub +#define f7_Imul __f7_Imul +#define f7_Idiv __f7_Idiv +#define f7_IRsub __f7_IRsub +#define f7_Isquare __f7_Isquare +#define f7_Ildexp __f7_Ildexp +#define f7_Isqrt __f7_Isqrt +#define f7_le __f7_le +#define f7_lt __f7_lt +#define f7_gt __f7_gt +#define f7_ge __f7_ge +#define f7_ne __f7_ne +#define f7_eq __f7_eq +#define f7_cmp __f7_cmp +#define f7_cmp_abs __f7_cmp_abs +#define f7_ordered __f7_ordered +#define f7_unordered __f7_unordered +#define f7_cmp_unordered __f7_cmp_unordered +#define f7_lt_impl __f7_lt_impl +#define f7_gt_impl __f7_gt_impl +#define f7_le_impl __f7_le_impl +#define f7_ge_impl __f7_ge_impl +#define f7_eq_impl __f7_eq_impl +#define f7_ne_impl __f7_ne_impl +#define f7_unord_impl __f7_unord_impl +#define f7_lrint __f7_lrint +#define f7_ldexp __f7_ldexp +#define f7_frexp __f7_frexp +#define f7_exp __f7_exp +#define f7_logx __f7_logx +#define f7_log __f7_log +#define f7_log10 __f7_log10 +#define f7_log2 __f7_log2 +#define f7_minmax __f7_minmax +#define f7_fmax __f7_fmax +#define f7_fmin __f7_fmin +#define f7_floor __f7_floor +#define f7_ceil __f7_ceil +#define f7_round __f7_round +#define f7_lround __f7_lround +#define f7_trunc __f7_trunc +#define f7_truncx __f7_truncx +#define f7_horner __f7_horner +#define f7_pow10 __f7_pow10 +#define f7_exp10 __f7_exp10 +#define f7_pow __f7_pow +#define f7_powi __f7_powi +#define f7_sin __f7_sin +#define f7_cos __f7_cos +#define f7_tan __f7_tan +#define f7_cotan __f7_cotan +#define f7_sincos __f7_sincos +#define f7_sinh __f7_sinh +#define f7_cosh __f7_cosh +#define f7_tanh __f7_tanh +#define f7_sinhcosh __f7_sinhcosh +#define f7_asinacos __f7_asinacos +#define f7_asin __f7_asin +#define f7_acos __f7_acos +#define f7_atan __f7_atan +#define f7_atan2 __f7_atan2 +#define f7_mul_noround __f7_mul_noround +#define f7_sqrt16_round __f7_sqrt16_round +#define f7_sqrt16_floor __f7_sqrt16_floor +#define f7_clr_mant_lsbs __f7_clr_mant_lsbs +#define f7_abscmp_msb_ge __f7_abscmp_msb_ge +#define f7_lshrdi3 __f7_lshrdi3 +#define f7_ashldi3 __f7_ashldi3 +#define f7_assert __f7_assert +#define f7_classify __f7_classify +#define f7_class_inf __f7_class_inf +#define f7_class_nan __f7_class_nan +#define f7_class_number __f7_class_number +#define f7_class_zero __f7_class_zero +#define f7_class_nonzero __f7_class_nonzero +#define f7_class_sign __f7_class_sign +#define f7_signbit __f7_signbit +#define f7_set_sign __f7_set_sign +#define f7_set_nan __f7_set_nan +#define f7_set_inf __f7_set_inf +#define f7_is_inf __f7_is_inf +#define f7_is_nan __f7_is_nan +#define f7_is_number __f7_is_number +#define f7_is_zero __f7_is_zero +#define f7_is_nonzero __f7_is_nonzero +#define f7_clr __f7_clr +#define f7_copy __f7_copy +#define f7_copy_P __f7_copy_P +#define f7_copy_mant __f7_copy_mant +#define f7_msbit __f7_msbit +#define f7_is0 __f7_is0 +#define f7_cmp_mant __f7_cmp_mant +#define f7_store_expo __f7_store_expo +#define f7_abs __f7_abs +#define f7_set_s64 __f7_set_s64 +#define f7_set_s32 __f7_set_s32 +#define f7_set_s16 __f7_set_s16 +#define f7_set_s16_impl __f7_set_s16_impl +#define f7_set_u16_worker __f7_set_u16_worker +#define f7_set_u64 __f7_set_u64 +#define f7_set_u32 __f7_set_u32 +#define f7_set_u16 __f7_set_u16 +#define f7_set_u16_impl __f7_set_u16_impl +#define f7_set_float __f7_set_float +#define f7_set_pdouble __f7_set_pdouble +#define f7_set_double_impl __f7_set_double_impl +#define f7_set_double __f7_set_double +#define f7_init_impl __f7_init_impl +#define f7_init __f7_init +#define f7_get_s16 __f7_get_s16 +#define f7_get_s32 __f7_get_s32 +#define f7_get_s64 __f7_get_s64 +#define f7_get_float __f7_get_float +#define f7_get_u16 __f7_get_u16 +#define f7_get_u32 __f7_get_u32 +#define f7_get_u64 __f7_get_u64 +#define f7_get_double __f7_get_double +#define f7_set_eps __f7_set_eps +#define f7_set_1pow2 __f7_set_1pow2 +#define f7_min __f7_min +#define f7_max __f7_max +#define f7_exp10 __f7_exp10 +#define f7_floatunsidf __f7_floatunsidf +#define f7_floatsidf __f7_floatsidf +#define f7_extendsfdf2 __f7_extendsfdf2 +#define f7_fixdfsi __f7_fixdfsi +#define f7_fixdfdi __f7_fixdfdi +#define f7_fixunsdfdi __f7_fixunsdfdi +#define f7_fixunsdfsi __f7_fixunsdfsi +#define f7_truncdfsf2 __f7_truncdfsf2 +#define f7_le_impl __f7_le_impl +#define f7_lt_impl __f7_lt_impl +#define f7_gt_impl __f7_gt_impl +#define f7_ge_impl __f7_ge_impl +#define f7_ne_impl __f7_ne_impl +#define f7_eq_impl __f7_eq_impl +#define f7_unord_impl __f7_unord_impl + +/* Renames for libf7.c, libf7.h. */ + +#define f7_const_1 __f7_const_1 +#define f7_const_1_P __f7_const_1_P +#define f7_const_2 __f7_const_2 +#define f7_const_2_P __f7_const_2_P +#define f7_const_1_2 __f7_const_1_2 +#define f7_const_1_2_P __f7_const_1_2_P +#define f7_const_1_3 __f7_const_1_3 +#define f7_const_1_3_P __f7_const_1_3_P +#define f7_const_m1 __f7_const_m1 +#define f7_const_m1_P __f7_const_m1_P +#define f7_const_pi __f7_const_pi +#define f7_const_pi_P __f7_const_pi_P +#define f7_const_ln2 __f7_const_ln2 +#define f7_const_ln2_P __f7_const_ln2_P +#define f7_const_ln10 __f7_const_ln10 +#define f7_const_ln10_P __f7_const_ln10_P +#define f7_const_1_ln2 __f7_const_1_ln2 +#define f7_const_1_ln2_P __f7_const_1_ln2_P +#define f7_const_1_ln10 __f7_const_1_ln10 +#define f7_const_1_ln10_P __f7_const_1_ln10_P +#define f7_const_sqrt2 __f7_const_sqrt2 +#define f7_const_sqrt2_P __f7_const_sqrt2_P + +/* Renames for libf7-asm.sx, f7-wraps.h. */ + +#define f7_classify_asm __f7_classify_asm +#define f7_store_expo_asm __f7_store_expo_asm +#define f7_clr_asm __f7_clr_asm +#define f7_copy_asm __f7_copy_asm +#define f7_copy_P_asm __f7_copy_P_asm +#define f7_copy_mant_asm __f7_copy_mant_asm +#define f7_cmp_mant_asm __f7_cmp_mant_asm +#define f7_normalize_asm __f7_normalize_asm +#define f7_store_expo_asm __f7_store_expo_asm +#define f7_set_u64_asm __f7_set_u64_asm +#define f7_set_s64_asm __f7_set_s64_asm +#define f7_addsub_mant_scaled_asm __f7_addsub_mant_scaled_asm +#define f7_mul_mant_asm __f7_mul_mant_asm +#define f7_to_integer_asm __f7_to_integer_asm +#define f7_to_unsigned_asm __f7_to_unsigned_asm +#define f7_clr_mant_lsbs_asm __f7_clr_mant_lsbs_asm +#define f7_div_asm __f7_div_asm +#define f7_sqrt_approx_asm __f7_sqrt_approx_asm +#define f7_sqrt16_round_asm __f7_sqrt16_round_asm +#define f7_sqrt16_floor_asm __f7_sqrt16_floor_asm +#define f7_lshrdi3_asm __f7_lshrdi3_asm +#define f7_ashldi3_asm __f7_ashldi3_asm +#define f7_class_D_asm __f7_class_D_asm +#define f7_call_ddd_asm __f7_call_ddd_asm +#define f7_call_xdd_asm __f7_call_xdd_asm +#define f7_call_ddx_asm __f7_call_ddx_asm +#define f7_call_dd_asm __f7_call_dd_asm +#define f7_call_xd_asm __f7_call_xd_asm +#define f7_call_dx_asm __f7_call_dx_asm + +#endif /* F7_RENAMES_H */ diff --git a/libgcc/config/avr/libf7/f7-wraps.h b/libgcc/config/avr/libf7/f7-wraps.h new file mode 100644 index 0000000..9de5267 --- /dev/null +++ b/libgcc/config/avr/libf7/f7-wraps.h @@ -0,0 +1,638 @@ +;; Copyright (C) 2019-2020 Free Software Foundation, Inc. +;; +;; This file is part of LIBF7, which is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it under +;; the terms of the GNU General Public License as published by the Free +;; Software Foundation; either version 3, or (at your option) any later +;; version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +;; WARRANTY; without even the implied warranty of MERCHANTABILITY or +;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +;; for more details. +;; +;; Under Section 7 of GPL version 3, you are granted additional +;; permissions described in the GCC Runtime Library Exception, version +;; 3.1, as published by the Free Software Foundation. +;; +;; You should have received a copy of the GNU General Public License and +;; a copy of the GCC Runtime Library Exception along with this program; +;; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +;; <http://www.gnu.org/licenses/>. */ + +;; Auto-generated file, do not change by hand. +;; +;; Wrappers for double and long double functions to use functions that +;; operate on f7_t, and get f7_t* and const f7_t*. +;; +;; Generated by: f7wraps.sh +;; Included by : libf7-asm.sx +;; WITH_LIBF7_MATH_FUNCTIONS=1 +;; WITH_LIBF7_MATH_SYMBOLS=1 + +;; Functions that usually live in libgcc: __<name>df3 for <name> in: +;; add sub mul div + +;; double __adddf3 (double, double) ; add +#ifdef F7MOD_D_add_ +_DEFUN __adddf3 + ALIAS __add + .global F7_NAME(add) + ldi ZH, hi8(gs(F7_NAME(add))) + ldi ZL, lo8(gs(F7_NAME(add))) + F7jmp call_ddd +_ENDF __adddf3 +#endif /* F7MOD_D_add_ */ + +;; double __subdf3 (double, double) ; sub +#ifdef F7MOD_D_sub_ +_DEFUN __subdf3 + ALIAS __sub + .global F7_NAME(sub) + ldi ZH, hi8(gs(F7_NAME(sub))) + ldi ZL, lo8(gs(F7_NAME(sub))) + F7jmp call_ddd +_ENDF __subdf3 +#endif /* F7MOD_D_sub_ */ + +;; double __muldf3 (double, double) ; mul +#ifdef F7MOD_D_mul_ +_DEFUN __muldf3 + ALIAS __mul + .global F7_NAME(mul) + ldi ZH, hi8(gs(F7_NAME(mul))) + ldi ZL, lo8(gs(F7_NAME(mul))) + F7jmp call_ddd +_ENDF __muldf3 +#endif /* F7MOD_D_mul_ */ + +;; double __divdf3 (double, double) ; div +#ifdef F7MOD_D_div_ +_DEFUN __divdf3 + ALIAS __div + .global F7_NAME(div) + ldi ZH, hi8(gs(F7_NAME(div))) + ldi ZL, lo8(gs(F7_NAME(div))) + F7jmp call_ddd +_ENDF __divdf3 +#endif /* F7MOD_D_div_ */ + +;; Functions that usually live in libgcc: __<name>df2 for <name> in: +;; le lt ge gt ne eq unord + +;; bool __ledf2 (double, double) ; le +#ifdef F7MOD_D_le_ +_DEFUN __ledf2 + .global F7_NAME(le_impl) + ldi ZH, hi8(gs(F7_NAME(le_impl))) + ldi ZL, lo8(gs(F7_NAME(le_impl))) + F7jmp call_xdd +_ENDF __ledf2 +#endif /* F7MOD_D_le_ */ + +;; bool __ltdf2 (double, double) ; lt +#ifdef F7MOD_D_lt_ +_DEFUN __ltdf2 + .global F7_NAME(lt_impl) + ldi ZH, hi8(gs(F7_NAME(lt_impl))) + ldi ZL, lo8(gs(F7_NAME(lt_impl))) + F7jmp call_xdd +_ENDF __ltdf2 +#endif /* F7MOD_D_lt_ */ + +;; bool __gedf2 (double, double) ; ge +#ifdef F7MOD_D_ge_ +_DEFUN __gedf2 + .global F7_NAME(ge_impl) + ldi ZH, hi8(gs(F7_NAME(ge_impl))) + ldi ZL, lo8(gs(F7_NAME(ge_impl))) + F7jmp call_xdd +_ENDF __gedf2 +#endif /* F7MOD_D_ge_ */ + +;; bool __gtdf2 (double, double) ; gt +#ifdef F7MOD_D_gt_ +_DEFUN __gtdf2 + .global F7_NAME(gt_impl) + ldi ZH, hi8(gs(F7_NAME(gt_impl))) + ldi ZL, lo8(gs(F7_NAME(gt_impl))) + F7jmp call_xdd +_ENDF __gtdf2 +#endif /* F7MOD_D_gt_ */ + +;; bool __nedf2 (double, double) ; ne +#ifdef F7MOD_D_ne_ +_DEFUN __nedf2 + .global F7_NAME(ne_impl) + ldi ZH, hi8(gs(F7_NAME(ne_impl))) + ldi ZL, lo8(gs(F7_NAME(ne_impl))) + F7jmp call_xdd +_ENDF __nedf2 +#endif /* F7MOD_D_ne_ */ + +;; bool __eqdf2 (double, double) ; eq +#ifdef F7MOD_D_eq_ +_DEFUN __eqdf2 + .global F7_NAME(eq_impl) + ldi ZH, hi8(gs(F7_NAME(eq_impl))) + ldi ZL, lo8(gs(F7_NAME(eq_impl))) + F7jmp call_xdd +_ENDF __eqdf2 +#endif /* F7MOD_D_eq_ */ + +;; bool __unorddf2 (double, double) ; unord +#ifdef F7MOD_D_unord_ +_DEFUN __unorddf2 + .global F7_NAME(unord_impl) + ldi ZH, hi8(gs(F7_NAME(unord_impl))) + ldi ZL, lo8(gs(F7_NAME(unord_impl))) + F7jmp call_xdd +_ENDF __unorddf2 +#endif /* F7MOD_D_unord_ */ + +;; Functions that usually live in libgcc: __<name> for <name> in: +;; fixdfsi fixdfdi fixunsdfdi fixunsdfsi truncdfsf2 + +;; type_t __fixdfsi (double) ; fixdfsi +#ifdef F7MOD_D_fixdfsi_ +_DEFUN __fixdfsi + .global F7_NAME(fixdfsi) + ldi ZH, hi8(gs(F7_NAME(fixdfsi))) + ldi ZL, lo8(gs(F7_NAME(fixdfsi))) + F7jmp call_xd +_ENDF __fixdfsi +#endif /* F7MOD_D_fixdfsi_ */ + +;; type_t __fixdfdi (double) ; fixdfdi +#ifdef F7MOD_D_fixdfdi_ +_DEFUN __fixdfdi + .global F7_NAME(fixdfdi) + ldi ZH, hi8(gs(F7_NAME(fixdfdi))) + ldi ZL, lo8(gs(F7_NAME(fixdfdi))) + F7jmp call_xd +_ENDF __fixdfdi +#endif /* F7MOD_D_fixdfdi_ */ + +;; type_t __fixunsdfdi (double) ; fixunsdfdi +#ifdef F7MOD_D_fixunsdfdi_ +_DEFUN __fixunsdfdi + .global F7_NAME(fixunsdfdi) + ldi ZH, hi8(gs(F7_NAME(fixunsdfdi))) + ldi ZL, lo8(gs(F7_NAME(fixunsdfdi))) + F7jmp call_xd +_ENDF __fixunsdfdi +#endif /* F7MOD_D_fixunsdfdi_ */ + +;; type_t __fixunsdfsi (double) ; fixunsdfsi +#ifdef F7MOD_D_fixunsdfsi_ +_DEFUN __fixunsdfsi + .global F7_NAME(fixunsdfsi) + ldi ZH, hi8(gs(F7_NAME(fixunsdfsi))) + ldi ZL, lo8(gs(F7_NAME(fixunsdfsi))) + F7jmp call_xd +_ENDF __fixunsdfsi +#endif /* F7MOD_D_fixunsdfsi_ */ + +;; type_t __truncdfsf2 (double) ; truncdfsf2 +#ifdef F7MOD_D_truncdfsf2_ +_DEFUN __truncdfsf2 + .global F7_NAME(truncdfsf2) + ldi ZH, hi8(gs(F7_NAME(truncdfsf2))) + ldi ZL, lo8(gs(F7_NAME(truncdfsf2))) + F7jmp call_xd +_ENDF __truncdfsf2 +#endif /* F7MOD_D_truncdfsf2_ */ + +;; Functions that usually live in libgcc: __<name> for <name> in: +;; floatunsidf floatsidf extendsfdf2 + +;; double __floatunsidf (type_t) ; floatunsidf +#ifdef F7MOD_D_floatunsidf_ +_DEFUN __floatunsidf + .global F7_NAME(floatunsidf) + ldi ZH, hi8(gs(F7_NAME(floatunsidf))) + ldi ZL, lo8(gs(F7_NAME(floatunsidf))) + F7jmp call_dx +_ENDF __floatunsidf +#endif /* F7MOD_D_floatunsidf_ */ + +;; double __floatsidf (type_t) ; floatsidf +#ifdef F7MOD_D_floatsidf_ +_DEFUN __floatsidf + .global F7_NAME(floatsidf) + ldi ZH, hi8(gs(F7_NAME(floatsidf))) + ldi ZL, lo8(gs(F7_NAME(floatsidf))) + F7jmp call_dx +_ENDF __floatsidf +#endif /* F7MOD_D_floatsidf_ */ + +;; double __extendsfdf2 (type_t) ; extendsfdf2 +#ifdef F7MOD_D_extendsfdf2_ +_DEFUN __extendsfdf2 + .global F7_NAME(extendsfdf2) + ldi ZH, hi8(gs(F7_NAME(extendsfdf2))) + ldi ZL, lo8(gs(F7_NAME(extendsfdf2))) + F7jmp call_dx +_ENDF __extendsfdf2 +#endif /* F7MOD_D_extendsfdf2_ */ + +;; Functions that usually live in libm: Depending on [long] double layout, +;; define <name> and <name>l as weak alias(es) of __<name> for <name> in: +;; pow fmin fmax fmod hypot atan2 + +;; double __pow (double, double) +#ifdef F7MOD_D_pow_ +_DEFUN __pow + DALIAS pow + LALIAS powl + .global F7_NAME(pow) + ldi ZH, hi8(gs(F7_NAME(pow))) + ldi ZL, lo8(gs(F7_NAME(pow))) + F7jmp call_ddd +_ENDF __pow +#endif /* F7MOD_D_pow_ */ + +;; double __fmin (double, double) +#ifdef F7MOD_D_fmin_ +_DEFUN __fmin + DALIAS fmin + LALIAS fminl + .global F7_NAME(fmin) + ldi ZH, hi8(gs(F7_NAME(fmin))) + ldi ZL, lo8(gs(F7_NAME(fmin))) + F7jmp call_ddd +_ENDF __fmin +#endif /* F7MOD_D_fmin_ */ + +;; double __fmax (double, double) +#ifdef F7MOD_D_fmax_ +_DEFUN __fmax + DALIAS fmax + LALIAS fmaxl + .global F7_NAME(fmax) + ldi ZH, hi8(gs(F7_NAME(fmax))) + ldi ZL, lo8(gs(F7_NAME(fmax))) + F7jmp call_ddd +_ENDF __fmax +#endif /* F7MOD_D_fmax_ */ + +;; double __fmod (double, double) +#ifdef F7MOD_D_fmod_ +_DEFUN __fmod + DALIAS fmod + LALIAS fmodl + .global F7_NAME(fmod) + ldi ZH, hi8(gs(F7_NAME(fmod))) + ldi ZL, lo8(gs(F7_NAME(fmod))) + F7jmp call_ddd +_ENDF __fmod +#endif /* F7MOD_D_fmod_ */ + +;; double __hypot (double, double) +#ifdef F7MOD_D_hypot_ +_DEFUN __hypot + DALIAS hypot + LALIAS hypotl + .global F7_NAME(hypot) + ldi ZH, hi8(gs(F7_NAME(hypot))) + ldi ZL, lo8(gs(F7_NAME(hypot))) + F7jmp call_ddd +_ENDF __hypot +#endif /* F7MOD_D_hypot_ */ + +;; double __atan2 (double, double) +#ifdef F7MOD_D_atan2_ +_DEFUN __atan2 + DALIAS atan2 + LALIAS atan2l + .global F7_NAME(atan2) + ldi ZH, hi8(gs(F7_NAME(atan2))) + ldi ZL, lo8(gs(F7_NAME(atan2))) + F7jmp call_ddd +_ENDF __atan2 +#endif /* F7MOD_D_atan2_ */ + +;; Functions that usually live in libm: Depending on [long] double layout, +;; define <name> and <name>l as weak alias(es) of __<name> for <name> in: +;; ldexp frexp + +;; double __ldexp (double, word_t) +#ifdef F7MOD_D_ldexp_ +_DEFUN __ldexp + DALIAS ldexp + LALIAS ldexpl + .global F7_NAME(ldexp) + ldi ZH, hi8(gs(F7_NAME(ldexp))) + ldi ZL, lo8(gs(F7_NAME(ldexp))) + F7jmp call_ddx +_ENDF __ldexp +#endif /* F7MOD_D_ldexp_ */ + +;; double __frexp (double, word_t) +#ifdef F7MOD_D_frexp_ +_DEFUN __frexp + DALIAS frexp + LALIAS frexpl + .global F7_NAME(frexp) + ldi ZH, hi8(gs(F7_NAME(frexp))) + ldi ZL, lo8(gs(F7_NAME(frexp))) + F7jmp call_ddx +_ENDF __frexp +#endif /* F7MOD_D_frexp_ */ + +;; Functions that usually live in libm: Depending on [long] double layout, +;; define <name> and <name>l as weak alias(es) of __<name> for <name> in: +;; sqrt cbrt exp exp10 pow10 log log10 log2 sin cos tan cotan asin acos atan ceil floor trunc round sinh cosh tanh + +;; double __sqrt (double) +#ifdef F7MOD_D_sqrt_ +_DEFUN __sqrt + DALIAS sqrt + LALIAS sqrtl + .global F7_NAME(sqrt) + ldi ZH, hi8(gs(F7_NAME(sqrt))) + ldi ZL, lo8(gs(F7_NAME(sqrt))) + F7jmp call_dd +_ENDF __sqrt +#endif /* F7MOD_D_sqrt_ */ + +;; double __cbrt (double) +#ifdef F7MOD_D_cbrt_ +_DEFUN __cbrt + DALIAS cbrt + LALIAS cbrtl + .global F7_NAME(cbrt) + ldi ZH, hi8(gs(F7_NAME(cbrt))) + ldi ZL, lo8(gs(F7_NAME(cbrt))) + F7jmp call_dd +_ENDF __cbrt +#endif /* F7MOD_D_cbrt_ */ + +;; double __exp (double) +#ifdef F7MOD_D_exp_ +_DEFUN __exp + DALIAS exp + LALIAS expl + .global F7_NAME(exp) + ldi ZH, hi8(gs(F7_NAME(exp))) + ldi ZL, lo8(gs(F7_NAME(exp))) + F7jmp call_dd +_ENDF __exp +#endif /* F7MOD_D_exp_ */ + +;; double __exp10 (double) +#ifdef F7MOD_D_exp10_ +_DEFUN __exp10 + DALIAS exp10 + LALIAS exp10l + .global F7_NAME(exp10) + ldi ZH, hi8(gs(F7_NAME(exp10))) + ldi ZL, lo8(gs(F7_NAME(exp10))) + F7jmp call_dd +_ENDF __exp10 +#endif /* F7MOD_D_exp10_ */ + +;; double __pow10 (double) +#ifdef F7MOD_D_pow10_ +_DEFUN __pow10 + DALIAS pow10 + LALIAS pow10l + .global F7_NAME(pow10) + ldi ZH, hi8(gs(F7_NAME(pow10))) + ldi ZL, lo8(gs(F7_NAME(pow10))) + F7jmp call_dd +_ENDF __pow10 +#endif /* F7MOD_D_pow10_ */ + +;; double __log (double) +#ifdef F7MOD_D_log_ +_DEFUN __log + DALIAS log + LALIAS logl + .global F7_NAME(log) + ldi ZH, hi8(gs(F7_NAME(log))) + ldi ZL, lo8(gs(F7_NAME(log))) + F7jmp call_dd +_ENDF __log +#endif /* F7MOD_D_log_ */ + +;; double __log10 (double) +#ifdef F7MOD_D_log10_ +_DEFUN __log10 + DALIAS log10 + LALIAS log10l + .global F7_NAME(log10) + ldi ZH, hi8(gs(F7_NAME(log10))) + ldi ZL, lo8(gs(F7_NAME(log10))) + F7jmp call_dd +_ENDF __log10 +#endif /* F7MOD_D_log10_ */ + +;; double __log2 (double) +#ifdef F7MOD_D_log2_ +_DEFUN __log2 + DALIAS log2 + LALIAS log2l + .global F7_NAME(log2) + ldi ZH, hi8(gs(F7_NAME(log2))) + ldi ZL, lo8(gs(F7_NAME(log2))) + F7jmp call_dd +_ENDF __log2 +#endif /* F7MOD_D_log2_ */ + +;; double __sin (double) +#ifdef F7MOD_D_sin_ +_DEFUN __sin + DALIAS sin + LALIAS sinl + .global F7_NAME(sin) + ldi ZH, hi8(gs(F7_NAME(sin))) + ldi ZL, lo8(gs(F7_NAME(sin))) + F7jmp call_dd +_ENDF __sin +#endif /* F7MOD_D_sin_ */ + +;; double __cos (double) +#ifdef F7MOD_D_cos_ +_DEFUN __cos + DALIAS cos + LALIAS cosl + .global F7_NAME(cos) + ldi ZH, hi8(gs(F7_NAME(cos))) + ldi ZL, lo8(gs(F7_NAME(cos))) + F7jmp call_dd +_ENDF __cos +#endif /* F7MOD_D_cos_ */ + +;; double __tan (double) +#ifdef F7MOD_D_tan_ +_DEFUN __tan + DALIAS tan + LALIAS tanl + .global F7_NAME(tan) + ldi ZH, hi8(gs(F7_NAME(tan))) + ldi ZL, lo8(gs(F7_NAME(tan))) + F7jmp call_dd +_ENDF __tan +#endif /* F7MOD_D_tan_ */ + +;; double __cotan (double) +#ifdef F7MOD_D_cotan_ +_DEFUN __cotan + DALIAS cotan + LALIAS cotanl + .global F7_NAME(cotan) + ldi ZH, hi8(gs(F7_NAME(cotan))) + ldi ZL, lo8(gs(F7_NAME(cotan))) + F7jmp call_dd +_ENDF __cotan +#endif /* F7MOD_D_cotan_ */ + +;; double __asin (double) +#ifdef F7MOD_D_asin_ +_DEFUN __asin + DALIAS asin + LALIAS asinl + .global F7_NAME(asin) + ldi ZH, hi8(gs(F7_NAME(asin))) + ldi ZL, lo8(gs(F7_NAME(asin))) + F7jmp call_dd +_ENDF __asin +#endif /* F7MOD_D_asin_ */ + +;; double __acos (double) +#ifdef F7MOD_D_acos_ +_DEFUN __acos + DALIAS acos + LALIAS acosl + .global F7_NAME(acos) + ldi ZH, hi8(gs(F7_NAME(acos))) + ldi ZL, lo8(gs(F7_NAME(acos))) + F7jmp call_dd +_ENDF __acos +#endif /* F7MOD_D_acos_ */ + +;; double __atan (double) +#ifdef F7MOD_D_atan_ +_DEFUN __atan + DALIAS atan + LALIAS atanl + .global F7_NAME(atan) + ldi ZH, hi8(gs(F7_NAME(atan))) + ldi ZL, lo8(gs(F7_NAME(atan))) + F7jmp call_dd +_ENDF __atan +#endif /* F7MOD_D_atan_ */ + +;; double __ceil (double) +#ifdef F7MOD_D_ceil_ +_DEFUN __ceil + DALIAS ceil + LALIAS ceill + .global F7_NAME(ceil) + ldi ZH, hi8(gs(F7_NAME(ceil))) + ldi ZL, lo8(gs(F7_NAME(ceil))) + F7jmp call_dd +_ENDF __ceil +#endif /* F7MOD_D_ceil_ */ + +;; double __floor (double) +#ifdef F7MOD_D_floor_ +_DEFUN __floor + DALIAS floor + LALIAS floorl + .global F7_NAME(floor) + ldi ZH, hi8(gs(F7_NAME(floor))) + ldi ZL, lo8(gs(F7_NAME(floor))) + F7jmp call_dd +_ENDF __floor +#endif /* F7MOD_D_floor_ */ + +;; double __trunc (double) +#ifdef F7MOD_D_trunc_ +_DEFUN __trunc + DALIAS trunc + LALIAS truncl + .global F7_NAME(trunc) + ldi ZH, hi8(gs(F7_NAME(trunc))) + ldi ZL, lo8(gs(F7_NAME(trunc))) + F7jmp call_dd +_ENDF __trunc +#endif /* F7MOD_D_trunc_ */ + +;; double __round (double) +#ifdef F7MOD_D_round_ +_DEFUN __round + DALIAS round + LALIAS roundl + .global F7_NAME(round) + ldi ZH, hi8(gs(F7_NAME(round))) + ldi ZL, lo8(gs(F7_NAME(round))) + F7jmp call_dd +_ENDF __round +#endif /* F7MOD_D_round_ */ + +;; double __sinh (double) +#ifdef F7MOD_D_sinh_ +_DEFUN __sinh + DALIAS sinh + LALIAS sinhl + .global F7_NAME(sinh) + ldi ZH, hi8(gs(F7_NAME(sinh))) + ldi ZL, lo8(gs(F7_NAME(sinh))) + F7jmp call_dd +_ENDF __sinh +#endif /* F7MOD_D_sinh_ */ + +;; double __cosh (double) +#ifdef F7MOD_D_cosh_ +_DEFUN __cosh + DALIAS cosh + LALIAS coshl + .global F7_NAME(cosh) + ldi ZH, hi8(gs(F7_NAME(cosh))) + ldi ZL, lo8(gs(F7_NAME(cosh))) + F7jmp call_dd +_ENDF __cosh +#endif /* F7MOD_D_cosh_ */ + +;; double __tanh (double) +#ifdef F7MOD_D_tanh_ +_DEFUN __tanh + DALIAS tanh + LALIAS tanhl + .global F7_NAME(tanh) + ldi ZH, hi8(gs(F7_NAME(tanh))) + ldi ZL, lo8(gs(F7_NAME(tanh))) + F7jmp call_dd +_ENDF __tanh +#endif /* F7MOD_D_tanh_ */ + +;; Functions that usually live in libm: Depending on [long] double layout, +;; define <name> and <name>l as weak alias(es) of __<name> for <name> in: +;; lrint lround + +;; type_t __lrint (double) +#ifdef F7MOD_D_lrint_ +_DEFUN __lrint + DALIAS lrint + LALIAS lrintl + .global F7_NAME(lrint) + ldi ZH, hi8(gs(F7_NAME(lrint))) + ldi ZL, lo8(gs(F7_NAME(lrint))) + F7jmp call_xd +_ENDF __lrint +#endif /* F7MOD_D_lrint_ */ + +;; type_t __lround (double) +#ifdef F7MOD_D_lround_ +_DEFUN __lround + DALIAS lround + LALIAS lroundl + .global F7_NAME(lround) + ldi ZH, hi8(gs(F7_NAME(lround))) + ldi ZL, lo8(gs(F7_NAME(lround))) + F7jmp call_xd +_ENDF __lround +#endif /* F7MOD_D_lround_ */ diff --git a/libgcc/config/avr/libf7/f7renames.sh b/libgcc/config/avr/libf7/f7renames.sh new file mode 100755 index 0000000..7ef251e --- /dev/null +++ b/libgcc/config/avr/libf7/f7renames.sh @@ -0,0 +1,80 @@ +#!/usr/bin/env sh + +# The first command argument tells us which flavour to generate for +# the rest of the command line arguments. + +what=$1 +shift + +# The second command argument is the prefix to prepend to all functions. +# It is defined by F7_PREFIX in $2. + +PRE=$1 +shift + +case ${what} in + head) + cat << EOF +/* + Auto-generated file, do not change by hand. + + Generated by: `basename $0`. + Generated using: F7_PREFIX = ${PRE} from $1. + F7F, F7F_cst, F7F_asm from libf7-common.mk. + Included by: libf7.h. + Used by: libf7.c, libf7.h, libf7-asm.sx, f7-wraps.h. +*/ + +#ifndef F7_RENAMES_H +#define F7_RENAMES_H + +#define F7_(name) ${PRE}##name +#define F7P ${PRE} +EOF + ;; + + c) + if [ x${PRE} != xf7_ ]; then + echo " " + echo "/* Renames for libf7.c, libf7.h. */" + echo " " + for x in $*; do + echo "#define f7_$x ${PRE}$x" + done + fi + ;; + + cst) + if [ x${PRE} != xf7_ ]; then + echo " " + echo "/* Renames for libf7.c, libf7.h. */" + echo " " + for x in $*; do + echo "#define f7_const_${x} ${PRE}const_${x}" + echo "#define f7_const_${x}_P ${PRE}const_${x}_P" + done + fi + ;; + + asm) + if [ x${PRE} != xf7_ ]; then + echo " " + echo "/* Renames for libf7-asm.sx, f7-wraps.h. */" + echo " " + for x in $*; do + echo "#define f7_${x}_asm ${PRE}${x}_asm" + done + fi + ;; + + tail) + cat << EOF + +#endif /* F7_RENAMES_H */ +EOF + ;; + + *) + exit 1 + ;; +esac diff --git a/libgcc/config/avr/libf7/f7wraps.sh b/libgcc/config/avr/libf7/f7wraps.sh new file mode 100755 index 0000000..8536010 --- /dev/null +++ b/libgcc/config/avr/libf7/f7wraps.sh @@ -0,0 +1,244 @@ +#!/usr/bin/env sh + +# The first command argument $1 tells us which flavour to generate for +# the rest of the command line arguments. + +what=$1 +shift + +if [ "x$*" = "x" ]; then + none="(none)" +fi + +case ${what} in + header) + cat << EOF +;; Copyright (C) 2019-2020 Free Software Foundation, Inc. +;; +;; This file is part of LIBF7, which is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it under +;; the terms of the GNU General Public License as published by the Free +;; Software Foundation; either version 3, or (at your option) any later +;; version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +;; WARRANTY; without even the implied warranty of MERCHANTABILITY or +;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +;; for more details. +;; +;; Under Section 7 of GPL version 3, you are granted additional +;; permissions described in the GCC Runtime Library Exception, version +;; 3.1, as published by the Free Software Foundation. +;; +;; You should have received a copy of the GNU General Public License and +;; a copy of the GCC Runtime Library Exception along with this program; +;; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +;; <http://www.gnu.org/licenses/>. */ + +;; Auto-generated file, do not change by hand. +;; +;; Wrappers for double and long double functions to use functions that +;; operate on f7_t, and get f7_t* and const f7_t*. +;; +;; Generated by: `basename $0` +;; Included by : libf7-asm.sx +EOF + for n in $*; do + echo ";; $n" + done + ;; + + xd_libgcc) + cat << EOF + +;; Functions that usually live in libgcc: __<name> for <name> in: +;; $*${none} +EOF + for n in $*; do + cat << EOF + +;; type_t __${n} (double) ; $n +#ifdef F7MOD_D_${n}_ +_DEFUN __${n} + .global F7_NAME($n) + ldi ZH, hi8(gs(F7_NAME($n))) + ldi ZL, lo8(gs(F7_NAME($n))) + F7jmp call_xd +_ENDF __${n} +#endif /* F7MOD_D_${n}_ */ +EOF + done + ;; + + dx_libgcc) + cat << EOF + +;; Functions that usually live in libgcc: __<name> for <name> in: +;; $*${none} +EOF + for n in $*; do + cat << EOF + +;; double __${n} (type_t) ; $n +#ifdef F7MOD_D_${n}_ +_DEFUN __${n} + .global F7_NAME($n) + ldi ZH, hi8(gs(F7_NAME($n))) + ldi ZL, lo8(gs(F7_NAME($n))) + F7jmp call_dx +_ENDF __${n} +#endif /* F7MOD_D_${n}_ */ +EOF + done + ;; + + ddd_libgcc) + cat << EOF + +;; Functions that usually live in libgcc: __<name>df3 for <name> in: +;; $*${none} +EOF + for n in $*; do + cat << EOF + +;; double __${n}df3 (double, double) ; $n +#ifdef F7MOD_D_${n}_ +_DEFUN __${n}df3 + ALIAS __$n + .global F7_NAME($n) + ldi ZH, hi8(gs(F7_NAME($n))) + ldi ZL, lo8(gs(F7_NAME($n))) + F7jmp call_ddd +_ENDF __${n}df3 +#endif /* F7MOD_D_${n}_ */ +EOF + done + ;; + + xdd_libgcc_cmp) + cat << EOF + +;; Functions that usually live in libgcc: __<name>df2 for <name> in: +;; $*${none} +EOF + for n in $*; do + cat << EOF + +;; bool __${n}df2 (double, double) ; $n +#ifdef F7MOD_D_${n}_ +_DEFUN __${n}df2 + .global F7_NAME(${n}_impl) + ldi ZH, hi8(gs(F7_NAME(${n}_impl))) + ldi ZL, lo8(gs(F7_NAME(${n}_impl))) + F7jmp call_xdd +_ENDF __${n}df2 +#endif /* F7MOD_D_${n}_ */ +EOF + done + ;; + + dd_math) + cat << EOF + +;; Functions that usually live in libm: Depending on [long] double layout, +;; define <name> and <name>l as weak alias(es) of __<name> for <name> in: +;; $*${none} +EOF + for n in $*; do + cat << EOF + +;; double __${n} (double) +#ifdef F7MOD_D_${n}_ +_DEFUN __${n} + DALIAS $n + LALIAS ${n}l + .global F7_NAME($n) + ldi ZH, hi8(gs(F7_NAME($n))) + ldi ZL, lo8(gs(F7_NAME($n))) + F7jmp call_dd +_ENDF __${n} +#endif /* F7MOD_D_${n}_ */ +EOF + done + ;; + + xd_math) + cat << EOF + +;; Functions that usually live in libm: Depending on [long] double layout, +;; define <name> and <name>l as weak alias(es) of __<name> for <name> in: +;; $*${none} +EOF + for n in $*; do + cat << EOF + +;; type_t __${n} (double) +#ifdef F7MOD_D_${n}_ +_DEFUN __${n} + DALIAS $n + LALIAS ${n}l + .global F7_NAME($n) + ldi ZH, hi8(gs(F7_NAME($n))) + ldi ZL, lo8(gs(F7_NAME($n))) + F7jmp call_xd +_ENDF __${n} +#endif /* F7MOD_D_${n}_ */ +EOF + done + ;; + + ddd_math) + cat << EOF + +;; Functions that usually live in libm: Depending on [long] double layout, +;; define <name> and <name>l as weak alias(es) of __<name> for <name> in: +;; $*${none} +EOF + for n in $*; do + cat << EOF + +;; double __${n} (double, double) +#ifdef F7MOD_D_${n}_ +_DEFUN __${n} + DALIAS $n + LALIAS ${n}l + .global F7_NAME($n) + ldi ZH, hi8(gs(F7_NAME($n))) + ldi ZL, lo8(gs(F7_NAME($n))) + F7jmp call_ddd +_ENDF __${n} +#endif /* F7MOD_D_${n}_ */ +EOF + done + ;; + + ddx_math) + cat << EOF + +;; Functions that usually live in libm: Depending on [long] double layout, +;; define <name> and <name>l as weak alias(es) of __<name> for <name> in: +;; $*${none} +EOF + for n in $*; do + cat << EOF + +;; double __${n} (double, word_t) +#ifdef F7MOD_D_${n}_ +_DEFUN __${n} + DALIAS $n + LALIAS ${n}l + .global F7_NAME($n) + ldi ZH, hi8(gs(F7_NAME($n))) + ldi ZL, lo8(gs(F7_NAME($n))) + F7jmp call_ddx +_ENDF __${n} +#endif /* F7MOD_D_${n}_ */ +EOF + done + ;; + + *) + exit 1 + ;; +esac diff --git a/libgcc/config/avr/libf7/libf7-array.def b/libgcc/config/avr/libf7/libf7-array.def new file mode 100644 index 0000000..7f4eeac --- /dev/null +++ b/libgcc/config/avr/libf7/libf7-array.def @@ -0,0 +1,45 @@ +/* Copyright (C) 2019-2020 Free Software Foundation, Inc. + + This file is part of LIBF7, which is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +static const F7_PGMSPACE f7_t ARRAY_NAME[] = +{ + #define F7_CONST_DEF(NAME, FLAGS, M6, M5, M4, M3, M2, M1, M0, EXPO) \ + { .flags = FLAGS, .mant = { M0, M1, M2, M3, M4, M5, M6 }, .expo = EXPO }, + #include "libf7-const.def" + #undef F7_CONST_DEF +}; + +// static const uint8_t n_ARRAY_NAME = <Entries in ARRAY_NAME[]>. + +#define F7_n_NAME2(X) n_##X +#define F7_n_NAME1(X) F7_n_NAME2(X) + +F7_UNUSED static const uint8_t F7_n_NAME1 (ARRAY_NAME) = + #define F7_CONST_DEF(NAME, FLAGS, M6, M5, M4, M3, M2, M1, M0, EXPO) \ + + 1 + #include "libf7-const.def" + #undef F7_CONST_DEF +; + +#undef F7_n_NAME1 +#undef F7_n_NAME2 diff --git a/libgcc/config/avr/libf7/libf7-asm-object.mk b/libgcc/config/avr/libf7/libf7-asm-object.mk new file mode 100644 index 0000000..c577512 --- /dev/null +++ b/libgcc/config/avr/libf7/libf7-asm-object.mk @@ -0,0 +1,20 @@ +# This file is included several times in a row, once for each element of +# $(iter-items). On each inclusion, we advance $o to the next element. +# $(iter-labels) is also advanced. +# This works similar to $(srcdir)/siditi-object.mk. + +o := $(firstword $(iter-items)) +iter-items := $(filter-out $o,$(iter-items)) + +$o-label := $(firstword $(iter-labels)) +iter-labels := $(wordlist 2,$(words $(iter-labels)),$(iter-labels)) + +f7_asm_$o$(objext): f7_asm_%$(objext): $(libf7)/libf7-asm.sx + $(gcc_compile) -DF7MOD_$($*-label)_ $(F7_ASM_FLAGS) \ + -c $< + +ifeq ($(enable_shared),yes) +f7_asm_$(o)_s$(objext): f7_asm_%_s$(objext): $(libf7)/libf7-asm.sx + $(gcc_s_compile) -DF7MOD_$($*-label)_ $(F7_ASM_FLAGS) \ + -c $< +endif diff --git a/libgcc/config/avr/libf7/libf7-asm.sx b/libgcc/config/avr/libf7/libf7-asm.sx new file mode 100644 index 0000000..7972557 --- /dev/null +++ b/libgcc/config/avr/libf7/libf7-asm.sx @@ -0,0 +1,1664 @@ +;; Copyright (C) 2019-2020 Free Software Foundation, Inc. +;; +;; This file is part of LIBF7, which is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it under +;; the terms of the GNU General Public License as published by the Free +;; Software Foundation; either version 3, or (at your option) any later +;; version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +;; WARRANTY; without even the implied warranty of MERCHANTABILITY or +;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +;; for more details. +;; +;; Under Section 7 of GPL version 3, you are granted additional +;; permissions described in the GCC Runtime Library Exception, version +;; 3.1, as published by the Free Software Foundation. +;; +;; You should have received a copy of the GNU General Public License and +;; a copy of the GCC Runtime Library Exception along with this program; +;; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +;; <http://www.gnu.org/licenses/>. */ + +#ifndef __AVR_TINY__ + +#define ASM_DEFS_HAVE_DEFUN + +#include "asm-defs.h" +#include "libf7.h" + +#define ZERO __zero_reg__ +#define TMP __tmp_reg__ + +#define F7(name) F7_(name##_asm) + +.macro F7call name + .global F7(\name\()) + XCALL F7(\name\()) +.endm + +.macro F7jmp name + .global F7(\name\()) + XJMP F7(\name\()) +.endm + +;; Just for visibility in disassembly. +.macro LLL name + .global LLL.\name + LLL.\name: + nop +.endm + +.macro DEFUN name + .section .text.libf7.asm.\name, "ax", @progbits + .global F7(\name\()) + .func F7(\name\()) + F7(\name\()) : +.endm + +.macro ENDF name + .size F7(\name\()), . - F7(\name\()) + .endfunc +.endm + +.macro LABEL name + .global F7(\name\()) + F7(\name\()) : +.endm + +.macro _DEFUN name + .section .text.libf7.asm.\name, "ax", @progbits + .weak \name + .type \name, @function + \name : +.endm + +.macro _ENDF name + .size \name, . - \name +.endm + +.macro _LABEL name + .weak \name + .type \name, @function + \name : +.endm + +#define F7_NAME(X) F7_(X) + +;; Make a weak alias. +.macro ALIAS sym + .weak \sym + .type \sym, @function + \sym: +.endm + +;; Make a weak alias if double is 64 bits wide. +.macro DALIAS sym +#if defined (WITH_LIBF7_MATH_SYMBOLS) && __SIZEOF_DOUBLE__ == 8 +ALIAS \sym +#endif +.endm + +;; Make a weak alias if long double is 64 bits wide. +.macro LALIAS sym +#if defined (WITH_LIBF7_MATH_SYMBOLS) && __SIZEOF_LONG_DOUBLE__ == 8 +ALIAS \sym +#endif +.endm + +#define Off 1 +#define Expo (Off + F7_MANT_BYTES) + +#ifdef F7MOD_classify_ +;; r24 = classify (*Z) +;; NaN -> F7_FLAG_nan +;; INF -> F7_FLAG_inf [ | F7_FLAG_sign ] +;; ==0 -> F7_FLAG_zero +;; ... -> 0 [ | F7_FLAG_sign ] + +;; Clobbers: None (no TMP, no T). +DEFUN classify + + ld r24, Z + lsr r24 + brne .Lnan_or_inf + + ldd r24, Z+6+Off + tst r24 + brpl 0f + sbc r24, r24 + andi r24, F7_FLAG_sign + ret + +0: ldi r24, F7_FLAG_zero + ret + +.Lnan_or_inf: + rol r24 + ret + +ENDF classify +#endif /* F7MOD_classify_ */ + +#ifdef F7MOD_clr_ +DEFUN clr + std Z+0, ZERO + std Z+0+Off, ZERO + std Z+1+Off, ZERO + std Z+2+Off, ZERO + std Z+3+Off, ZERO + std Z+4+Off, ZERO + std Z+5+Off, ZERO + std Z+6+Off, ZERO + std Z+0+Expo, ZERO + std Z+1+Expo, ZERO + ret +ENDF clr + +#endif /* F7MOD_clr_ */ + +#ifdef F7MOD_clz_ +;; The libcc CLZ implementations like __clzsi2 aka. __builtin_clzl are +;; not very well suited for out purpose, so implement our own. + +#define ZBITS r26 +.macro .test.byte reg + or ZERO, \reg + brne .Loop_bit + subi ZBITS, -8 +.endm + +;; R26 = CLZ (uint64_t R18); CLZ (0) = 64. +;; Unchanged: T +DEFUN clzdi2 + clr ZBITS + ;; Catch the common case of normalized .mant for speed-up. + tst r25 + brmi 9f + .test.byte r25 + .test.byte r24 + .test.byte r23 + .test.byte r22 + .test.byte r21 + .test.byte r20 + .test.byte r19 + .test.byte r18 +.Ldone: + clr ZERO +9: ret + +.Loop_bit: + lsl ZERO + brcs .Ldone + inc ZBITS + rjmp .Loop_bit + +ENDF clzdi2 +#undef ZBITS +#endif /* F7MOD_clz_ */ + +#ifdef F7MOD_cmp_mant_ +DEFUN cmp_mant + + adiw X, 6 + Off + ld r24, X $ ldd TMP, Z+6+Off $ SUB r24, TMP + brne .Lunequal + + sbiw X, 6 + ld r24, X+ $ ldd TMP, Z+0+Off $ SUB r24, TMP + ld r24, X+ $ ldd TMP, Z+1+Off $ sbc r24, TMP + ld r24, X+ $ ldd TMP, Z+2+Off $ sbc r24, TMP + ld r24, X+ $ ldd TMP, Z+3+Off $ sbc r24, TMP + ld r24, X+ $ ldd TMP, Z+4+Off $ sbc r24, TMP + ld r24, X+ $ ldd TMP, Z+5+Off $ sbc r24, TMP + ;; MSBs are already known to be equal + breq 9f +.Lunequal: + sbc r24, r24 + sbci r24, -1 +9: sbiw X, 6 + Off + ret +ENDF cmp_mant +#endif /* F7MOD_cmp_mant_ */ + +#define CA 18 +#define C0 CA+1 +#define C1 C0+1 +#define C2 C0+2 +#define C3 C0+3 +#define C4 C0+4 +#define C5 C0+5 +#define C6 C0+6 +#define Carry r16 +#define Flags 18 + +#ifdef F7MOD_store_ +;; Z->flags = CA. +;; Z->mant = C[7]. +DEFUN store_mant.with_flags + st Z, CA + +;; Z->mant = C[7]. +LABEL store_mant + std Z+0+Off, C0 + std Z+1+Off, C1 + std Z+2+Off, C2 + std Z+3+Off, C3 + std Z+4+Off, C4 + std Z+5+Off, C5 + std Z+6+Off, C6 + ret +ENDF store_mant.with_flags +#endif /* F7MOD_store_ */ + +#ifdef F7MOD_load_ +;; CA = Z->flags +;; C[7] = Z->mant +DEFUN load_mant.with_flags + ld CA, Z + skipnext + +;; CA = 0 +;; C[7] = Z->mant +LABEL load_mant.clr_CA +LABEL load_mant.clr_flags + clr CA ; May be skipped + +;; C[7] = Z->mant +LABEL load_mant + ldd C0, Z+0+Off + ldd C1, Z+1+Off + ldd C2, Z+2+Off + ldd C3, Z+3+Off + ldd C4, Z+4+Off + ldd C5, Z+5+Off + ldd C6, Z+6+Off + ret +ENDF load_mant.with_flags +#endif /* F7MOD_load_ */ + +#ifdef F7MOD_copy_ +DEFUN copy + cp XL, ZL + cpc XH, ZH + breq 9f + adiw XL, 10 + adiw ZL, 10 + set + bld ZERO, 1 + bld ZERO, 3 ; ZERO = 0b1010 = 10. +.Loop: + ld TMP, -X + st -Z, TMP + dec ZERO + brne .Loop +9: ret +ENDF copy +#endif /* F7MOD_copy_ */ + +#ifdef F7MOD_copy_P_ +DEFUN copy_P + set + bld ZERO, 1 + bld ZERO, 3 ; ZERO = 0b1010 = 10. +.Loop: +#ifdef __AVR_HAVE_LPMX__ + lpm TMP, Z+ +#else + lpm + adiw Z, 1 +#endif /* Have LPMx */ + st X+, TMP + dec ZERO + brne .Loop + sbiw X, 10 + sbiw Z, 10 + ret +ENDF copy_P +#endif /* F7MOD_copy_P_ */ + +#ifdef F7MOD_copy_mant_ +DEFUN copy_mant + cp XL, ZL + cpc XH, ZH + breq 9f + adiw XL, 1 + adiw ZL, 1 + set + bld ZERO, 3 + dec ZERO ; ZERO = 7 +.Loop: + ld TMP, X+ + st Z+, TMP + dec ZERO + brne .Loop + sbiw XL, 8 + sbiw ZL, 8 +9: ret +ENDF copy_mant +#endif /* F7MOD_copy_mant_ */ + + +#ifdef F7MOD_clr_mant_lsbs_ +DEFUN clr_mant_lsbs + push r16 + mov r16, r20 + wmov XL, r24 + + wmov ZL, r22 + F7call load_mant + + F7call lshrdi3 + + clr CA + + F7call ashldi3 + + pop r16 + + wmov ZL, XL + F7jmp store_mant + +ENDF clr_mant_lsbs +#endif /* F7MOD_clr_mant_lsbs_ */ + + +#ifdef F7MOD_normalize_with_carry_ +;; Z = &f7_t +;; C[] = .mant may be not normalized +;; Carry === r16 = Addend to Z->expo in [-64, 128). +;; Normalize C[], set Flags, and adjust Z->expo. +;; Return CA (after normalization) in TMP. +;; Unchanged: T +#define Addend r17 +#define Zbits r26 +#define expL r26 +#define expH r27 +DEFUN normalize_with_carry + mov Addend, Carry + tst C6 + brmi .Lshift.0 + ;; r26 = CLZ (uint64_t R18) + F7call clzdi2 + cpi Zbits, 64 + breq .Lclr + sub Addend, Zbits + mov r16, Zbits + + F7call ashldi3 + ;; Assert (R25.7 == 1) +.Lshift.0: + mov TMP, CA + ld Flags, Z + + ;; .expo += Addend + ldd expL, Z+0+Expo + ldd expH, Z+1+Expo + ;; Sign-extend Addend + clr r16 + sbrc Addend, 7 + com r16 + + ;; exp += (int8_t) Addend, i.e. sign-extend Addend. + add expL, Addend + adc expH, r16 + brvc .Lnormal + tst r16 + brmi .Lclr + ;; Overflow +#if F7_HAVE_Inf == 1 + ori Flags, F7_FLAG_inf +#else + ldi Flags, F7_FLAG_nan +#endif /* Have Inf */ + ret + +.Lnormal: + std Z+0+Expo, expL + std Z+1+Expo, expH + ret + +.Lclr: + ;; Underflow or Zero. + clr TMP + .global __clr_8 + XJMP __clr_8 + +LABEL normalize.store_with_flags + ;; no rounding + set + skipnext +LABEL normalize.round.store_with_flags + ;; with rounding + clt ; skipped ? +LABEL normalize.maybe_round.store_with_flags + F7call normalize_with_carry + ;; We have: + ;; Z = &f7_t + ;; X = .expo + ;; C[] = .mant + ;; R18 = .flags + ;; TMP = byte below .mant after normalization + ;; T = 1 => no rounding. + brts .Lstore + lsl TMP + adc C0, ZERO + brcc .Lstore + adc C1, ZERO + adc C2, ZERO + adc C3, ZERO + adc C4, ZERO + adc C5, ZERO + adc C6, ZERO + brcc .Lstore + ;; We only come here if C6 overflowed, i.e. C[] is 0 now. + ;; .mant = 1.0 by restoring the MSbit. + ror C6 + ;; .expo += 1 and override the .expo stored during normalize. + adiw expL, 1 + std Z+0+Expo, expL + std Z+1+Expo, expH + +.Lstore: + F7call store_mant.with_flags + + ;; Return the byte below .mant after normalization. + ;; This is only useful without rounding; the caller will know. + mov R24, TMP + ret +ENDF normalize_with_carry +#endif /* F7MOD_normalize_with_carry_ */ + + +#ifdef F7MOD_normalize_ +;; Using above functionality from C. +;; f7_t* normalize (f7_t *cc) +;; Adjusts cc->expo +;; Clears cc->flags +DEFUN normalize + push r17 + push r16 + wmov ZL, r24 + F7call load_mant.clr_CA + clr Carry + st Z, ZERO + F7call normalize.store_with_flags + wmov r24, Z + pop r16 + pop r17 + ret +ENDF normalize +#endif /* F7MOD_normalize_ */ + + +#ifdef F7MOD_store_expo_ +#define Done r24 +#define expLO r24 +#define expHI r25 +;; expo == INT16_MAX => *Z = Inf, return Done = true. +;; expo == INT16_MIN => *Z = 0x0, return Done = true. +;; else => Z->expo = expo, return Done = false. +DEFUN store_expo + cpi expHI, 0x80 + cpc expLO, ZERO + breq .Ltiny + adiw expLO, 1 + brvs .Lhuge + sbiw expLO, 1 + std Z+0+Expo, expLO + std Z+1+Expo, expHI + ldi Done, 0 + ret + +.Lhuge: +#if F7_HAVE_Inf == 1 + ld Done, Z + andi Done, F7_FLAG_sign + ori Done, F7_FLAG_inf +#else + ldi Done, F7_FLAG_nan +#endif /* Have Inf */ + st Z, Done + ldi Done, 1 + ret + +.Ltiny: + ldi Done, 1 + F7jmp clr +ENDF store_expo +#endif /* F7MOD_store_expo_ */ + + +#ifdef F7MOD_set_u64_ +DEFUN set_s64 + set + skipnext + ;; ... +LABEL set_u64 + clt ; Skipped? + wmov Zl, r16 + ;; TMP holds .flags. + clr TMP + brtc .Lnot.negative + + bst C6, 7 + brtc .Lnot.negative + bld TMP, F7_FLAGNO_sign + .global __negdi2 + XCALL __negdi2 + +.Lnot.negative: + st Z, TMP + std Z+0+Expo, ZERO + std Z+1+Expo, ZERO + ldi Carry, 63 + F7call normalize.round.store_with_flags + wmov r24, Z + wmov r16, Z ; Unclobber r16. + ret +ENDF set_s64 +#endif /* F7MOD_set_u64_ */ + + +#ifdef F7MOD_to_integer_ +#define Mask r26 +DEFUN to_integer + wmov ZL, r24 + mov Mask, r22 + + F7call load_mant.with_flags + + sbrc Flags, F7_FLAGNO_nan + rjmp .Lset_0x8000 + + sbrc Flags, F7_FLAGNO_inf + rjmp .Lsaturate + + sbrs C6, 7 + rjmp .Lset_0x0000 + + bst Flags, F7_FLAGNO_sign + ldd r27, Z+0+Expo + ;; Does .expo have bits outside Mask? ... + mov TMP, Mask + com TMP + and TMP, r27 + ldd r27, Z+1+Expo + tst r27 + brmi .Lset_0x0000 ; ...yes: .expo is < 0 => return 0 + or TMP, r27 + brne .Lsaturate.T ; ...yes: .expo > Mask => saturate + + ;; ...no: Shift right to meet .expo = 0. + PUSH r16 + ldd r16, Z+0+Expo + eor r16, Mask + and r16, Mask + clr CA + F7call lshrdi3 + POP r16 + tst C6 + brmi .Lsaturate.T ; > INTxx_MAX => saturate + + rcall .Lround + brmi .Lsaturate.T ; > INTxx_MAX => saturate + + brtc 9f ; >= 0 => return + sbrc Mask, 5 + .global __negdi2 + XJMP __negdi2 + sbrc Mask, 4 + .global __negsi2 + XJMP __negsi2 + neg C6 + neg C5 + sbci C6, 0 +9: ret + +.Lsaturate: + bst Flags, F7_FLAGNO_sign +.Lsaturate.T: + +#if F7_HAVE_Inf + brtc .Lset_0x7fff + ;; -Inf => return 1 + INTxx_MIN + mov ZL, Flags + .global __clr_8 + XCALL __clr_8 + ldi C6, 0x80 + + ldi CA+0, 0x01 + + sbrs Mask, 5 + ldi CA+4, 0x01 + + sbrs Mask, 4 + ldi CA+6, 0x01 + ret + +.Lset_0x7fff: + ;; +Inf => return INTxx_MAX + sec + .global __sbc_8 + XCALL __sbc_8 + ldi C6, 0x7f + ret +#endif /* F7_HAVE_Inf */ + +.Lset_0x8000: + ;; NaN => return INTxx_MIN + .global __clr_8 + XCALL __clr_8 + ldi C6, 0x80 + ret + +.Lset_0x0000: + ;; Small value => return 0x0 + .global __clr_8 + XJMP __clr_8 + +.Lround: + ;; C6.7 is known to be 0 here. + ;; Return N = 1 iff we have to saturate. + cpi Mask, 0xf + breq .Lround16 + cpi Mask, 0x1f + breq .Lround32 + + ;; For now, no rounding in the 64-bit case. This rounding + ;; would have to be integrated into the right-shift. + cln + ret + +.Lround32: + rol C2 + adc C3, ZERO + adc C4, ZERO + rjmp 2f + +.Lround16: + rol C4 +2: adc C5, ZERO + adc C6, ZERO + ret +ENDF to_integer +#endif /* F7MOD_to_integer_ */ + + +#ifdef F7MOD_to_unsigned_ +#define Mask r26 +DEFUN to_unsigned + wmov ZL, r24 + mov Mask, r22 + + F7call load_mant.with_flags + + sbrc Flags, F7_FLAGNO_nan + rjmp .Lset_0xffff + + sbrc Flags, F7_FLAGNO_sign + rjmp .Lset_0x0000 + + sbrc Flags, F7_FLAGNO_inf + rjmp .Lset_0xffff + + sbrs C6, 7 + rjmp .Lset_0x0000 + + ldd r27, Z+0+Expo + ;; Does .expo have bits outside Mask? ... + mov TMP, Mask + com TMP + and TMP, r27 + ldd r27, Z+1+Expo + tst r27 + brmi .Lset_0x0000 ; ...yes: .expo is < 0 => return 0 + or TMP, r27 + brne .Lset_0xffff ; ...yes: .expo > Mask => saturate + + ;; ...no: Shift right to meet .expo = 0. + PUSH r16 + ldd r16, Z+0+Expo + eor r16, Mask + and r16, Mask + clr CA + F7call lshrdi3 + POP r16 + + ;; Rounding + ;; ??? C6.7 is known to be 0 here. + cpi Mask, 0xf + breq .Lround16 + cpi Mask, 0x1f + breq .Lround32 + + ;; For now, no rounding in the 64-bit case. This rounding + ;; would have to be integrated into the right-shift. + ret + +.Lround32: + rol C2 + adc C3, ZERO + adc C4, ZERO + rjmp 2f + +.Lround16: + rol C4 +2: adc C5, ZERO + adc C6, ZERO + brcs .Lset_0xffff ; Rounding overflow => saturate + ret + +.Lset_0xffff: + ;; return UINTxx_MAX + sec + .global __sbc_8 + XJMP __sbc_8 + +.Lset_0x0000: + ;; Small value => return 0x0 + .global __clr_8 + XJMP __clr_8 + +ENDF to_unsigned +#endif /* F7MOD_to_unsigned_ */ + + +#ifdef F7MOD_addsub_mant_scaled_ +;; int8_t f7_addsub_mant_scaled_asm (f7_t *r24, const f7_t *r22, const f7_t 20*, +;; uint8_t r18); +;; R18.0 = 1 : ADD +;; R18.0 = 0 : SUB +;; R18[7..1] : Scale +;; Compute *R24 = *R22 + *R20 >> R18[7..1]. + +#define BA 10 +#define B0 BA+1 +#define B1 B0+1 +#define B2 B0+2 +#define B3 B0+3 +#define B4 B0+4 +#define B5 B0+5 +#define B6 B0+6 + +DEFUN addsub_mant_scaled + do_prologue_saves 10 + + bst r18, 0 ;; ADD ? + lsr r18 + mov r16, r18 + + wmov ZL, r20 + wmov YL, r22 + ;; C[] = bb >> shift + wmov XL, r24 + + F7call load_mant.clr_CA + F7call lshrdi3 + + wmov BA, CA + wmov B1, C1 + wmov B3, C3 + wmov B5, C5 + wmov ZL, YL + F7call load_mant.clr_CA + + wmov ZL, XL + + brts .Ladd + + .global __subdi3 + XCALL __subdi3 + + breq .Lzero + brcc .Lround + ;; C = 1: Can underflow happen at all ? +.Lzero: + F7call clr + rjmp .Lepilogue + +.Ladd: + .global __adddi3 + XCALL __adddi3 + brcc .Lround + ldi Carry, 1 + .global __lshrdi3 + XCALL __lshrdi3 + ori C6, 1 << 7 + skipnext +.Lround: + clr Carry ; skipped? + F7call normalize.round.store_with_flags + +.Lepilogue: + do_epilogue_restores 10 + +ENDF addsub_mant_scaled + +#if !defined (__AVR_HAVE_MOVW__) || !defined (__AVR_HAVE_JMP_CALL__) +DEFUN lshrdi3 + .global __lshrdi3 + XJMP __lshrdi3 +ENDF lshrdi3 +DEFUN ashldi3 + .global __ashldi3 + XJMP __ashldi3 +ENDF ashldi3 +#else + +# Basically just a wrapper around libgcc's __lshrdi3. +DEFUN lshrdi3 + ;; Handle bit 5 of shift offset. + sbrs r16, 5 + rjmp 4f + wmov CA, C3 + wmov C1, C5 + clr C6 $ clr C5 $ wmov C3, C5 +4: + ;; Handle bit 4 of shift offset. + sbrs r16, 4 + rjmp 3f + wmov CA, C1 + wmov C1, C3 + wmov C3, C5 + clr C6 $ clr C5 +3: + ;; Handle bits 3...0 of shift offset. + push r16 + andi r16, 0xf + breq 0f + + .global __lshrdi3 + XCALL __lshrdi3 +0: + pop r16 + ret +ENDF lshrdi3 + +# Basically just a wrapper around libgcc's __ashldi3. +DEFUN ashldi3 + ;; Handle bit 5 of shift offset. + sbrs r16, 5 + rjmp 4f + wmov C5, C1 + wmov C3, CA + clr C2 $ clr C1 $ wmov CA, C1 +4: + ;; Handle bit 4 of shift offset. + sbrs r16, 4 + rjmp 3f + wmov C5, C3 + wmov C3, C1 + wmov C1, CA + clr CA $ clr C0 +3: + ;; Handle bits 3...0 of shift offset. + push r16 + andi r16, 0xf + breq 0f + + .global __ashldi3 + XCALL __ashldi3 +0: + pop r16 + ret +ENDF ashldi3 +#endif /* Small device */ + +#endif /* F7MOD_addsub_mant_scaled_ */ + +#if defined F7MOD_mul_mant_ && defined (__AVR_HAVE_MUL__) + #define A0 11 + #define A1 A0+1 + #define A2 A0+2 + #define A3 A0+3 + #define A4 A0+4 + #define A5 A0+5 + #define A6 A0+6 + + #define TT0 26 + #define TT1 TT0+1 + #define TT2 28 + #define TT3 TT2+1 + + #define BB 10 + +;; R18.0 = 1: No rounding. + +DEFUN mul_mant + do_prologue_saves 10 + bst r18, 0 + push r25 + push r24 + movw ZL, r22 + LDD A0, Z+0+Off + LDD A1, Z+1+Off + LDD A2, Z+2+Off + LDD A3, Z+3+Off + LDD A4, Z+4+Off + LDD A5, Z+5+Off + LDD A6, Z+6+Off + movw ZL, r20 + + ;; 6 * 6 -> 6:5 + ;; 4 * 6 -> 4:3 + ;; 2 * 6 -> 2:1 + ;; 0 * 6 -> 0:a + ldd BB, Z+6+Off + mul A6, BB $ movw C5, r0 + mul A4, BB $ movw C3, r0 + mul A2, BB $ movw C1, r0 + mul A0, BB $ movw CA, r0 + + ;; 5 * 6 -> 5:4 + ;; 3 * 6 -> 3:2 + ;; 1 * 6 -> 1:0 + mul A5, BB $ movw TT2, r0 + mul A3, BB $ movw TT0, r0 + mul A1, BB + ADD C0, r0 $ adc C1, r1 + adc C2, TT0 $ adc C3, TT1 + adc C4, TT2 $ adc C5, TT3 $ clr ZERO + adc C6, ZERO + ;; Done B6 + + ;; 3 * 3 -> 0:a + ;; 4 * 4 -> 2:1 + ;; 5 * 5 -> 4:3 + ldd BB, Z+3+Off $ mul A3, BB $ movw TT0, r0 + ldd BB, Z+4+Off $ mul A4, BB $ movw TT2, r0 + ldd BB, Z+5+Off $ mul A5, BB + + ADD CA, TT0 $ adc C0, TT1 + adc C1, TT2 $ adc C2, TT3 + adc C3, r0 $ adc C4, r1 + brcc .+2 + adiw C5, 1 + + ;; 6 * 5 -> 5:4 + ;; 4 * 5 -> 3:2 + ;; 2 * 5 -> 1:0 + ;; 0 * 5 -> a:- + mul A0, BB + ;; A0 done +#define Atmp A0 + + mov Atmp, r1 + mul A6, BB $ movw TT2, r0 + mul A4, BB $ movw TT0, r0 + mul A2, BB + + ADD CA, Atmp + adc C0, r0 $ adc C1, r1 + adc C2, TT0 $ adc C3, TT1 + adc C4, TT2 $ adc C5, TT3 $ clr ZERO + adc C6, ZERO + + ;; 1 * 5 -> 0:a + ;; 3 * 5 -> 2:1 + ;; 6 * 4 -> 4:3 + mul A1, BB $ movw TT0, r0 + mul A3, BB $ movw TT2, r0 + ldd BB, Z+4+Off + mul A6, BB + + ADD CA, TT0 $ adc C0, TT1 + adc C1, TT2 $ adc C2, TT3 + adc C3, r0 $ adc C4, r1 $ clr ZERO + adc C5, ZERO $ adc C6, ZERO + ;; B5 done + + ;; 6 * 3 -> 3:2 + ;; 6 * 1 -> 1:0 + ;; 4 * 1 -> a:- + mov TT0, A6 $ ldd TMP, Z+3+Off + mov BB, A4 $ ldd Atmp, Z+1+Off + rcall .Lmul.help.3 + + ;; 5 * 4 -> 3:2 + ;; 5 * 2 -> 1:0 + ;; 3 * 2 -> a:- + mov TT0, A5 $ ldd TMP, Z+4+Off + mov BB, A3 $ ldd Atmp, Z+2+Off + rcall .Lmul.help.3 + + ;; 4 * -> 3:2 (=0) + ;; 4 * 3 -> 1:0 + ;; 2 * 3 -> a:- + mov TT0, A4 $ clr TMP + mov BB, A2 $ ldd Atmp, Z+3+Off + rcall .Lmul.help.3 + + ;; 3 * . -> 3:2 (=0) + ;; 3 * 4 -> 1:0 + ;; 1 * 4 -> a:- + mov TT0, A3 $ clr TMP + mov BB, A1 $ ldd Atmp, Z+4+Off + rcall .Lmul.help.3 + + ;; . * ? -> 3:2 (=0) + ;; . * 0 -> 1:0 (=0) + ;; 5 * 0 -> a:- + clr TT0 + mov BB, A5 $ ldd Atmp, Z+0+Off + rcall .Lmul.help.3 + + clr TT3 ;; Asserted by .Lmul.help.2 + ;; 6 * 2 -> 2:1 + ;; 6 * 0 -> 0:a + $ ldd TMP, Z+2+Off + mov BB, A6 ;$ ldd Atmp, Z+0+Off + rcall .Lmul.help.2 + + ;; 5 * 3 -> 2:1 + ;; 5 * 1 -> 0:a + $ ldd TMP, Z+3+Off + mov BB, A5 $ ldd Atmp, Z+1+Off + rcall .Lmul.help.2 + + ;; 4 * . -> 2:1 (=0) + ;; 4 * 2 -> 0:a + $ clr TMP + mov BB, A4 $ ldd Atmp, Z+2+Off + rcall .Lmul.help.2 + + ;; 2 * . -> 2:1 (=0) + ;; 2 * 4 -> 0:a + $ clr TMP + mov BB, A2 $ ldd Atmp, Z+4+Off + rcall .Lmul.help.2 + + ;; Finally... + + pop ZL + pop ZH + ;; The high byte is at least 0x40 and at most 0xfe. + ;; The result has to be left-shifted by one in order to scale it + ;; correctly. + + ldi Carry, 1 + F7call normalize.maybe_round.store_with_flags + + do_epilogue_restores 10 + +;; TT0 * Tmp -> 3:2 +;; TT0 * Atmp -> 1:0 +;; BB * Atmp -> a:- +;; +;; Clobbers : TMP, TT0...TT3. +;; Sets : ZERO = 0. +.Lmul.help.3: + mul TT0, TMP $ movw TT2, r0 + mul TT0, Atmp $ movw TT0, r0 + mul BB, Atmp + + ADD CA, r1 + adc C0, TT0 $ adc C1, TT1 + adc C2, TT2 +.Lmul.help.3.C3: $ adc C3, TT3 $ clr ZERO + adc C4, ZERO $ adc C5, ZERO + adc C6, ZERO + ret + +;; BB * TMP -> 2:1 +;; BB * Atmp -> 0:a +;; +;; Asserts : TT3 = 0 +;; Clobbers : TMP, TT0, TT1. +;; Sets : ZERO = 0. +.Lmul.help.2: + mul BB, TMP $ movw TT0, r0 + mul BB, Atmp + ADD CA, r0 $ adc C0, r1 + adc C1, TT0 $ adc C2, TT1 + rjmp .Lmul.help.3.C3 + +ENDF mul_mant +#endif /* F7MOD_mul_mant_ && MUL */ + + +#if defined (F7MOD_div_) + +;; Dividend is C[] + +;; Divisor +#define A0 9 +#define A1 10 +#define A2 11 +#define A3 12 +#define A4 13 +#define A5 14 +#define A6 15 + +;; Quotient +#define Q0 0 /* === TMP */ +#define Q1 Q0+1 /* === ZERO */ +#define Q2 26 +#define Q3 Q2+1 +#define Q4 28 +#define Q5 Q4+1 +#define Q6 16 +#define Q7 Q6+1 + +#define Cnt CA +#define QBits r8 + +DEFUN div + do_prologue_saves 12 + + ;; Number of bits requested for the quotient. + ;; This is usually 2 + F7_MANT_BITS. + mov QBits, r20 + wmov ZL, r22 + LDD A0, Z+0+Off + LDD A1, Z+1+Off + LDD A2, Z+2+Off + LDD A3, Z+3+Off + LDD A4, Z+4+Off + LDD A5, Z+5+Off + LDD A6, Z+6+Off + wmov ZL, r24 + F7call load_mant + + ;; Clear quotient Q[]. + clr Q0 ; === TMP + ;clr Q1 ; === ZERO + wmov Q2, Q0 + wmov Q4, Q0 + wmov Q6, Q0 + + ;; C[] and A[] are valid mantissae, i.e. their MSBit is set. Therefore, + ;; quotient Q[] will be in [0x0.ff..., 0x0.40...] and to adjust Q[] we + ;; need at most 1 left-shift. Compute F7_MANT_BITS + 2 bits of the + ;; quotient: One bit is used for rounding, and one bit might be consumed + ;; by the mentioned left-shift. + mov Cnt, QBits + rjmp .Loop_start + +.Loop: + ;; Shift dividend. + LSL C0 + rol C1 + rol C2 + rol C3 + rol C4 + rol C5 + rol C6 + brcs .Lfits + ;; Compare dividend against divisor. +.Loop_start: + CP C0, A0 + cpc C1, A1 + cpc C2, A2 + cpc C3, A3 + cpc C4, A4 + cpc C5, A5 + cpc C6, A6 + ;; Shift 0 into quotient. + brlo 1f +.Lfits: + ;; Divisor fits into dividend. + SUB C0, A0 + sbc C1, A1 + sbc C2, A2 + sbc C3, A3 + sbc C4, A4 + sbc C5, A5 + sbc C6, A6 + ;; Shift 1 into quotient. + sec + rol Q0 + skipnext +1: lsl Q0 + rol Q1 + rol Q2 + rol Q3 + rol Q4 + rol Q5 + rol Q6 + rol Q7 + dec Cnt + brne .Loop + + wmov CA, Q0 + wmov C1, Q2 + wmov C3, Q4 + wmov C5, Q6 + clr ZERO + + ldi Carry, 64 + sub Carry, QBits + F7call normalize.round.store_with_flags + + do_epilogue_restores 12 +ENDF div + +#endif /* F7MOD_div_ */ + + +#if defined (F7MOD_sqrt16_) && defined (__AVR_HAVE_MUL__) + +#define Mask C6 +#define Q0 C3 /* = R22 */ +#define Q1 C4 /* = R23 */ + +;; uint16_t R24 = sqrt16_XXX (uint16_t R24); +;; Clobbers: R22, R23, TMP. +;; +;; XXX = floor: Return integral part of square-root of R25:R24 with R25 = 0. +;; Error is in [0, -1 LSB). +;; XXX = round: Return quare-root of R25:R24 rounded to nearest integer. +;; R25 = (Q[] >= 65281) = (Q > 0xff00), i.e. if Q[] is not +;; bigger than 0xff00, then the result fits in 8 bits. +;; Return C = 0 if the result is the same as for XXX = floor, +;; error in [0, -1/2 LSB) +;; Return C = 1 if the result is one higher than for XXX = floor, +;; error in [1/2 LSB, 0). +DEFUN sqrt16_round + set + skipnext + ;; ... +LABEL sqrt16_floor + clt ; Skipped? + movw Q0, r24 + clr C5 + ldi Mask, 1 << 7 + +.Loop_mask: + add C5, Mask + mul C5, C5 + cp Q0, R0 + cpc Q1, R1 + brsh 1f + sub C5, Mask +1: lsr Mask + brne .Loop_mask + + brtc .Ldone ; No rounding => C6 will be 0. + + ;; Rounding: (X + 1/2)^2 = X^2 + X + 1/4, thus probing + ;; for bit -1 is testing Q[] against C5^2 + C5. + mul C5, C5 + add R0, C5 + adc R1, C6 ; Exploit C6 === Mask = 0. + cp R0, Q0 + cpc R1, Q1 + brcc .Ldone + ;; If C5^2 + C5 + 1/4 fits into Q[], then round up and C = 1. + adiw C5, 1 ; Exploit C6 === Mask = 0. + sec + +.Ldone: + clr __zero_reg__ + ret +ENDF sqrt16_round +#undef Mask +#undef Q0 +#undef Q1 +#endif /* F7MOD_sqrt16_ && MUL */ + +#ifdef F7MOD_sqrt_approx_ +DEFUN sqrt_approx + push r17 + push r16 + wmov XL, r24 + wmov ZL, r22 + + ;; C[] = 0. + .global __clr_8 + XCALL __clr_8 + + ldd C5, Z+5+Off + ldd C6, Z+6+Off + + ldd Carry, Z+0+Expo + ldd TMP, Z+1+Expo + wmov ZL, XL + + st Z, ZERO + + asr TMP + ror Carry + std Z+1+Expo, TMP + std Z+0+Expo, Carry + + ;; Re-interpreting our Q-format 1.xx mantissa as Q2.yy, we have to shift + ;; the mantissa to the right by 1. As we need an even exponent, multiply + ;; the mantissa by 2 for odd exponents, i.e. only right-shift if .expo + ;; is even. + + brcs 1f + lsr C6 + ror C5 + +1: + F7call sqrt16_round + + ;; sqrt16_round() returns: C = 0: error in [0, -1/2 LSB). + ;; C = 1: error in [1/2 LSB, 0) + + brcc 2f + ;; Undo the round-up from sqrt16_round(); this will transform to + ;; error in [-1/2 LSB, -1 LSB). + sbiw C5, 1 + ;; Together with the correct bit C4.7, the error is in [0, -1/2 LSB). + ori C4, 1 << 7 + +2: ;; Setting C4.6 adds 1/4 LSB and the error is now in [1/4 LSB, -1/4 LSB) + ;; in either case. + ori C4, 1 << 6 + + ;; ???????????? + ;; sqrt16_round() runs on integers which means that it computes the + ;; square root of mant * 2^14 if we regard mant as Q-format 2.yy, + ;; i.e. 2 integral bits. The result is sqrt(mant) * 2^7, + ;; and in order to get the same scaling like the input, .expo has to + ;; be adjusted by 7. ??????????????? + + ldi Carry, 8 + F7call normalize.store_with_flags + + pop r16 + pop r17 + ret + +ENDF sqrt_approx +#endif /* F7MOD_sqrt_approx_ */ + + +#undef CA +#undef C0 +#undef C1 +#undef C2 +#undef C3 +#undef C4 +#undef C5 +#undef C6 +#undef Carry + + +#ifdef F7MOD_D_fabs_ +_DEFUN __fabs + DALIAS fabs + LALIAS fabsl + andi R25, 0b01111111 + ret +_ENDF __fabs +#endif /* F7MOD_D_fabs_ */ + + +#ifdef F7MOD_D_neg_ +_DEFUN __neg +_LABEL __negdf2 + subi R25, 0b10000000 + ret +_ENDF __neg +#endif /* F7MOD_D_neg_ */ + + +#ifdef F7MOD_D_signbit_ +_DEFUN __signbit + DALIAS signbit + LALIAS signbitl + bst R25, 7 + clr R25 + clr R24 + bld R24, 0 + ret +_ENDF __signbit +#endif /* F7MOD_D_signbit_ */ + + +#ifdef F7MOD_D_copysign_ +_DEFUN __copysign + DALIAS copysign + LALIAS copysignl + bst R17, 7 + bld R25, 7 + ret +_ENDF __copysign +#endif /* F7MOD_D_copysign_ */ + + +#ifdef F7MOD_D_isinf_ +_DEFUN __isinf + DALIAS isinf + LALIAS isinfl + F7call class_D + ;; Inf: T = Z = 1. + brtc 0f + ldi R24, 1 + breq 1f +0: + clr R24 +1: + clr R25 + ret +_ENDF __isinf +#endif /* F7MOD_D_isinf_ */ + + +#ifdef F7MOD_D_isnan_ +_DEFUN __isnan + DALIAS isnan + LALIAS isnanl + F7call class_D + ;; NaN: T = 1, Z = 0. + brtc 0f + ldi R24, 1 + brne 1f +0: + clr R24 +1: + clr R25 + ret +_ENDF __isnan +#endif /* F7MOD_D_isnan_ */ + + +#ifdef F7MOD_D_isfinite_ +_DEFUN __isfinite + DALIAS isfinite + LALIAS isfinitel + F7call class_D + ;; Number <=> T = 0. + bld R24, 0 + com R24 + andi R24, 1 + clr R25 + ret +_ENDF __isfinite +#endif /* F7MOD_D_isfinite_ */ + + +#ifdef F7MOD_D_class_ +;; The encoded exponent has 11 Bits. +#define MAX_BIASED_EXPO 0b0111111111110000 + +;; Classify a double in R18[] +;; Number: T-Flag = 0. +;; +-Inf : T-Flag = 1, Z-Flag = 1. +;; NaN : T-Flag = 1, Z-Flag = 0. +DEFUN class_D + wmov R26, R24 + andi R26, lo8 (MAX_BIASED_EXPO) + andi R27, hi8 (MAX_BIASED_EXPO) + subi R26, lo8 (MAX_BIASED_EXPO) + sbci R27, hi8 (MAX_BIASED_EXPO) + clt + brne .L.number + set + ;; Set sign and expo to 0. + clr R25 + andi R24, lo8 (~MAX_BIASED_EXPO) + ;; What remains is the mantissa. + ;; Mantissa == 0 => +/-Inf. + ;; Mantissa != 0 => NaN. + ;; Compare R18[] against sign_extend(R26) with R26 = 0. + .global __cmpdi2_s8 + XJMP __cmpdi2_s8 +.L.number: + ret + +ENDF class_D +#endif /* F7MOD_D_class_ */ + + +#ifdef F7MOD_call_dd_ + +;; Provide double wrappers for functions that operate on f7_t and get f7_t*. +;; +;; We set up a frame of sizeof(f7_t), convert the input double in R18[] to +;; f7_t in that frame location, then call *Z and finally convert the result f7_t +;; to double R18[] if that's requested. +;; +;; call_dd: double func (double A) +;; void (*Z) (f7_t *aa, const f7_t *aa) +;; +;; call_dx: double func (type_t A) , sizeof(type_t) <= 4 +;; void (*Z) (f7_t *aa, type_t) +;; +;; call_xd: type_t func (double A) +;; type_t (*Z) (const f7_t *aa) +;; +;; call_ddx: double func (double A, word_t) , sizeof (word_t) <= 2 +;; void (*Z) (f7_t *aa, const f7_t *aa, word_t) + +#define WHAT R13 + +DEFUN call_dd ; WHAT = R13 = 3 + inc ZERO +LABEL call_xd ; WHAT = R13 = 2 + inc ZERO +LABEL call_ddx ; WHAT = R13 = 1 + inc ZERO +LABEL call_dx ; WHAT = R13 = 0 + push WHAT + mov WHAT, ZERO + clr ZERO + ;; R14/R15 hold Z, the address of the f7_worker function, until we need it. + push r14 + push r15 + wmov r14, Z + +#define n_pushed 4 +#define n_frame 10 + + do_prologue_saves n_pushed, n_frame + ;; Y = FramePointer + 1 + adiw Y, 1 + dec WHAT + brmi .Ldx ; WHAT was initially 0. + ;; FP + 1 = (f7_t) arg1 + wmov r16, Y + ;; The double argument is in R18[]. + XCALL F7_NAME (set_double_impl) + tst WHAT + brne .Lno.ddx ; WHAT was initially != 1. + ;; call_ddx: Set R20/21 to the 2-byte scalar / pointer argument. + ;; Fetch it from where prologue_saves put it. + ldd r20, Y + n_frame + 3 ; Saved R16 + ldd r21, Y + n_frame + 2 ; Saved R17 +.Lno.ddx: + wmov r22, Y ; &arg1 (input) +.Ldo.dx: + wmov r24, Y ; &arg1 (output) + wmov Z, r14 + XICALL + dec WHAT + breq .Lepilogue ; WHAT was initially 2: Return non-double. + wmov r24, Y ; &arg1 + XCALL F7_NAME (get_double) +.Lepilogue: + ;; + 3 to account for R13...R15 pushed prior to do_prologue_saves. + do_epilogue_restores n_pushed + 3, n_frame + +.Ldx: + ;; call_dx: Copy the 4-byte input scalar from R22[4] to R20[4]. + wmov r20, r22 + wmov r22, r24 + rjmp .Ldo.dx + +ENDF call_dd +#endif /* F7MOD_call_dd_ */ + + +#ifdef F7MOD_call_ddd_ + +;; Provide double wrappers for functions that operate on f7_t and get f7_t*. +;; +;; We set up a frame of 2 * sizeof(f7_t), convert the input doubles in R18[] +;; and R10[] to f7_t in these frame locations, then call *Z and finally +;; convert the result f7_t to double R18[] if that's requested. +;; +;; call_ddd: double func (double A, double B) +;; void (*Z) (f7_t *aa, const f7_t *aa, const f7_t *bb) +;; +;; call_xdd: type_t func (double A, double B) +;; type_t (*Z) (const f7_t *aa, const f7_t *bb) + +DEFUN call_ddd + inc ZERO +LABEL call_xdd + ;; R8/R9 hold Z, the address of the f7_worker function, until we need it. + push r9 + push r8 + wmov r8, Z + ;; This is an argument to call.2 and will be accessed by the arg pointer. + push ZERO + clr ZERO + rcall call.2 + pop TMP + pop r8 + pop r9 + ret + +#define n_pushed 4 +#define n_frame 20 + +call.2: + do_prologue_saves n_pushed, n_frame + ;; Y = FramePointer + 1 + adiw Y, 1 + ;; FP + 1 = (f7_t) arg1 + wmov r16, Y + ;; First double argument is already in R18[]. + XCALL F7_NAME (set_double_impl) + ;; FP + 11 = (f7_t) arg2 + wmov r16, Y + subi r16, lo8 (-10) + sbci r17, hi8 (-10) + ;; Move second double argument to R18[]. + wmov r18, r10 + wmov r20, r12 + wmov r22, r14 + ;; Get high word of arg2 from where prologue_saves put it. + ldd r24, Y + n_frame + 3 ; Saved R16 + ldd r25, Y + n_frame + 2 ; Saved R17 + XCALL F7_NAME (set_double_impl) + ;; Z (f7_t *arg1, const f7_t *arg1, const f7_t *arg2) + wmov Z, r8 + wmov r24, Y ; &arg1 + ;; WHAT == 0 => call_xdd + ;; WHAT != 0 => call_ddd + ldd TMP, Y + n_frame + n_pushed + PC_SIZE + tst TMP + breq .Lxdd + wmov r22, Y ; &arg1 + wmov r20, r16 ; &arg2 + XICALL + wmov r24, Y ; &arg1 + XCALL F7_NAME (get_double) +.Lepilogue: + do_epilogue_restores n_pushed, n_frame +.Lxdd: + wmov r22, r16 ; &arg2 + XICALL + rjmp .Lepilogue +ENDF call_ddd +#endif /* F7MOD_call_ddd_ */ + +#include "f7-wraps.h" + +#endif /* !AVR_TINY */ diff --git a/libgcc/config/avr/libf7/libf7-c-object.mk b/libgcc/config/avr/libf7/libf7-c-object.mk new file mode 100644 index 0000000..0424cba --- /dev/null +++ b/libgcc/config/avr/libf7/libf7-c-object.mk @@ -0,0 +1,20 @@ +# This file is included several times in a row, once for each element of +# $(iter-items). On each inclusion, we advance $o to the next element. +# $(iter-labels) is also advanced. +# This works similar to $(srcdir)/siditi-object.mk. + +o := $(firstword $(iter-items)) +iter-items := $(filter-out $o,$(iter-items)) + +$o-label := $(firstword $(iter-labels)) +iter-labels := $(wordlist 2,$(words $(iter-labels)),$(iter-labels)) + +f7_c_$o$(objext): f7_c_%$(objext): $(libf7)/libf7.c + $(gcc_compile) -DF7MOD_$($*-label)_ $(F7_C_FLAGS) \ + -c $< + +ifeq ($(enable_shared),yes) +f7_c_$(o)_s$(objext): %_s$(objext): $(libf7)/libf7.c + $(gcc_s_compile) -DF7MOD_$($*-label)_ $(F7_C_FLAGS) \ + -c $< +endif diff --git a/libgcc/config/avr/libf7/libf7-common.mk b/libgcc/config/avr/libf7/libf7-common.mk new file mode 100644 index 0000000..28663b5 --- /dev/null +++ b/libgcc/config/avr/libf7/libf7-common.mk @@ -0,0 +1,102 @@ +# f7_c_*.o modules from libf7.c. +F7_C_PARTS += set_s16 set_u16 set_s32 set_u32 init +F7_C_PARTS += get_s16 get_u16 get_s32 get_u32 get_s64 get_u64 +F7_C_PARTS += lrint ldexp frexp madd_msub madd msub hypot +F7_C_PARTS += addsub add sub mulx mul square divx div div1 fmod sqrt cbrt +F7_C_PARTS += Ineg Iadd Isub Imul Idiv IRsub Isquare Ildexp Isqrt +F7_C_PARTS += set_float get_float get_double set_double set_pdouble +F7_C_PARTS += fabs neg fmin fmax minmax truncx trunc floor ceil round lround +F7_C_PARTS += horner logx log log10 log2 exp pow10 pow powi +F7_C_PARTS += sin cos tan cotan sincos sinh cosh tanh sinhcosh +F7_C_PARTS += asinacos asin acos atan atan2 +F7_C_PARTS += abscmp_msb_ge cmp cmp_abs cmp_unordered + +F7_C_PARTS += const_1 const_1_2 const_1_3 +F7_C_PARTS += const_pi const_ln2 const_1_ln2 const_ln10 const_1_ln10 const_sqrt2 +F7_C_PARTS += # const_m1 const_2 const_sqrt2 + +# f7_asm_*.o modules from libf7-asm.sx. +F7_ASM_PARTS += classify clr mul_mant cmp_mant set_u64 +F7_ASM_PARTS += copy copy_P copy_mant clr_mant_lsbs +F7_ASM_PARTS += addsub_mant_scaled store load +F7_ASM_PARTS += to_integer to_unsigned clz normalize_with_carry normalize +F7_ASM_PARTS += store_expo sqrt16 sqrt_approx div + +F7_ASM_PARTS += D_class +F7_ASM_PARTS += D_isnan D_isinf D_isfinite D_signbit D_copysign D_neg D_fabs + +F7_ASM_PARTS += call_dd call_ddd + +# Stuff that will be wrapped in f7-wraps.h (included by libf7-asm.sx) +# and give f7_asm_D_*.o modules. +g_ddd += add sub mul div +g_xdd_cmp += le lt ge gt ne eq unord +g_dx += floatunsidf floatsidf extendsfdf2 +g_xd += fixdfsi fixdfdi fixunsdfdi fixunsdfsi truncdfsf2 + +m_ddd += pow fmin fmax fmod hypot atan2 +m_ddx += ldexp frexp +m_dd += sqrt cbrt exp exp10 pow10 log log10 log2 sin cos tan cotan asin acos atan +m_dd += ceil floor trunc round sinh cosh tanh +m_xd += lrint lround + +# -mcall-prologues +CALL_PROLOGUES += divx sqrt cbrt get_double set_double logx exp exp10 pow10 +CALL_PROLOGUES += put_C truncx round minmax sincos tan cotan pow powi fmod +CALL_PROLOGUES += atan asinacos madd_msub hypot init horner sinhcosh tanh + +# -mstrict-X +STRICT_X += log addsub truncx ldexp exp + +# Renames used when building f7-renames.h. +F7F += fabs neg add sub addsub div div1 divx fmod sqrt cbrt +F7F += square mul mulx madd_msub madd msub hypot +F7F += Ineg Iadd Isub Imul Idiv IRsub Isquare Ildexp Isqrt +F7F += le lt gt ge ne eq cmp cmp_abs ordered unordered cmp_unordered +F7F += lt_impl gt_impl le_impl ge_impl eq_impl ne_impl unord_impl + +F7F += lrint ldexp frexp exp logx log log10 log2 +F7F += minmax fmax fmin floor ceil round lround trunc truncx +F7F += horner pow10 exp10 pow powi +F7F += sin cos tan cotan sincos sinh cosh tanh sinhcosh +F7F += asinacos asin acos atan atan2 +F7F += mul_noround sqrt16_round sqrt16_floor +F7F += clr_mant_lsbs abscmp_msb_ge lshrdi3 ashldi3 +F7F += assert + +F7F += classify + +F7F += class_inf class_nan class_number class_zero class_nonzero class_sign +F7F += signbit set_sign set_nan set_inf +F7F += is_inf is_nan is_number is_zero is_nonzero +F7F += clr copy copy_P copy_mant msbit is0 cmp_mant store_expo +F7F += abs + +F7F += set_s64 set_s32 set_s16 set_s16_impl set_u16_worker +F7F += set_u64 set_u32 set_u16 set_u16_impl +F7F += set_float set_pdouble set_double_impl set_double init_impl init +F7F += get_s16 get_s32 get_s64 get_float +F7F += get_u16 get_u32 get_u64 get_double + +F7F += set_eps set_1pow2 + +# Renames for ALIASes without own module. +F7F += min max exp10 +F7F += floatunsidf floatsidf extendsfdf2 +F7F += fixdfsi fixdfdi fixunsdfdi fixunsdfsi truncdfsf2 + +# Renames for f7-const.def. +F7F_cst += 1 2 1_2 1_3 m1 pi ln2 ln10 1_ln2 1_ln10 sqrt2 + +F7F_asm += classify +F7F_asm += store_expo clr copy copy_P copy_mant +F7F_asm += cmp_mant normalize store_expo +F7F_asm += set_u64 set_s64 addsub_mant_scaled mul_mant +F7F_asm += to_integer to_unsigned clr_mant_lsbs +F7F_asm += div sqrt_approx sqrt16_round sqrt16_floor +F7F_asm += lshrdi3 ashldi3 + +F7F_asm += class_D + +F7F_asm += call_ddd call_xdd call_ddx +F7F_asm += call_dd call_xd call_dx diff --git a/libgcc/config/avr/libf7/libf7-const.def b/libgcc/config/avr/libf7/libf7-const.def new file mode 100644 index 0000000..ff07cdb --- /dev/null +++ b/libgcc/config/avr/libf7/libf7-const.def @@ -0,0 +1,201 @@ +/* Copyright (C) 2019-2020 Free Software Foundation, Inc. + + This file is part of LIBF7, which is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef ONLY_CONST_WITH_ID +#if defined IN_LIBF7_H || defined (F7MOD_const_) || defined (F7MOD_const_1_2_) +F7_CONST_DEF (1_2, 0, 0x80,0,0,0,0,0,0, -1) +#endif /* 1/2 */ + +#if defined IN_LIBF7_H || defined (F7MOD_const_) || defined (F7MOD_const_1_) +F7_CONST_DEF (1, 0, 0x80,0,0,0,0,0,0, 0) +#endif /* 1 */ + +#if defined IN_LIBF7_H || defined (F7MOD_const_) || defined (F7MOD_const_2_) +F7_CONST_DEF (2, 0, 0x80,0,0,0,0,0,0, 1) +#endif /* 2 */ + +#if defined IN_LIBF7_H || defined (F7MOD_const_) || defined (F7MOD_const_1_3_) +F7_CONST_DEF (1_3, 0, 0xaa,0xaa,0xaa,0xaa,0xaa,0xaa,0xab, -2) +#endif /* 1/3 */ + +#if defined IN_LIBF7_H || defined (F7MOD_const_) || defined (F7MOD_const_1_ln2_) +F7_CONST_DEF (1_ln2, 0, 0xb8,0xaa,0x3b,0x29,0x5c,0x17,0xf1, 0) +#endif /* 1 / ln2 */ + +#if defined IN_LIBF7_H || defined (F7MOD_const_) || defined (F7MOD_const_1_ln10_) +F7_CONST_DEF (1_ln10, 0, 0xde,0x5b,0xd8,0xa9,0x37,0x28,0x72, -2) +#endif /* 1 / ln10 */ +#endif /* const with ID */ + +#if defined IN_LIBF7_H || defined (F7MOD_const_) || defined (F7MOD_const_m1_) +F7_CONST_DEF (m1, 1, 0x80,0,0,0,0,0,0, 0) +#endif /* -1 */ + +#if defined IN_LIBF7_H || defined (F7MOD_const_) || defined (F7MOD_const_pi_) +F7_CONST_DEF (pi, 0, 0xc9,0x0f,0xda,0xa2,0x21,0x68,0xc2, 1) +#endif /* pi */ + +#if defined IN_LIBF7_H || defined (F7MOD_const_) || defined (F7MOD_const_ln2_) +F7_CONST_DEF (ln2, 0, 0xb1,0x72,0x17,0xf7,0xd1,0xcf,0x7a, -1) +#endif /* ln2 */ + +#if defined IN_LIBF7_H || defined (F7MOD_const_) || defined (F7MOD_const_ln10_) +F7_CONST_DEF (ln10, 0, 0x93,0x5d,0x8d,0xdd,0xaa,0xa8,0xac, 1) +#endif /* ln10 */ + +#if defined IN_LIBF7_H || defined (F7MOD_const_) || defined (F7MOD_const_sqrt2_) +F7_CONST_DEF (sqrt2, 0, 0xb5,0x04,0xf3,0x33,0xf9,0xde,0x65, 0) +#endif /* sqrt2 */ + +#if !defined (IN_LIBF7_H) && !defined (F7MOD_const_) + +#if defined (F7MOD_logx_) +// 2.00000000000000000287938058543222037939 + 0.666666666666667653654896829047862109605x + 0.399999999996639180070480580779767357622x^2 + 0.285714286985476106491309914245597720749x^3 + 0.222222024077476110197597399876978069272x^4 + 0.181833876328594532366358057253631240618x^5 + 0.153181571233880298729095145342556944283x^6 + 0.147580071554569676223389696418304199218x^7 +F7_CONST_DEF (X, 0, 0x80,0x00,0x00,0x00,0x00,0x00,0x00, 1) +F7_CONST_DEF (X, 0, 0xaa,0xaa,0xaa,0xaa,0xaa,0xaa,0xf2, -1) +F7_CONST_DEF (X, 0, 0xcc,0xcc,0xcc,0xcc,0xc5,0x68,0xd4, -2) +F7_CONST_DEF (X, 0, 0x92,0x49,0x24,0x9d,0x34,0x85,0x25, -2) +F7_CONST_DEF (X, 0, 0xe3,0x8e,0x2b,0x97,0x74,0x69,0xb3, -3) +F7_CONST_DEF (X, 0, 0xba,0x32,0xa8,0xe0,0x8b,0x7d,0xcc, -3) +F7_CONST_DEF (X, 0, 0x9c,0xdb,0xa1,0x3b,0x32,0x98,0x9a, -3) +F7_CONST_DEF (X, 0, 0x97,0x1f,0x3a,0xf3,0x79,0x0d,0xc8, -3) + +#elif defined (F7MOD_exp_) +#ifdef ARRAY_NAME +// Intended for |X| < ln2 / 8 +F7_CONST_DEF (X, 0, 0x80,0x00,0x00,0x00,0x00,0x00,0x00, 0) +// f(x) = (e^x - 1) / x and f -> 1 + x*f by means of const_1 above. +// 0.99999999999999993104781692731238623644199 + 0.50000000000000018681311888335564572558345x + 0.16666666666696003026683736262180526042594x^2 + 0.041666666666489887893617892155331720764329x^3 + 0.0083333331383434741356242595738508060753717x^4 + 0.0013888889242082668539796357369837122917246x^5 + 0.00019845416752769938401628433087258224774248x^6 + 0.000024801569801689274979195020335136136418918x^7 +F7_CONST_DEF (X, 0, 0xff,0xff,0xff,0xff,0xff,0xff,0xfb, -1) +F7_CONST_DEF (X, 0, 0x80,0x00,0x00,0x00,0x00,0x00,0x0d, -1) +F7_CONST_DEF (X, 0, 0xaa,0xaa,0xaa,0xaa,0xab,0xf4,0xf7, -3) +F7_CONST_DEF (X, 0, 0xaa,0xaa,0xaa,0xaa,0xa7,0x8e,0x87, -5) +F7_CONST_DEF (X, 0, 0x88,0x88,0x88,0x52,0xef,0x57,0x81, -7) +F7_CONST_DEF (X, 0, 0xb6,0x0b,0x61,0x03,0xb6,0x6b,0x82, -10) +F7_CONST_DEF (X, 0, 0xd0,0x18,0x22,0x8c,0x6f,0xde,0x72, -13) +F7_CONST_DEF (X, 0, 0xd0,0x0c,0xf7,0x31,0x28,0xba,0xb5, -16) +#else +// Negative because f7_const_ln2 is ln2 rounded up. +F7_CONST_DEF (ln2_low, 1, 0xa8,0x6c,0x38,0x98,0xcf,0xf8,0x1a, -58) +#endif + +#elif defined (F7MOD_atan_) + +#if defined (MINIMAX_6_6_IN_0_1) +// https://www.mikrocontroller.net/topic/480840#6003520 +#if defined (FOR_NUMERATOR) +// 1209.7470017580907217240715 + 3031.0745956115083044212807x + 2761.7198246138834959053784x^2 + 1114.1290728455183546172942x^3 + 192.5792014481559613474286x^4 + 11.3221594116764655236245x^5 + 0.09762721591717633036983x^6 +// p = Poly ([Decimal('1209.7470017580907217240715'), Decimal('3031.0745956115083044212807'), Decimal('2761.7198246138834959053784'), Decimal('1114.1290728455183546172942'), Decimal('192.5792014481559613474286'), Decimal('11.3221594116764655236245'), Decimal('0.09762721591717633036983')]) +F7_CONST_DEF (X, 0, 0x97,0x37,0xe7,0x70,0x3b,0x21,0xbc, 10) +F7_CONST_DEF (X, 0, 0xbd,0x71,0x31,0x8b,0x2a,0xfd,0xa7, 11) +F7_CONST_DEF (X, 0, 0xac,0x9b,0x84,0x66,0xd0,0x77,0xc4, 11) +F7_CONST_DEF (X, 0, 0x8b,0x44,0x21,0x5d,0x60,0x49,0xb2, 10) +F7_CONST_DEF (X, 0, 0xc0,0x94,0x46,0x8b,0xcd,0xa0,0x2d, 7) +F7_CONST_DEF (X, 0, 0xb5,0x27,0x90,0xa0,0x93,0xfb,0xfc, 3) +F7_CONST_DEF (X, 0, 0xc7,0xf0,0xc7,0x1c,0x82,0xab,0x23, -4) +#elif defined (FOR_DENOMINATOR) +// 1209.7470017580907287514197 + 3434.3235961975351716547069x + 3664.5449563283749893504796x^2 + 1821.6003392918464941509225x^3 + 423.0716464809047804524206x^4 + 39.9178842486537981501999x^5 + x^6 +// q = Poly ([Decimal('1209.7470017580907287514197'), Decimal('3434.3235961975351716547069'), Decimal('3664.5449563283749893504796'), Decimal('1821.6003392918464941509225'), Decimal('423.0716464809047804524206'), Decimal('39.9178842486537981501999'), Decimal('1.0')]) +F7_CONST_DEF (X, 0, 0x97,0x37,0xe7,0x70,0x3b,0x21,0xbc, 10) +F7_CONST_DEF (X, 0, 0xd6,0xa5,0x2d,0x73,0x34,0xd8,0x60, 11) +F7_CONST_DEF (X, 0, 0xe5,0x08,0xb8,0x24,0x20,0x81,0xe7, 11) +F7_CONST_DEF (X, 0, 0xe3,0xb3,0x35,0xfa,0xbf,0x1f,0x81, 10) +F7_CONST_DEF (X, 0, 0xd3,0x89,0x2b,0xb6,0x3e,0x2e,0x05, 8) +F7_CONST_DEF (X, 0, 0x9f,0xab,0xe9,0xd9,0x35,0xed,0x27, 5) +F7_CONST_DEF (X, 0, 0x80,0x00,0x00,0x00,0x00,0x00,0x00, 0) +#endif + +#elif defined (SWIFT_3_4) +// My [3,4] MiniMax +#if defined (FOR_NUMERATOR) +// 0.999999999999999998080178351225003952632 + 1.51597040589722809277543885223133087789x + 0.636928974763539784860899545005247736093x^2 + 0.0638944455799886571709448345524306512048x^3 +F7_CONST_DEF (X, 0, 0x80,0x00,0x00,0x00,0x00,0x00,0x00, 0) +F7_CONST_DEF (X, 0, 0xc2,0x0b,0x51,0x79,0x84,0x27,0x00, 0) +F7_CONST_DEF (X, 0, 0xa3,0x0d,0xc6,0xfc,0x7b,0xf5,0x5d, -1) +F7_CONST_DEF (X, 0, 0x82,0xdb,0x17,0x51,0x4b,0xfc,0xad, -4) +#elif defined (FOR_DENOMINATOR) +// 1 + 1.84930373923056200945453682584178320362x + 1.05336355450697082895016644607427716580x^2 + 0.188012025422931152047197803304030906006x^3 + 0.00507310235929401206762490897042543192106x^4 +F7_CONST_DEF (X, 0, 0x80,0x00,0x00,0x00,0x00,0x00,0x00, 0) +F7_CONST_DEF (X, 0, 0xec,0xb5,0xfc,0x24,0x2e,0xd1,0xc0, 0) +F7_CONST_DEF (X, 0, 0x86,0xd4,0x9d,0xf0,0xb3,0xef,0xb8, 0) +F7_CONST_DEF (X, 0, 0xc0,0x86,0x39,0x71,0xc8,0xeb,0x3d, -3) +F7_CONST_DEF (X, 0, 0xa6,0x3c,0x44,0x5c,0x78,0x87,0x2d, -8) +#else +F7_CONST_DEF (1_sqrt3, 0, 0x93,0xCD,0x3A,0x2C,0x81,0x98,0xE2, -1) +F7_CONST_DEF (pi_6, 0, 0x86,0x0a,0x91,0xc1,0x6b,0x9b,0x2c, -1) +#endif +#endif // which MiniMax + +#elif defined (F7MOD_asinacos_) +// Relative error < 5.6E-18, quality = 1.00000037 (ideal = 1). +#if defined (FOR_NUMERATOR) +// 0.99999999999999999442491073135027586203 - 1.035234033892197627842731209x + 0.35290206232981519813422591897720574012x^2 - 0.04333483170641685705612351801x^3 + 0.0012557428614630796315205218507940285622x^4 + 0.0000084705471128435769021718764878041684288x^5 +// p = Poly ([Decimal('0.99999999999999999442491073135027586203'), Decimal('-1.0352340338921976278427312087167692142'), Decimal('0.35290206232981519813422591897720574012'), Decimal('-0.043334831706416857056123518013656946650'), Decimal('0.0012557428614630796315205218507940285622'), Decimal('0.0000084705471128435769021718764878041684288')]) +F7_CONST_DEF (X, 0, 0x80,0x00,0x00,0x00,0x00,0x00,0x00, 0) +F7_CONST_DEF (X, 1, 0x84,0x82,0x8c,0x7f,0xa2,0xf6,0x65, 0) +F7_CONST_DEF (X, 0, 0xb4,0xaf,0x94,0x40,0xcb,0x86,0x69, -2) +F7_CONST_DEF (X, 1, 0xb1,0x7f,0xdd,0x4f,0x4e,0xbe,0x1d, -5) +F7_CONST_DEF (X, 0, 0xa4,0x97,0xbd,0x0b,0x59,0xc9,0x25, -10) +F7_CONST_DEF (X, 0, 0x8e,0x1c,0xb9,0x0b,0x50,0x6c,0xce, -17) +#elif defined (FOR_DENOMINATOR) +// 1 - 1.118567367225532923662371649x + 0.42736600959872448854098334016758333519x^2 - 0.06355588484963171659942148390x^3 + 0.0028820878185134035637440105959294542908x^4 +// q = Poly ([Decimal('1'), Decimal('-1.1185673672255329236623716486696411533'), Decimal('0.42736600959872448854098334016758333519'), Decimal('-0.063555884849631716599421483898013782858'), Decimal('0.0028820878185134035637440105959294542908')]) +F7_CONST_DEF (X, 0, 0x80,0x00,0x00,0x00,0x00,0x00,0x00, 0) +F7_CONST_DEF (X, 1, 0x8f,0x2d,0x37,0x2a,0x4d,0xa1,0x57, 0) +F7_CONST_DEF (X, 0, 0xda,0xcf,0xb7,0xb5,0x4c,0x0d,0xee, -2) +F7_CONST_DEF (X, 1, 0x82,0x29,0x96,0x77,0x2e,0x19,0xc7, -4) +F7_CONST_DEF (X, 0, 0xbc,0xe1,0x68,0xec,0xba,0x20,0x29, -9) +#endif + +#elif defined (F7MOD_sincos_) +#if defined (FOR_SIN) +// sin(sqrt(x)) / sqrt(x) in [0, pi^2/16] ~ [0, 0.6169]. +// Error < 3.313E-18, quality@quot ~ 1. +// p = Poly ([Decimal('0.9999999999999999966879566228039012'), Decimal('-0.1666666666666661475365354492822562'), Decimal('0.008333333333320002444252560522892007'), Decimal('-0.0001984126982840212992698250499618594'), Decimal('0.000002755731329901509333692668814721451'), Decimal('-2.505070584638448064973439248538648E-8'), Decimal('1.589413637225924008237178592214358E-10')]) +// 0.9999999999999999966879566228039012 - 0.1666666666666661475365354492822562x + 0.008333333333320002444252560522892007x^2 - 0.0001984126982840212992698250499618594x^3 + 0.000002755731329901509333692668814721451x^4 - 2.505070584638448064973439248538648E-8x^5 + 1.589413637225924008237178592214358E-10x^6 +F7_CONST_DEF (X, 0, 0x80,0x00,0x00,0x00,0x00,0x00,0x00, 0) +F7_CONST_DEF (X, 1, 0xaa,0xaa,0xaa,0xaa,0xaa,0xaa,0x15, -3) +F7_CONST_DEF (X, 0, 0x88,0x88,0x88,0x88,0x87,0x98,0x63, -7) +F7_CONST_DEF (X, 1, 0xd0,0x0d,0x00,0xcd,0xc9,0x7e,0x34, -13) +F7_CONST_DEF (X, 0, 0xb8,0xef,0x1a,0x8f,0x9e,0x91,0xa2, -19) +F7_CONST_DEF (X, 1, 0xd7,0x2f,0x15,0xb0,0x7c,0x11,0x7a, -26) +F7_CONST_DEF (X, 0, 0xae,0xc2,0x04,0x43,0x51,0x29,0xe8, -33) +#elif defined (FOR_COS) +// cos(sqrt(x)) in [0, pi^2/16] ~ [0, 0.6169]. +// Error < 3.6E-20, quality@quot ~ 1. +// p = Poly ([Decimal('0.999999999999999999964159204335255989833'), Decimal('-0.499999999999999992843598630174036090975'), Decimal('0.0416666666666664302574165741342831192783'), Decimal('-0.00138888888888589604372397206213200172560'), Decimal('0.0000248015872828994642415389301648196400167'), Decimal('-2.75573128656963697259081300852869272237E-7'), Decimal('2.08755551457127352241183195408105664857E-9'), Decimal('-1.13521232075810109334454849167561526185E-11')]) +// 0.999999999999999999964159204335255989833 - 0.499999999999999992843598630174036090975x + 0.0416666666666664302574165741342831192783x^2 - 0.00138888888888589604372397206213200172560x^3 + 0.0000248015872828994642415389301648196400167x^4 - 2.75573128656963697259081300852869272237E-7x^5 + 2.08755551457127352241183195408105664857E-9x^6 - 1.13521232075810109334454849167561526185E-11x^7 +F7_CONST_DEF (X, 0, 0x80,0x00,0x00,0x00,0x00,0x00,0x00, 0) +F7_CONST_DEF (X, 1, 0xff,0xff,0xff,0xff,0xff,0xff,0xff, -2) +F7_CONST_DEF (X, 0, 0xaa,0xaa,0xaa,0xaa,0xaa,0xa9,0x9a, -5) +F7_CONST_DEF (X, 1, 0xb6,0x0b,0x60,0xb6,0x09,0xb1,0x66, -10) +F7_CONST_DEF (X, 0, 0xd0,0x0d,0x00,0xcd,0x6b,0xb3,0xf0, -16) +F7_CONST_DEF (X, 1, 0x93,0xf2,0x7b,0x7f,0x10,0xce,0x3d, -22) +F7_CONST_DEF (X, 0, 0x8f,0x74,0xaa,0x3c,0xcf,0x51,0x3d, -29) +F7_CONST_DEF (X, 1, 0xc7,0xb5,0x6a,0xf8,0x0e,0x32,0x07, -37) +#else +F7_CONST_DEF (pi_low,0, 0xd3,0x13,0x19,0x8a,0x2e,0x03,0x70, 1 - F7_MANT_BITS-2) +#endif + +#endif +#endif /* ! IN_LIBF7_H && ! F7MOD_const_ */ diff --git a/libgcc/config/avr/libf7/libf7-constdef.h b/libgcc/config/avr/libf7/libf7-constdef.h new file mode 100644 index 0000000..8419f3b --- /dev/null +++ b/libgcc/config/avr/libf7/libf7-constdef.h @@ -0,0 +1,43 @@ +/* Copyright (C) 2019-2020 Free Software Foundation, Inc. + + This file is part of LIBF7, which is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#if !defined STATIC +#define STATIC /* empty */ +#endif + +#if USE_LPM +#define F7_CONST_DEF(NAME, FLAGS, M6, M5, M4, M3, M2, M1, M0, EXPO) \ + STATIC const __attribute__((__progmem__)) \ + f7_t F7_(const_ ## NAME ## _P) = \ + { .flags = FLAGS, .mant = { M0, M1, M2, M3, M4, M5, M6 }, .expo = EXPO }; + #include "libf7-const.def" +#undef F7_CONST_DEF +#else +#define F7_CONST_DEF(NAME, FLAGS, M6, M5, M4, M3, M2, M1, M0, EXPO) \ + STATIC const f7_t F7_(const_ ## NAME) = \ + { .flags = FLAGS, .mant = { M0, M1, M2, M3, M4, M5, M6 }, .expo = EXPO }; + #include "libf7-const.def" +#undef F7_CONST_DEF +#endif // USE_LPM + +#undef STATIC diff --git a/libgcc/config/avr/libf7/libf7.c b/libgcc/config/avr/libf7/libf7.c new file mode 100644 index 0000000..d1e3348 --- /dev/null +++ b/libgcc/config/avr/libf7/libf7.c @@ -0,0 +1,2501 @@ +/* Copyright (C) 2019-2020 Free Software Foundation, Inc. + + This file is part of LIBF7, which is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "libf7.h" + +#ifndef __AVR_TINY__ + +#define ALIAS(X, Y) \ + F7_WEAK __attribute__((__alias__(F7_STRINGY(X)))) __typeof__(X) Y; + +#define DALIAS(...) // empty +#define LALIAS(...) // empty + +#ifndef IN_LIBGCC2 + +#include <stdio.h> +#include <assert.h> + +#define in_libgcc false + +_Static_assert (sizeof (f7_t) == 10 && F7_MANT_BYTES == 7, + "libf7 will only work with 7-byte mantissa."); +#else + +#define in_libgcc true + +#if __SIZEOF_DOUBLE__ == 8 +#undef DALIAS +#define DALIAS(X,Y) \ + F7_WEAK __attribute__((__alias__(F7_STRINGY(X)))) __typeof__(X) Y; +#endif + +#if __SIZEOF_LONG_DOUBLE__ == 8 +#undef LALIAS +#define LALIAS(X,Y) \ + F7_WEAK __attribute__((__alias__(F7_STRINGY(X)))) __typeof__(X) Y; +#endif + +#endif // in libgcc + +static F7_INLINE +void f7_assert (bool x) +{ + if (!in_libgcc && !x) + __builtin_abort(); +} + +static F7_INLINE +int16_t abs_ssat16 (int16_t a) +{ + _Sat _Fract sa = __builtin_avr_rbits (a); + return __builtin_avr_bitsr (__builtin_avr_absr (sa)); +} + +static F7_INLINE +int16_t add_ssat16 (int16_t a, int16_t b) +{ + _Sat _Fract sa = __builtin_avr_rbits (a); + _Sat _Fract sb = __builtin_avr_rbits (b); + return __builtin_avr_bitsr (sa + sb); +} + +static F7_INLINE +int16_t sub_ssat16 (int16_t a, int16_t b) +{ + _Sat _Fract sa = __builtin_avr_rbits (a); + _Sat _Fract sb = __builtin_avr_rbits (b); + return __builtin_avr_bitsr (sa - sb); +} + +static F7_INLINE +int8_t ssat8_range (int16_t a, int8_t range) +{ + if (a >= range) + return range; + if (a <= -range) + return -range; + return a; +} + + +#define IN_LIBF7_H + #define F7_CONST_DEF(NAME, FLAGS, M6, M5, M4, M3, M2, M1, M0, EXPO) \ + F7_UNUSED static const uint8_t F7_(const_##NAME##_msb) = M6; \ + F7_UNUSED static const int16_t F7_(const_##NAME##_expo) = EXPO; + #include "libf7-const.def" + #undef F7_CONST_DEF +#undef IN_LIBF7_H + + +/* + libgcc naming converntions for conversions: + + __float<fmode><fmode> : Convert float modes. + __floatun<imode><fmode>: Convert unsigned integral to float. + __fix<fmode><imode> : Convert float to signed integral. + __fixuns<fmode><imode> : Convert float to unsigned integral. +*/ + + +#ifdef F7MOD_floatundidf_ +F7_WEAK +f7_double_t __floatundidf (uint64_t x) +{ + f7_t xx; + f7_set_u64 (&xx, x); + return f7_get_double (&xx); +} +#endif // F7MOD_floatundidf_ + + +#ifdef F7MOD_floatdidf_ +F7_WEAK +f7_double_t __floatdidf (int64_t x) +{ + f7_t xx; + f7_set_s64 (&xx, x); + return f7_get_double (&xx); +} +#endif // F7MOD_floatdidf_ + + +#ifdef F7MOD_init_ +f7_t* f7_init_impl (uint64_t mant, uint8_t flags, f7_t *cc, int16_t expo) +{ + flags &= F7_FLAGS; + if (f7_class_number (flags)) + { + uint8_t msb; + while ((__builtin_memcpy (&msb, (uint8_t*) &mant + 7, 1), msb)) + { + mant >>= 1; + expo = add_ssat16 (expo, 1); + } + *(uint64_t*) cc->mant = mant; + cc->expo = add_ssat16 (expo, F7_MANT_BITS-1); + + cc = f7_normalize_asm (cc); + } + + cc->flags = flags; + + return cc; +} +#endif // F7MOD_init_ + + +#ifdef F7MOD_set_s16_ +f7_t* f7_set_s16_impl (f7_t *cc, int16_t i16) +{ + uint16_t u16 = (uint16_t) i16; + uint8_t flags = 0; + if (i16 < 0) + { + u16 = -u16; + flags = F7_FLAG_sign; + } + f7_set_u16_impl (cc, u16); + cc->flags = flags; + return cc; +} +#endif // F7MOD_set_s16_ + + +#ifdef F7MOD_set_u16_ +f7_t* f7_set_u16_impl (f7_t *cc, uint16_t u16) +{ + f7_clr (cc); + F7_MANT_HI2 (cc) = u16; + cc->expo = 15; + return f7_normalize_asm (cc); +} +#endif // F7MOD_set_u16_ + + +#ifdef F7MOD_set_s32_ +f7_t* f7_set_s32 (f7_t *cc, int32_t i32) +{ + uint32_t u32 = (uint32_t) i32; + uint8_t flags = 0; + if (i32 < 0) + { + u32 = -u32; + flags = F7_FLAG_sign; + } + cc = f7_set_u32 (cc, u32); + cc->flags = flags; + return cc; +} +ALIAS (f7_set_s32, f7_floatsidf) +#endif // F7MOD_set_s32_ + + +#ifdef F7MOD_set_u32_ +f7_t* f7_set_u32 (f7_t *cc, uint32_t u32) +{ + f7_clr (cc); + F7_MANT_HI4 (cc) = u32; + cc->expo = 31; + return f7_normalize_asm (cc); +} +ALIAS (f7_set_u32, f7_floatunsidf) +#endif // F7MOD_set_u32_ + + +// IEEE 754 single +// float = s bbbbbbbb mmmmmmmmmmmmmmmmmmmmmmm +// 31 +// s = sign +// b = biased exponent, bias = 127 +// m = mantissa + +// +0 = 0 0 0 +// -0 = 1 0 0 +// Inf = S B 0 = S * Inf, B = 0xff +// NaN = S B M, B = 0xff, M != 0 +// Sub-normal = S 0 M = S * 0.M * 2^{1 - bias}, M != 0 +// Normal = S B M = S * 1.M * 2^{B - bias}, B = 1 ... 0xfe + +#define FLT_DIG_EXP 8 +#define FLT_DIG_MANT (31 - FLT_DIG_EXP) +#define FLT_MAX_EXP ((1 << FLT_DIG_EXP) - 1) +#define FLT_EXP_BIAS (FLT_MAX_EXP >> 1) + +#ifdef F7MOD_set_float_ +F7_WEAK +void f7_set_float (f7_t *cc, float f) +{ + uint32_t val32; + + _Static_assert (__SIZEOF_FLOAT__ == 4, ""); + _Static_assert (__FLT_MANT_DIG__ == 24, ""); + __builtin_memcpy (&val32, &f, __SIZEOF_FLOAT__); + + uint16_t val16 = val32 >> 16; + val16 >>= FLT_DIG_MANT - 16; + + uint8_t expo_biased = val16 & FLT_MAX_EXP; + bool sign = val16 & (1u << FLT_DIG_EXP); + + f7_clr (cc); + + uint32_t mant = val32 & ((1ul << FLT_DIG_MANT) -1); + + if (mant == 0) + { + if (expo_biased == 0) + return; + if (expo_biased >= FLT_MAX_EXP) + return f7_set_inf (cc, sign); + } + + if (expo_biased == 0) + expo_biased = 1; // Sub-normal: biased expo of 1 was encoded as 0. + else if (expo_biased < FLT_MAX_EXP) + mant |= (1ul << FLT_DIG_MANT); + else + return f7_set_nan (cc); + + __builtin_memcpy (& F7_MANT_HI4 (cc), &mant, 4); + + cc->expo = expo_biased - FLT_EXP_BIAS + 31 - FLT_DIG_MANT; + f7_normalize_asm (cc); + f7_set_sign (cc, sign); +} +ALIAS (f7_set_float, f7_extendsfdf2) +#endif // F7MOD_set_float_ + + +#ifdef F7MOD_get_float_ +static F7_INLINE +float make_float (uint32_t x) +{ + float ff; + __builtin_memcpy (&ff, &x, 4); + return ff; + +} + +F7_WEAK +float f7_get_float (const f7_t *aa) +{ + uint8_t a_class = f7_classify (aa); + + if (f7_class_nan (a_class)) + return make_float (0xffc00000 /* NaN: Biased expo = 0xff, mant != 0 */); + + uint32_t mant; + __builtin_memcpy (&mant, &F7_MANT_CONST_HI4 (aa), 4); + + uint8_t expo8 = 0; + uint8_t mant_offset = FLT_DIG_EXP; + int16_t c_expo = add_ssat16 (aa->expo, FLT_EXP_BIAS); + + if (f7_class_zero (a_class) || c_expo <= -FLT_DIG_MANT) + { + // Zero or tiny. + return 0.0f; + } + else if (c_expo >= FLT_MAX_EXP || f7_class_inf (a_class)) + { + // Inf or overflow. + expo8 = FLT_MAX_EXP; + mant = 0; + } + else if (c_expo > 0) + { + // Normal. + expo8 = c_expo; + } + else + { + // Sub-normal: -DIG_MANT < c_expo <= 0. + // Encoding of 0 represents a biased exponent of 1. + // mant_offset in 9...31. + expo8 = 0; + mant_offset += 1 - c_expo; + } + + uint16_t expo16 = expo8 << (FLT_DIG_MANT - 16); + + if (f7_class_sign (a_class)) + expo16 |= 1u << (FLT_DIG_EXP + FLT_DIG_MANT - 16); + + mant >>= mant_offset; + + __asm ("cbr %T0%t2, 1 << (7 & %2)" "\n\t" + "or %C0, %A1" "\n\t" + "or %D0, %B1" + : "+d" (mant) + : "r" (expo16), "n" (FLT_DIG_MANT)); + + return make_float (mant); +} +F7_PURE ALIAS (f7_get_float, f7_truncdfsf2) +#endif // F7MOD_get_float_ + +#define DBL_DIG_EXP 11 +#define DBL_DIG_MANT (63 - DBL_DIG_EXP) +#define DBL_MAX_EXP ((1 << DBL_DIG_EXP) - 1) +#define DBL_EXP_BIAS (DBL_MAX_EXP >> 1) + + +#ifdef F7MOD_set_double_ +void f7_set_double_impl (f7_double_t val64, f7_t *cc) +{ + f7_clr (cc); + register uint64_t mant __asm ("r18") = val64 & ((1ull << DBL_DIG_MANT) -1); + + uint16_t val16 = 3[(uint16_t*) & val64]; + val16 >>= DBL_DIG_MANT - 48; + + uint16_t expo_biased = val16 & DBL_MAX_EXP; + bool sign = val16 & (1u << DBL_DIG_EXP); + + if (mant == 0) + { + if (expo_biased == 0) + return; + if (expo_biased >= DBL_MAX_EXP) + return f7_set_inf (cc, sign); + } + __asm ("" : "+r" (mant)); + + if (expo_biased == 0) + expo_biased = 1; // Sub-normal: biased expo of 1 was encoded as 0. + else if (expo_biased < DBL_MAX_EXP) + mant |= (1ull << DBL_DIG_MANT); + else + return f7_set_nan (cc); + + *(uint64_t*) & cc->mant = mant; + + cc->expo = expo_biased - DBL_EXP_BIAS + 63 - DBL_DIG_MANT - 8; + f7_normalize_asm (cc); + f7_set_sign (cc, sign); +} +#endif // F7MOD_set_double_ + + +#ifdef F7MOD_set_pdouble_ +void f7_set_pdouble (f7_t *cc, const f7_double_t *val64) +{ + f7_set_double (cc, *val64); +} +#endif // F7MOD_set_pdouble_ + + +#ifdef F7MOD_get_double_ +static F7_INLINE +uint64_t clr_r18 (void) +{ + extern void __clr_8 (void); + register uint64_t r18 __asm ("r18"); + __asm ("%~call %x[f]" : "=r" (r18) : [f] "i" (__clr_8)); + return r18; +} + +static F7_INLINE +f7_double_t make_double (uint64_t x) +{ + register f7_double_t r18 __asm ("r18") = x; + __asm ("" : "+r" (r18)); + return r18; +} + +F7_WEAK +f7_double_t f7_get_double (const f7_t *aa) +{ + uint8_t a_class = f7_classify (aa); + + if (f7_class_nan (a_class)) + { + uint64_t nan = clr_r18() | (0x7fffull << 48); + return make_double (nan); + } + + uint64_t mant; + __builtin_memcpy (&mant, & aa->mant, 8); + + mant &= 0x00ffffffffffffff; + + // FIXME: For subnormals, rounding is premature and should be + // done *after* the mantissa has been shifted into place + // (or the round value be shifted left accordingly). + // Round. + mant += 1u << (F7_MANT_BITS - (1 + DBL_DIG_MANT) - 1); + + uint8_t dex; + register uint64_t r18 __asm ("r18") = mant; + // dex = Overflow ? 1 : 0. + __asm ("bst %T[mant]%T[bitno]" "\n\t" + "clr %0" "\n\t" + "bld %0,0" + : "=r" (dex), [mant] "+r" (r18) + : [bitno] "n" (64 - 8)); + + mant = r18 >> dex; + + uint16_t expo16 = 0; + uint16_t mant_offset = DBL_DIG_EXP - 8; + int16_t c_expo = add_ssat16 (aa->expo, DBL_EXP_BIAS + dex); + + if (f7_class_zero (a_class) || c_expo <= -DBL_DIG_MANT) + { + // Zero or tiny. + return make_double (clr_r18()); + } + else if (c_expo >= DBL_MAX_EXP || f7_class_inf (a_class)) + { + // Inf or overflow. + expo16 = DBL_MAX_EXP; + mant = clr_r18(); + } + else if (c_expo > 0) + { + // Normal. + expo16 = c_expo; + } + else + { + // Sub-normal: -DIG_MANT < c_expo <= 0. + // Encoding expo of 0 represents a biased exponent of 1. + // mant_offset in 5...55 = 63-8. + mant_offset += 1 - c_expo; + } + + expo16 <<= (DBL_DIG_MANT - 48); + + if (f7_class_sign (a_class)) + expo16 |= 1u << (DBL_DIG_EXP + DBL_DIG_MANT - 48); + + // mant >>= mant_offset; + mant = f7_lshrdi3 (mant, mant_offset); + + r18 = mant; + __asm ("cbr %T0%t2, 1 << (7 & %2)" "\n\t" + "or %r0+6, %A1" "\n\t" + "or %r0+7, %B1" + : "+r" (r18) + : "r" (expo16), "n" (DBL_DIG_MANT)); + + return make_double (r18); +} +#endif // F7MOD_get_double_ + + +#ifdef F7MOD_fabs_ +F7_WEAK +void f7_fabs (f7_t *cc, const f7_t *aa) +{ + f7_abs (cc, aa); +} +#endif // F7MOD_fabs_ + + +#ifdef F7MOD_neg_ +F7_WEAK +f7_t* f7_neg (f7_t *cc, const f7_t *aa) +{ + f7_copy (cc, aa); + + uint8_t c_class = f7_classify (cc); + + if (! f7_class_zero (c_class)) + cc->sign = ! f7_class_sign (c_class); + + return cc; +} +#endif // F7MOD_neg_ + + +#ifdef F7MOD_frexp_ +F7_WEAK +void f7_frexp (f7_t *cc, const f7_t *aa, int *expo) +{ + uint8_t a_class = f7_classify (aa); + + if (f7_class_nan (a_class)) + return f7_set_nan (cc); + + if (f7_class_inf (a_class) || aa->expo == INT16_MAX) + return f7_set_inf (cc, f7_class_sign (a_class)); + + if (! f7_msbit (aa)) + { + *expo = 0; + return f7_clr (cc); + } + + *expo = 1 + aa->expo; + cc->flags = a_class & F7_FLAG_sign; + cc->expo = -1; + f7_copy_mant (cc, aa); +} +#endif // F7MOD_frexp_ + +#ifdef F7MOD_get_s16_ +F7_WEAK +int16_t f7_get_s16 (const f7_t *aa) +{ + extern int16_t to_s16 (const f7_t*, uint8_t) F7ASM(f7_to_integer_asm); + return to_s16 (aa, 0xf); +} +#endif // F7MOD_get_s16_ + + +#ifdef F7MOD_get_s32_ +F7_WEAK +int32_t f7_get_s32 (const f7_t *aa) +{ + extern int32_t to_s32 (const f7_t*, uint8_t) F7ASM(f7_to_integer_asm); + return to_s32 (aa, 0x1f); +} +F7_PURE ALIAS (f7_get_s32, f7_fixdfsi) +#endif // F7MOD_get_s32_ + + +#ifdef F7MOD_get_s64_ + F7_WEAK + int64_t f7_get_s64 (const f7_t *aa) +{ + extern int64_t to_s64 (const f7_t*, uint8_t) F7ASM(f7_to_integer_asm); + return to_s64 (aa, 0x3f); +} +F7_PURE ALIAS (f7_get_s64, f7_fixdfdi) +#endif // F7MOD_get_s64_ + +#ifdef F7MOD_get_u16_ + F7_WEAK + uint16_t f7_get_u16 (const f7_t *aa) +{ + extern uint16_t to_u16 (const f7_t*, uint8_t) F7ASM(f7_to_unsigned_asm); + return to_u16 (aa, 0xf); +} +#endif // F7MOD_get_u16_ + + +#ifdef F7MOD_get_u32_ +F7_WEAK +uint32_t f7_get_u32 (const f7_t *aa) +{ + extern uint32_t to_u32 (const f7_t*, uint8_t) F7ASM(f7_to_unsigned_asm); + return to_u32 (aa, 0x1f); +} +F7_PURE ALIAS (f7_get_u32, f7_fixunsdfsi) +#endif // F7MOD_get_u32_ + + +#ifdef F7MOD_get_u64_ +F7_WEAK +uint64_t f7_get_u64 (const f7_t *aa) +{ + extern int64_t to_u64 (const f7_t*, uint8_t) F7ASM(f7_to_unsigned_asm); + return to_u64 (aa, 0x3f); +} +F7_PURE ALIAS (f7_get_u64, f7_fixunsdfdi) +#endif // F7MOD_get_u64_ + + +#ifdef F7MOD_cmp_unordered_ +F7_NOINLINE +static int8_t cmp_u8 (uint8_t a_class, uint8_t b_class, bool sign_a); + +F7_WEAK +int8_t f7_cmp_unordered (const f7_t *aa, const f7_t *bb, bool with_sign) +{ + uint8_t a_class = f7_classify (aa); + uint8_t b_class = f7_classify (bb); + + uint8_t a_sign = f7_class_sign (a_class) & with_sign; + uint8_t b_sign = f7_class_sign (b_class) & with_sign; + uint8_t ab_class = a_class | b_class; + ab_class &= with_sign - 2; + + if (f7_class_nan (ab_class)) + return INT8_MIN; + + if (a_sign != b_sign) + return b_sign - a_sign; + + if (f7_class_inf (ab_class)) + return cmp_u8 (a_class, b_class, a_sign); + + if (f7_class_zero (ab_class)) + return cmp_u8 (b_class, a_class, a_sign); + + if (aa->expo < bb->expo) + return a_sign ? 1 : -1; + + if (aa->expo > bb->expo) + return a_sign ? -1 : 1; + + return cmp_u8 (1 + f7_cmp_mant (aa, bb), 1, a_sign); +} + + +int8_t cmp_u8 (uint8_t a_class, uint8_t b_class, bool sign_a) +{ + int8_t c; + __asm ("sub %[a], %[b]" "\n\t" + "breq 1f" "\n\t" + "sbc %[c], %[c]" "\n\t" + "sbci %[c], -1" "\n\t" + "sbrc %[s], 0" "\n\t" + "neg %[c]" "\n\t" + "1:" + : [c] "=d" (c) + : [a] "0" (a_class), [b] "r" (b_class), [s] "r" (sign_a)); + return c; +} +#endif // F7MOD_cmp_unordered_ + + +#ifdef F7MOD_cmp_abs_ +F7_WEAK +int8_t f7_cmp_abs (const f7_t *aa, const f7_t *bb) +{ + return f7_cmp_unordered (aa, bb, false /* no signs */); +} +#endif // F7MOD_cmp_abs_ + + +#ifdef F7MOD_cmp_ +F7_WEAK +int8_t f7_cmp (const f7_t *aa, const f7_t *bb) +{ + return f7_cmp_unordered (aa, bb, true /* with signs */); +} +#endif // F7MOD_cmp_ + + +#ifdef F7MOD_abscmp_msb_ge_ +// Compare absolute value of Number aa against a f7_t represented +// by msb and expo. +F7_WEAK +bool f7_abscmp_msb_ge (const f7_t *aa, uint8_t msb, int16_t expo) +{ + uint8_t a_msb = aa->mant[F7_MANT_BYTES - 1]; + + if (0 == (0x80 & a_msb)) + // 0 or subnormal. + return false; + + return aa->expo == expo + ? a_msb >= msb + : aa->expo > expo; +} +#endif // F7MOD_abscmp_msb_ge_ + +#ifdef F7MOD_lt_ +F7_WEAK +bool f7_lt_impl (const f7_t *aa, const f7_t *bb) +{ + return f7_lt (aa, bb); +} +#endif // F7MOD_lt_ + +#ifdef F7MOD_le_ +F7_WEAK +bool f7_le_impl (const f7_t *aa, const f7_t *bb) +{ + return f7_le (aa, bb); +} +#endif // F7MOD_le_ + +#ifdef F7MOD_gt_ +F7_WEAK +bool f7_gt_impl (const f7_t *aa, const f7_t *bb) +{ + return f7_gt (aa, bb); +} +#endif // F7MOD_gt_ + +#ifdef F7MOD_ge_ +F7_WEAK +bool f7_ge_impl (const f7_t *aa, const f7_t *bb) +{ + return f7_ge (aa, bb); +} +#endif // F7MOD_ge_ + +#ifdef F7MOD_ne_ +F7_WEAK +bool f7_ne_impl (const f7_t *aa, const f7_t *bb) +{ + return f7_ne (aa, bb); +} +#endif // F7MOD_ne_ + +#ifdef F7MOD_eq_ +F7_WEAK +bool f7_eq_impl (const f7_t *aa, const f7_t *bb) +{ + return f7_eq (aa, bb); +} +#endif // F7MOD_eq_ + + +#ifdef F7MOD_unord_ +F7_WEAK +bool f7_unord_impl (const f7_t *aa, const f7_t *bb) +{ + return f7_unordered (aa, bb); +} +#endif // F7MOD_unord_ + + +#ifdef F7MOD_minmax_ +F7_WEAK +f7_t* f7_minmax (f7_t *cc, const f7_t *aa, const f7_t *bb, bool do_min) +{ + int8_t cmp = f7_cmp_unordered (aa, bb, true /* with signs */); + + if (cmp == INT8_MIN) + return (f7_set_nan (cc), cc); + + if (do_min) + cmp = -cmp; + + return f7_copy (cc, cmp >= 0 ? aa : bb); +} +#endif // F7MOD_minmax_ + + +#ifdef F7MOD_fmax_ +F7_WEAK +f7_t* f7_fmax (f7_t *cc, const f7_t *aa, const f7_t *bb) +{ + return f7_minmax (cc, aa, bb, false); +} +ALIAS (f7_fmax, f7_max) +#endif // F7MOD_fmax_ + + +#ifdef F7MOD_fmin_ +F7_WEAK +f7_t* f7_fmin (f7_t *cc, const f7_t *aa, const f7_t *bb) +{ + return f7_minmax (cc, aa, bb, true); +} +ALIAS (f7_fmin, f7_min) +#endif // F7MOD_fmin_ + + +#ifdef F7MOD_mulx_ +F7_WEAK +uint8_t f7_mulx (f7_t *cc, const f7_t *aa, const f7_t *bb, bool no_rounding) +{ + uint8_t a_class = f7_classify (aa); + uint8_t b_class = f7_classify (bb); + // From this point on, no more access aa->flags or bb->flags + // to avoid early-clobber when writing cc->flags. + + uint8_t ab_class = a_class | b_class; + // If either value is NaN, return NaN. + if (f7_class_nan (ab_class) + // Any combination of Inf and 0. + || (f7_class_zero (ab_class) && f7_class_inf (ab_class))) + { + cc->flags = F7_FLAG_nan; + return 0; + } + // If either value is 0.0, return 0.0. + if (f7_class_zero (ab_class)) + { + f7_clr (cc); + return 0; + } + // We have 2 non-zero numbers-or-INF. + + uint8_t c_sign = (a_class ^ b_class) & F7_FLAG_sign; + uint8_t c_inf = ab_class & F7_FLAG_inf; + cc->flags = c_sign | c_inf; + if (c_inf) + return 0; + + int16_t expo = add_ssat16 (aa->expo, bb->expo); + // Store expo and handle expo = INT16_MIN and INT16_MAX. + if (f7_store_expo (cc, expo)) + return 0; + + return f7_mul_mant_asm (cc, aa, bb, no_rounding); +} +#endif // F7MOD_mulx_ + + +#ifdef F7MOD_square_ +F7_WEAK +void f7_square (f7_t *cc, const f7_t *aa) +{ + f7_mul (cc, aa, aa); +} +#endif // F7MOD_square_ + + +#ifdef F7MOD_mul_ +F7_WEAK +void f7_mul (f7_t *cc, const f7_t *aa, const f7_t *bb) +{ + f7_mulx (cc, aa, bb, false); +} +#endif // F7MOD_mul_ + + +#ifdef F7MOD_Iadd_ +F7_WEAK void f7_Iadd (f7_t *cc, const f7_t *aa) { f7_add (cc, cc, aa); } +#endif + +#ifdef F7MOD_Isub_ +F7_WEAK void f7_Isub (f7_t *cc, const f7_t *aa) { f7_sub (cc, cc, aa); } +#endif + +#ifdef F7MOD_Imul_ +F7_WEAK void f7_Imul (f7_t *cc, const f7_t *aa) { f7_mul (cc, cc, aa); } +#endif + +#ifdef F7MOD_Idiv_ +F7_WEAK void f7_Idiv (f7_t *cc, const f7_t *aa) { f7_div (cc, cc, aa); } +#endif + +#ifdef F7MOD_IRsub_ +F7_WEAK void f7_IRsub (f7_t *cc, const f7_t *aa) { f7_sub (cc, aa, cc); } +#endif + +#ifdef F7MOD_Ineg_ +F7_WEAK void f7_Ineg (f7_t *cc) { f7_neg (cc, cc); } +#endif + +#ifdef F7MOD_Isqrt_ +F7_WEAK void f7_Isqrt (f7_t *cc) { f7_sqrt (cc, cc); } +#endif + +#ifdef F7MOD_Isquare_ +F7_WEAK void f7_Isquare (f7_t *cc) { f7_square (cc, cc); } +#endif + +#ifdef F7MOD_Ildexp_ +F7_WEAK f7_t* f7_Ildexp (f7_t *cc, int ex) { return f7_ldexp (cc, cc, ex); } +#endif + + +#ifdef F7MOD_add_ +F7_WEAK +void f7_add (f7_t *cc, const f7_t *aa, const f7_t *bb) +{ + f7_addsub (cc, aa, bb, false); +} +#endif // F7MOD_add_ + + +#ifdef F7MOD_sub_ +F7_WEAK +void f7_sub (f7_t *cc, const f7_t *aa, const f7_t *bb) +{ + f7_addsub (cc, aa, bb, true); +} +#endif // F7MOD_sub_ + + +#ifdef F7MOD_addsub_ +static void return_with_sign (f7_t *cc, const f7_t *aa, int8_t c_sign) +{ + __asm (";;; return with sign"); + f7_copy (cc, aa); + if (c_sign != -1) + f7_set_sign (cc, c_sign); +} + +F7_WEAK +void f7_addsub (f7_t *cc, const f7_t *aa, const f7_t *bb, bool neg_b) +{ + uint8_t a_class = f7_classify (aa); + uint8_t b_class = f7_classify (bb); + // From this point on, no more access aa->flags or bb->flags + // to avoid early-clobber when writing cc->flags. + + // Hande NaNs. + if (f7_class_nan (a_class | b_class)) + return f7_set_nan (cc); + + bool a_sign = f7_class_sign (a_class); + bool b_sign = f7_class_sign (b_class) ^ neg_b; + + // Add the mantissae? + bool do_add = a_sign == b_sign; + + // Handle +Infs and -Infs. + bool a_inf = f7_class_inf (a_class); + bool b_inf = f7_class_inf (b_class); + + if (a_inf && b_inf) + { + if (do_add) + return f7_set_inf (cc, a_sign); + else + return f7_set_nan (cc); + } + else if (a_inf) + return f7_set_inf (cc, a_sign); + else if (b_inf) + return f7_set_inf (cc, b_sign); + + int16_t shift16 = sub_ssat16 (aa->expo, bb->expo); + + // aa + 0 = aa. + // Also check MSBit to get rid of Subnormals and 0. + if (shift16 > F7_MANT_BITS || f7_is0 (bb)) + return return_with_sign (cc, aa, -1); + + // 0 + bb = bb. + // 0 - bb = -bb. + // Also check MSBit to get rid of Subnormals and 0. + if (shift16 < -F7_MANT_BITS || f7_is0 (aa)) + return return_with_sign (cc, bb, b_sign); + + // Now aa and bb are non-zero, non-NaN, non-Inf. + // shift > 0 ==> |a| > |b| + // shift < 0 ==> |a| < |b| + int8_t shift = (int8_t) shift16; + bool c_sign = a_sign; + if (shift < 0 + || (shift == 0 && f7_cmp_mant (aa, bb) < 0)) + { + const f7_t *p = aa; aa = bb; bb = p; + c_sign = b_sign; + shift = -shift; + } + uint8_t shift2 = (uint8_t) (shift << 1); + + cc->expo = aa->expo; + // From this point on, no more access aa->expo or bb->expo + // to avoid early-clobber when writing cc->expo. + + cc->flags = c_sign; _Static_assert (F7_FLAGNO_sign == 0, ""); + + // This function uses neither .expo nor .flags from either aa or bb, + // hence there is early-clobber for cc->expo and cc->flags. + f7_addsub_mant_scaled_asm (cc, aa, bb, shift2 | do_add); +} +#endif // F7MOD_addsub_ + + +#ifdef F7MOD_madd_msub_ +F7_WEAK +void f7_madd_msub (f7_t *cc, const f7_t *aa, const f7_t *bb, const f7_t *dd, + bool neg_d) +{ + f7_t xx7, *xx = &xx7; + uint8_t x_lsb = f7_mulx (xx, aa, bb, true /* no rounding */); + uint8_t x_sign = f7_signbit (xx); + int16_t x_expo = xx->expo; + f7_addsub (xx, xx, dd, neg_d); + // Now add LSB. If cancellation occured in the add / sub, then we have the + // chance of extra 8 bits of precision. Turn LSByte into f7_t. + f7_clr (cc); + cc->expo = sub_ssat16 (x_expo, F7_MANT_BITS); + cc->mant[F7_MANT_BYTES - 1] = x_lsb; + cc = f7_normalize_asm (cc); + cc->flags = x_sign; + f7_Iadd (cc, xx); +} +#endif // F7MOD_madd_msub_ + +#ifdef F7MOD_madd_ +F7_WEAK +void f7_madd (f7_t *cc, const f7_t *aa, const f7_t *bb, const f7_t *dd) +{ + f7_madd_msub (cc, aa, bb, dd, false); +} +#endif // F7MOD_madd_ + +#ifdef F7MOD_msub_ +F7_WEAK +void f7_msub (f7_t *cc, const f7_t *aa, const f7_t *bb, const f7_t *dd) +{ + f7_madd_msub (cc, aa, bb, dd, true); +} +#endif // F7MOD_msub_ + + +#ifdef F7MOD_ldexp_ +F7_WEAK +f7_t* f7_ldexp (f7_t *cc, const f7_t *aa, int delta) +{ + uint8_t a_class = f7_classify (aa); + + cc->flags = a_class; + + // Inf and NaN. + if (! f7_class_number (a_class)) + return cc; + + if (f7_msbit (aa) == 0) + return (f7_clr (cc), cc); + + int16_t expo = add_ssat16 (delta, aa->expo); + // Store expo and handle expo = INT16_MIN and INT16_MAX. + if (! f7_store_expo (cc, expo)) + f7_copy_mant (cc, aa); + + return cc; +} +#endif // F7MOD_ldexp_ + + +#if USE_LPM +#elif USE_LD +#else +#error need include "asm-defs.h" +#endif // USE_LPM + +/* + Handling constants: + + F7_PCONST (PVAR, X) + + Set f7_t [const] *PVAR to an LD address for one + of the f7_const_X[_P] constants. + PVAR might be set to point to a local auto that serves + as temporary storage for f7_const_X_P. PVAR is only + valid in the current block. + + const f7_t* F7_PCONST_U16 (PVAR, <ident> X) // USE_LD + f7_t* F7_PCONST_U16 (PVAR, uint16_t X) // USE_LPM + + Set f7_t [const] *PVAR to an LD address for one of the + f7_const_X[_P] constants. PVAR might be set to point to a + local auto that serves as temporary storage for X. PVAR is + only valid in the current block. + + F7_PCONST_VAR (PVAR, VAR) + + VAR is a pointer variable holding the address of some f7_const_X[_P] + constant. Set [const] f7_t *PVAR to a respective LD address. + PVAR might be set to point to a local auto that serves + as temporary storage for f7_const_X_P. PVAR is only + valid in the current block. + + F7_CONST_ADDR (<ident> CST, f7_t* PTMP) + + Return an LD address to for some f7_const_X[_P] constant. + *PTMP might be needed to hold a copy of f7_const_X_P in RAM. + + f7_t* F7_U16_ADDR (uint16_t X, f7_t* PTMP) // USE_LPM + const f7_t* F7_U16_ADDR (<cst-ident> X, <unused>) // USE_LD + + Return an LD address to compile-time constant uint16_t X which is + also known as f7_const_X[_P]. *PTMP might be set to (f7_t) X. + + f7_t* f7_const (f7_t *PVAR, <cst-ident> X) + + Copy f7_const_X[_P] to *PVAR. + + f7_t* f7_copy_flash (f7_t *DST, const f7_t *SRC) + + Copy to *DST with LD (from .rodata in flash) if the address + space is linear, or with LPM (from .progmem.data) if the + address space is not linear. + + f7_t* f7_copy (f7_t *DST, const f7_t* SRC) + + Copy to RAM using LD. + + f7_t* f7_copy_P (f7_t *DST, const f7_t *SRC) + + Copy to RAM using LPM. +*/ + +#if USE_LPM + #define F7_RAW_CONST_ADDR(CST) \ + & F7_(const_##CST##_P) + + #define F7_PCONST(PVAR, CST) \ + f7_t _var_for_##CST; \ + f7_copy_P (& _var_for_##CST, & F7_(const_##CST##_P)); \ + PVAR = & _var_for_##CST + + #define F7_PCONST_U16(PVAR, CST) \ + f7_t _var_for_##CST; \ + PVAR = f7_set_u16 (& _var_for_##CST, CST) + + #define F7_PCONST_VAR(PVAR, VAR) \ + f7_t _var_for_##VAR; \ + f7_copy_P (& _var_for_##VAR, VAR); \ + PVAR = & _var_for_##VAR + + #define MAYBE_const // empty + + #define F7_CONST_ADDR(CST, PTMP) \ + f7_copy_P ((PTMP), & F7_(const_##CST##_P)) + + #define F7_U16_ADDR(CST, PTMP) \ + f7_set_u16 ((PTMP), CST) + +#elif USE_LD + #define F7_RAW_CONST_ADDR(CST) \ + & F7_(const_##CST) + + #define F7_PCONST(PVAR, CST) \ + PVAR = & F7_(const_##CST) + + #define F7_PCONST_U16(PVAR, CST) \ + PVAR = & F7_(const_##CST) + + #define F7_PCONST_VAR(PVAR, VAR) \ + PVAR = (VAR) + + #define F7_CONST_ADDR(CST, PTMP) \ + (& F7_(const_##CST)) + + #define F7_U16_ADDR(CST, PTMP) \ + (& F7_(const_##CST)) + + #define MAYBE_const const +#endif + + + +#define DD(str, X) \ + do { \ + LOG_PSTR (PSTR (str)); \ + f7_dump (X); \ + } while (0) + +#undef DD +#define DD(...) (void) 0 + + +#ifdef F7MOD_sqrt_ +static void sqrt_worker (f7_t *cc, const f7_t *rr) +{ + f7_t tmp7, *tmp = &tmp7; + f7_t aa7, *aa = &aa7; + + // aa in [1/2, 2) => aa->expo in { -1, 0 }. + int16_t a_expo = -(rr->expo & 1); + int16_t c_expo = (rr->expo - a_expo) >> 1; // FIXME: r_expo = INT_MAX??? + + __asm ("" : "+r" (aa)); + + f7_copy (aa, rr); + aa->expo = a_expo; + + // No use of rr or *cc past this point: We may use cc as temporary. + // Approximate square-root of A by X <-- (X + A / X) / 2. + + f7_sqrt_approx_asm (cc, aa); + + // Iterate X <-- (X + A / X) / 2. + // 3 Iterations with 16, 32, 58 bits of precision for the quotient. + + for (uint8_t prec = 16; (prec & 0x80) == 0; prec <<= 1) + { + f7_divx (tmp, aa, cc, (prec & 64) ? 2 + F7_MANT_BITS : prec); + f7_Iadd (cc, tmp); + // This will never underflow because |c_expo| is small. + cc->expo--; + } + + // Similar: |c_expo| is small, hence no ldexp needed. + cc->expo += c_expo; +} + +F7_WEAK +void f7_sqrt (f7_t *cc, const f7_t *aa) +{ + uint8_t a_class = f7_classify (aa); + + if (f7_class_nan (a_class) || f7_class_sign (a_class)) + return f7_set_nan (cc); + + if (f7_class_inf (a_class)) + return f7_set_inf (cc, 0); + + if (f7_class_zero (a_class)) + return f7_clr (cc); + + sqrt_worker (cc, aa); +} +#endif // F7MOD_sqrt_ + + +#ifdef F7MOD_hypot_ +F7_WEAK +void f7_hypot (f7_t *cc, const f7_t *aa, const f7_t *bb) +{ + f7_t xx7, *xx = &xx7; + + f7_square (xx, aa); + f7_square (cc, bb); + f7_Iadd (cc, xx); + f7_Isqrt (cc); +} +#endif // F7MOD_hypot_ + + +#ifdef F7MOD_const_m1_ +#include "libf7-constdef.h" +#endif // -1 + +#ifdef F7MOD_const_1_2_ +#include "libf7-constdef.h" +#endif // 1/2 + +#ifdef F7MOD_const_1_3_ +#include "libf7-constdef.h" +#endif // 1/3 + +#ifdef F7MOD_const_ln2_ +#include "libf7-constdef.h" +#endif // ln2 + +#ifdef F7MOD_const_1_ln2_ +#include "libf7-constdef.h" +#endif // 1_ln2 + +#ifdef F7MOD_const_ln10_ +#include "libf7-constdef.h" +#endif // ln10 + +#ifdef F7MOD_const_1_ln10_ +#include "libf7-constdef.h" +#endif // 1_ln10 + +#ifdef F7MOD_const_1_ +#include "libf7-constdef.h" +#endif // 1 + +#ifdef F7MOD_const_sqrt2_ +#include "libf7-constdef.h" +#endif // sqrt2 + +#ifdef F7MOD_const_2_ +#include "libf7-constdef.h" +#endif // 2 + +#ifdef F7MOD_const_pi_ +#include "libf7-constdef.h" +#endif // pi + + +#ifdef F7MOD_divx_ + +// C /= A +extern void f7_div_asm (f7_t*, const f7_t*, uint8_t); + +F7_WEAK +void f7_divx (f7_t *cc, const f7_t *aa, const f7_t *bb, uint8_t quot_bits) +{ + uint8_t a_class = f7_classify (aa); + uint8_t b_class = f7_classify (bb); + // From this point on, no more access aa->flags or bb->flags + // to avoid early-clobber when writing cc->flags. + + // If either value is NaN, return NaN. + if (f7_class_nan (a_class | b_class) + // If both values are Inf or both are 0, return NaN. + || f7_class_zero (a_class & b_class) + || f7_class_inf (a_class & b_class) + // Inf / 0 = NaN. + || (f7_class_inf (a_class) && f7_class_zero (b_class))) + { + return f7_set_nan (cc); + } + + // 0 / B = 0 for non-zero, non-NaN B. + // A / Inf = 0 for non-zero numbers A. + if (f7_class_zero (a_class) || f7_class_inf (b_class)) + return f7_clr (cc); + + uint8_t c_sign = (a_class ^ b_class) & F7_FLAG_sign; + + if (f7_class_inf (a_class) || f7_class_zero (b_class)) + return f7_set_inf (cc, c_sign); + + cc->flags = c_sign; _Static_assert (F7_FLAGNO_sign == 0, ""); + int16_t expo = sub_ssat16 (aa->expo, bb->expo); + + // Store expo and handle expo = INT16_MIN and INT16_MAX. + if (f7_store_expo (cc, expo)) + return; + + f7_t ss7, *ss = &ss7; + ss->flags = cc->flags; + ss->expo = cc->expo; + + f7_copy_mant (ss, aa); + f7_div_asm (ss, bb, quot_bits); + f7_copy (cc, ss); +} +#endif // F7MOD_divx_ + + +#ifdef F7MOD_div_ +F7_WEAK +void f7_div (f7_t *cc, const f7_t *aa, const f7_t *bb) +{ + /* When f7_divx calls f7_div_asm, dividend and divisor are valid + mantissae, i.e. their MSBit is set. Therefore, the quotient will + be in [0x0.ff..., 0x0.40...] and to adjust it, at most 1 left-shift + is needed. Compute F7_MANT_BITS + 2 bits of the quotient: + One bit is used for rounding, and one bit might be consumed by the + mentioned left-shift. */ + + f7_divx (cc, aa, bb, 2 + F7_MANT_BITS); +} +#endif // F7MOD_div_ + + +#ifdef F7MOD_div1_ +F7_WEAK +void f7_div1 (f7_t *cc, const f7_t *aa) +{ + F7_PCONST_U16 (const f7_t *one, 1); + f7_div (cc, one, aa); +} +#endif // F7MOD_div_ + + +#ifdef F7MOD_fmod_ +F7_WEAK +void f7_fmod (f7_t *cc, const f7_t *aa, const f7_t *bb) +{ + uint8_t a_class = f7_classify (aa); + uint8_t b_class = f7_classify (bb); + + if (! f7_class_number (a_class) + || f7_class_nan (b_class) + || f7_class_zero (b_class)) + { + return f7_set_nan (cc); + } + + // A == 0 and B != 0 => 0. + if (f7_class_zero (a_class)) + return f7_clr (cc); + + f7_t zz7, *zz = & zz7; + + f7_div (zz, aa, bb); + + // Z in Z, |Z| <= |A/B|. + f7_trunc (zz, zz); + + // C = A - Z * B. + f7_msub (cc, zz, bb, aa); + cc->flags ^= F7_FLAG_sign; +} +#endif // F7MOD_fmod_ + + +#ifdef F7MOD_truncx_ +F7_WEAK +f7_t* f7_truncx (f7_t *cc, const f7_t *aa, bool do_floor) +{ + uint8_t a_class = f7_classify (aa); + + if (! f7_class_nonzero (a_class)) + return f7_copy (cc, aa); + + bool sign = f7_class_sign (a_class); + + int16_t a_expo = aa->expo; + + if (a_expo < 0) + { + // |A| < 1. + if (sign & do_floor) + return f7_set_s16 (cc, -1); + + f7_clr (cc); + return cc; + } + else if (a_expo >= F7_MANT_BITS - 1) + // A == floor (A). + return f7_copy (cc, aa); + + f7_t tmp7, *tmp = &tmp7; + + // Needed if aa === cc. + f7_copy (tmp, aa); + + cc->flags = sign; + cc->expo = a_expo; + f7_clr_mant_lsbs (cc, aa, F7_MANT_BITS - 1 - a_expo); + + if (do_floor && cc->sign && f7_cmp_mant (cc, tmp) != 0) + { + F7_PCONST_U16 (const f7_t *one, 1); + f7_Isub (cc, one); + } + + return cc; +} +#endif // F7MOD_truncx_ + + +#ifdef F7MOD_floor_ +F7_WEAK +f7_t* f7_floor (f7_t *cc, const f7_t *aa) +{ + return f7_truncx (cc, aa, true); +} +#endif // F7MOD_floor_ + + +#ifdef F7MOD_trunc_ +F7_WEAK +f7_t* f7_trunc (f7_t *cc, const f7_t *aa) +{ + return f7_truncx (cc, aa, false); +} +#endif // F7MOD_trunc_ + + +#ifdef F7MOD_ceil_ +F7_WEAK +void f7_ceil (f7_t *cc, const f7_t *aa) +{ + cc = f7_copy (cc, aa); + cc->flags ^= F7_FLAG_sign; + cc = f7_floor (cc, cc); + f7_Ineg (cc); +} +#endif // F7MOD_ceil_ + + +#ifdef F7MOD_round_ +F7_WEAK +void f7_round (f7_t *cc, const f7_t *aa) +{ + f7_t tmp; + (void) tmp; + const f7_t *half = F7_CONST_ADDR (1_2, &tmp); + + f7_addsub (cc, aa, half, f7_signbit (aa)); + f7_trunc (cc, cc); +} +#endif // F7MOD_round_ + + +#ifdef F7MOD_horner_ + +// Assertion when using this function is that either cc != xx, +// or if cc == xx, then tmp1 must be non-NULL and tmp1 != xx. +// In General, the calling functions have a spare f7_t object available +// and can pass it down to save some stack. +// Moreover, the power series must have degree 1 at least. + +F7_WEAK +void f7_horner (f7_t *cc, const f7_t *xx, uint8_t n_coeff, const f7_t *coeff, + f7_t *tmp1) +{ + f7_assert (n_coeff > 1); + + if (cc != xx) + tmp1 = cc; + else + f7_assert (tmp1 != NULL && tmp1 != xx); + + f7_t *yy = tmp1; + f7_t tmp27, *tmp2 = &tmp27; + + n_coeff--; + const f7_t *pcoeff = coeff + n_coeff; + + f7_copy_flash (yy, pcoeff); + + while (1) + { + --pcoeff; +#if 1 + f7_Imul (yy, xx); + const f7_t *cst = USE_LD ? pcoeff : f7_copy_P (tmp2, pcoeff); + if (coeff == pcoeff) + return f7_add (cc, yy, cst); + f7_Iadd (yy, cst); +#else + const f7_t *cst = USE_LD ? pcoeff : f7_copy_P (tmp2, pcoeff); + f7_madd (yy, yy, xx, cst); + if (coeff == pcoeff) + { + f7_copy (cc, yy); + return; + } +#endif + } + + __builtin_unreachable(); +} +#endif // F7MOD_horner_ + + +#ifdef F7MOD_log_ +F7_WEAK +void f7_log (f7_t *cc, const f7_t *aa) +{ + f7_logx (cc, aa, NULL); +} +#endif // F7MOD_log_ + + +#ifdef F7MOD_log2_ +F7_WEAK +void f7_log2 (f7_t *cc, const f7_t *aa) +{ + f7_logx (cc, aa, F7_RAW_CONST_ADDR (1_ln2)); +} +#endif // F7MOD_log2_ + + +#ifdef F7MOD_log10_ +F7_WEAK +void f7_log10 (f7_t *cc, const f7_t *aa) +{ + f7_logx (cc, aa, F7_RAW_CONST_ADDR (1_ln10)); +} +#endif // F7MOD_log10_ + + +#ifdef F7MOD_logx_ + +#define ARRAY_NAME coeff_artanh +#include "libf7-array.def" +#undef ARRAY_NAME + +// Compute P * ln(A) if P != NULL and ln(A), otherwise. +// P is a LD-address if USE_LD and a LPM-address if USE_LPM. +// Assumption is that P > 0. + +F7_WEAK +void f7_logx (f7_t *cc, const f7_t *aa, const f7_t *p) +{ + uint8_t a_class = f7_classify (aa); + + if (f7_class_nan (a_class) || f7_class_sign (a_class)) + return f7_set_nan (cc); + + if (f7_class_inf (a_class)) + return f7_set_inf (cc, 0); + + if (f7_class_zero (a_class)) + return f7_set_inf (cc, 1); + + f7_t *yy = cc; + f7_t xx7, *xx = &xx7; + f7_t tmp7, *tmp = &tmp7; + + // Y in [1, 2] = A * 2 ^ (-a_expo). + int16_t a_expo = aa->expo; + f7_copy (yy, aa); + yy->expo = 0; + + // Y in [1 / sqrt2, sqrt2]. + + if (f7_abscmp_msb_ge (yy, F7_(const_sqrt2_msb), F7_(const_sqrt2_expo))) + { + yy->expo = -1; + a_expo = add_ssat16 (a_expo, 1); + } + + const f7_t *one = F7_U16_ADDR (1, & tmp7); + + // X := (Y - 1) / (Y + 1), |X| <= (sqrt2 - 1) / (sqrt2 + 1) ~ 0.172. + f7_sub (xx, yy, one); + f7_Iadd (yy, one); + f7_Idiv (xx, yy); + + // Y := X^2, |Y| < 0.03. + f7_square (yy, xx); + + // Y := artanh (X^2) / X + f7_horner (yy, yy, n_coeff_artanh, coeff_artanh, tmp); + + // C = X * Y = ln A - a_expo * ln2. + f7_mul (cc, xx, yy); + + // X := a_expo * ln2. + f7_set_s16 (xx, a_expo); + f7_Imul (xx, F7_CONST_ADDR (ln2, & tmp7)); + + // C = ln A. + f7_Iadd (cc, xx); + + if (p && USE_LPM) + f7_Imul (cc, f7_copy_P (tmp, p)); + if (p && USE_LD) + f7_Imul (cc, p); +} +#endif // F7MOD_logx_ + + +#ifdef F7MOD_exp_ + +#define ARRAY_NAME coeff_exp +#include "libf7-array.def" +#undef ARRAY_NAME + +#define STATIC static +#include "libf7-constdef.h" // ln2_low +#undef STATIC + +F7_WEAK +void f7_exp (f7_t *cc, const f7_t *aa) +{ + uint8_t a_class = f7_classify (aa); + + if (f7_class_nan (a_class)) + return f7_set_nan (cc); + + /* The maximal exponent of 2 for a double is 1023, hence we may limit + to |A| < 1023 * ln2 ~ 709. We limit to 1024 ~ 1.99 * 2^9 */ + + if (f7_class_inf (a_class) + || (f7_class_nonzero (a_class) && aa->expo >= 9)) + { + if (f7_class_sign (a_class)) + return f7_clr (cc); + else + return f7_set_inf (cc, 0); + } + + f7_t const *cst; + f7_t qq7, *qq = &qq7; + + F7_PCONST (cst, ln2); + + // We limited |A| to 1024 and are now dividing by ln2, hence Q will + // be at most 1024 / ln2 ~ 1477 and fit into 11 bits. We will + // round Q anyway, hence only request 11 bits from the division and + // one additional bit that might be needed to normalize the quotient. + f7_divx (qq, aa, cst, 1 + 11); + + // Use the smallest (by absolute value) remainder system. + f7_round (qq, qq); + int16_t q = f7_get_s16 (qq); + + // Reducing A mod ln2 gives |C| <= ln2 / 2, C = -A mod ln2. + f7_msub (cc, qq, cst, aa); + + // Corrigendum: We added Q * ln2; now add Q times the low part of ln2 + // for better precision. Due to |C| < |A| this is not a no-op in general. + const f7_t *yy = F7_CONST_ADDR (ln2_low, &_var_for_ln2); + f7_madd (cc, qq, yy, cc); + + // Because we computed C = -A mod ... + cc->flags ^= F7_FLAG_sign; + + // Reduce further to |C| < ln2 / 8 which is the range of our MiniMax poly. + const uint8_t MAX_LN2_RED = 3; + int8_t scal2 = 0; + + while (f7_abscmp_msb_ge (cc, F7_(const_ln2_msb), + F7_(const_ln2_expo) - MAX_LN2_RED)) + { + scal2++; + cc->expo--; + } + + f7_horner (cc, cc, n_coeff_exp, coeff_exp, qq); + + while (--scal2 >= 0) + f7_Isquare (cc); + + f7_Ildexp (cc, q); +} +#endif // F7MOD_exp_ + + +#ifdef F7MOD_pow10_ +F7_WEAK +void f7_pow10 (f7_t *cc, const f7_t *aa) +{ + const f7_t *p_ln10; + F7_PCONST (p_ln10, ln10); + f7_mul (cc, aa, p_ln10); + f7_exp (cc, cc); +} +ALIAS (f7_pow10, f7_exp10) +#endif // F7MOD_pow10_ + + +#ifdef F7MOD_cbrt_ +F7_WEAK +void f7_cbrt (f7_t *cc, const f7_t *aa) +{ + f7_copy (cc, aa); + const f7_t *p_1_3; + uint8_t c_flags = cc->flags; + cc->flags &= ~F7_FLAG_sign; + f7_log (cc, cc); + F7_PCONST (p_1_3, 1_3); + f7_Imul (cc, p_1_3); + f7_exp (cc, cc); + + if (c_flags & F7_FLAG_sign) + cc->flags |= F7_FLAG_sign; +} +#endif // F7MOD_cbrt_ + + +#ifdef F7MOD_pow_ +F7_WEAK +void f7_pow (f7_t *cc, const f7_t *aa, const f7_t *bb) +{ +#if 0 + f7_t slots[cc == bb]; + f7_t *yy = cc == bb ? slots : cc; +#else + f7_t yy7, *yy = &yy7; +#endif + f7_log (yy, aa); + f7_Imul (yy, bb); + f7_exp (cc, yy); +} +#endif // F7MOD_pow_ + + +#ifdef F7MOD_powi_ +F7_WEAK +void f7_powi (f7_t *cc, const f7_t *aa, int ii) +{ + uint16_t u16 = ii; + f7_t xx27, *xx2 = &xx27; + + if (ii < 0) + u16 = -u16; + + f7_copy (xx2, aa); + + f7_set_u16 (cc, 1); + + while (1) + { + if (u16 & 1) + f7_Imul (cc, xx2); + + if (! f7_is_nonzero (cc)) + break; + + u16 >>= 1; + if (u16 == 0) + break; + f7_Isquare (xx2); + } + + if (ii < 0) + f7_div1 (xx2, aa); +} +#endif // F7MOD_powi_ + + +#ifdef F7MOD_sincos_ + +#define ARRAY_NAME coeff_sin + #define FOR_SIN + #include "libf7-array.def" + #undef FOR_SIN +#undef ARRAY_NAME + +#define ARRAY_NAME coeff_cos + #define FOR_COS + #include "libf7-array.def" + #undef FOR_COS +#undef ARRAY_NAME + +#define STATIC static +#include "libf7-constdef.h" // pi_low +#undef STATIC + +typedef union +{ + struct + { + bool neg_sin : 1; // Must be bit F7_FLAGNO_sign. + bool neg_cos : 1; + bool do_sin: 1; + bool do_cos: 1; + bool swap_sincos : 1; + uint8_t res : 3; + }; + uint8_t bits; +} sincos_t; + + +F7_WEAK +void f7_sincos (f7_t *ss, f7_t *cc, const f7_t *aa) +{ + uint8_t a_class = f7_classify (aa); + + sincos_t sc = { .bits = a_class & F7_FLAG_sign }; + if (ss != NULL) sc.do_sin = 1; + if (cc != NULL) sc.do_cos = 1; + + if (f7_class_nan (a_class) || f7_class_inf (a_class)) + { + if (sc.do_sin) f7_set_nan (ss); + if (sc.do_cos) f7_set_nan (cc); + return; + } + + f7_t pi7, *pi = &pi7; + f7_t xx7, *xx = &xx7; + f7_t yy7, *yy = &yy7; + f7_t *hh = sc.do_sin ? ss : cc; + + // X = |A| + f7_copy (xx, aa); + xx->flags = 0; + + // Y is how often we subtract PI from X. + f7_clr (yy); + f7_const (pi, pi); + + if (f7_abscmp_msb_ge (xx, F7_(const_pi_msb), F7_(const_pi_expo) + 1)) + { + pi->expo = 1 + F7_(const_pi_expo); // 2*pi + + // Y = X / 2pi. + f7_div (yy, xx, pi); + + // The integral part of |A| / pi mod 2 is bit 55 - x_expo. + if (yy->expo >= F7_MANT_BITS && !f7_is_zero (yy)) + { + // Too big for sensible calculation: Should this be NaN instead? + if (sc.do_sin) f7_clr (ss); + if (sc.do_cos) f7_clr (cc); + return; + } + + // X -= 2pi * [ X / 2pi ] + f7_floor (yy, yy); + + f7_msub (xx, yy, pi, xx); + xx->flags ^= F7_FLAG_sign; + + // We divided by 2pi, but Y should count times we subtracted pi. + yy->expo++; + } + + pi->expo = F7_(const_pi_expo); // pi + f7_sub (hh, xx, pi); + if (!f7_signbit (hh)) + { + // H = X - pi >= 0 => X >= pi + // sin(x) = -sin(x - pi) + // cos(x) = -cos(x - pi) + f7_copy (xx, hh); + // Y: We subtracted pi one more time. + f7_Iadd (yy, f7_set_u16 (hh, 1)); + sc.neg_sin ^= 1; + sc.neg_cos ^= 1; + } + + pi->expo = F7_(const_pi_expo) - 1; // pi/2 + if (f7_gt (xx, pi)) + { + // x > pi/2 + // sin(x) = sin(pi - x) + // cos(x) = -cos(pi - x) + pi->expo = F7_(const_pi_expo); // pi + f7_IRsub (xx, pi); + // Y: We subtracted pi one more time (and then negated). + f7_Iadd (yy, f7_set_u16 (hh, 1)); + yy->flags ^= F7_FLAG_sign; + sc.neg_cos ^= 1; + } + + pi->expo = F7_(const_pi_expo) - 2; // pi/4 + if (f7_gt (xx, pi)) + { + // x > pi/4 + // sin(x) = cos(pi/2 - x) + // cos(x) = sin(pi/2 - x) + pi->expo = F7_(const_pi_expo) - 1; // pi/2 + f7_IRsub (xx, pi); + // Y: We subtracted pi/2 one more time (and then negated). + f7_Iadd (yy, f7_set_1pow2 (hh, -1, 0)); + yy->flags ^= F7_FLAG_sign; + sc.swap_sincos = 1; + } + + if (!f7_is0 (yy)) + { + // Y counts how often we subtracted pi from X in order to + // get 0 <= X < pi/4 as small as possible (Y is 0 mod 0.5). + // Now also subtract the low part of pi: + // f7_const_pi_low = pi - f7_const_pi in order to get more precise + // results in the cases where the final result is close to 0. + const f7_t *pi_low = F7_CONST_ADDR (pi_low, pi); + //f7_const (pi, pi_low); + f7_Imul (yy, pi_low); + f7_Isub (xx, yy); + } + + // X in [0, pi/4]. + // X^2 in [0, pi^2/16] ~ [0, 0.6169] + + f7_square (yy, xx); + + f7_t *x_sin = xx; + f7_t *x_cos = yy; + + if ((sc.do_sin && !sc.swap_sincos) + || (sc.do_cos && sc.swap_sincos)) + { + f7_horner (hh, yy, n_coeff_sin, coeff_sin, NULL); + f7_mul (x_sin, hh, xx); + } + + if ((sc.do_cos && !sc.swap_sincos) + || (sc.do_sin && sc.swap_sincos)) + { + f7_horner (x_cos, yy, n_coeff_cos, coeff_cos, hh); + } + + if (sc.swap_sincos) + { + x_sin = yy; + x_cos = xx; + } + + if (sc.do_sin) + { + x_sin->flags ^= sc.bits; + x_sin->flags &= F7_FLAG_sign; + f7_copy (ss, x_sin); + } + + if (sc.do_cos) + { + x_cos->flags = sc.neg_cos; + f7_copy (cc, x_cos); + } +} +#endif // F7MOD_sincos_ + +#ifdef F7MOD_sin_ +F7_WEAK +void f7_sin (f7_t *ss, const f7_t *aa) +{ + f7_sincos (ss, NULL, aa); +} +#endif // F7MOD_sin_ + +#ifdef F7MOD_cos_ +F7_WEAK +void f7_cos (f7_t *cc, const f7_t *aa) +{ + f7_sincos (NULL, cc, aa); +} +#endif // F7MOD_cos_ + + +#ifdef F7MOD_tan_ +F7_WEAK +void f7_tan (f7_t *tt, const f7_t *aa) +{ + f7_t xcos; + f7_sincos (tt, & xcos, aa); + f7_Idiv (tt, & xcos); +} +#endif // F7MOD_tan_ + + +#ifdef F7MOD_cotan_ +F7_WEAK +void f7_cotan (f7_t *ct, const f7_t *aa) +{ + f7_t xcos; + f7_sincos (ct, & xcos, aa); + f7_div (ct, & xcos, ct); +} +#endif // F7MOD_cotan_ + + +#ifdef F7MOD_sinhcosh_ +F7_WEAK +void f7_sinhcosh (f7_t *cc, const f7_t *aa, bool do_sinh) +{ + f7_t xx7, *xx = &xx7; + // C = exp(A) + f7_exp (cc, aa); + // X = exp(-A) + f7_div (xx, f7_set_u16 (xx, 1), cc); + // sinh(A) = (exp(A) - exp(-A)) / 2 + // cosh(A) = (exp(A) + exp(-A)) / 2 + f7_addsub (cc, cc, xx, do_sinh); + cc->expo--; +} +#endif // F7MOD_sinhcosh_ + + +#ifdef F7MOD_sinh_ +F7_WEAK +void f7_sinh (f7_t *cc, const f7_t *aa) +{ + f7_sinhcosh (cc, aa, true); +} +#endif // F7MOD_sinh_ + + +#ifdef F7MOD_cosh_ +F7_WEAK +void f7_cosh (f7_t *cc, const f7_t *aa) +{ + f7_sinhcosh (cc, aa, false); +} +#endif // F7MOD_cosh_ + + +#ifdef F7MOD_tanh_ +F7_WEAK +void f7_tanh (f7_t *cc, const f7_t *aa) +{ + // tanh(A) = (exp(2A) - 1) / (exp(2A) + 1) + f7_t xx7, *xx = &xx7; + F7_PCONST_U16 (const f7_t *one, 1); + // C = 2A + f7_copy (cc, aa); + cc->expo++; + // C = exp(2A) + f7_exp (cc, cc); + // X = exp(2A) + 1 + f7_add (xx, cc, one); + // C = exp(2A) - 1 + f7_Isub (cc, one); + // C = tanh(A) + f7_Idiv (cc, xx); +} +#endif // F7MOD_tanh_ + + +#ifdef F7MOD_atan_ + +#define MINIMAX_6_6_IN_0_1 + +#define ARRAY_NAME coeff_atan_zahler +#define FOR_NUMERATOR +#include "libf7-array.def" +#undef FOR_NUMERATOR +#undef ARRAY_NAME + +#define ARRAY_NAME coeff_atan_nenner +#define FOR_DENOMINATOR +#include "libf7-array.def" +#undef FOR_DENOMINATOR +#undef ARRAY_NAME + +#include "libf7-constdef.h" + +F7_WEAK +void f7_atan (f7_t *cc, const f7_t *aa) +{ + uint8_t a_class = f7_classify (aa); + uint8_t flags = a_class & F7_FLAG_sign; + + if (f7_class_nan (a_class)) + return f7_set_nan (cc); + + f7_t yy7, *yy = &yy7; + f7_t zz7, *zz = &zz7; + + if (f7_class_inf (a_class)) + { + f7_set_u16 (cc, 0); + goto do_Inf; + } + + // C = |A| + f7_copy (cc, aa); + cc->flags = 0; + + if (!f7_class_zero (a_class) && cc->expo >= 0) + { + // C >= 1: use atan (x) + atan (1/x) = pi / 2 to reduce to [0, 1]. + flags |= 1 << 1; + f7_div (cc, f7_set_u16 (yy, 1), cc); + } +#if !defined (MINIMAX_6_6_IN_0_1) + const uint8_t const_a_msb = 0x89; + const int16_t const_a_expo = -2; + if (f7_abscmp_msb_ge (cc, const_a_msb, const_a_expo)) + { + // We have C in [0, 1] and we want to use argument reduction by means + // of addition theorem atan(x) - atan(y) = atan((x - y) / (1 + xy)). + // We want to split [0, 1] into [0, a] u [a, 1] in such a way that + // the upper interval will be mapped to [-a, a]. The system is easy + // to solve and yiels + // y = 1 / sqrt (3) ~ 0.57735 atan(y) = pi / 6 + // a = (1 - y) / (1 + y) ~ 0.26795 ~ 0x0.8930A2F * 2^-1. + flags |= 1 << 2; + // C <- (C - Y) / (1 + C * Y) in [-a, a] + const f7_t *cst = F7_CONST_ADDR (1_sqrt3, zz); + f7_mul (yy, cc, cst); + f7_Isub (cc, cst); + f7_Iadd (yy, F7_U16_ADDR (1, zz)); + f7_Idiv (cc, yy); + } +#endif + // C <- C * p(C^2) / q(C^2) + f7_square (yy, cc); + f7_horner (zz, yy, n_coeff_atan_zahler, coeff_atan_zahler, NULL); + f7_Imul (zz, cc); + f7_horner (cc, yy, n_coeff_atan_nenner, coeff_atan_nenner, NULL); + f7_div (cc, zz, cc); + +#if !defined (MINIMAX_6_6_IN_0_1) + if (flags & (1 << 2)) + f7_Iadd (cc, F7_CONST_ADDR (pi_6, yy)); +#endif + + if (flags & (1 << 1)) + { + do_Inf:; + // Y = pi / 2 + f7_const (yy, pi); + yy->expo = F7_(const_pi_expo) - 1; + f7_IRsub (cc, yy); + } + + cc->flags = a_class & F7_FLAG_sign; +} +#undef MINIMAX_6_6_IN_0_1 +#endif // F7MOD_atan_ + + +#ifdef F7MOD_asinacos_ + +#define ARRAY_NAME coeff_func_a_zahler +#define FOR_NUMERATOR +#include "libf7-array.def" +#undef FOR_NUMERATOR +#undef ARRAY_NAME + +#define ARRAY_NAME coeff_func_a_nenner +#define FOR_DENOMINATOR +#include "libf7-array.def" +#undef FOR_DENOMINATOR +#undef ARRAY_NAME + +typedef union +{ + struct + { + bool sign : 1; // Must be bit F7_FLAGNO_sign. + bool do_acos : 1; // From caller. + bool have_acos : 1; // What we compute from rational approx p/q. + uint8_t res : 5; + }; + uint8_t bits; +} asinacos_t; + +F7_WEAK +void f7_asinacos (f7_t *cc, const f7_t *aa, uint8_t what) +{ + f7_t xx7, *xx = &xx7; + f7_t xx27, *xx2 = &xx27; + + asinacos_t flags = { .bits = what | f7_signbit (aa) }; + + f7_abs (xx, aa); + + int8_t cmp = f7_cmp (xx, f7_set_u16 (cc, 1)); + + if (cmp == INT8_MIN + || cmp > 0) + { + return f7_set_nan (cc); + } + + if (xx->expo <= -2 || f7_is_zero (xx)) + { + // |A| < 1/2: asin(x) = x * a(2*x^2) + f7_square (xx2, xx); + xx2->expo ++; + } + else + { + // |A| > 1/2: acos (1-x) = sqrt(2*x) * a(x) + // C is 1 from above. + f7_IRsub (xx, cc); + f7_copy (xx2, xx); + flags.have_acos = 1; + } + + // MiniMax [5/4] numerator. + f7_horner (cc, xx2, n_coeff_func_a_zahler, coeff_func_a_zahler, NULL); + + if (flags.have_acos) + { + xx->expo ++; + f7_Isqrt (xx); + } + f7_Imul (cc, xx); + + // MiniMax [5/4] denominator. + f7_horner (xx, xx2, n_coeff_func_a_nenner, coeff_func_a_nenner, NULL); + + f7_Idiv (cc, xx); + + /* + With the current value of C, we have: + + | | do_asin | do_acos + | C | A <= 0 | A >= 0 | A <= 0 | A >= 0 + ----------+------------+-----------+----------+----------+---------- + have_asin | asin (|A|) | -C | C | pi/2 + C | pi/2 - C + have_acos | acos (|A|) | -pi/2 + C | pi/2 - C | pi - C | C + + Result = n_pi2 * pi/2 + C * (c_sign ? -1 : 1) + Result (A, do_asin) = asin (A) + Result (A, do_acos) = acos (A) + + with + c_sign = do_acos ^ have_acos ^ a_sign + n_pi2 = do_acos + have_acos * (a_sign ^ do_acos) ? (-1 : 1) + n_pi2 in { -1, 0, 1, 2 } + */ + + // All what matters for c_sign is bit 0. + uint8_t c_sign = flags.bits; + int8_t n_pi2 = flags.do_acos; + c_sign ^= flags.do_acos; + if (flags.have_acos) + { + n_pi2++; + __asm ("" : "+r" (n_pi2)); + if (c_sign & 1) // c_sign & 1 = a_sign ^ do_acos + n_pi2 -= 2; + c_sign++; + } + + cc->flags = c_sign & F7_FLAG_sign; + + if (n_pi2) + { + f7_const (xx, pi); + if (n_pi2 < 0) + xx->sign = 1; + if (n_pi2 != 2) + xx->expo = F7_(const_pi_expo) - 1; + + f7_Iadd (cc, xx); + } +} +#endif // F7MOD_asinacos_ + + +#ifdef F7MOD_asin_ +F7_WEAK +void f7_asin (f7_t *cc, const f7_t *aa) +{ + f7_asinacos (cc, aa, 0); +} +#endif // F7MOD_asin_ + + +#ifdef F7MOD_acos_ +F7_WEAK +void f7_acos (f7_t *cc, const f7_t *aa) +{ + f7_asinacos (cc, aa, 1 << 1); +} +#endif // F7MOD_acos_ + + +#ifndef IN_LIBGCC2 + +#ifdef F7MOD_put_C_ + +#include <stdio.h> +#include <avr/pgmspace.h> + +static F7_INLINE +uint8_t f7_hex_digit (uint8_t nibble) +{ + nibble = (uint8_t) (nibble + '0'); + if (nibble > '9') + nibble = (uint8_t) (nibble + ('a' - '0' - 10)); + return nibble; +} + +static void f7_put_hex2 (uint8_t x, FILE *stream) +{ + putc ('0', stream); + if (x) + { + putc ('x', stream); + putc (f7_hex_digit (x >> 4), stream); + putc (f7_hex_digit (x & 0xf), stream); + } +} + +#define XPUT(str) \ + fputs_P (PSTR (str), stream) + +// Write to STREAM a line that is appropriate for usage in libf7-const.def. + +F7_WEAK +void f7_put_CDEF (const char *name, const f7_t *aa, FILE *stream) +{ + char buf[7]; + XPUT ("F7_CONST_DEF ("); + fputs (name, stream); + XPUT (",\t"); + uint8_t a_class = f7_classify (aa); + if (! f7_class_nonzero (a_class)) + { + f7_put_hex2 (a_class & F7_FLAGS, stream); + XPUT (",\t0,0,0,0,0,0,0,\t0)"); + return; + } + putc ('0' + (a_class & F7_FLAGS), stream); + XPUT (",\t"); + + for (uint8_t i = 0; i < F7_MANT_BYTES; i++) + { + f7_put_hex2 (aa->mant[F7_MANT_BYTES-1 - i], stream); + putc (',', stream); + } + putc ('\t', stream); + + itoa (aa->expo, buf, 10); + fputs (buf, stream); + XPUT (")"); +} + +void f7_put_C (const f7_t *aa, FILE *stream) +{ + char buf[7]; + + uint8_t a_class = f7_classify (aa); + if (f7_class_nan (a_class)) + { + XPUT ("{ .is_nan = 1 }"); + return; + } + bool sign = a_class & F7_FLAG_sign; + + if (f7_class_inf (a_class)) + { + XPUT ("{ .is_nan = 1, .sign = "); + putc ('0' + sign, stream); + XPUT (" }"); + return; + } + + XPUT ("{ .sign = "); + putc ('0' + sign, stream); + + XPUT (", .mant = { "); + for (uint8_t i = 0; i < F7_MANT_BYTES; i++) + { + f7_put_hex2 (aa->mant[F7_MANT_BYTES-1 - i], stream); + if (i != F7_MANT_BYTES - 1) + putc (',', stream); + } + + XPUT (" }, .expo = "); + itoa (aa->expo, buf, 10); + fputs (buf, stream); + XPUT (" }"); +} +#endif //F7MOD_put_C_ + + +#ifdef F7MOD_dump_ + +#include <avr/pgmspace.h> + +#ifndef AVRTEST_H + +#include <stdio.h> + +static void LOG_PSTR (const char *str) +{ + fputs_P (str, stdout); +} + +static void LOG_PFMT_U16 (const char *fmt, uint16_t x) +{ + printf_P (fmt, x); +} + +static void LOG_PFMT_FLOAT (const char *fmt, float x) +{ + printf_P (fmt, x); +} + +#define LOG_X8(X) LOG_PFMT_U16 (PSTR (" 0x%02x "), (uint8_t)(X)) +#define LOG_PFMT_S16(FMT, X) LOG_PFMT_U16 (FMT, (unsigned)(X)) +#define LOG_PFMT_ADDR(FMT, X) LOG_PFMT_U16 (FMT, (unsigned)(X)) + +#endif // AVRTEST_H + +static void dump_byte (uint8_t b) +{ + LOG_PSTR (PSTR (" ")); + for (uint8_t i = 0; i < 8; i++) + { + LOG_PSTR ((b & 0x80) ? PSTR ("1") : PSTR ("0")); + b = (uint8_t) (b << 1); + } +} + +void f7_dump_mant (const f7_t *aa) +{ + LOG_PSTR (PSTR ("\tmant =")); + for (int i = F7_MANT_BYTES - 1; i >= 0; i--) + LOG_X8 (aa->mant[i]); + LOG_PSTR (PSTR ("\n\t =")); + + for (int i = F7_MANT_BYTES - 1; i >= 0; i--) + dump_byte (aa->mant[i]); + LOG_PSTR (PSTR ("\n")); +} + +void f7_dump (const f7_t *aa) +{ + LOG_PFMT_ADDR (PSTR ("\n0x%04x\tflags = "), aa); + dump_byte (aa->flags); + uint8_t a_class = f7_classify (aa); + LOG_PSTR (PSTR (" = ")); + LOG_PSTR (f7_class_sign (a_class) ? PSTR ("-") : PSTR ("+")); + if (f7_class_inf (a_class)) LOG_PSTR (PSTR ("Inf ")); + if (f7_class_nan (a_class)) LOG_PSTR (PSTR ("NaN ")); + if (f7_class_zero (a_class)) LOG_PSTR (PSTR ("0 ")); + if (f7_class_number (a_class)) LOG_PSTR (PSTR ("Number ")); + + LOG_PFMT_FLOAT (PSTR (" = %.10g\n"), f7_get_float (aa)); + LOG_PFMT_S16 (PSTR ("\texpo = %d\n"), aa->expo); + + f7_dump_mant (aa); +} +#endif // F7MOD_dump_ + +#endif // ! libgcc + +#endif // !AVR_TINY diff --git a/libgcc/config/avr/libf7/libf7.h b/libgcc/config/avr/libf7/libf7.h new file mode 100644 index 0000000..73eafe0 --- /dev/null +++ b/libgcc/config/avr/libf7/libf7.h @@ -0,0 +1,687 @@ +/* Copyright (C) 2019-2020 Free Software Foundation, Inc. + + This file is part of LIBF7, which is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef LIBF7_H +#define LIBF7_H +#define IN_LIBF7_H + +#include "f7-renames.h" + +#define F7_MANT_BYTES 7 +#define F7_MANT_BITS (8 * F7_MANT_BYTES) + +/* Using the following GCC features: + -- Unnamed structs / unions (GNU-C) + -- Fixed-point types (GNU-C) + -- Inline asm + -- Setting assembler names by means of __asm (GNU-C). + -- Attributes: alias, always_inline, const, noinline, unused, + progmem, pure, weak, warning + -- GCC built-ins: __builtin_abort, __builtin_constant_p + -- AVR built-ins: __builtin_avr_bitsr, __builtin_avr_rbits +*/ + +/* We have 2 kinds of flags: + + A) The flags that are stored in f7_t.flags: + -- f7_t.is_nan (NaN) + -- f7_t.is_inf (+Inf or -Inf) + -- f7_t.sign (negative or -Inf). + + B) The flags that are returned by f7_classify(). This are the + flags from A) together with + -- _zero: indicate that a number is zero. +*/ + +#define F7_FLAGNO_sign 0 +#define F7_FLAGNO_zero 1 +#define F7_FLAGNO_nan 2 +#define F7_FLAGNO_inf 7 + +#define F7_HAVE_Inf 1 + +// Flags that might be set by f7_classify(). +#define F7_FLAG_sign (1 << F7_FLAGNO_sign) +#define F7_FLAG_zero (1 << F7_FLAGNO_zero) +#define F7_FLAG_nan (1 << F7_FLAGNO_nan) +#define F7_FLAG_inf (F7_HAVE_Inf << F7_FLAGNO_inf) + +// Flags that might be set in f7_t.flags. +#define F7_FLAGS (F7_FLAG_inf | F7_FLAG_nan | F7_FLAG_sign) + +#if !defined __ASSEMBLER__ + +#ifndef IN_LIBGCC2 +#include <stdint.h> +#include <stdbool.h> +#include <stdlib.h> +#include <stdio.h> +#else +/* Do not assume that we have std headers when we build libgcc. */ + +typedef __UINT64_TYPE__ uint64_t; +typedef __UINT32_TYPE__ uint32_t; +typedef __UINT16_TYPE__ uint16_t; +typedef __UINT8_TYPE__ uint8_t; +typedef __INT64_TYPE__ int64_t; +typedef __INT32_TYPE__ int32_t; +typedef __INT16_TYPE__ int16_t; +typedef __INT8_TYPE__ int8_t; +typedef _Bool bool; +#define false 0 +#define true 1 +#define INT8_MIN (-1 - __INT8_MAX__) +#define INT16_MAX __INT16_MAX__ +#define NULL ((void*) 0) +#endif /* IN_LIBGCC2 */ + +#include "asm-defs.h" + +#ifdef __cplusplus +extern "C" { +#define _Static_assert(X, Y) static_assert (X) +#endif // C++ + +#define F7_INLINE inline __attribute__((__always_inline__)) +#define F7_NOINLINE __attribute__((__noinline__)) +#define F7_WEAK __attribute__((__weak__)) +#define F7_PURE __attribute__((__pure__)) +#define F7_UNUSED __attribute__((__unused__)) +#define F7_CONST __attribute__((__const__)) + +#define F7_STRINGY2(X) #X +#define F7_STRINGY(X) F7_STRINGY2(X) +#define F7ASM(X) __asm (F7_STRINGY2(X)) + +typedef struct f7_t +{ + union + { + struct + { + uint8_t sign :1; + uint8_t reserved1 :1; + uint8_t is_nan :1; + uint8_t reserved2 :4; + uint8_t is_inf :1; + }; + uint8_t flags; + }; + + uint8_t mant[7]; + int16_t expo; +} f7_t; + +typedef uint64_t f7_double_t; + +#define F7_MANT_HI4(X) \ + (*(uint32_t*) & (X)->mant[F7_MANT_BYTES - 4]) + +#define F7_MANT_CONST_HI4(X) \ + (*(const uint32_t*) & (X)->mant[F7_MANT_BYTES - 4]) + +#define F7_MANT_HI2(X) \ + (*(uint16_t*) & (X)->mant[F7_MANT_BYTES - 2]) + +static F7_INLINE F7_PURE +uint8_t f7_classify (const f7_t *aa) +{ + extern void f7_classify_asm (void); + register uint8_t rclass __asm ("r24"); + __asm ("%~call %x[f]" + : "=r" (rclass) + : [f] "i" (f7_classify_asm), "z" (aa)); + return rclass; +} + + +// +Inf or -Inf +static F7_INLINE +bool f7_class_inf (uint8_t c) +{ +#if defined (F7_HAVE_Inf) && F7_HAVE_Inf == 1 + return c >= F7_FLAG_inf; +#elif defined (F7_HAVE_Inf) && F7_HAVE_Inf == 0 + (void) c; + return false; +#else +#error macro F7_HAVE_Inf must be defined to 0 or to 1. +#endif // Have Inf +} + +static F7_INLINE +bool f7_is_inf (const f7_t *aa) +{ + return f7_class_inf (aa->flags); +} + +// Not-a-Number (NaN). +static F7_INLINE +bool f7_class_nan (uint8_t c) +{ + return c & F7_FLAG_nan; +} + +static F7_INLINE +bool f7_is_nan (const f7_t *aa) +{ + return f7_class_nan (aa->flags); +} + +// Some number +static F7_INLINE +bool f7_class_number (uint8_t c) +{ + return c <= (F7_FLAG_sign | F7_FLAG_zero); +} + +static F7_INLINE +bool f7_is_number (const f7_t *aa) +{ + return f7_class_number (f7_classify (aa)); +} + +// Zero +static F7_INLINE +bool f7_class_zero (uint8_t c) +{ + return c & F7_FLAG_zero; +} + +static F7_INLINE +bool f7_is_zero (const f7_t *aa) +{ + return f7_class_zero (f7_classify (aa)); +} + +// A non-zero number. +static F7_INLINE +bool f7_class_nonzero (uint8_t c) +{ + return c <= F7_FLAG_sign; +} + +static F7_INLINE +bool f7_is_nonzero (const f7_t *aa) +{ + return f7_class_nonzero (f7_classify (aa)); +} + +static F7_INLINE +bool f7_class_sign (uint8_t c) +{ + return c & F7_FLAG_sign; +} + +static F7_INLINE +bool f7_signbit (const f7_t *aa) +{ + return aa->flags & F7_FLAG_sign; +} + +static F7_INLINE +void f7_set_sign (f7_t *cc, bool sign) +{ + _Static_assert (F7_FLAGNO_sign == 0, ""); + cc->flags &= ~F7_FLAG_sign; + cc->flags |= sign; +} + +static F7_INLINE +void f7_set_nan (f7_t *cc) +{ + cc->flags = F7_FLAG_nan; +} + +static F7_INLINE +void f7_clr (f7_t *cc) +{ + extern void f7_clr_asm (void); + __asm ("%~call %x[f]" + : + : [f] "i" (f7_clr_asm), "z" (cc) + : "memory"); +} + +static F7_INLINE +f7_t* f7_copy (f7_t *cc, const f7_t *aa) +{ + extern void f7_copy_asm (void); + __asm ("%~call %x[f]" + : + : [f] "i" (f7_copy_asm), "z" (cc), "x" (aa) + : "memory"); + return cc; +} + +static F7_INLINE +f7_t* f7_copy_P (f7_t *cc, const f7_t *aa) +{ + extern void f7_copy_P_asm (void); + __asm ("%~call %x[f]" + : + : [f] "i" (f7_copy_P_asm), "x" (cc), "z" (aa) + : "memory"); + return cc; +} + +static F7_INLINE +void f7_copy_mant (f7_t *cc, const f7_t *aa) +{ + extern void f7_copy_mant_asm (void); + __asm ("%~call %x[f]" + : + : [f] "i" (f7_copy_mant_asm), "z" (cc), "x" (aa) + : "memory"); +} + +static F7_INLINE +void f7_set_inf (f7_t *cc, bool sign) +{ +#if F7_HAVE_Inf == 1 + cc->flags = F7_FLAG_inf | sign; +#else + (void) sign; + cc->flags = F7_FLAG_nan; +#endif // Have Inf +} + + +static F7_INLINE +bool f7_msbit (const f7_t *aa) +{ + return aa->mant[F7_MANT_BYTES - 1] & 0x80; +} + +// Quick test against 0 if A is known to be a number (neither NaN nor Inf). +static F7_INLINE +bool f7_is0 (const f7_t *aa) +{ + return 0 == f7_msbit (aa); +} + + +static F7_INLINE +int8_t f7_cmp_mant (const f7_t *aa, const f7_t *bb) +{ + extern void f7_cmp_mant_asm (void); + register int8_t r24 __asm ("r24"); + __asm ("%~call %x[f] ;; %1 %3" + : "=r" (r24) + : [f] "i" (f7_cmp_mant_asm), "x" (aa), "z" (bb)); + return r24; +} + +static F7_INLINE +bool f7_store_expo (f7_t *cc, int16_t expo) +{ + extern void f7_store_expo_asm (void); + register bool r24 __asm ("r24"); + register int16_t rexpo __asm ("r24") = expo; + __asm ("%~call %x[f] ;; %0 %2 %3" + : "=r" (r24) + : [f] "i" (f7_store_expo_asm), "z" (cc), "r" (rexpo)); + return r24; +} + +static F7_INLINE +f7_t* f7_abs (f7_t *cc, const f7_t *aa) +{ + f7_copy (cc, aa); + f7_set_sign (cc, 0); + + return cc; +} + + +F7_PURE extern int8_t f7_cmp (const f7_t*, const f7_t*); +F7_PURE extern bool f7_lt_impl (const f7_t*, const f7_t*); +F7_PURE extern bool f7_le_impl (const f7_t*, const f7_t*); +F7_PURE extern bool f7_gt_impl (const f7_t*, const f7_t*); +F7_PURE extern bool f7_ge_impl (const f7_t*, const f7_t*); +F7_PURE extern bool f7_ne_impl (const f7_t*, const f7_t*); +F7_PURE extern bool f7_eq_impl (const f7_t*, const f7_t*); +F7_PURE extern bool f7_unord_impl (const f7_t*, const f7_t*); + +static F7_INLINE +bool f7_lt (const f7_t *aa, const f7_t *bb) +{ + return 2 & f7_cmp (aa, bb); +} + +static F7_INLINE +bool f7_gt (const f7_t *aa, const f7_t *bb) +{ + return 1 == f7_cmp (aa, bb); +} + +static F7_INLINE +bool f7_le (const f7_t *aa, const f7_t *bb) +{ + int8_t c = f7_cmp (aa, bb); + return (uint8_t) (c + 1) <= 1; +} + +static F7_INLINE +bool f7_ge (const f7_t *aa, const f7_t *bb) +{ + return f7_cmp (aa, bb) >= 0; +} + +static F7_INLINE +bool f7_unordered (const f7_t *aa, const f7_t *bb) +{ + return INT8_MIN == f7_cmp (aa, bb); +} + +static F7_INLINE +bool f7_ordered (const f7_t *aa, const f7_t *bb) +{ + return INT8_MIN != f7_cmp (aa, bb); +} + +static F7_INLINE +bool f7_eq (const f7_t *aa, const f7_t *bb) +{ + return 0 == f7_cmp (aa, bb); +} + +static F7_INLINE +bool f7_ne (const f7_t *aa, const f7_t *bb) +{ + return 1 & f7_cmp (aa, bb); +} + +extern void f7_clr (f7_t*); + +__attribute__((warning ("foo_u16"))) void foo_u16 (void); +__attribute__((warning ("foo_s16"))) void foo_s16 (void); + +extern f7_t* f7_set_s16_impl (f7_t*, int16_t); +extern f7_t* f7_set_u16_impl (f7_t*, uint16_t); + +static F7_INLINE +f7_t* f7_set_u16_worker (f7_t *cc, uint16_t u16) +{ + if (__builtin_constant_p (u16)) + { + if (u16 == 0) + return cc; + + uint8_t off = __builtin_clz (u16); + if (15 - off) + * (uint8_t*) & cc->expo = (uint8_t) (15 - off); + u16 <<= off; + if (u16 & 0xff) + cc->mant[5] = (uint8_t) u16; + if (u16 & 0xff00) + cc->mant[6] = (uint8_t) (u16 >> 8); + + return cc; + } + else + { + foo_u16(); + __builtin_abort(); + return NULL; + } +} + +static F7_INLINE +f7_t* f7_set_u16 (f7_t *cc, uint16_t u16) +{ + if (__builtin_constant_p (u16)) + { + f7_clr (cc); + return f7_set_u16_worker (cc, u16); + } + + return f7_set_u16_impl (cc, u16); +} + +static F7_INLINE +f7_t* f7_set_s16 (f7_t *cc, int16_t s16) +{ + if (__builtin_constant_p (s16)) + { + f7_clr (cc); + + uint16_t u16 = (uint16_t) s16; + + if (s16 < 0) + { + u16 = -u16; + cc->flags = F7_FLAG_sign; + } + + return f7_set_u16_worker (cc, u16); + } + + return f7_set_s16_impl (cc, s16); +} + +static F7_INLINE +void f7_set_eps (f7_t *cc, uint8_t eps, bool sign) +{ + cc = f7_set_u16 (cc, 1); + if (!__builtin_constant_p (sign) || sign) + cc->flags = sign; + cc->mant[0] = eps; +} + +static F7_INLINE +f7_t* f7_set_1pow2 (f7_t *cc, int16_t expo, bool sign) +{ + cc = f7_set_u16 (cc, 1); + cc->expo = expo; + if (!__builtin_constant_p (sign) || sign) + cc->flags = sign; + return cc; +} + +static F7_INLINE +f7_t* f7_set_u64 (f7_t *cc, uint64_t u64) +{ + extern f7_t* f7_set_u64_asm (uint64_t, f7_t*); + return f7_set_u64_asm (u64, cc); +} + +static F7_INLINE +f7_t* f7_set_s64 (f7_t *cc, int64_t s64) +{ + extern f7_t* f7_set_s64_asm (int64_t, f7_t*); + return f7_set_s64_asm (s64, cc); +} + +extern void f7_set_double_impl (f7_double_t, f7_t*); +static F7_INLINE +void f7_set_double (f7_t *cc, f7_double_t val64) +{ + f7_set_double_impl (val64, cc); +} + +extern f7_t* f7_init_impl (uint64_t, uint8_t, f7_t*, int16_t); + +static F7_INLINE +f7_t* f7_init (f7_t *cc, uint8_t flags, uint64_t mant, int16_t expo) +{ + return f7_init_impl (mant, flags, cc, expo); +} + +extern f7_t* f7_set_s32 (f7_t*, int32_t); +extern f7_t* f7_set_u16 (f7_t*, uint16_t); +extern f7_t* f7_set_u32 (f7_t*, uint32_t); +extern void f7_set_float (f7_t*, float); +extern void f7_set_pdouble (f7_t*, const f7_double_t*); + +F7_PURE extern int16_t f7_get_s16 (const f7_t*); +F7_PURE extern int32_t f7_get_s32 (const f7_t*); +F7_PURE extern int64_t f7_get_s64 (const f7_t*); +F7_PURE extern uint16_t f7_get_u16 (const f7_t*); +F7_PURE extern uint32_t f7_get_u32 (const f7_t*); +F7_PURE extern uint64_t f7_get_u64 (const f7_t*); +F7_PURE extern float f7_get_float (const f7_t*); +F7_PURE extern f7_double_t f7_get_double (const f7_t*); + +#if USE_LPM == 1 + #define F7_PGMSPACE __attribute__((__progmem__)) + #define f7_copy_flash f7_copy_P + + #define f7_const(X, NAME) \ + f7_copy_P ((X), & F7_(const_ ## NAME ## _P)) + + #define F7_CONST_DEF(NAME, FLAGS, M0, M1, M2, M3, M4, M5, M6, EXPO) \ + extern const f7_t F7_(const_ ## NAME ## _P); + #include "libf7-const.def" + #undef F7_CONST_DEF +#else + #define F7_PGMSPACE // Empty + #define f7_copy_flash f7_copy + + #define f7_const(X, NAME) \ + f7_copy ((X), & F7_(const_ ## NAME)) + + #define F7_CONST_DEF(NAME, FLAGS, M0, M1, M2, M3, M4, M5, M6, EXPO) \ + extern const f7_t F7_(const_ ## NAME); + #include "libf7-const.def" + #undef F7_CONST_DEF +#endif // USE_LPM + + +// Basic floating point arithmetic: +// double output <=> f7_t* +// double input <=> const f7_t* +extern f7_t* f7_neg (f7_t*, const f7_t*); +extern void f7_add (f7_t*, const f7_t*, const f7_t*); +extern void f7_sub (f7_t*, const f7_t*, const f7_t*); +extern void f7_mul (f7_t*, const f7_t*, const f7_t*); +extern void f7_div (f7_t*, const f7_t*, const f7_t*); + +// Analogies of functions from math.h: +// double output <=> f7_t* +// double input <=> const f7_t* +extern void f7_fabs (f7_t*, const f7_t*); +extern void f7_fmod (f7_t*, const f7_t*, const f7_t*); +extern void f7_frexp (f7_t*, const f7_t*, int*); +extern void f7_exp (f7_t*, const f7_t*); +extern void f7_log (f7_t*, const f7_t*); +extern void f7_pow (f7_t*, const f7_t*, const f7_t*); +extern void f7_sqrt (f7_t*, const f7_t*); +extern void f7_cbrt (f7_t*, const f7_t*); +extern void f7_hypot (f7_t*, const f7_t*, const f7_t*); +extern f7_t* f7_ldexp (f7_t*, const f7_t*, int); +extern f7_t* f7_fmax (f7_t*, const f7_t*, const f7_t*); +extern f7_t* f7_fmin (f7_t*, const f7_t*, const f7_t*); +extern f7_t* f7_trunc (f7_t*, const f7_t*); +extern f7_t* f7_floor (f7_t*, const f7_t*); +extern void f7_ceil (f7_t*, const f7_t*); +extern void f7_round (f7_t*, const f7_t*); +extern void f7_sin (f7_t*, const f7_t*); +extern void f7_cos (f7_t*, const f7_t*); +extern void f7_tan (f7_t*, const f7_t*); +extern void f7_atan (f7_t*, const f7_t*); +extern void f7_asin (f7_t*, const f7_t*); +extern void f7_acos (f7_t*, const f7_t*); +extern void f7_tanh (f7_t*, const f7_t*); +extern void f7_sinh (f7_t*, const f7_t*); +extern void f7_cosh (f7_t*, const f7_t*); +extern void f7_log2 (f7_t*, const f7_t*); +extern void f7_log10 (f7_t*, const f7_t*); +extern void f7_exp10 (f7_t*, const f7_t*); +extern void f7_pow10 (f7_t*, const f7_t*); + +// Just prototypes, not implemented yet. +extern void f7_atan2 (f7_t*, const f7_t*, const f7_t*); +extern long f7_lrint (const f7_t*); +extern long f7_lround (const f7_t*); + +// Helper functions, aliases, convenience. +extern void f7_div1 (f7_t*, const f7_t*); +extern void f7_square (f7_t*, const f7_t*); + +extern void f7_powi (f7_t*, const f7_t*, int); +extern f7_t* f7_max (f7_t*, const f7_t*, const f7_t*); +extern f7_t* f7_min (f7_t*, const f7_t*, const f7_t*); +extern f7_t* f7_truncx (f7_t*, const f7_t*, bool); +extern void f7_cotan (f7_t*, const f7_t*); +extern void f7_sincos (f7_t*, f7_t*, const f7_t*); +extern void f7_asinacos (f7_t*, const f7_t*, uint8_t); +extern void f7_sinhcosh (f7_t*, const f7_t*, bool); + +extern void f7_horner (f7_t*, const f7_t*, uint8_t, const f7_t *coeff, f7_t*); +extern void f7_mul_noround (f7_t*, const f7_t*, const f7_t*); +extern void f7_clr_mant_lsbs (f7_t*, const f7_t*, uint8_t) F7ASM(f7_clr_mant_lsbs_asm); + +F7_PURE extern int8_t f7_cmp_unordered (const f7_t*, const f7_t*, bool); +F7_PURE extern int8_t f7_cmp_abs (const f7_t*, const f7_t*); + +F7_PURE extern bool f7_abscmp_msb_ge (const f7_t*, uint8_t msb, int16_t expo); +extern void f7_addsub (f7_t*, const f7_t*, const f7_t*, bool neg_b); +extern void f7_madd_msub (f7_t*, const f7_t*, const f7_t*, const f7_t*, bool); +extern void f7_madd (f7_t*, const f7_t*, const f7_t*, const f7_t*); +extern void f7_msub (f7_t*, const f7_t*, const f7_t*, const f7_t*); +extern uint8_t f7_mulx (f7_t*, const f7_t*, const f7_t*, bool); +extern void f7_divx (f7_t*, const f7_t*, const f7_t*, uint8_t); +extern void f7_logx (f7_t*, const f7_t*, const f7_t*); +extern f7_t* f7_minmax (f7_t*, const f7_t*, const f7_t*, bool); + +// Idem: +// f7_Ifunc (y) = f7_func (y, y) +// f7_Ifunc (y, x) = f7_func (y, y, x) +extern void f7_Iadd (f7_t*, const f7_t*); +extern void f7_Isub (f7_t*, const f7_t*); +extern void f7_Imul (f7_t*, const f7_t*); +extern void f7_Idiv (f7_t*, const f7_t*); +extern void f7_IRsub (f7_t*, const f7_t*); +extern void f7_Ineg (f7_t*); +extern void f7_Isqrt (f7_t*); +extern void f7_Isquare (f7_t*); +extern f7_t* f7_Ildexp (f7_t*, int); + +// Protoypes for some functions from libf7-asm.sx. +F7_CONST extern uint16_t f7_sqrt16_round (uint16_t) F7ASM(f7_sqrt16_round_asm); +F7_CONST extern uint8_t f7_sqrt16_floor (uint16_t) F7ASM(f7_sqrt16_floor_asm); +extern void f7_addsub_mant_scaled_asm (f7_t*, const f7_t*, const f7_t*, uint8_t); +extern uint8_t f7_mul_mant_asm (f7_t*, const f7_t*, const f7_t*, uint8_t); +extern void f7_sqrt_approx_asm (f7_t*, const f7_t*); +extern uint64_t f7_lshrdi3 (uint64_t, uint8_t) F7ASM(f7_lshrdi3_asm); +extern uint64_t f7_ashldi3 (uint64_t, uint8_t) F7ASM(f7_ashldi3_asm); +// Normalize a non-Inf, non-NaN value. Sets .sign to 0. +extern f7_t* f7_normalize_asm (f7_t*); + +// Dumping. +#ifndef IN_LIBGCC2 +extern void f7_dump (const f7_t*); +extern void f7_dump_mant (const f7_t*); +extern void f7_put_C (const f7_t*, FILE*); +extern void f7_put_CDEF (const char *name, const f7_t*, FILE*); +#endif /* IN_LIBGCC2 */ + +#ifdef __cplusplus +} // extern "C" +#include "libf7-class.h" +#endif // C++ + +#endif /* __ASSEMBLER__ */ +#undef IN_LIBF7_H +#endif /* LIBF7_H */ diff --git a/libgcc/config/avr/libf7/t-libf7 b/libgcc/config/avr/libf7/t-libf7 new file mode 100644 index 0000000..92a27eb --- /dev/null +++ b/libgcc/config/avr/libf7/t-libf7 @@ -0,0 +1,159 @@ +# Used except --with-libf7=no + +avrsrc := $(srcdir)/config/avr + +libf7 := $(avrsrc)/libf7 + +F7_PREFIX = __f7_ + +include $(libf7)/libf7-common.mk + +LIBF7_DF_CONV += floatundidf floatdidf # floatunsidf floatsidf + +# Wrappers like f7_lt_impl for f7_lt etc. because the latter is inline. +LIBF7_DF_CMP += lt le gt ge ne eq unord + +F7_C_PARTS += $(LIBF7_DF_CONV) $(LIBF7_DF_CMP) + +# -mcall-prologues +CALL_PROLOGUES += $(LIBF7_DF_CONV) + +# -Wno-missing-prototypes +NO_PROTO += $(LIBF7_DF_CONV) + +F7F += le_impl lt_impl gt_impl ge_impl ne_impl eq_impl unord_impl + + +$(libf7)/f7-renames.h: $(libf7)/f7renames.sh $(libf7)/libf7-common.mk + $< head $(F7_PREFIX) t-libf7 > $@ + $< c $(F7_PREFIX) $(F7F) >> $@ + $< cst $(F7_PREFIX) $(F7F_cst) >> $@ + $< asm $(F7_PREFIX) $(F7F_asm) >> $@ + $< tail $(F7_PREFIX) >> $@ + +# The right-hand sides like g_ddd come from libf7-common.mk. +# The _m_ wraps are added by t-libf7-math + +# __adddf3, ... +F7_ASM_WRAPS_g_ddd += $(g_ddd) + +# __ltdf2, ... +F7_ASM_WRAPS_g_xdd_cmp += $(g_xdd_cmp) + +# __floatsidf, ... +F7_ASM_WRAPS_g_dx += $(g_dx) + +# __fixdfsi, ... +F7_ASM_WRAPS_g_xd += $(g_xd) + +$(libf7)/f7-wraps.h: $(libf7)/f7wraps.sh \ + $(libf7)/libf7-common.mk $(libf7)/t-libf7-math + $< header "WITH_LIBF7_MATH_FUNCTIONS=$(WITH_LIBF7_MATH_FUNCTIONS)" "WITH_LIBF7_MATH_SYMBOLS=$(WITH_LIBF7_MATH_SYMBOLS)" > $@ + $< ddd_libgcc $(F7_ASM_WRAPS_g_ddd) >> $@ + $< xdd_libgcc_cmp $(F7_ASM_WRAPS_g_xdd_cmp) >> $@ + $< xd_libgcc $(F7_ASM_WRAPS_g_xd) >> $@ + $< dx_libgcc $(F7_ASM_WRAPS_g_dx) >> $@ + $< ddd_math $(F7_ASM_WRAPS_m_ddd) >> $@ + $< ddx_math $(F7_ASM_WRAPS_m_ddx) >> $@ + $< dd_math $(F7_ASM_WRAPS_m_dd) >> $@ + $< xd_math $(F7_ASM_WRAPS_m_xd) >> $@ + +F7_ASM_WRAPS += $(F7_ASM_WRAPS_g_xd) +F7_ASM_WRAPS += $(F7_ASM_WRAPS_g_dx) +F7_ASM_WRAPS += $(F7_ASM_WRAPS_g_ddd) +F7_ASM_WRAPS += $(F7_ASM_WRAPS_g_xdd_cmp) +F7_ASM_WRAPS += $(F7_ASM_WRAPS_m_ddd) +F7_ASM_WRAPS += $(F7_ASM_WRAPS_m_ddx) +F7_ASM_WRAPS += $(F7_ASM_WRAPS_m_dd) +F7_ASM_WRAPS += $(F7_ASM_WRAPS_m_xd) + +F7_ASM_PARTS += $(patsubst %, D_%, $(F7_ASM_WRAPS)) + +# Options +F7_FLAGS += -I $(libf7) -save-temps=obj + +# t-avr::HOST_LIBGCC2_CFLAGS sets -mcall-prologues which will inhibits +# tail-call optimizations. The user could get it with -mrelax, but we +# just switch it off here and then explicitly on again for the +# CALL_PROLOGUES modules. + +F7_C_FLAGS += $(F7_FLAGS) \ + -dp -g0 \ + -mno-call-prologues \ + -fno-lto -Os \ + -fdata-sections -ffunction-sections \ + -fno-reorder-blocks \ + -fno-tree-loop-optimize \ + -fno-tree-loop-im -fno-move-loop-invariants + +F7_ASM_FLAGS += $(F7_FLAGS) + +$(patsubst %, f7_c_%.o, $(CALL_PROLOGUES)) \ + : F7_C_FLAGS += -mcall-prologues + +$(patsubst %, f7_c_%.o, $(STRICT_X)) \ + : F7_C_FLAGS += -mstrict-X + +$(patsubst %, f7_c_%.o, $(NO_PROTO)) \ + : F7_C_FLAGS += -Wno-missing-prototypes + +# Depends will be worked out by the libgcc build system. + +F7_C_OBJECTS = $(patsubst %, f7_c_%$(objext), $(F7_C_PARTS)) +F7_ASM_OBJECTS = $(patsubst %, f7_asm_%$(objext), $(F7_ASM_PARTS)) + +$(F7_ASM_OBJECTS) $(F7_C_OBJECTS) : $(libf7)/t-libf7 +$(F7_ASM_OBJECTS) $(F7_C_OBJECTS) : $(libf7)/t-libf7-math +$(F7_ASM_OBJECTS) $(F7_C_OBJECTS) : $(libf7)/t-libf7-math-symbols + +.PHONY: log_vars + +all: log_vars + +log_vars: + $(info # libf7: WITH_LIBF7_MATH_FUNCTIONS = $(WITH_LIBF7_MATH_FUNCTIONS)) + $(info # libf7: WITH_LIBF7_MATH_SYMBOLS = $(WITH_LIBF7_MATH_SYMBOLS)) + $(info # libf7: F7_C_PARTS = $(F7_C_PARTS)) + $(info # libf7: F7_C_OBJECTS = $(F7_C_OBJECTS)) + $(info # libf7: F7_ASM_PARTS = $(F7_ASM_PARTS)) + $(info # libf7: F7_ASM_OBJECTS = $(F7_ASM_OBJECTS)) + +# Build the libf7 C objects and add them to libgcc.a. + +f7_parts := $(F7_C_PARTS) + +iter-items := $(f7_parts) +iter-labels := $(f7_parts) + +include $(srcdir)/empty.mk $(patsubst %,$(libf7)/libf7-c-object.mk,$(iter-items)) + +libgcc-objects += $(patsubst %,f7_c_%$(objext),$(F7_C_PARTS)) + +# Build the libf7 ASM objects and add them to libgcc.a. + +f7_parts := $(F7_ASM_PARTS) + +iter-items := $(f7_parts) +iter-labels := $(f7_parts) + +include $(srcdir)/empty.mk $(patsubst %,$(libf7)/libf7-asm-object.mk,$(iter-items)) + +libgcc-objects += $(patsubst %,f7_asm_%$(objext),$(F7_ASM_PARTS)) + +.PHONY: clean-f7 + +clean: clean-f7 + +clean-f7: + rm -f $(wildcard f7_*.i f7_*.s f7_*.o) + +# Get rid if any DFmode remains. + +LIB2FUNCS_EXCLUDE += \ + _sf_to_df \ + _fixdfdi \ + _fixunsdfsi \ + _floatundidf \ + _fixunsdfdi \ + _floatdidf \ + _powidf2 diff --git a/libgcc/config/avr/libf7/t-libf7-math b/libgcc/config/avr/libf7/t-libf7-math new file mode 100644 index 0000000..6eb144d --- /dev/null +++ b/libgcc/config/avr/libf7/t-libf7-math @@ -0,0 +1,21 @@ +# Triggered by --with-libf7=math or --with-libf7=math-symbols +# +# We provide weak double wrappers for functions specified in math.h, +# but with __ prepended to the symbol name used for the double function. +# For example we provide double __sin (double) but neither sin nor sinl. +# To get weak symbols according to math.h, t-libf7-math-symbols has to +# be used which is triggered by --with-libf7=math-symbols. + +WITH_LIBF7_MATH_FUNCTIONS = 1 + +# __sin, ... +F7_ASM_WRAPS_m_dd += $(m_dd) + +# __pow, __fmin, ... +F7_ASM_WRAPS_m_ddd += $(m_ddd) + +# __ldexp, ... +F7_ASM_WRAPS_m_ddx += $(m_ddx) + +# __lrint, ... +F7_ASM_WRAPS_m_xd += $(m_xd) diff --git a/libgcc/config/avr/libf7/t-libf7-math-symbols b/libgcc/config/avr/libf7/t-libf7-math-symbols new file mode 100644 index 0000000..c79e2b6 --- /dev/null +++ b/libgcc/config/avr/libf7/t-libf7-math-symbols @@ -0,0 +1,11 @@ +# Triggered by --with-libf7=math-symbols +# +# We have at least one module in libgcc that depends on __SIZEOF_DOUBLE__ +# or __SIZEOF_LONG_DOUBLE__ which means that t-avrlibc must not copy +# double32/64 or long-double32/64 variants from the vanilla one. +# This occurs when some module(s) define(s) weak aliases for functions +# that usually live in libm. + +WITH_LIBF7_MATH_SYMBOLS = 1 + +F7_FLAGS += -DWITH_LIBF7_MATH_SYMBOLS diff --git a/libgcc/config/avr/t-avrlibc b/libgcc/config/avr/t-avrlibc index 34eca4f..661a518 100644 --- a/libgcc/config/avr/t-avrlibc +++ b/libgcc/config/avr/t-avrlibc @@ -65,6 +65,12 @@ LIB2FUNCS_EXCLUDE += \ _fixunssfdi \ _floatdisf _floatundisf +ifeq (,$(WITH_LIBF7_MATH_SYMBOLS)) + +# No modules depend on __SIZEOF_LONG_DOUBLE__ or __SIZEOF_DOUBLE__ +# which means we might have an opportunity to copy libgcc.a. +# WITH_LIBF7_MATH_SYMBOLS is set by libf7/t-libf7-math-symbols. + ifneq (,$(findstring avr,$(MULTISUBDIR))) # We are not in the avr2 (default) subdir, hence copying will work. @@ -95,3 +101,4 @@ Makefile: t-copy-libgcc.dep endif endif +endif |