aboutsummaryrefslogtreecommitdiff
path: root/libgcc/config/avr
diff options
context:
space:
mode:
authorGeorg-Johann Lay <avr@gjlay.de>2023-11-08 12:43:49 +0100
committerGeorg-Johann Lay <avr@gjlay.de>2023-11-08 12:45:25 +0100
commitf7da59104c0f12e8eee0f56039a0c1a4ac8382f8 (patch)
tree365b0fb33e496e861290586f3133557c0e066144 /libgcc/config/avr
parentc2f2351494794a86360dfc7db97848de4638f9f6 (diff)
downloadgcc-f7da59104c0f12e8eee0f56039a0c1a4ac8382f8.zip
gcc-f7da59104c0f12e8eee0f56039a0c1a4ac8382f8.tar.gz
gcc-f7da59104c0f12e8eee0f56039a0c1a4ac8382f8.tar.bz2
LibF7: Tweak IEEE double multiplication.
libgcc/config/avr/libf7/ * libf7-asm.sx (mul_mant) [AVR_HAVE_MUL]: Tweak code.
Diffstat (limited to 'libgcc/config/avr')
-rw-r--r--libgcc/config/avr/libf7/libf7-asm.sx208
1 files changed, 107 insertions, 101 deletions
diff --git a/libgcc/config/avr/libf7/libf7-asm.sx b/libgcc/config/avr/libf7/libf7-asm.sx
index 4505764..01d1fa3 100644
--- a/libgcc/config/avr/libf7/libf7-asm.sx
+++ b/libgcc/config/avr/libf7/libf7-asm.sx
@@ -877,10 +877,14 @@ ENDF ashldi3
;; R18.0 = 1: No rounding.
DEFUN mul_mant
+ ;; 10 = Y, R17...R10
do_prologue_saves 10
+ ;; T = R18.0: Skip rounding?
bst r18, 0
+ ;; Save result address for later.
push r25
push r24
+ ;; Load A's mantissa.
movw ZL, r22
LDD A0, Z+0+Off
LDD A1, Z+1+Off
@@ -913,26 +917,15 @@ DEFUN mul_mant
adc C6, ZERO
;; Done B6
- ;; 3 * 3 -> 0:a
- ;; 4 * 4 -> 2:1
- ;; 5 * 5 -> 4:3
- ldd BB, Z+3+Off $ mul A3, BB $ movw TT0, r0
- ldd BB, Z+4+Off $ mul A4, BB $ movw TT2, r0
- ldd BB, Z+5+Off $ mul A5, BB
-
- ADD CA, TT0 $ adc C0, TT1
- adc C1, TT2 $ adc C2, TT3
- adc C3, r0 $ adc C4, r1
- brcc .+2
- adiw C5, 1
-
;; 6 * 5 -> 5:4
;; 4 * 5 -> 3:2
;; 2 * 5 -> 1:0
;; 0 * 5 -> a:-
+ ldd BB, Z+5+Off
mul A0, BB
- ;; A0 done
+ ;; Done A0
#define Atmp A0
+#define Null A0
mov Atmp, r1
mul A6, BB $ movw TT2, r0
@@ -942,82 +935,127 @@ DEFUN mul_mant
ADD CA, Atmp
adc C0, r0 $ adc C1, r1
adc C2, TT0 $ adc C3, TT1
- adc C4, TT2 $ adc C5, TT3 $ clr ZERO
- adc C6, ZERO
+ adc C4, TT2 $ adc C5, TT3 $ clr Null
+ adc C6, Null
;; 1 * 5 -> 0:a
;; 3 * 5 -> 2:1
- ;; 6 * 4 -> 4:3
+ ;; 5 * 5 -> 4:3
mul A1, BB $ movw TT0, r0
mul A3, BB $ movw TT2, r0
+ mul A5, BB
+
+ ADD CA, TT0 $ adc C0, TT1
+ adc C1, TT2 $ adc C2, TT3
+ adc C3, r0 $ adc C4, r1
+ adc C5, Null $ adc C6, Null
+ ;; Done B5
+
+ ;; 2 * 4 -> 0:a
+ ;; 4 * 4 -> 2:1
+ ;; 6 * 4 -> 4:3
ldd BB, Z+4+Off
+ mul A2, BB $ movw TT0, r0
+ mul A4, BB $ movw TT2, r0
mul A6, BB
ADD CA, TT0 $ adc C0, TT1
adc C1, TT2 $ adc C2, TT3
- adc C3, r0 $ adc C4, r1 $ clr ZERO
- adc C5, ZERO $ adc C6, ZERO
- ;; B5 done
+ adc C3, r0 $ adc C4, r1
+ adc C5, Null $ adc C6, Null
+ ;; 1 * 4 -> a:-
+ ;; 3 * 4 -> 1:0
+ ;; 5 * 4 -> 3:2
+ mul A1, BB $ mov TT1, r1
+ mul A3, BB $ movw TT2, r0
+ mul A5, BB
+ ;; Done A1
+ ;; Done B4
+ ADD CA, TT1
+ adc C0, TT2 $ adc C1, TT3
+ adc C2, r0 $ adc C3, r1
+ ;; Accumulate carry for C3 in TT1.
+ ;; Accumulate carry for C4 in A1.
+#define Cry3 TT1
+#define Cry4 A1
+ clr Cry3
+ clr Cry4
+ rol Cry4
+
+ ;; 6 * 2 -> 2:1
;; 6 * 3 -> 3:2
- ;; 6 * 1 -> 1:0
- ;; 4 * 1 -> a:-
- mov TT0, A6 $ ldd TMP, Z+3+Off
- mov BB, A4 $ ldd Atmp, Z+1+Off
- rcall .Lmul.help.3
+ ;; 5 * 3 -> 2:1
+ ldd BB, Z+2+Off
+ mul A6, BB
+ add C1, r0
+ adc C2, r1
+ adc Cry3, Null
- ;; 5 * 4 -> 3:2
- ;; 5 * 2 -> 1:0
- ;; 3 * 2 -> a:-
- mov TT0, A5 $ ldd TMP, Z+4+Off
- mov BB, A3 $ ldd Atmp, Z+2+Off
- rcall .Lmul.help.3
+ ldd BB, Z+3+Off
+ mul A6, BB
+ add C2, r0
+ adc C3, r1
+ adc Cry4, Null
+
+ mul A5, BB
+ add C1, r0
+ adc C2, r1
+ adc Cry3, Null
- ;; 4 * -> 3:2 (=0)
+ ;; Perform the remaining 11 multiplications in 4 loopings:
;; 4 * 3 -> 1:0
+ ;; 3 * 3 -> 0:a
;; 2 * 3 -> a:-
- mov TT0, A4 $ clr TMP
- mov BB, A2 $ ldd Atmp, Z+3+Off
- rcall .Lmul.help.3
-
- ;; 3 * . -> 3:2 (=0)
- ;; 3 * 4 -> 1:0
- ;; 1 * 4 -> a:-
- mov TT0, A3 $ clr TMP
- mov BB, A1 $ ldd Atmp, Z+4+Off
- rcall .Lmul.help.3
-
- ;; . * ? -> 3:2 (=0)
- ;; . * 0 -> 1:0 (=0)
+ ;;
+ ;; 5 * 2 -> 1:0
+ ;; 4 * 2 -> 0:a
+ ;; 3 * 2 -> a:-
+ ;;
+ ;; 6 * 1 -> 1:0
+ ;; 5 * 1 -> 0:a
+ ;; 4 * 1 -> a:-
+ ;;
+ ;; . * 0 -> 1:0 (=0)
+ ;; 6 * 0 -> 0:a
;; 5 * 0 -> a:-
- clr TT0
- mov BB, A5 $ ldd Atmp, Z+0+Off
- rcall .Lmul.help.3
- clr TT3 ;; Asserted by .Lmul.help.2
- ;; 6 * 2 -> 2:1
- ;; 6 * 0 -> 0:a
- $ ldd TMP, Z+2+Off
- mov BB, A6 ;$ ldd Atmp, Z+0+Off
- rcall .Lmul.help.2
+ ;; BB already contains B3, hence let Z point one past B2 so that
+ ;; the LD *, -Z below will pick up B2, B1, B0.
+ adiw r30, 1 + Off+2
- ;; 5 * 3 -> 2:1
- ;; 5 * 1 -> 0:a
- $ ldd TMP, Z+3+Off
- mov BB, A5 $ ldd Atmp, Z+1+Off
- rcall .Lmul.help.2
+ ;; Accumulate carry for C2 in TT2.
+#define Cry2 TT2
+ clr Cry2
- ;; 4 * . -> 2:1 (=0)
- ;; 4 * 2 -> 0:a
- $ clr TMP
- mov BB, A4 $ ldd Atmp, Z+2+Off
- rcall .Lmul.help.2
+ ;; TT3 is the loop counter, iterate over B3...B0.
+ ldi TT3, 4
+ rjmp .Loop_start
- ;; 2 * . -> 2:1 (=0)
- ;; 2 * 4 -> 0:a
- $ clr TMP
- mov BB, A2 $ ldd Atmp, Z+4+Off
- rcall .Lmul.help.2
+.Loop:
+ ;; We use A2...A4 below; so shift bytes of A into place.
+ mov A2, A3
+ mov A3, A4
+ mov A4, A5
+ mov A5, A6
+ clr A6
+ ld BB, -Z
+.Loop_start:
+ mul A3, BB
+ ADD CA, r0 $ adc C0, r1 $ adc C1, Null $ adc Cry2, Null
+ MUL A2, BB
+ mov TT0, r1
+ MUL A4, BB
+ ADD CA, TT0 $ adc C0, r0 $ adc C1, r1 $ adc Cry2, Null
+ dec TT3
+ brne .Loop
+
+ clr ZERO
+ ADD C2, Cry2
+ adc C3, Cry3
+ adc C4, Cry4
+ adc C5, ZERO
+ adc C6, ZERO
;; Finally...
@@ -1032,38 +1070,6 @@ DEFUN mul_mant
do_epilogue_restores 10
-;; TT0 * Tmp -> 3:2
-;; TT0 * Atmp -> 1:0
-;; BB * Atmp -> a:-
-;;
-;; Clobbers : TMP, TT0...TT3.
-;; Sets : ZERO = 0.
-.Lmul.help.3:
- mul TT0, TMP $ movw TT2, r0
- mul TT0, Atmp $ movw TT0, r0
- mul BB, Atmp
-
- ADD CA, r1
- adc C0, TT0 $ adc C1, TT1
- adc C2, TT2
-.Lmul.help.3.C3: $ adc C3, TT3 $ clr ZERO
- adc C4, ZERO $ adc C5, ZERO
- adc C6, ZERO
- ret
-
-;; BB * TMP -> 2:1
-;; BB * Atmp -> 0:a
-;;
-;; Asserts : TT3 = 0
-;; Clobbers : TMP, TT0, TT1.
-;; Sets : ZERO = 0.
-.Lmul.help.2:
- mul BB, TMP $ movw TT0, r0
- mul BB, Atmp
- ADD CA, r0 $ adc C0, r1
- adc C1, TT0 $ adc C2, TT1
- rjmp .Lmul.help.3.C3
-
ENDF mul_mant
#endif /* F7MOD_mul_mant_ && MUL */