diff options
Diffstat (limited to 'libgcc')
-rw-r--r-- | libgcc/config/avr/libf7/libf7-asm.sx | 208 |
1 files changed, 107 insertions, 101 deletions
diff --git a/libgcc/config/avr/libf7/libf7-asm.sx b/libgcc/config/avr/libf7/libf7-asm.sx index 4505764..01d1fa3 100644 --- a/libgcc/config/avr/libf7/libf7-asm.sx +++ b/libgcc/config/avr/libf7/libf7-asm.sx @@ -877,10 +877,14 @@ ENDF ashldi3 ;; R18.0 = 1: No rounding. DEFUN mul_mant + ;; 10 = Y, R17...R10 do_prologue_saves 10 + ;; T = R18.0: Skip rounding? bst r18, 0 + ;; Save result address for later. push r25 push r24 + ;; Load A's mantissa. movw ZL, r22 LDD A0, Z+0+Off LDD A1, Z+1+Off @@ -913,26 +917,15 @@ DEFUN mul_mant adc C6, ZERO ;; Done B6 - ;; 3 * 3 -> 0:a - ;; 4 * 4 -> 2:1 - ;; 5 * 5 -> 4:3 - ldd BB, Z+3+Off $ mul A3, BB $ movw TT0, r0 - ldd BB, Z+4+Off $ mul A4, BB $ movw TT2, r0 - ldd BB, Z+5+Off $ mul A5, BB - - ADD CA, TT0 $ adc C0, TT1 - adc C1, TT2 $ adc C2, TT3 - adc C3, r0 $ adc C4, r1 - brcc .+2 - adiw C5, 1 - ;; 6 * 5 -> 5:4 ;; 4 * 5 -> 3:2 ;; 2 * 5 -> 1:0 ;; 0 * 5 -> a:- + ldd BB, Z+5+Off mul A0, BB - ;; A0 done + ;; Done A0 #define Atmp A0 +#define Null A0 mov Atmp, r1 mul A6, BB $ movw TT2, r0 @@ -942,82 +935,127 @@ DEFUN mul_mant ADD CA, Atmp adc C0, r0 $ adc C1, r1 adc C2, TT0 $ adc C3, TT1 - adc C4, TT2 $ adc C5, TT3 $ clr ZERO - adc C6, ZERO + adc C4, TT2 $ adc C5, TT3 $ clr Null + adc C6, Null ;; 1 * 5 -> 0:a ;; 3 * 5 -> 2:1 - ;; 6 * 4 -> 4:3 + ;; 5 * 5 -> 4:3 mul A1, BB $ movw TT0, r0 mul A3, BB $ movw TT2, r0 + mul A5, BB + + ADD CA, TT0 $ adc C0, TT1 + adc C1, TT2 $ adc C2, TT3 + adc C3, r0 $ adc C4, r1 + adc C5, Null $ adc C6, Null + ;; Done B5 + + ;; 2 * 4 -> 0:a + ;; 4 * 4 -> 2:1 + ;; 6 * 4 -> 4:3 ldd BB, Z+4+Off + mul A2, BB $ movw TT0, r0 + mul A4, BB $ movw TT2, r0 mul A6, BB ADD CA, TT0 $ adc C0, TT1 adc C1, TT2 $ adc C2, TT3 - adc C3, r0 $ adc C4, r1 $ clr ZERO - adc C5, ZERO $ adc C6, ZERO - ;; B5 done + adc C3, r0 $ adc C4, r1 + adc C5, Null $ adc C6, Null + ;; 1 * 4 -> a:- + ;; 3 * 4 -> 1:0 + ;; 5 * 4 -> 3:2 + mul A1, BB $ mov TT1, r1 + mul A3, BB $ movw TT2, r0 + mul A5, BB + ;; Done A1 + ;; Done B4 + ADD CA, TT1 + adc C0, TT2 $ adc C1, TT3 + adc C2, r0 $ adc C3, r1 + ;; Accumulate carry for C3 in TT1. + ;; Accumulate carry for C4 in A1. +#define Cry3 TT1 +#define Cry4 A1 + clr Cry3 + clr Cry4 + rol Cry4 + + ;; 6 * 2 -> 2:1 ;; 6 * 3 -> 3:2 - ;; 6 * 1 -> 1:0 - ;; 4 * 1 -> a:- - mov TT0, A6 $ ldd TMP, Z+3+Off - mov BB, A4 $ ldd Atmp, Z+1+Off - rcall .Lmul.help.3 + ;; 5 * 3 -> 2:1 + ldd BB, Z+2+Off + mul A6, BB + add C1, r0 + adc C2, r1 + adc Cry3, Null - ;; 5 * 4 -> 3:2 - ;; 5 * 2 -> 1:0 - ;; 3 * 2 -> a:- - mov TT0, A5 $ ldd TMP, Z+4+Off - mov BB, A3 $ ldd Atmp, Z+2+Off - rcall .Lmul.help.3 + ldd BB, Z+3+Off + mul A6, BB + add C2, r0 + adc C3, r1 + adc Cry4, Null + + mul A5, BB + add C1, r0 + adc C2, r1 + adc Cry3, Null - ;; 4 * -> 3:2 (=0) + ;; Perform the remaining 11 multiplications in 4 loopings: ;; 4 * 3 -> 1:0 + ;; 3 * 3 -> 0:a ;; 2 * 3 -> a:- - mov TT0, A4 $ clr TMP - mov BB, A2 $ ldd Atmp, Z+3+Off - rcall .Lmul.help.3 - - ;; 3 * . -> 3:2 (=0) - ;; 3 * 4 -> 1:0 - ;; 1 * 4 -> a:- - mov TT0, A3 $ clr TMP - mov BB, A1 $ ldd Atmp, Z+4+Off - rcall .Lmul.help.3 - - ;; . * ? -> 3:2 (=0) - ;; . * 0 -> 1:0 (=0) + ;; + ;; 5 * 2 -> 1:0 + ;; 4 * 2 -> 0:a + ;; 3 * 2 -> a:- + ;; + ;; 6 * 1 -> 1:0 + ;; 5 * 1 -> 0:a + ;; 4 * 1 -> a:- + ;; + ;; . * 0 -> 1:0 (=0) + ;; 6 * 0 -> 0:a ;; 5 * 0 -> a:- - clr TT0 - mov BB, A5 $ ldd Atmp, Z+0+Off - rcall .Lmul.help.3 - clr TT3 ;; Asserted by .Lmul.help.2 - ;; 6 * 2 -> 2:1 - ;; 6 * 0 -> 0:a - $ ldd TMP, Z+2+Off - mov BB, A6 ;$ ldd Atmp, Z+0+Off - rcall .Lmul.help.2 + ;; BB already contains B3, hence let Z point one past B2 so that + ;; the LD *, -Z below will pick up B2, B1, B0. + adiw r30, 1 + Off+2 - ;; 5 * 3 -> 2:1 - ;; 5 * 1 -> 0:a - $ ldd TMP, Z+3+Off - mov BB, A5 $ ldd Atmp, Z+1+Off - rcall .Lmul.help.2 + ;; Accumulate carry for C2 in TT2. +#define Cry2 TT2 + clr Cry2 - ;; 4 * . -> 2:1 (=0) - ;; 4 * 2 -> 0:a - $ clr TMP - mov BB, A4 $ ldd Atmp, Z+2+Off - rcall .Lmul.help.2 + ;; TT3 is the loop counter, iterate over B3...B0. + ldi TT3, 4 + rjmp .Loop_start - ;; 2 * . -> 2:1 (=0) - ;; 2 * 4 -> 0:a - $ clr TMP - mov BB, A2 $ ldd Atmp, Z+4+Off - rcall .Lmul.help.2 +.Loop: + ;; We use A2...A4 below; so shift bytes of A into place. + mov A2, A3 + mov A3, A4 + mov A4, A5 + mov A5, A6 + clr A6 + ld BB, -Z +.Loop_start: + mul A3, BB + ADD CA, r0 $ adc C0, r1 $ adc C1, Null $ adc Cry2, Null + MUL A2, BB + mov TT0, r1 + MUL A4, BB + ADD CA, TT0 $ adc C0, r0 $ adc C1, r1 $ adc Cry2, Null + dec TT3 + brne .Loop + + clr ZERO + ADD C2, Cry2 + adc C3, Cry3 + adc C4, Cry4 + adc C5, ZERO + adc C6, ZERO ;; Finally... @@ -1032,38 +1070,6 @@ DEFUN mul_mant do_epilogue_restores 10 -;; TT0 * Tmp -> 3:2 -;; TT0 * Atmp -> 1:0 -;; BB * Atmp -> a:- -;; -;; Clobbers : TMP, TT0...TT3. -;; Sets : ZERO = 0. -.Lmul.help.3: - mul TT0, TMP $ movw TT2, r0 - mul TT0, Atmp $ movw TT0, r0 - mul BB, Atmp - - ADD CA, r1 - adc C0, TT0 $ adc C1, TT1 - adc C2, TT2 -.Lmul.help.3.C3: $ adc C3, TT3 $ clr ZERO - adc C4, ZERO $ adc C5, ZERO - adc C6, ZERO - ret - -;; BB * TMP -> 2:1 -;; BB * Atmp -> 0:a -;; -;; Asserts : TT3 = 0 -;; Clobbers : TMP, TT0, TT1. -;; Sets : ZERO = 0. -.Lmul.help.2: - mul BB, TMP $ movw TT0, r0 - mul BB, Atmp - ADD CA, r0 $ adc C0, r1 - adc C1, TT0 $ adc C2, TT1 - rjmp .Lmul.help.3.C3 - ENDF mul_mant #endif /* F7MOD_mul_mant_ && MUL */ |