aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorDoug Evans <dje@gnu.org>1995-05-12 16:30:52 +0000
committerDoug Evans <dje@gnu.org>1995-05-12 16:30:52 +0000
commit454e0249676efa5688841900b0447fe690fb8742 (patch)
treec68c836b80705afa52d99765f19cd6791efca538 /gcc
parentb6e2a70e206ee03378536ae03618d61fa11c1756 (diff)
downloadgcc-454e0249676efa5688841900b0447fe690fb8742.zip
gcc-454e0249676efa5688841900b0447fe690fb8742.tar.gz
gcc-454e0249676efa5688841900b0447fe690fb8742.tar.bz2
Initial revision
From-SVN: r9645
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/arm/lib1funcs.asm1597
1 files changed, 1597 insertions, 0 deletions
diff --git a/gcc/config/arm/lib1funcs.asm b/gcc/config/arm/lib1funcs.asm
new file mode 100644
index 0000000..c23683e
--- /dev/null
+++ b/gcc/config/arm/lib1funcs.asm
@@ -0,0 +1,1597 @@
+@ libgcc1 routines for ARM cpu.
+@ Division and remainder, from Appendix E of the Sparc Version 8
+@ Architecture Manual, with fixes from Gordon Irlam.
+@ Rewritten for the ARM by Richard Earnshaw (rwe@pegasus.esprit.ec.org)
+
+/* Copyright (C) 1995 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file with other programs, and to distribute
+those programs without any restriction coming from the use of this
+file. (The General Public License restrictions do apply in other
+respects; for example, they cover modification of the file, and
+distribution when not linked into another program.)
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; see the file COPYING. If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* As a special exception, if you link this library with other files,
+ some of which are compiled with GCC, to produce an executable,
+ this library does not by itself cause the resulting executable
+ to be covered by the GNU General Public License.
+ This exception does not however invalidate any other reasons why
+ the executable file might be covered by the GNU General Public License. */
+
+/*
+ * Input: dividend and divisor in r0 and r1 respectively.
+ *
+ * m4 parameters:
+ * NAME name of function to generate
+ * OP OP=div => r0 / r1; OP=mod => r0 % r1
+ * S S=true => signed; S=false => unsigned
+ *
+ * Algorithm parameters:
+ * N how many bits per iteration we try to get (4)
+ * WORDSIZE total number of bits (32)
+ *
+ * Derived constants:
+ * TOPBITS number of bits in the top `decade' of a number
+ *
+ * Important variables:
+ * Q the partial quotient under development (initially 0)
+ * R the remainder so far, initially the dividend
+ * ITER number of main division loop iterations required;
+ * equal to ceil(log2(quotient) / N). Note that this
+ * is the log base (2^N) of the quotient.
+ * V the current comparand, initially divisor*2^(ITER*N-1)
+ *
+ * Cost:
+ * Current estimate for non-large dividend is
+ * ceil(log2(quotient) / N) * (10 + 7N/2) + C
+ * A large dividend is one greater than 2^(31-TOPBITS) and takes a
+ * different path, as the upper bits of the quotient must be developed
+ * one bit at a time.
+ */
+
+/*
+define(N, `4')dnl
+define(WORDSIZE, `32')dnl
+define(TOPBITS, eval(WORDSIZE - N*((WORDSIZE-1)/N)))dnl
+dnl
+define(dividend, `r0')dnl
+define(divisor, `r1')dnl
+define(Q, `r2')dnl
+define(R, `r3')dnl
+define(ITER, `ip')dnl
+define(V, `lr')dnl
+dnl
+dnl m4 reminder: ifelse(a,b,c,d) => if a is b, then c, else d
+define(T, `r4')dnl
+define(SC, `r5')dnl
+ifelse(S, `true', `define(SIGN, `r6')')dnl
+define(REGLIST, `ifelse(S, `true', `{r4, r5, r6,', `{r4, r5,')')dnl
+define(ret, `ldmia sp!, REGLIST pc}')dnl
+dnl
+dnl This is the recursive definition for developing quotient digits.
+dnl
+dnl Parameters:
+dnl $1 the current depth, 1 <= $1 <= N
+dnl $2 the current accumulation of quotient bits
+dnl N max depth
+dnl
+dnl We add a new bit to $2 and either recurse or insert the bits in
+dnl the quotient. R, Q, and V are inputs and outputs as defined above;
+dnl the condition codes are expected to reflect the input R, and are
+dnl modified to reflect the output R.
+dnl
+define(DEVELOP_QUOTIENT_BITS,
+` @ depth $1, accumulated bits $2
+ mov V, V, lsr #1
+ blt L.$1.eval(2^N+$2+999)
+ @ remainder is positive
+ subs R, R, V
+ ifelse($1, N,
+ ` ifelse(eval(2*$2+1<0), `0',
+ `add Q, Q, `#'eval($2*2+1)',
+ `sub Q, Q, `#'eval(-($2*2+1))')
+
+ b 9f
+ ', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2+1)')')
+L.$1.eval(2^N+$2+999):
+ @ remainder is negative
+ adds R, R, V
+ ifelse($1, N,
+ ` ifelse(eval(2*$2-1<0), `0',
+ `add Q, Q, `#'eval($2*2-1)',
+ `sub Q, Q, `#'eval(-($2*2-1))')
+ b 9f
+
+ ', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2-1)')')
+ ifelse($1, 1, `9:')')dnl
+
+#include "trap.h"
+
+ip .req r12
+sp .req r13
+lr .req r14
+pc .req r15
+.text
+ .globl NAME
+ .align 0
+NAME:
+ stmdb sp!, REGLIST lr}
+ifelse(S, `true',
+` @ compute sign of result; if neither is negative, no problem
+ eor SIGN, divisor, dividend @ compute sign
+ cmp divisor, #0
+ rsbmi divisor, divisor, #0
+ beq Ldiv_zero
+ mov V, divisor
+ movs R, dividend
+ rsbmi R, R, #0 @ make dividend nonnegative
+',
+` @ Ready to divide. Compute size of quotient; scale comparand.
+ movs V, divisor
+ mov R, dividend
+ beq Ldiv_zero
+')
+
+ cmp R, V @ if divisor exceeds dividend, done
+ mov Q, #0
+ bcc Lgot_result @ (and algorithm fails otherwise)
+ mov T, `#'(1 << (WORDSIZE - TOPBITS - 1))
+ cmp R, T
+ mov ITER, #0
+ bcc Lnot_really_big
+
+ @ `Here the dividend is >= 2^(31-N) or so. We must be careful here,
+ @ as our usual N-at-a-shot divide step will cause overflow and havoc.
+ @ The number of bits in the result here is N*ITER+SC, where SC <= N.
+ @ Compute ITER in an unorthodox manner: know we need to shift V into
+ @ the top decade: so do not even bother to compare to R.'
+ mov SC, #1
+ 1:
+ cmp V, T
+ bcs 3f
+ mov V, V, lsl `#'N
+ add ITER, ITER, #1
+ b 1b
+
+ @ Now compute SC.
+ 2: adds V, V, V
+ add SC, SC, #1
+ bcc Lnot_too_big
+
+ @ We get here if the divisor overflowed while shifting.
+ @ This means that R has the high-order bit set.
+ @ Restore V and subtract from R.
+ mov T, T, lsl `#'TOPBITS
+ mov V, V, lsr #1
+ add V, T, V
+ sub SC, SC, #1
+ b Ldo_single_div
+
+ Lnot_too_big:
+ 3: cmp V, R
+ bcc 2b
+@ beq Ldo_single_div
+
+ /-* NB: these are commented out in the V8-Sparc manual as well *-/
+ /-* (I do not understand this) *-/
+ @ V > R: went too far: back up 1 step
+ @ srl V, 1, V
+ @ dec SC
+ @ do single-bit divide steps
+ @
+ @ We have to be careful here. We know that R >= V, so we can do the
+ @ first divide step without thinking. BUT, the others are conditional,
+ @ and are only done if R >= 0. Because both R and V may have the high-
+ @ order bit set in the first step, just falling into the regular
+ @ division loop will mess up the first time around.
+ @ So we unroll slightly...
+ Ldo_single_div:
+ subs SC, SC, #1
+ blt Lend_regular_divide
+ sub R, R, V
+ mov Q, #1
+ b Lend_single_divloop
+ Lsingle_divloop:
+ cmp R, #0
+ mov Q, Q, lsl #1
+ mov V, V, lsr #1
+ @ R >= 0
+ subpl R, R, V
+ addpl Q, Q, #1
+ @ R < 0
+ addmi R, R, V
+ submi Q, Q, #1
+ Lend_single_divloop:
+ subs SC, SC, #1
+ bge Lsingle_divloop
+ b Lend_regular_divide
+
+1:
+ add ITER, ITER, #1
+Lnot_really_big:
+ mov V, V, lsl `#'N
+ cmp V, R
+ bls 1b
+ @
+ @ HOW CAN ITER EVER BE -1 HERE ?????
+ @
+ cmn ITER, #1
+ beq Lgot_result
+
+Ldivloop:
+ cmp R, #0 @ set up for initial iteration
+ mov Q, Q, lsl `#'N
+ DEVELOP_QUOTIENT_BITS(1, 0)
+Lend_regular_divide:
+ subs ITER, ITER, #1
+ bge Ldivloop
+ cmp R, #0
+ @ non-restoring fixup here (one instruction only!)
+ifelse(OP, `div',
+` sublt Q, Q, #1
+', ` addlt R, divisor, R
+')
+
+Lgot_result:
+ifelse(S, `true',
+` @ check to see if answer should be < 0
+ cmp SIGN, #0
+ ifelse(OP, `div', `rsbmi Q, Q, #0', `rsbmi R, R, #0')
+')
+ ifelse(OP, `div', `mov r0, Q', `mov r0, R')
+ ret
+
+Ldiv_zero:
+ @ Divide by zero trap. If it returns, return 0 (about as
+ @ wrong as possible, but that is what SunOS does...).
+ bl ___div0
+ mov r0, #0
+ ret
+*/
+
+#ifdef L_udivsi3
+
+ip .req r12
+sp .req r13
+lr .req r14
+pc .req r15
+.text
+ .globl ___udivsi3
+ .align 0
+___udivsi3:
+ stmdb sp!, {r4, r5, lr}
+ @ Ready to divide. Compute size of quotient; scale comparand.
+ movs lr, r1
+ mov r3, r0
+ beq Ldiv_zero
+
+
+ cmp r3, lr @ if r1 exceeds r0, done
+ mov r2, #0
+ bcc Lgot_result @ (and algorithm fails otherwise)
+ mov r4, #(1 << (32 - 4 - 1))
+ cmp r3, r4
+ mov ip, #0
+ bcc Lnot_really_big
+
+ @ Here the dividend is >= 2^(31-N) or so. We must be careful here,
+ @ as our usual N-at-a-shot divide step will cause overflow and havoc.
+ @ The number of bits in the result here is N*ITER+SC, where SC <= N.
+ @ Compute ITER in an unorthodox manner: know we need to shift V into
+ @ the top decade: so do not even bother to compare to R.
+ mov r5, #1
+ 1:
+ cmp lr, r4
+ bcs 3f
+ mov lr, lr, lsl #4
+ add ip, ip, #1
+ b 1b
+
+ @ Now compute r5.
+ 2: adds lr, lr, lr
+ add r5, r5, #1
+ bcc Lnot_too_big
+
+ @ We get here if the r1 overflowed while shifting.
+ @ This means that r3 has the high-order bit set.
+ @ Restore lr and subtract from r3.
+ mov r4, r4, lsl #4
+ mov lr, lr, lsr #1
+ add lr, r4, lr
+ sub r5, r5, #1
+ b Ldo_single_div
+
+ Lnot_too_big:
+ 3: cmp lr, r3
+ bcc 2b
+@ beq Ldo_single_div
+
+ /* NB: these are commented out in the V8-Sparc manual as well */
+ /* (I do not understand this) */
+ @ lr > r3: went too far: back up 1 step
+ @ srl lr, 1, lr
+ @ dec r5
+ @ do single-bit divide steps
+ @
+ @ We have to be careful here. We know that r3 >= lr, so we can do the
+ @ first divide step without thinking. BUT, the others are conditional,
+ @ and are only done if r3 >= 0. Because both r3 and lr may have the high-
+ @ order bit set in the first step, just falling into the regular
+ @ division loop will mess up the first time around.
+ @ So we unroll slightly...
+ Ldo_single_div:
+ subs r5, r5, #1
+ blt Lend_regular_divide
+ sub r3, r3, lr
+ mov r2, #1
+ b Lend_single_divloop
+ Lsingle_divloop:
+ cmp r3, #0
+ mov r2, r2, lsl #1
+ mov lr, lr, lsr #1
+ @ r3 >= 0
+ subpl r3, r3, lr
+ addpl r2, r2, #1
+ @ r3 < 0
+ addmi r3, r3, lr
+ submi r2, r2, #1
+ Lend_single_divloop:
+ subs r5, r5, #1
+ bge Lsingle_divloop
+ b Lend_regular_divide
+
+1:
+ add ip, ip, #1
+Lnot_really_big:
+ mov lr, lr, lsl #4
+ cmp lr, r3
+ bls 1b
+ @
+ @ HOW CAN ip EVER BE -1 HERE ?????
+ @
+ cmn ip, #1
+ beq Lgot_result
+
+Ldivloop:
+ cmp r3, #0 @ set up for initial iteration
+ mov r2, r2, lsl #4
+ @ depth 1, accumulated bits 0
+ mov lr, lr, lsr #1
+ blt L.1.1015
+ @ remainder is positive
+ subs r3, r3, lr
+ @ depth 2, accumulated bits 1
+ mov lr, lr, lsr #1
+ blt L.2.1016
+ @ remainder is positive
+ subs r3, r3, lr
+ @ depth 3, accumulated bits 3
+ mov lr, lr, lsr #1
+ blt L.3.1018
+ @ remainder is positive
+ subs r3, r3, lr
+ @ depth 4, accumulated bits 7
+ mov lr, lr, lsr #1
+ blt L.4.1022
+ @ remainder is positive
+ subs r3, r3, lr
+ add r2, r2, #15
+
+ b 9f
+
+L.4.1022:
+ @ remainder is negative
+ adds r3, r3, lr
+ add r2, r2, #13
+ b 9f
+
+
+
+L.3.1018:
+ @ remainder is negative
+ adds r3, r3, lr
+ @ depth 4, accumulated bits 5
+ mov lr, lr, lsr #1
+ blt L.4.1020
+ @ remainder is positive
+ subs r3, r3, lr
+ add r2, r2, #11
+
+ b 9f
+
+L.4.1020:
+ @ remainder is negative
+ adds r3, r3, lr
+ add r2, r2, #9
+ b 9f
+
+
+
+
+L.2.1016:
+ @ remainder is negative
+ adds r3, r3, lr
+ @ depth 3, accumulated bits 1
+ mov lr, lr, lsr #1
+ blt L.3.1016
+ @ remainder is positive
+ subs r3, r3, lr
+ @ depth 4, accumulated bits 3
+ mov lr, lr, lsr #1
+ blt L.4.1018
+ @ remainder is positive
+ subs r3, r3, lr
+ add r2, r2, #7
+
+ b 9f
+
+L.4.1018:
+ @ remainder is negative
+ adds r3, r3, lr
+ add r2, r2, #5
+ b 9f
+
+
+
+L.3.1016:
+ @ remainder is negative
+ adds r3, r3, lr
+ @ depth 4, accumulated bits 1
+ mov lr, lr, lsr #1
+ blt L.4.1016
+ @ remainder is positive
+ subs r3, r3, lr
+ add r2, r2, #3
+
+ b 9f
+
+L.4.1016:
+ @ remainder is negative
+ adds r3, r3, lr
+ add r2, r2, #1
+ b 9f
+
+
+
+
+
+L.1.1015:
+ @ remainder is negative
+ adds r3, r3, lr
+ @ depth 2, accumulated bits -1
+ mov lr, lr, lsr #1
+ blt L.2.1014
+ @ remainder is positive
+ subs r3, r3, lr
+ @ depth 3, accumulated bits -1
+ mov lr, lr, lsr #1
+ blt L.3.1014
+ @ remainder is positive
+ subs r3, r3, lr
+ @ depth 4, accumulated bits -1
+ mov lr, lr, lsr #1
+ blt L.4.1014
+ @ remainder is positive
+ subs r3, r3, lr
+ sub r2, r2, #1
+
+ b 9f
+
+L.4.1014:
+ @ remainder is negative
+ adds r3, r3, lr
+ sub r2, r2, #3
+ b 9f
+
+
+
+L.3.1014:
+ @ remainder is negative
+ adds r3, r3, lr
+ @ depth 4, accumulated bits -3
+ mov lr, lr, lsr #1
+ blt L.4.1012
+ @ remainder is positive
+ subs r3, r3, lr
+ sub r2, r2, #5
+
+ b 9f
+
+L.4.1012:
+ @ remainder is negative
+ adds r3, r3, lr
+ sub r2, r2, #7
+ b 9f
+
+
+
+
+L.2.1014:
+ @ remainder is negative
+ adds r3, r3, lr
+ @ depth 3, accumulated bits -3
+ mov lr, lr, lsr #1
+ blt L.3.1012
+ @ remainder is positive
+ subs r3, r3, lr
+ @ depth 4, accumulated bits -5
+ mov lr, lr, lsr #1
+ blt L.4.1010
+ @ remainder is positive
+ subs r3, r3, lr
+ sub r2, r2, #9
+
+ b 9f
+
+L.4.1010:
+ @ remainder is negative
+ adds r3, r3, lr
+ sub r2, r2, #11
+ b 9f
+
+
+
+L.3.1012:
+ @ remainder is negative
+ adds r3, r3, lr
+ @ depth 4, accumulated bits -7
+ mov lr, lr, lsr #1
+ blt L.4.1008
+ @ remainder is positive
+ subs r3, r3, lr
+ sub r2, r2, #13
+
+ b 9f
+
+L.4.1008:
+ @ remainder is negative
+ adds r3, r3, lr
+ sub r2, r2, #15
+ b 9f
+
+
+
+
+
+ 9:
+Lend_regular_divide:
+ subs ip, ip, #1
+ bge Ldivloop
+ cmp r3, #0
+ @ non-restoring fixup here (one instruction only!)
+ sublt r2, r2, #1
+
+
+Lgot_result:
+
+ mov r0, r2
+ ldmia sp!, {r4, r5, pc}
+
+Ldiv_zero:
+ @ Divide by zero trap. If it returns, return 0 (about as
+ @ wrong as possible, but that is what SunOS does...).
+ bl ___div0
+ mov r0, #0
+ ldmia sp!, {r4, r5, pc}
+
+#endif /* L_udivsi3 */
+
+#ifdef L_divsi3
+
+ip .req r12
+sp .req r13
+lr .req r14
+pc .req r15
+.text
+ .globl ___divsi3
+ .align 0
+___divsi3:
+ stmdb sp!, {r4, r5, r6, lr}
+ @ compute sign of result; if neither is negative, no problem
+ eor r6, r1, r0 @ compute sign
+ cmp r1, #0
+ rsbmi r1, r1, #0
+ beq Ldiv_zero
+ mov lr, r1
+ movs r3, r0
+ rsbmi r3, r3, #0 @ make dividend nonnegative
+
+
+ cmp r3, lr @ if r1 exceeds r0, done
+ mov r2, #0
+ bcc Lgot_result @ (and algorithm fails otherwise)
+ mov r4, #(1 << (32 - 4 - 1))
+ cmp r3, r4
+ mov ip, #0
+ bcc Lnot_really_big
+
+ @ Here the dividend is >= 2^(31-N) or so. We must be careful here,
+ @ as our usual N-at-a-shot divide step will cause overflow and havoc.
+ @ The number of bits in the result here is N*ITER+SC, where SC <= N.
+ @ Compute ITER in an unorthodox manner: know we need to shift V into
+ @ the top decade: so do not even bother to compare to R.
+ mov r5, #1
+ 1:
+ cmp lr, r4
+ bcs 3f
+ mov lr, lr, lsl #4
+ add ip, ip, #1
+ b 1b
+
+ @ Now compute r5.
+ 2: adds lr, lr, lr
+ add r5, r5, #1
+ bcc Lnot_too_big
+
+ @ We get here if the r1 overflowed while shifting.
+ @ This means that r3 has the high-order bit set.
+ @ Restore lr and subtract from r3.
+ mov r4, r4, lsl #4
+ mov lr, lr, lsr #1
+ add lr, r4, lr
+ sub r5, r5, #1
+ b Ldo_single_div
+
+ Lnot_too_big:
+ 3: cmp lr, r3
+ bcc 2b
+@ beq Ldo_single_div
+
+ /* NB: these are commented out in the V8-Sparc manual as well */
+ /* (I do not understand this) */
+ @ lr > r3: went too far: back up 1 step
+ @ srl lr, 1, lr
+ @ dec r5
+ @ do single-bit divide steps
+ @
+ @ We have to be careful here. We know that r3 >= lr, so we can do the
+ @ first divide step without thinking. BUT, the others are conditional,
+ @ and are only done if r3 >= 0. Because both r3 and lr may have the high-
+ @ order bit set in the first step, just falling into the regular
+ @ division loop will mess up the first time around.
+ @ So we unroll slightly...
+ Ldo_single_div:
+ subs r5, r5, #1
+ blt Lend_regular_divide
+ sub r3, r3, lr
+ mov r2, #1
+ b Lend_single_divloop
+ Lsingle_divloop:
+ cmp r3, #0
+ mov r2, r2, lsl #1
+ mov lr, lr, lsr #1
+ @ r3 >= 0
+ subpl r3, r3, lr
+ addpl r2, r2, #1
+ @ r3 < 0
+ addmi r3, r3, lr
+ submi r2, r2, #1
+ Lend_single_divloop:
+ subs r5, r5, #1
+ bge Lsingle_divloop
+ b Lend_regular_divide
+
+1:
+ add ip, ip, #1
+Lnot_really_big:
+ mov lr, lr, lsl #4
+ cmp lr, r3
+ bls 1b
+ @
+ @ HOW CAN ip EVER BE -1 HERE ?????
+ @
+ cmn ip, #1
+ beq Lgot_result
+
+Ldivloop:
+ cmp r3, #0 @ set up for initial iteration
+ mov r2, r2, lsl #4
+ @ depth 1, accumulated bits 0
+ mov lr, lr, lsr #1
+ blt L.1.1015
+ @ remainder is positive
+ subs r3, r3, lr
+ @ depth 2, accumulated bits 1
+ mov lr, lr, lsr #1
+ blt L.2.1016
+ @ remainder is positive
+ subs r3, r3, lr
+ @ depth 3, accumulated bits 3
+ mov lr, lr, lsr #1
+ blt L.3.1018
+ @ remainder is positive
+ subs r3, r3, lr
+ @ depth 4, accumulated bits 7
+ mov lr, lr, lsr #1
+ blt L.4.1022
+ @ remainder is positive
+ subs r3, r3, lr
+ add r2, r2, #15
+
+ b 9f
+
+L.4.1022:
+ @ remainder is negative
+ adds r3, r3, lr
+ add r2, r2, #13
+ b 9f
+
+
+
+L.3.1018:
+ @ remainder is negative
+ adds r3, r3, lr
+ @ depth 4, accumulated bits 5
+ mov lr, lr, lsr #1
+ blt L.4.1020
+ @ remainder is positive
+ subs r3, r3, lr
+ add r2, r2, #11
+
+ b 9f
+
+L.4.1020:
+ @ remainder is negative
+ adds r3, r3, lr
+ add r2, r2, #9
+ b 9f
+
+
+
+
+L.2.1016:
+ @ remainder is negative
+ adds r3, r3, lr
+ @ depth 3, accumulated bits 1
+ mov lr, lr, lsr #1
+ blt L.3.1016
+ @ remainder is positive
+ subs r3, r3, lr
+ @ depth 4, accumulated bits 3
+ mov lr, lr, lsr #1
+ blt L.4.1018
+ @ remainder is positive
+ subs r3, r3, lr
+ add r2, r2, #7
+
+ b 9f
+
+L.4.1018:
+ @ remainder is negative
+ adds r3, r3, lr
+ add r2, r2, #5
+ b 9f
+
+
+
+L.3.1016:
+ @ remainder is negative
+ adds r3, r3, lr
+ @ depth 4, accumulated bits 1
+ mov lr, lr, lsr #1
+ blt L.4.1016
+ @ remainder is positive
+ subs r3, r3, lr
+ add r2, r2, #3
+
+ b 9f
+
+L.4.1016:
+ @ remainder is negative
+ adds r3, r3, lr
+ add r2, r2, #1
+ b 9f
+
+
+
+
+
+L.1.1015:
+ @ remainder is negative
+ adds r3, r3, lr
+ @ depth 2, accumulated bits -1
+ mov lr, lr, lsr #1
+ blt L.2.1014
+ @ remainder is positive
+ subs r3, r3, lr
+ @ depth 3, accumulated bits -1
+ mov lr, lr, lsr #1
+ blt L.3.1014
+ @ remainder is positive
+ subs r3, r3, lr
+ @ depth 4, accumulated bits -1
+ mov lr, lr, lsr #1
+ blt L.4.1014
+ @ remainder is positive
+ subs r3, r3, lr
+ sub r2, r2, #1
+
+ b 9f
+
+L.4.1014:
+ @ remainder is negative
+ adds r3, r3, lr
+ sub r2, r2, #3
+ b 9f
+
+
+
+L.3.1014:
+ @ remainder is negative
+ adds r3, r3, lr
+ @ depth 4, accumulated bits -3
+ mov lr, lr, lsr #1
+ blt L.4.1012
+ @ remainder is positive
+ subs r3, r3, lr
+ sub r2, r2, #5
+
+ b 9f
+
+L.4.1012:
+ @ remainder is negative
+ adds r3, r3, lr
+ sub r2, r2, #7
+ b 9f
+
+
+
+
+L.2.1014:
+ @ remainder is negative
+ adds r3, r3, lr
+ @ depth 3, accumulated bits -3
+ mov lr, lr, lsr #1
+ blt L.3.1012
+ @ remainder is positive
+ subs r3, r3, lr
+ @ depth 4, accumulated bits -5
+ mov lr, lr, lsr #1
+ blt L.4.1010
+ @ remainder is positive
+ subs r3, r3, lr
+ sub r2, r2, #9
+
+ b 9f
+
+L.4.1010:
+ @ remainder is negative
+ adds r3, r3, lr
+ sub r2, r2, #11
+ b 9f
+
+
+
+L.3.1012:
+ @ remainder is negative
+ adds r3, r3, lr
+ @ depth 4, accumulated bits -7
+ mov lr, lr, lsr #1
+ blt L.4.1008
+ @ remainder is positive
+ subs r3, r3, lr
+ sub r2, r2, #13
+
+ b 9f
+
+L.4.1008:
+ @ remainder is negative
+ adds r3, r3, lr
+ sub r2, r2, #15
+ b 9f
+
+
+
+
+
+ 9:
+Lend_regular_divide:
+ subs ip, ip, #1
+ bge Ldivloop
+ cmp r3, #0
+ @ non-restoring fixup here (one instruction only!)
+ sublt r2, r2, #1
+
+
+Lgot_result:
+ @ check to see if answer should be < 0
+ cmp r6, #0
+ rsbmi r2, r2, #0
+
+ mov r0, r2
+ ldmia sp!, {r4, r5, r6, pc}
+
+Ldiv_zero:
+ @ Divide by zero trap. If it returns, return 0 (about as
+ @ wrong as possible, but that is what SunOS does...).
+ bl ___div0
+ mov r0, #0
+ ldmia sp!, {r4, r5, r6, pc}
+
+#endif /* L_divsi3 */
+
+#ifdef L_umodsi3
+
+ip .req r12
+sp .req r13
+lr .req r14
+pc .req r15
+.text
+ .globl ___umodsi3
+ .align 0
+___umodsi3:
+ stmdb sp!, {r4, r5, lr}
+ @ Ready to divide. Compute size of quotient; scale comparand.
+ movs lr, r1
+ mov r3, r0
+ beq Ldiv_zero
+
+
+ cmp r3, lr @ if r1 exceeds r0, done
+ mov r2, #0
+ bcc Lgot_result @ (and algorithm fails otherwise)
+ mov r4, #(1 << (32 - 4 - 1))
+ cmp r3, r4
+ mov ip, #0
+ bcc Lnot_really_big
+
+ @ Here the dividend is >= 2^(31-N) or so. We must be careful here,
+ @ as our usual N-at-a-shot divide step will cause overflow and havoc.
+ @ The number of bits in the result here is N*ITER+SC, where SC <= N.
+ @ Compute ITER in an unorthodox manner: know we need to shift V into
+ @ the top decade: so do not even bother to compare to R.
+ mov r5, #1
+ 1:
+ cmp lr, r4
+ bcs 3f
+ mov lr, lr, lsl #4
+ add ip, ip, #1
+ b 1b
+
+ @ Now compute r5.
+ 2: adds lr, lr, lr
+ add r5, r5, #1
+ bcc Lnot_too_big
+
+ @ We get here if the r1 overflowed while shifting.
+ @ This means that r3 has the high-order bit set.
+ @ Restore lr and subtract from r3.
+ mov r4, r4, lsl #4
+ mov lr, lr, lsr #1
+ add lr, r4, lr
+ sub r5, r5, #1
+ b Ldo_single_div
+
+ Lnot_too_big:
+ 3: cmp lr, r3
+ bcc 2b
+@ beq Ldo_single_div
+
+ /* NB: these are commented out in the V8-Sparc manual as well */
+ /* (I do not understand this) */
+ @ lr > r3: went too far: back up 1 step
+ @ srl lr, 1, lr
+ @ dec r5
+ @ do single-bit divide steps
+ @
+ @ We have to be careful here. We know that r3 >= lr, so we can do the
+ @ first divide step without thinking. BUT, the others are conditional,
+ @ and are only done if r3 >= 0. Because both r3 and lr may have the high-
+ @ order bit set in the first step, just falling into the regular
+ @ division loop will mess up the first time around.
+ @ So we unroll slightly...
+ Ldo_single_div:
+ subs r5, r5, #1
+ blt Lend_regular_divide
+ sub r3, r3, lr
+ mov r2, #1
+ b Lend_single_divloop
+ Lsingle_divloop:
+ cmp r3, #0
+ mov r2, r2, lsl #1
+ mov lr, lr, lsr #1
+ @ r3 >= 0
+ subpl r3, r3, lr
+ addpl r2, r2, #1
+ @ r3 < 0
+ addmi r3, r3, lr
+ submi r2, r2, #1
+ Lend_single_divloop:
+ subs r5, r5, #1
+ bge Lsingle_divloop
+ b Lend_regular_divide
+
+1:
+ add ip, ip, #1
+Lnot_really_big:
+ mov lr, lr, lsl #4
+ cmp lr, r3
+ bls 1b
+ @
+ @ HOW CAN ip EVER BE -1 HERE ?????
+ @
+ cmn ip, #1
+ beq Lgot_result
+
+Ldivloop:
+ cmp r3, #0 @ set up for initial iteration
+ mov r2, r2, lsl #4
+ @ depth 1, accumulated bits 0
+ mov lr, lr, lsr #1
+ blt L.1.1015
+ @ remainder is positive
+ subs r3, r3, lr
+ @ depth 2, accumulated bits 1
+ mov lr, lr, lsr #1
+ blt L.2.1016
+ @ remainder is positive
+ subs r3, r3, lr
+ @ depth 3, accumulated bits 3
+ mov lr, lr, lsr #1
+ blt L.3.1018
+ @ remainder is positive
+ subs r3, r3, lr
+ @ depth 4, accumulated bits 7
+ mov lr, lr, lsr #1
+ blt L.4.1022
+ @ remainder is positive
+ subs r3, r3, lr
+ add r2, r2, #15
+
+ b 9f
+
+L.4.1022:
+ @ remainder is negative
+ adds r3, r3, lr
+ add r2, r2, #13
+ b 9f
+
+
+
+L.3.1018:
+ @ remainder is negative
+ adds r3, r3, lr
+ @ depth 4, accumulated bits 5
+ mov lr, lr, lsr #1
+ blt L.4.1020
+ @ remainder is positive
+ subs r3, r3, lr
+ add r2, r2, #11
+
+ b 9f
+
+L.4.1020:
+ @ remainder is negative
+ adds r3, r3, lr
+ add r2, r2, #9
+ b 9f
+
+
+
+
+L.2.1016:
+ @ remainder is negative
+ adds r3, r3, lr
+ @ depth 3, accumulated bits 1
+ mov lr, lr, lsr #1
+ blt L.3.1016
+ @ remainder is positive
+ subs r3, r3, lr
+ @ depth 4, accumulated bits 3
+ mov lr, lr, lsr #1
+ blt L.4.1018
+ @ remainder is positive
+ subs r3, r3, lr
+ add r2, r2, #7
+
+ b 9f
+
+L.4.1018:
+ @ remainder is negative
+ adds r3, r3, lr
+ add r2, r2, #5
+ b 9f
+
+
+
+L.3.1016:
+ @ remainder is negative
+ adds r3, r3, lr
+ @ depth 4, accumulated bits 1
+ mov lr, lr, lsr #1
+ blt L.4.1016
+ @ remainder is positive
+ subs r3, r3, lr
+ add r2, r2, #3
+
+ b 9f
+
+L.4.1016:
+ @ remainder is negative
+ adds r3, r3, lr
+ add r2, r2, #1
+ b 9f
+
+
+
+
+
+L.1.1015:
+ @ remainder is negative
+ adds r3, r3, lr
+ @ depth 2, accumulated bits -1
+ mov lr, lr, lsr #1
+ blt L.2.1014
+ @ remainder is positive
+ subs r3, r3, lr
+ @ depth 3, accumulated bits -1
+ mov lr, lr, lsr #1
+ blt L.3.1014
+ @ remainder is positive
+ subs r3, r3, lr
+ @ depth 4, accumulated bits -1
+ mov lr, lr, lsr #1
+ blt L.4.1014
+ @ remainder is positive
+ subs r3, r3, lr
+ sub r2, r2, #1
+
+ b 9f
+
+L.4.1014:
+ @ remainder is negative
+ adds r3, r3, lr
+ sub r2, r2, #3
+ b 9f
+
+
+
+L.3.1014:
+ @ remainder is negative
+ adds r3, r3, lr
+ @ depth 4, accumulated bits -3
+ mov lr, lr, lsr #1
+ blt L.4.1012
+ @ remainder is positive
+ subs r3, r3, lr
+ sub r2, r2, #5
+
+ b 9f
+
+L.4.1012:
+ @ remainder is negative
+ adds r3, r3, lr
+ sub r2, r2, #7
+ b 9f
+
+
+
+
+L.2.1014:
+ @ remainder is negative
+ adds r3, r3, lr
+ @ depth 3, accumulated bits -3
+ mov lr, lr, lsr #1
+ blt L.3.1012
+ @ remainder is positive
+ subs r3, r3, lr
+ @ depth 4, accumulated bits -5
+ mov lr, lr, lsr #1
+ blt L.4.1010
+ @ remainder is positive
+ subs r3, r3, lr
+ sub r2, r2, #9
+
+ b 9f
+
+L.4.1010:
+ @ remainder is negative
+ adds r3, r3, lr
+ sub r2, r2, #11
+ b 9f
+
+
+
+L.3.1012:
+ @ remainder is negative
+ adds r3, r3, lr
+ @ depth 4, accumulated bits -7
+ mov lr, lr, lsr #1
+ blt L.4.1008
+ @ remainder is positive
+ subs r3, r3, lr
+ sub r2, r2, #13
+
+ b 9f
+
+L.4.1008:
+ @ remainder is negative
+ adds r3, r3, lr
+ sub r2, r2, #15
+ b 9f
+
+
+
+
+
+ 9:
+Lend_regular_divide:
+ subs ip, ip, #1
+ bge Ldivloop
+ cmp r3, #0
+ @ non-restoring fixup here (one instruction only!)
+ addlt r3, r1, r3
+
+
+Lgot_result:
+
+ mov r0, r3
+ ldmia sp!, {r4, r5, pc}
+
+Ldiv_zero:
+ @ Divide by zero trap. If it returns, return 0 (about as
+ @ wrong as possible, but that is what SunOS does...).
+ bl ___div0
+ mov r0, #0
+ ldmia sp!, {r4, r5, pc}
+
+#endif /* L_umodsi3 */
+
+#ifdef L_modsi3
+
+ip .req r12
+sp .req r13
+lr .req r14
+pc .req r15
+.text
+ .globl ___modsi3
+ .align 0
+___modsi3:
+ stmdb sp!, {r4, r5, r6, lr}
+ @ compute sign of result; if neither is negative, no problem
+ eor r6, r1, r0 @ compute sign
+ cmp r1, #0
+ rsbmi r1, r1, #0
+ beq Ldiv_zero
+ mov lr, r1
+ movs r3, r0
+ rsbmi r3, r3, #0 @ make dividend nonnegative
+
+
+ cmp r3, lr @ if r1 exceeds r0, done
+ mov r2, #0
+ bcc Lgot_result @ (and algorithm fails otherwise)
+ mov r4, #(1 << (32 - 4 - 1))
+ cmp r3, r4
+ mov ip, #0
+ bcc Lnot_really_big
+
+ @ Here the dividend is >= 2^(31-N) or so. We must be careful here,
+ @ as our usual N-at-a-shot divide step will cause overflow and havoc.
+ @ The number of bits in the result here is N*ITER+SC, where SC <= N.
+ @ Compute ITER in an unorthodox manner: know we need to shift V into
+ @ the top decade: so do not even bother to compare to R.
+ mov r5, #1
+ 1:
+ cmp lr, r4
+ bcs 3f
+ mov lr, lr, lsl #4
+ add ip, ip, #1
+ b 1b
+
+ @ Now compute r5.
+ 2: adds lr, lr, lr
+ add r5, r5, #1
+ bcc Lnot_too_big
+
+ @ We get here if the r1 overflowed while shifting.
+ @ This means that r3 has the high-order bit set.
+ @ Restore lr and subtract from r3.
+ mov r4, r4, lsl #4
+ mov lr, lr, lsr #1
+ add lr, r4, lr
+ sub r5, r5, #1
+ b Ldo_single_div
+
+ Lnot_too_big:
+ 3: cmp lr, r3
+ bcc 2b
+@ beq Ldo_single_div
+
+ /* NB: these are commented out in the V8-Sparc manual as well */
+ /* (I do not understand this) */
+ @ lr > r3: went too far: back up 1 step
+ @ srl lr, 1, lr
+ @ dec r5
+ @ do single-bit divide steps
+ @
+ @ We have to be careful here. We know that r3 >= lr, so we can do the
+ @ first divide step without thinking. BUT, the others are conditional,
+ @ and are only done if r3 >= 0. Because both r3 and lr may have the high-
+ @ order bit set in the first step, just falling into the regular
+ @ division loop will mess up the first time around.
+ @ So we unroll slightly...
+ Ldo_single_div:
+ subs r5, r5, #1
+ blt Lend_regular_divide
+ sub r3, r3, lr
+ mov r2, #1
+ b Lend_single_divloop
+ Lsingle_divloop:
+ cmp r3, #0
+ mov r2, r2, lsl #1
+ mov lr, lr, lsr #1
+ @ r3 >= 0
+ subpl r3, r3, lr
+ addpl r2, r2, #1
+ @ r3 < 0
+ addmi r3, r3, lr
+ submi r2, r2, #1
+ Lend_single_divloop:
+ subs r5, r5, #1
+ bge Lsingle_divloop
+ b Lend_regular_divide
+
+1:
+ add ip, ip, #1
+Lnot_really_big:
+ mov lr, lr, lsl #4
+ cmp lr, r3
+ bls 1b
+ @
+ @ HOW CAN ip EVER BE -1 HERE ?????
+ @
+ cmn ip, #1
+ beq Lgot_result
+
+Ldivloop:
+ cmp r3, #0 @ set up for initial iteration
+ mov r2, r2, lsl #4
+ @ depth 1, accumulated bits 0
+ mov lr, lr, lsr #1
+ blt L.1.1015
+ @ remainder is positive
+ subs r3, r3, lr
+ @ depth 2, accumulated bits 1
+ mov lr, lr, lsr #1
+ blt L.2.1016
+ @ remainder is positive
+ subs r3, r3, lr
+ @ depth 3, accumulated bits 3
+ mov lr, lr, lsr #1
+ blt L.3.1018
+ @ remainder is positive
+ subs r3, r3, lr
+ @ depth 4, accumulated bits 7
+ mov lr, lr, lsr #1
+ blt L.4.1022
+ @ remainder is positive
+ subs r3, r3, lr
+ add r2, r2, #15
+
+ b 9f
+
+L.4.1022:
+ @ remainder is negative
+ adds r3, r3, lr
+ add r2, r2, #13
+ b 9f
+
+
+
+L.3.1018:
+ @ remainder is negative
+ adds r3, r3, lr
+ @ depth 4, accumulated bits 5
+ mov lr, lr, lsr #1
+ blt L.4.1020
+ @ remainder is positive
+ subs r3, r3, lr
+ add r2, r2, #11
+
+ b 9f
+
+L.4.1020:
+ @ remainder is negative
+ adds r3, r3, lr
+ add r2, r2, #9
+ b 9f
+
+
+
+
+L.2.1016:
+ @ remainder is negative
+ adds r3, r3, lr
+ @ depth 3, accumulated bits 1
+ mov lr, lr, lsr #1
+ blt L.3.1016
+ @ remainder is positive
+ subs r3, r3, lr
+ @ depth 4, accumulated bits 3
+ mov lr, lr, lsr #1
+ blt L.4.1018
+ @ remainder is positive
+ subs r3, r3, lr
+ add r2, r2, #7
+
+ b 9f
+
+L.4.1018:
+ @ remainder is negative
+ adds r3, r3, lr
+ add r2, r2, #5
+ b 9f
+
+
+
+L.3.1016:
+ @ remainder is negative
+ adds r3, r3, lr
+ @ depth 4, accumulated bits 1
+ mov lr, lr, lsr #1
+ blt L.4.1016
+ @ remainder is positive
+ subs r3, r3, lr
+ add r2, r2, #3
+
+ b 9f
+
+L.4.1016:
+ @ remainder is negative
+ adds r3, r3, lr
+ add r2, r2, #1
+ b 9f
+
+
+
+
+
+L.1.1015:
+ @ remainder is negative
+ adds r3, r3, lr
+ @ depth 2, accumulated bits -1
+ mov lr, lr, lsr #1
+ blt L.2.1014
+ @ remainder is positive
+ subs r3, r3, lr
+ @ depth 3, accumulated bits -1
+ mov lr, lr, lsr #1
+ blt L.3.1014
+ @ remainder is positive
+ subs r3, r3, lr
+ @ depth 4, accumulated bits -1
+ mov lr, lr, lsr #1
+ blt L.4.1014
+ @ remainder is positive
+ subs r3, r3, lr
+ sub r2, r2, #1
+
+ b 9f
+
+L.4.1014:
+ @ remainder is negative
+ adds r3, r3, lr
+ sub r2, r2, #3
+ b 9f
+
+
+
+L.3.1014:
+ @ remainder is negative
+ adds r3, r3, lr
+ @ depth 4, accumulated bits -3
+ mov lr, lr, lsr #1
+ blt L.4.1012
+ @ remainder is positive
+ subs r3, r3, lr
+ sub r2, r2, #5
+
+ b 9f
+
+L.4.1012:
+ @ remainder is negative
+ adds r3, r3, lr
+ sub r2, r2, #7
+ b 9f
+
+
+
+
+L.2.1014:
+ @ remainder is negative
+ adds r3, r3, lr
+ @ depth 3, accumulated bits -3
+ mov lr, lr, lsr #1
+ blt L.3.1012
+ @ remainder is positive
+ subs r3, r3, lr
+ @ depth 4, accumulated bits -5
+ mov lr, lr, lsr #1
+ blt L.4.1010
+ @ remainder is positive
+ subs r3, r3, lr
+ sub r2, r2, #9
+
+ b 9f
+
+L.4.1010:
+ @ remainder is negative
+ adds r3, r3, lr
+ sub r2, r2, #11
+ b 9f
+
+
+
+L.3.1012:
+ @ remainder is negative
+ adds r3, r3, lr
+ @ depth 4, accumulated bits -7
+ mov lr, lr, lsr #1
+ blt L.4.1008
+ @ remainder is positive
+ subs r3, r3, lr
+ sub r2, r2, #13
+
+ b 9f
+
+L.4.1008:
+ @ remainder is negative
+ adds r3, r3, lr
+ sub r2, r2, #15
+ b 9f
+
+
+
+
+
+ 9:
+Lend_regular_divide:
+ subs ip, ip, #1
+ bge Ldivloop
+ cmp r3, #0
+ @ non-restoring fixup here (one instruction only!)
+ addlt r3, r1, r3
+
+
+Lgot_result:
+ @ check to see if answer should be < 0
+ cmp r6, #0
+ rsbmi r3, r3, #0
+
+ mov r0, r3
+ ldmia sp!, {r4, r5, r6, pc}
+
+Ldiv_zero:
+ @ Divide by zero trap. If it returns, return 0 (about as
+ @ wrong as possible, but that is what SunOS does...).
+ bl ___div0
+ mov r0, #0
+ ldmia sp!, {r4, r5, r6, pc}
+
+#endif /* L_modsi3 */
+
+#ifdef L_divmodsi_tools
+
+ .globl ___div0
+ .align 0
+___div0:
+ mov pc, lr
+
+#endif /* L_divmodsi_tools */