aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/arm/ieee754-df.S
diff options
context:
space:
mode:
authorNicolas Pitre <nico@cam.org>2004-10-21 04:22:01 +0000
committerNicolas Pitre <nico@gcc.gnu.org>2004-10-21 04:22:01 +0000
commit6883a6662fc7728a539230ea1b02efd47815d705 (patch)
tree9fda943368aa15ad74a4a037362c1ee02af23752 /gcc/config/arm/ieee754-df.S
parent0aab7a4b725f565fcd1dbc85c798a2dc88914c2c (diff)
downloadgcc-6883a6662fc7728a539230ea1b02efd47815d705.zip
gcc-6883a6662fc7728a539230ea1b02efd47815d705.tar.gz
gcc-6883a6662fc7728a539230ea1b02efd47815d705.tar.bz2
ieee754-sf.S: Large speed improvements.
* config/arm/ieee754-sf.S: Large speed improvements. Fix NAN handling. * config/arm/ieee754-df.S: Ditto. From-SVN: r89364
Diffstat (limited to 'gcc/config/arm/ieee754-df.S')
-rw-r--r--gcc/config/arm/ieee754-df.S983
1 files changed, 461 insertions, 522 deletions
diff --git a/gcc/config/arm/ieee754-df.S b/gcc/config/arm/ieee754-df.S
index af32b9e..b9cf52e 100644
--- a/gcc/config/arm/ieee754-df.S
+++ b/gcc/config/arm/ieee754-df.S
@@ -60,6 +60,7 @@
ARM_FUNC_START negdf2
ARM_FUNC_ALIAS aeabi_dneg negdf2
+
@ flip sign bit
eor xh, xh, #0x80000000
RET
@@ -76,10 +77,10 @@ ARM_FUNC_START aeabi_drsub
eor xh, xh, #0x80000000 @ flip sign bit of first arg
b 1f
- ARM_FUNC_START subdf3
+ARM_FUNC_START subdf3
ARM_FUNC_ALIAS aeabi_dsub subdf3
- @ flip sign bit of second arg
- eor yh, yh, #0x80000000
+
+ eor yh, yh, #0x80000000 @ flip sign bit of second arg
#if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
b 1f @ Skip Thumb-code prologue
#endif
@@ -87,36 +88,23 @@ ARM_FUNC_ALIAS aeabi_dsub subdf3
ARM_FUNC_START adddf3
ARM_FUNC_ALIAS aeabi_dadd adddf3
-1: @ Compare both args, return zero if equal but the sign.
- teq xl, yl
- eoreq ip, xh, yh
- teqeq ip, #0x80000000
- beq LSYM(Lad_z)
-
- @ If first arg is 0 or -0, return second arg.
- @ If second arg is 0 or -0, return first arg.
- orrs ip, xl, xh, lsl #1
- moveq xl, yl
- moveq xh, yh
- orrnes ip, yl, yh, lsl #1
- RETc(eq)
-
- stmfd sp!, {r4, r5, lr}
-
- @ Mask out exponents.
- mov ip, #0x7f000000
- orr ip, ip, #0x00f00000
- and r4, xh, ip
- and r5, yh, ip
+1: stmfd sp!, {r4, r5, lr}
- @ If either of them is 0x7ff, result will be INF or NAN
- teq r4, ip
- teqne r5, ip
- beq LSYM(Lad_i)
+ @ Look for zeroes, equal values, INF, or NAN.
+ mov r4, xh, lsl #1
+ mov r5, yh, lsl #1
+ teq r4, r5
+ teqeq xl, yl
+ orrnes ip, r4, xl
+ orrnes ip, r5, yl
+ mvnnes ip, r4, asr #21
+ mvnnes ip, r5, asr #21
+ beq LSYM(Lad_s)
@ Compute exponent difference. Make largest exponent in r4,
@ corresponding arg in xh-xl, and positive exponent difference in r5.
- subs r5, r5, r4
+ mov r4, r4, lsr #21
+ rsbs r5, r4, r5, lsr #21
rsblt r5, r5, #0
ble 1f
add r4, r4, r5
@@ -127,24 +115,24 @@ ARM_FUNC_ALIAS aeabi_dadd adddf3
eor yl, xl, yl
eor yh, xh, yh
1:
-
@ If exponent difference is too large, return largest argument
@ already in xh-xl. We need up to 54 bit to handle proper rounding
@ of 0x1p54 - 1.1.
- cmp r5, #(54 << 20)
+ cmp r5, #54
RETLDM "r4, r5" hi
@ Convert mantissa to signed integer.
tst xh, #0x80000000
- bic xh, xh, ip, lsl #1
- orr xh, xh, #0x00100000
+ mov xh, xh, lsl #12
+ mov ip, #0x00100000
+ orr xh, ip, xh, lsr #12
beq 1f
rsbs xl, xl, #0
rsc xh, xh, #0
1:
tst yh, #0x80000000
- bic yh, yh, ip, lsl #1
- orr yh, yh, #0x00100000
+ mov yh, yh, lsl #12
+ orr yh, ip, yh, lsr #12
beq 1f
rsbs yl, yl, #0
rsc yh, yh, #0
@@ -154,42 +142,30 @@ ARM_FUNC_ALIAS aeabi_dadd adddf3
teq r4, r5
beq LSYM(Lad_d)
LSYM(Lad_x):
- @ Scale down second arg with exponent difference.
- @ Apply shift one bit left to first arg and the rest to second arg
- @ to simplify things later, but only if exponent does not become 0.
- mov ip, #0
- movs r5, r5, lsr #20
- beq 3f
- teq r4, #(1 << 20)
- beq 1f
- movs xl, xl, lsl #1
- adc xh, ip, xh, lsl #1
- sub r4, r4, #(1 << 20)
- subs r5, r5, #1
- beq 3f
- @ Shift yh-yl right per r5, keep leftover bits into ip.
-1: rsbs lr, r5, #32
- blt 2f
+ @ Compensate for the exponent overlapping the mantissa MSB added later
+ sub r4, r4, #1
+
+ @ Shift yh-yl right per r5, add to xh-xl, keep leftover bits into ip.
+ rsbs lr, r5, #32
+ blt 1f
mov ip, yl, lsl lr
- mov yl, yl, lsr r5
- orr yl, yl, yh, lsl lr
- mov yh, yh, asr r5
- b 3f
-2: sub r5, r5, #32
+ adds xl, xl, yl, lsr r5
+ adc xh, xh, #0
+ adds xl, xl, yh, lsl lr
+ adcs xh, xh, yh, asr r5
+ b 2f
+1: sub r5, r5, #32
add lr, lr, #32
cmp yl, #1
- adc ip, ip, yh, lsl lr
- mov yl, yh, asr r5
- mov yh, yh, asr #32
-3:
- @ the actual addition
- adds xl, xl, yl
- adc xh, xh, yh
-
+ mov ip, yh, lsl lr
+ orrcs ip, ip, #2 @ 2 not 1, to allow lsr #1 later
+ adds xl, xl, yh, asr r5
+ adcs xh, xh, yh, asr #31
+2:
@ We now have a result in xh-xl-ip.
- @ Keep absolute value in xh-xl-ip, sign in r5.
- ands r5, xh, #0x80000000
+ @ Keep absolute value in xh-xl-ip, sign in r5 (the n bit was set above)
+ and r5, xh, #0x80000000
bpl LSYM(Lad_p)
rsbs ip, ip, #0
rscs xl, xl, #0
@@ -198,75 +174,66 @@ LSYM(Lad_x):
@ Determine how to normalize the result.
LSYM(Lad_p):
cmp xh, #0x00100000
- bcc LSYM(Lad_l)
+ bcc LSYM(Lad_a)
cmp xh, #0x00200000
- bcc LSYM(Lad_r0)
- cmp xh, #0x00400000
- bcc LSYM(Lad_r1)
+ bcc LSYM(Lad_e)
@ Result needs to be shifted right.
movs xh, xh, lsr #1
movs xl, xl, rrx
- movs ip, ip, rrx
- orrcs ip, ip, #1
- add r4, r4, #(1 << 20)
-LSYM(Lad_r1):
- movs xh, xh, lsr #1
- movs xl, xl, rrx
- movs ip, ip, rrx
- orrcs ip, ip, #1
- add r4, r4, #(1 << 20)
+ mov ip, ip, rrx
+ add r4, r4, #1
+
+ @ Make sure we did not bust our exponent.
+ mov r2, r4, lsl #21
+ cmn r2, #(2 << 21)
+ bcs LSYM(Lad_o)
@ Our result is now properly aligned into xh-xl, remaining bits in ip.
@ Round with MSB of ip. If halfway between two numbers, round towards
@ LSB of xl = 0.
-LSYM(Lad_r0):
- adds xl, xl, ip, lsr #31
- adc xh, xh, #0
- teq ip, #0x80000000
- biceq xl, xl, #1
-
- @ One extreme rounding case may add a new MSB. Adjust exponent.
- @ That MSB will be cleared when exponent is merged below.
- tst xh, #0x00200000
- addne r4, r4, #(1 << 20)
-
- @ Make sure we did not bust our exponent.
- adds ip, r4, #(1 << 20)
- bmi LSYM(Lad_o)
-
@ Pack final result together.
LSYM(Lad_e):
- bic xh, xh, #0x00300000
- orr xh, xh, r4
+ cmp ip, #0x80000000
+ moveqs ip, xl, lsr #1
+ adcs xl, xl, #0
+ adc xh, xh, r4, lsl #20
orr xh, xh, r5
RETLDM "r4, r5"
-LSYM(Lad_l):
@ Result must be shifted left and exponent adjusted.
- @ No rounding necessary since ip will always be 0.
+LSYM(Lad_a):
+ movs ip, ip, lsl #1
+ adcs xl, xl, xl
+ adc xh, xh, xh
+ tst xh, #0x00100000
+ sub r4, r4, #1
+ bne LSYM(Lad_e)
+
+ @ No rounding necessary since ip will always be 0 at this point.
+LSYM(Lad_l):
+
#if __ARM_ARCH__ < 5
teq xh, #0
- movne r3, #-11
- moveq r3, #21
+ movne r3, #20
+ moveq r3, #52
moveq xh, xl
moveq xl, #0
mov r2, xh
- movs ip, xh, lsr #16
- moveq r2, r2, lsl #16
- addeq r3, r3, #16
- tst r2, #0xff000000
- moveq r2, r2, lsl #8
- addeq r3, r3, #8
- tst r2, #0xf0000000
- moveq r2, r2, lsl #4
- addeq r3, r3, #4
- tst r2, #0xc0000000
- moveq r2, r2, lsl #2
- addeq r3, r3, #2
- tst r2, #0x80000000
- addeq r3, r3, #1
+ cmp r2, #(1 << 16)
+ movhs r2, r2, lsr #16
+ subhs r3, r3, #16
+ cmp r2, #(1 << 8)
+ movhs r2, r2, lsr #8
+ subhs r3, r3, #8
+ cmp r2, #(1 << 4)
+ movhs r2, r2, lsr #4
+ subhs r3, r3, #4
+ cmp r2, #(1 << 2)
+ subhs r3, r3, #2
+ sublo r3, r3, r2, lsr #1
+ sub r3, r3, r2, lsr #3
#else
@@ -302,13 +269,15 @@ LSYM(Lad_l):
movle xl, xl, lsl r2
@ adjust exponent accordingly.
-3: subs r4, r4, r3, lsl #20
- bgt LSYM(Lad_e)
+3: subs r4, r4, r3
+ addge xh, xh, r4, lsl #20
+ orrge xh, xh, r5
+ RETLDM "r4, r5" ge
@ Exponent too small, denormalize result.
@ Find out proper shift value.
- mvn r4, r4, asr #20
- subs r4, r4, #30
+ mvn r4, r4
+ subs r4, r4, #31
bge 2f
adds r4, r4, #12
bgt 1f
@@ -337,23 +306,49 @@ LSYM(Lad_l):
RETLDM "r4, r5"
@ Adjust exponents for denormalized arguments.
+ @ Note that r4 must not remain equal to 0.
LSYM(Lad_d):
teq r4, #0
- eoreq xh, xh, #0x00100000
- addeq r4, r4, #(1 << 20)
eor yh, yh, #0x00100000
- subne r5, r5, #(1 << 20)
+ eoreq xh, xh, #0x00100000
+ addeq r4, r4, #1
+ subne r5, r5, #1
b LSYM(Lad_x)
- @ Result is x - x = 0, unless x = INF or NAN.
-LSYM(Lad_z):
- sub ip, ip, #0x00100000 @ ip becomes 0x7ff00000
- and r2, xh, ip
- teq r2, ip
- orreq xh, ip, #0x00080000
+
+LSYM(Lad_s):
+ mvns ip, r4, asr #21
+ mvnnes ip, r5, asr #21
+ beq LSYM(Lad_i)
+
+ teq r4, r5
+ teqeq xl, yl
+ beq 1f
+
+ @ Result is x + 0.0 = x or 0.0 + y = y.
+ teq r4, #0
+ moveq xh, yh
+ moveq xl, yl
+ RETLDM "r4, r5"
+
+1: teq xh, yh
+
+ @ Result is x - x = 0.
movne xh, #0
- mov xl, #0
- RET
+ movne xl, #0
+ RETLDM "r4, r5" ne
+
+ @ Result is x + x = 2x.
+ movs ip, r4, lsr #21
+ bne 2f
+ movs xl, xl, lsl #1
+ adcs xh, xh, xh
+ orrcs xh, xh, #0x80000000
+ RETLDM "r4, r5"
+2: adds r4, r4, #(2 << 21)
+ addcc xh, xh, #(1 << 20)
+ RETLDM "r4, r5" cc
+ and r5, xh, #0x80000000
@ Overflow: return INF.
LSYM(Lad_o):
@@ -367,19 +362,18 @@ LSYM(Lad_o):
@ if yh-yl != INF/NAN: return xh-xl (which is INF/NAN)
@ if either is NAN: return NAN
@ if opposite sign: return NAN
- @ return xh-xl (which is INF or -INF)
+ @ otherwise return xh-xl (which is INF or -INF)
LSYM(Lad_i):
- teq r4, ip
+ mvns ip, r4, asr #21
movne xh, yh
movne xl, yl
- teqeq r5, ip
- RETLDM "r4, r5" ne
-
+ mvneqs ip, r5, asr #21
+ movne yh, xh
+ movne yl, xl
orrs r4, xl, xh, lsl #12
- orreqs r4, yl, yh, lsl #12
+ orreqs r5, yl, yh, lsl #12
teqeq xh, yh
- orrne xh, r5, #0x00080000
- movne xl, #0
+ orrne xh, xh, #0x00080000 @ quiet NAN
RETLDM "r4, r5"
FUNC_END aeabi_dsub
@@ -389,14 +383,17 @@ LSYM(Lad_i):
ARM_FUNC_START floatunsidf
ARM_FUNC_ALIAS aeabi_ui2d floatunsidf
+
teq r0, #0
moveq r1, #0
RETc(eq)
stmfd sp!, {r4, r5, lr}
- mov r4, #(0x400 << 20) @ initial exponent
- add r4, r4, #((52-1) << 20)
+ mov r4, #0x400 @ initial exponent
+ add r4, r4, #(52-1 - 1)
mov r5, #0 @ sign bit is 0
+ .ifnc xl, r0
mov xl, r0
+ .endif
mov xh, #0
b LSYM(Lad_l)
@@ -405,15 +402,18 @@ ARM_FUNC_ALIAS aeabi_ui2d floatunsidf
ARM_FUNC_START floatsidf
ARM_FUNC_ALIAS aeabi_i2d floatsidf
+
teq r0, #0
moveq r1, #0
RETc(eq)
stmfd sp!, {r4, r5, lr}
- mov r4, #(0x400 << 20) @ initial exponent
- add r4, r4, #((52-1) << 20)
+ mov r4, #0x400 @ initial exponent
+ add r4, r4, #(52-1 - 1)
ands r5, r0, #0x80000000 @ sign bit in r5
rsbmi r0, r0, #0 @ absolute value
+ .ifnc xl, r0
mov xl, r0
+ .endif
mov xh, #0
b LSYM(Lad_l)
@@ -422,26 +422,23 @@ ARM_FUNC_ALIAS aeabi_i2d floatsidf
ARM_FUNC_START extendsfdf2
ARM_FUNC_ALIAS aeabi_f2d extendsfdf2
-
- movs r2, r0, lsl #1
- beq 1f @ value is 0.0 or -0.0
+
+ movs r2, r0, lsl #1 @ toss sign bit
mov xh, r2, asr #3 @ stretch exponent
mov xh, xh, rrx @ retrieve sign bit
mov xl, r2, lsl #28 @ retrieve remaining bits
- ands r2, r2, #0xff000000 @ isolate exponent
- beq 2f @ exponent was 0 but not mantissa
- teq r2, #0xff000000 @ check if INF or NAN
+ andnes r3, r2, #0xff000000 @ isolate exponent
+ teqne r3, #0xff000000 @ if not 0, check if INF or NAN
eorne xh, xh, #0x38000000 @ fixup exponent otherwise.
- RET
+ RETc(ne) @ and return it.
-1: mov xh, r0
- mov xl, #0
- RET
+ teq r2, #0 @ if actually 0
+ teqne r3, #0xff000000 @ or INF or NAN
+ RETc(eq) @ we are done already.
-2: @ value was denormalized. We can normalize it now.
+ @ value was denormalized. We can normalize it now.
stmfd sp!, {r4, r5, lr}
- mov r4, #(0x380 << 20) @ setup corresponding exponent
- add r4, r4, #(1 << 20)
+ mov r4, #0x380 @ setup corresponding exponent
and r5, xh, #0x80000000 @ move sign bit in r5
bic xh, xh, #0x80000000
b LSYM(Lad_l)
@@ -451,76 +448,90 @@ ARM_FUNC_ALIAS aeabi_f2d extendsfdf2
ARM_FUNC_START floatundidf
ARM_FUNC_ALIAS aeabi_ul2d floatundidf
-
+
orrs r2, r0, r1
#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
mvfeqd f0, #0.0
#endif
RETc(eq)
+
#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
@ For hard FPA code we want to return via the tail below so that
@ we can return the result in f0 as well as in r0/r1 for backwards
@ compatibility.
- adr ip, 1f
+ adr ip, LSYM(f0_ret)
stmfd sp!, {r4, r5, ip, lr}
#else
stmfd sp!, {r4, r5, lr}
#endif
+
mov r5, #0
b 2f
ARM_FUNC_START floatdidf
ARM_FUNC_ALIAS aeabi_l2d floatdidf
+
orrs r2, r0, r1
#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
mvfeqd f0, #0.0
#endif
RETc(eq)
+
#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
@ For hard FPA code we want to return via the tail below so that
@ we can return the result in f0 as well as in r0/r1 for backwards
@ compatibility.
- adr ip, 1f
+ adr ip, LSYM(f0_ret)
stmfd sp!, {r4, r5, ip, lr}
#else
stmfd sp!, {r4, r5, lr}
#endif
+
ands r5, ah, #0x80000000 @ sign bit in r5
bpl 2f
rsbs al, al, #0
rsc ah, ah, #0
2:
- mov r4, #(0x400 << 20) @ initial exponent
- add r4, r4, #((52 - 1) << 20)
-#if !defined (__VFP_FP__) && !defined(__ARMEB__)
+ mov r4, #0x400 @ initial exponent
+ add r4, r4, #(52-1 - 1)
+
@ FPA little-endian: must swap the word order.
+ .ifnc xh, ah
mov ip, al
mov xh, ah
mov xl, ip
-#endif
- movs ip, xh, lsr #23
+ .endif
+
+ movs ip, xh, lsr #22
beq LSYM(Lad_p)
- @ The value's too big. Scale it down a bit...
+
+ @ The value is too big. Scale it down a bit...
mov r2, #3
movs ip, ip, lsr #3
addne r2, r2, #3
movs ip, ip, lsr #3
addne r2, r2, #3
+ add r2, r2, ip
+
rsb r3, r2, #32
mov ip, xl, lsl r3
mov xl, xl, lsr r2
orr xl, xl, xh, lsl r3
mov xh, xh, lsr r2
- add r4, r4, r2, lsl #20
+ add r4, r4, r2
b LSYM(Lad_p)
+
#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
-1:
+
@ Legacy code expects the result to be returned in f0. Copy it
@ there as well.
+LSYM(f0_ret):
stmfd sp!, {r0, r1}
ldfd f0, [sp], #8
RETLDM
+
#endif
+
FUNC_END floatdidf
FUNC_END aeabi_l2d
FUNC_END floatundidf
@@ -534,46 +545,38 @@ ARM_FUNC_START muldf3
ARM_FUNC_ALIAS aeabi_dmul muldf3
stmfd sp!, {r4, r5, r6, lr}
- @ Mask out exponents.
- mov ip, #0x7f000000
- orr ip, ip, #0x00f00000
- and r4, xh, ip
- and r5, yh, ip
-
- @ Trap any INF/NAN.
- teq r4, ip
+ @ Mask out exponents, trap any zero/denormal/INF/NAN.
+ mov ip, #0xff
+ orr ip, ip, #0x700
+ ands r4, ip, xh, lsr #20
+ andnes r5, ip, yh, lsr #20
+ teqne r4, ip
teqne r5, ip
- beq LSYM(Lml_s)
+ bleq LSYM(Lml_s)
- @ Trap any multiplication by 0.
- orrs r6, xl, xh, lsl #1
- orrnes r6, yl, yh, lsl #1
- beq LSYM(Lml_z)
-
- @ Shift exponents right one bit to make room for overflow bit.
- @ If either of them is 0, scale denormalized arguments off line.
- @ Then add both exponents together.
- movs r4, r4, lsr #1
- teqne r5, #0
- beq LSYM(Lml_d)
-LSYM(Lml_x):
- add r4, r4, r5, asr #1
-
- @ Preserve final sign in r4 along with exponent for now.
- teq xh, yh
- orrmi r4, r4, #0x8000
+ @ Add exponents together
+ add r4, r4, r5
+
+ @ Determine final sign.
+ eor r6, xh, yh
@ Convert mantissa to unsigned integer.
- bic xh, xh, ip, lsl #1
- bic yh, yh, ip, lsl #1
+ @ If power of two, branch to a separate path.
+ bic xh, xh, ip, lsl #21
+ bic yh, yh, ip, lsl #21
+ orrs r5, xl, xh, lsl #12
+ orrnes r5, yl, yh, lsl #12
orr xh, xh, #0x00100000
orr yh, yh, #0x00100000
+ beq LSYM(Lml_1)
#if __ARM_ARCH__ < 4
+ @ Put sign bit in r6, which will be restored in yl later.
+ and r6, r6, #0x80000000
+
@ Well, no way to make it shorter without the umull instruction.
- @ We must perform that 53 x 53 bit multiplication by hand.
- stmfd sp!, {r7, r8, r9, sl, fp}
+ stmfd sp!, {r6, r7, r8, r9, sl, fp}
mov r7, xl, lsr #16
mov r8, yl, lsr #16
mov r9, xh, lsr #16
@@ -625,92 +628,83 @@ LSYM(Lml_x):
mul fp, xh, yh
adcs r5, r5, fp
adc r6, r6, #0
- ldmfd sp!, {r7, r8, r9, sl, fp}
+ ldmfd sp!, {yl, r7, r8, r9, sl, fp}
#else
- @ Here is the actual multiplication: 53 bits * 53 bits -> 106 bits.
+ @ Here is the actual multiplication.
umull ip, lr, xl, yl
mov r5, #0
- umlal lr, r5, xl, yh
umlal lr, r5, xh, yl
+ and yl, r6, #0x80000000
+ umlal lr, r5, xl, yh
mov r6, #0
umlal r5, r6, xh, yh
#endif
@ The LSBs in ip are only significant for the final rounding.
- @ Fold them into one bit of lr.
+ @ Fold them into lr.
teq ip, #0
orrne lr, lr, #1
- @ Put final sign in xh.
- mov xh, r4, lsl #16
- bic r4, r4, #0x8000
-
- @ Adjust result if one extra MSB appeared (one of four times).
- tst r6, #(1 << 9)
- beq 1f
- add r4, r4, #(1 << 19)
- movs r6, r6, lsr #1
- movs r5, r5, rrx
- movs lr, lr, rrx
- orrcs lr, lr, #1
-1:
- @ Scale back to 53 bits.
- @ xh contains sign bit already.
- orr xh, xh, r6, lsl #12
- orr xh, xh, r5, lsr #20
- mov xl, r5, lsl #12
- orr xl, xl, lr, lsr #20
-
- @ Apply exponent bias, check range for underflow.
- sub r4, r4, #0x00f80000
- subs r4, r4, #0x1f000000
- ble LSYM(Lml_u)
-
- @ Round the result.
- movs lr, lr, lsl #12
- bpl 1f
- adds xl, xl, #1
- adc xh, xh, #0
- teq lr, #0x80000000
- biceq xl, xl, #1
-
- @ Rounding may have produced an extra MSB here.
- @ The extra bit is cleared before merging the exponent below.
- tst xh, #0x00200000
- addne r4, r4, #(1 << 19)
+ @ Adjust result upon the MSB position.
+ sub r4, r4, #0xff
+ cmp r6, #(1 << (20-11))
+ sbc r4, r4, #0x300
+ bcs 1f
+ movs lr, lr, lsl #1
+ adcs r5, r5, r5
+ adc r6, r6, r6
1:
- @ Check exponent for overflow.
- adds ip, r4, #(1 << 19)
- tst ip, #(1 << 30)
- bne LSYM(Lml_o)
-
- @ Add final exponent.
- bic xh, xh, #0x00300000
- orr xh, xh, r4, lsl #1
+ @ Shift to final position, add sign to result.
+ orr xh, yl, r6, lsl #11
+ orr xh, xh, r5, lsr #21
+ mov xl, r5, lsl #11
+ orr xl, xl, lr, lsr #21
+ mov lr, lr, lsl #11
+
+ @ Check exponent range for under/overflow.
+ subs ip, r4, #(254 - 1)
+ cmphi ip, #0x700
+ bhi LSYM(Lml_u)
+
+ @ Round the result, merge final exponent.
+ cmp lr, #0x80000000
+ moveqs lr, xl, lsr #1
+ adcs xl, xl, #0
+ adc xh, xh, r4, lsl #20
RETLDM "r4, r5, r6"
- @ Result is 0, but determine sign anyway.
-LSYM(Lml_z):
+ @ Multiplication by 0x1p*: let''s shortcut a lot of code.
+LSYM(Lml_1):
+ and r6, r6, #0x80000000
+ orr xh, r6, xh
+ orr xl, xl, yl
eor xh, xh, yh
-LSYM(Ldv_z):
- bic xh, xh, #0x7fffffff
- mov xl, #0
- RETLDM "r4, r5, r6"
+ subs r4, r4, ip, lsr #1
+ rsbgts r5, r4, ip
+ orrgt xh, xh, r4, lsl #20
+ RETLDM "r4, r5, r6" gt
+
+ @ Under/overflow: fix things up for the code below.
+ orr xh, xh, #0x00100000
+ mov lr, #0
+ subs r4, r4, #1
- @ Check if denormalized result is possible, otherwise return signed 0.
LSYM(Lml_u):
- cmn r4, #(53 << 19)
+ @ Overflow?
+ bgt LSYM(Lml_o)
+
+ @ Check if denormalized result is possible, otherwise return signed 0.
+ cmn r4, #(53 + 1)
movle xl, #0
bicle xh, xh, #0x7fffffff
RETLDM "r4, r5, r6" le
@ Find out proper shift value.
-LSYM(Lml_r):
- mvn r4, r4, asr #19
- subs r4, r4, #30
+ rsb r4, r4, #0
+ subs r4, r4, #32
bge 2f
adds r4, r4, #12
bgt 1f
@@ -721,14 +715,12 @@ LSYM(Lml_r):
mov r3, xl, lsl r5
mov xl, xl, lsr r4
orr xl, xl, xh, lsl r5
- movs xh, xh, lsl #1
- mov xh, xh, lsr r4
- mov xh, xh, rrx
+ and r2, xh, #0x80000000
+ bic xh, xh, #0x80000000
adds xl, xl, r3, lsr #31
- adc xh, xh, #0
- teq lr, #0
- teqeq r3, #0x80000000
- biceq xl, xl, #1
+ adc xh, r2, xh, lsr r4
+ orrs lr, lr, r3, lsl #1
+ biceq xl, xl, r3, lsr #31
RETLDM "r4, r5, r6"
@ shift result right of 21 to 31 bits, or left 11 to 1 bits after
@@ -741,53 +733,70 @@ LSYM(Lml_r):
bic xh, xh, #0x7fffffff
adds xl, xl, r3, lsr #31
adc xh, xh, #0
- teq lr, #0
- teqeq r3, #0x80000000
- biceq xl, xl, #1
+ orrs lr, lr, r3, lsl #1
+ biceq xl, xl, r3, lsr #31
RETLDM "r4, r5, r6"
@ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch
@ from xh to xl. Leftover bits are in r3-r6-lr for rounding.
2: rsb r5, r4, #32
- mov r6, xl, lsl r5
+ orr lr, lr, xl, lsl r5
mov r3, xl, lsr r4
orr r3, r3, xh, lsl r5
mov xl, xh, lsr r4
bic xh, xh, #0x7fffffff
bic xl, xl, xh, lsr r4
add xl, xl, r3, lsr #31
- orrs r6, r6, lr
- teqeq r3, #0x80000000
- biceq xl, xl, #1
+ orrs lr, lr, r3, lsl #1
+ biceq xl, xl, r3, lsr #31
RETLDM "r4, r5, r6"
@ One or both arguments are denormalized.
@ Scale them leftwards and preserve sign bit.
LSYM(Lml_d):
- mov lr, #0
teq r4, #0
bne 2f
and r6, xh, #0x80000000
1: movs xl, xl, lsl #1
- adc xh, lr, xh, lsl #1
+ adc xh, xh, xh
tst xh, #0x00100000
- subeq r4, r4, #(1 << 19)
+ subeq r4, r4, #1
beq 1b
orr xh, xh, r6
teq r5, #0
- bne LSYM(Lml_x)
+ movne pc, lr
2: and r6, yh, #0x80000000
3: movs yl, yl, lsl #1
- adc yh, lr, yh, lsl #1
+ adc yh, yh, yh
tst yh, #0x00100000
- subeq r5, r5, #(1 << 20)
+ subeq r5, r5, #1
beq 3b
orr yh, yh, r6
- b LSYM(Lml_x)
+ mov pc, lr
- @ One or both args are INF or NAN.
LSYM(Lml_s):
+ @ Isolate the INF and NAN cases away
+ teq r4, ip
+ and r5, ip, yh, lsr #20
+ teqne r5, ip
+ beq 1f
+
+ @ Here, one or more arguments are either denormalized or zero.
+ orrs r6, xl, xh, lsl #1
+ orrnes r6, yl, yh, lsl #1
+ bne LSYM(Lml_d)
+
+ @ Result is 0, but determine sign anyway.
+LSYM(Lml_z):
+ eor xh, xh, yh
+ bic xh, xh, #0x7fffffff
+ mov xl, #0
+ RETLDM "r4, r5, r6"
+
+1: @ One or both args are INF or NAN.
orrs r6, xl, xh, lsl #1
+ moveq xl, yl
+ moveq xh, yh
orrnes r6, yl, yh, lsl #1
beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN
teq r4, ip
@@ -797,6 +806,8 @@ LSYM(Lml_s):
1: teq r5, ip
bne LSYM(Lml_i)
orrs r6, yl, yh, lsl #12
+ movne xl, yl
+ movne xh, yh
bne LSYM(Lml_n) @ <anything> * NAN -> NAN
@ Result is INF, but we need to determine its sign.
@@ -811,9 +822,9 @@ LSYM(Lml_o):
mov xl, #0
RETLDM "r4, r5, r6"
- @ Return NAN.
+ @ Return a quiet NAN.
LSYM(Lml_n):
- mov xh, #0x7f000000
+ orr xh, xh, #0x7f000000
orr xh, xh, #0x00f80000
RETLDM "r4, r5, r6"
@@ -825,41 +836,31 @@ ARM_FUNC_ALIAS aeabi_ddiv divdf3
stmfd sp!, {r4, r5, r6, lr}
- @ Mask out exponents.
- mov ip, #0x7f000000
- orr ip, ip, #0x00f00000
- and r4, xh, ip
- and r5, yh, ip
-
- @ Trap any INF/NAN or zeroes.
- teq r4, ip
+ @ Mask out exponents, trap any zero/denormal/INF/NAN.
+ mov ip, #0xff
+ orr ip, ip, #0x700
+ ands r4, ip, xh, lsr #20
+ andnes r5, ip, yh, lsr #20
+ teqne r4, ip
teqne r5, ip
- orrnes r6, xl, xh, lsl #1
- orrnes r6, yl, yh, lsl #1
- beq LSYM(Ldv_s)
+ bleq LSYM(Ldv_s)
- @ Shift exponents right one bit to make room for overflow bit.
- @ If either of them is 0, scale denormalized arguments off line.
- @ Then substract divisor exponent from dividend''s.
- movs r4, r4, lsr #1
- teqne r5, #0
- beq LSYM(Ldv_d)
-LSYM(Ldv_x):
- sub r4, r4, r5, asr #1
+ @ Substract divisor exponent from dividend''s.
+ sub r4, r4, r5
@ Preserve final sign into lr.
eor lr, xh, yh
@ Convert mantissa to unsigned integer.
@ Dividend -> r5-r6, divisor -> yh-yl.
- mov r5, #0x10000000
+ orrs r5, yl, yh, lsl #12
+ mov xh, xh, lsl #12
+ beq LSYM(Ldv_1)
mov yh, yh, lsl #12
+ mov r5, #0x10000000
orr yh, r5, yh, lsr #4
orr yh, yh, yl, lsr #24
- movs yl, yl, lsl #8
- mov xh, xh, lsl #12
- teqeq yh, r5
- beq LSYM(Ldv_1)
+ mov yl, yl, lsl #8
orr r5, r5, xh, lsr #4
orr r5, r5, xl, lsr #24
mov r6, xl, lsl #8
@@ -868,21 +869,15 @@ LSYM(Ldv_x):
and xh, lr, #0x80000000
@ Ensure result will land to known bit position.
+ @ Apply exponent bias accordingly.
cmp r5, yh
cmpeq r6, yl
+ adc r4, r4, #(255 - 2)
+ add r4, r4, #0x300
bcs 1f
- sub r4, r4, #(1 << 19)
movs yh, yh, lsr #1
mov yl, yl, rrx
1:
- @ Apply exponent bias, check range for over/underflow.
- add r4, r4, #0x1f000000
- add r4, r4, #0x00f80000
- cmn r4, #(53 << 19)
- ble LSYM(Ldv_z)
- cmp r4, ip, lsr #1
- bge LSYM(Lml_o)
-
@ Perform first substraction to align result to a nibble.
subs r6, r6, yl
sbc r5, r5, yh
@@ -944,73 +939,42 @@ LSYM(Ldv_x):
orreq xh, xh, xl
moveq xl, #0
3:
- @ Check if denormalized result is needed.
- cmp r4, #0
- ble LSYM(Ldv_u)
+ @ Check exponent range for under/overflow.
+ subs ip, r4, #(254 - 1)
+ cmphi ip, #0x700
+ bhi LSYM(Lml_u)
- @ Apply proper rounding.
+ @ Round the result, merge final exponent.
subs ip, r5, yh
subeqs ip, r6, yl
+ moveqs ip, xl, lsr #1
adcs xl, xl, #0
- adc xh, xh, #0
- teq ip, #0
- biceq xl, xl, #1
-
- @ Add exponent to result.
- bic xh, xh, #0x00100000
- orr xh, xh, r4, lsl #1
+ adc xh, xh, r4, lsl #20
RETLDM "r4, r5, r6"
@ Division by 0x1p*: shortcut a lot of code.
LSYM(Ldv_1):
and lr, lr, #0x80000000
orr xh, lr, xh, lsr #12
- add r4, r4, #0x1f000000
- add r4, r4, #0x00f80000
- cmp r4, ip, lsr #1
- bge LSYM(Lml_o)
- cmp r4, #0
- orrgt xh, xh, r4, lsl #1
+ adds r4, r4, ip, lsr #1
+ rsbgts r5, r4, ip
+ orrgt xh, xh, r4, lsl #20
RETLDM "r4, r5, r6" gt
- cmn r4, #(53 << 19)
- ble LSYM(Ldv_z)
orr xh, xh, #0x00100000
mov lr, #0
- b LSYM(Lml_r)
+ subs r4, r4, #1
+ b LSYM(Lml_u)
- @ Result must be denormalized: put remainder in lr for
- @ rounding considerations.
+ @ Result mightt need to be denormalized: put remainder bits
+ @ in lr for rounding considerations.
LSYM(Ldv_u):
orr lr, r5, r6
- b LSYM(Lml_r)
-
- @ One or both arguments are denormalized.
- @ Scale them leftwards and preserve sign bit.
-LSYM(Ldv_d):
- mov lr, #0
- teq r4, #0
- bne 2f
- and r6, xh, #0x80000000
-1: movs xl, xl, lsl #1
- adc xh, lr, xh, lsl #1
- tst xh, #0x00100000
- subeq r4, r4, #(1 << 19)
- beq 1b
- orr xh, xh, r6
- teq r5, #0
- bne LSYM(Ldv_x)
-2: and r6, yh, #0x80000000
-3: movs yl, yl, lsl #1
- adc yh, lr, yh, lsl #1
- tst yh, #0x00100000
- subeq r5, r5, #(1 << 20)
- beq 3b
- orr yh, yh, r6
- b LSYM(Ldv_x)
+ b LSYM(Lml_u)
@ One or both arguments is either INF, NAN or zero.
LSYM(Ldv_s):
+ and r5, ip, yh, lsr #20
teq r4, ip
teqeq r5, ip
beq LSYM(Lml_n) @ INF/NAN / INF/NAN -> NAN
@@ -1018,13 +982,23 @@ LSYM(Ldv_s):
bne 1f
orrs r4, xl, xh, lsl #12
bne LSYM(Lml_n) @ NAN / <anything> -> NAN
- b LSYM(Lml_i) @ INF / <anything> -> INF
+ teq r5, ip
+ bne LSYM(Lml_i) @ INF / <anything> -> INF
+ mov xl, yl
+ mov xh, yh
+ b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN
1: teq r5, ip
bne 2f
orrs r5, yl, yh, lsl #12
- bne LSYM(Lml_n) @ <anything> / NAN -> NAN
- b LSYM(Lml_z) @ <anything> / INF -> 0
-2: @ One or both arguments are 0.
+ beq LSYM(Lml_z) @ <anything> / INF -> 0
+ mov xl, yl
+ mov xh, yh
+ b LSYM(Lml_n) @ <anything> / NAN -> NAN
+2: @ If both are non-zero, we need to normalize and resume above.
+ orrs r6, xl, xh, lsl #1
+ orrnes r6, yl, yh, lsl #1
+ bne LSYM(Lml_d)
+ @ One or both arguments are 0.
orrs r4, xl, xh, lsl #1
bne LSYM(Lml_i) @ <non_zero> / 0 -> INF
orrs r5, yl, yh, lsl #1
@@ -1038,6 +1012,8 @@ LSYM(Ldv_s):
#ifdef L_cmpdf2
+@ Note: only r0 (return value) and ip are clobbered here.
+
ARM_FUNC_START gtdf2
ARM_FUNC_ALIAS gedf2 gtdf2
mov ip, #-1
@@ -1053,15 +1029,13 @@ ARM_FUNC_ALIAS nedf2 cmpdf2
ARM_FUNC_ALIAS eqdf2 cmpdf2
mov ip, #1 @ how should we specify unordered here?
-1: stmfd sp!, {r4, r5, lr}
+1: str ip, [sp, #-4]
@ Trap any INF/NAN first.
- mov lr, #0x7f000000
- orr lr, lr, #0x00f00000
- and r4, xh, lr
- and r5, yh, lr
- teq r4, lr
- teqne r5, lr
+ mov ip, xh, lsl #1
+ mvns ip, ip, asr #21
+ mov ip, yh, lsl #1
+ mvnnes ip, ip, asr #21
beq 3f
@ Test for equality.
@@ -1071,37 +1045,37 @@ ARM_FUNC_ALIAS eqdf2 cmpdf2
teqne xh, yh @ or xh == yh
teqeq xl, yl @ and xl == yl
moveq r0, #0 @ then equal.
- RETLDM "r4, r5" eq
+ RETc(eq)
- @ Check for sign difference.
- teq xh, yh
- movmi r0, xh, asr #31
- orrmi r0, r0, #1
- RETLDM "r4, r5" mi
+ @ Clear C flag
+ cmn r0, #0
- @ Compare exponents.
- cmp r4, r5
+ @ Compare sign,
+ teq xh, yh
- @ Compare mantissa if exponents are equal.
- moveq xh, xh, lsl #12
- cmpeq xh, yh, lsl #12
+ @ Compare values if same sign
+ cmppl xh, yh
cmpeq xl, yl
+
+ @ Result:
movcs r0, yh, asr #31
mvncc r0, yh, asr #31
orr r0, r0, #1
- RETLDM "r4, r5"
+ RET
@ Look for a NAN.
-3: teq r4, lr
+3: mov ip, xh, lsl #1
+ mvns ip, ip, asr #21
bne 4f
- orrs xl, xl, xh, lsl #12
+ orrs ip, xl, xh, lsl #12
bne 5f @ x is NAN
-4: teq r5, lr
+4: mov ip, yh, lsl #1
+ mvns ip, ip, asr #21
bne 2b
- orrs yl, yl, yh, lsl #12
+ orrs ip, yl, yh, lsl #12
beq 2b @ y is not NAN
-5: mov r0, ip @ return unordered code from ip
- RETLDM "r4, r5"
+5: ldr r0, [sp, #-4] @ unordered return code
+ RET
FUNC_END gedf2
FUNC_END gtdf2
@@ -1112,6 +1086,7 @@ ARM_FUNC_ALIAS eqdf2 cmpdf2
FUNC_END cmpdf2
ARM_FUNC_START aeabi_cdrcmple
+
mov ip, r0
mov r0, r2
mov r2, ip
@@ -1122,85 +1097,95 @@ ARM_FUNC_START aeabi_cdrcmple
ARM_FUNC_START aeabi_cdcmpeq
ARM_FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq
+
@ The status-returning routines are required to preserve all
@ registers except ip, lr, and cpsr.
-6: stmfd sp!, {r0, r1, r2, r3, lr}
+6: stmfd sp!, {r0, lr}
ARM_CALL cmpdf2
@ Set the Z flag correctly, and the C flag unconditionally.
cmp r0, #0
@ Clear the C flag if the return value was -1, indicating
@ that the first operand was smaller than the second.
cmnmi r0, #0
- RETLDM "r0, r1, r2, r3"
+ RETLDM "r0"
+
FUNC_END aeabi_cdcmple
FUNC_END aeabi_cdcmpeq
+ FUNC_END aeabi_cdrcmple
ARM_FUNC_START aeabi_dcmpeq
+
str lr, [sp, #-4]!
ARM_CALL aeabi_cdcmple
moveq r0, #1 @ Equal to.
movne r0, #0 @ Less than, greater than, or unordered.
RETLDM
+
FUNC_END aeabi_dcmpeq
ARM_FUNC_START aeabi_dcmplt
+
str lr, [sp, #-4]!
ARM_CALL aeabi_cdcmple
movcc r0, #1 @ Less than.
movcs r0, #0 @ Equal to, greater than, or unordered.
RETLDM
+
FUNC_END aeabi_dcmplt
ARM_FUNC_START aeabi_dcmple
+
str lr, [sp, #-4]!
ARM_CALL aeabi_cdcmple
movls r0, #1 @ Less than or equal to.
movhi r0, #0 @ Greater than or unordered.
RETLDM
+
FUNC_END aeabi_dcmple
ARM_FUNC_START aeabi_dcmpge
+
str lr, [sp, #-4]!
ARM_CALL aeabi_cdrcmple
movls r0, #1 @ Operand 2 is less than or equal to operand 1.
movhi r0, #0 @ Operand 2 greater than operand 1, or unordered.
RETLDM
+
FUNC_END aeabi_dcmpge
ARM_FUNC_START aeabi_dcmpgt
+
str lr, [sp, #-4]!
ARM_CALL aeabi_cdrcmple
movcc r0, #1 @ Operand 2 is less than operand 1.
movcs r0, #0 @ Operand 2 is greater than or equal to operand 1,
@ or they are unordered.
RETLDM
+
FUNC_END aeabi_dcmpgt
-
+
#endif /* L_cmpdf2 */
#ifdef L_unorddf2
ARM_FUNC_START unorddf2
ARM_FUNC_ALIAS aeabi_dcmpun unorddf2
-
- str lr, [sp, #-4]!
- mov ip, #0x7f000000
- orr ip, ip, #0x00f00000
- and lr, xh, ip
- teq lr, ip
+
+ mov ip, xh, lsl #1
+ mvns ip, ip, asr #21
bne 1f
- orrs xl, xl, xh, lsl #12
+ orrs ip, xl, xh, lsl #12
bne 3f @ x is NAN
-1: and lr, yh, ip
- teq lr, ip
+1: mov ip, yh, lsl #1
+ mvns ip, ip, asr #21
bne 2f
- orrs yl, yl, yh, lsl #12
+ orrs ip, yl, yh, lsl #12
bne 3f @ y is NAN
2: mov r0, #0 @ arguments are ordered.
- RETLDM
+ RET
3: mov r0, #1 @ arguments are unordered.
- RETLDM
+ RET
FUNC_END aeabi_dcmpun
FUNC_END unorddf2
@@ -1211,31 +1196,22 @@ ARM_FUNC_ALIAS aeabi_dcmpun unorddf2
ARM_FUNC_START fixdfsi
ARM_FUNC_ALIAS aeabi_d2iz fixdfsi
- orrs ip, xl, xh, lsl #1
- beq 1f @ value is 0.
-
- mov r3, r3, rrx @ preserve C flag (the actual sign)
@ check exponent range.
- mov ip, #0x7f000000
- orr ip, ip, #0x00f00000
- and r2, xh, ip
- teq r2, ip
- beq 2f @ value is INF or NAN
- bic ip, ip, #0x40000000
- cmp r2, ip
- bcc 1f @ value is too small
- add ip, ip, #(31 << 20)
- cmp r2, ip
- bcs 3f @ value is too large
-
- rsb r2, r2, ip
- mov ip, xh, lsl #11
- orr ip, ip, #0x80000000
- orr ip, ip, xl, lsr #21
- mov r2, r2, lsr #20
- tst r3, #0x80000000 @ the sign bit
- mov r0, ip, lsr r2
+ mov r2, xh, lsl #1
+ adds r2, r2, #(1 << 21)
+ bcs 2f @ value is INF or NAN
+ bpl 1f @ value is too small
+ mov r3, #(0xfffffc00 + 31)
+ subs r2, r3, r2, asr #21
+ bls 3f @ value is too large
+
+ @ scale value
+ mov r3, xh, lsl #11
+ orr r3, r3, #0x80000000
+ orr r3, r3, xl, lsr #21
+ tst xh, #0x80000000 @ the sign bit
+ mov r0, r3, lsr r2
rsbne r0, r0, #0
RET
@@ -1243,8 +1219,8 @@ ARM_FUNC_ALIAS aeabi_d2iz fixdfsi
RET
2: orrs xl, xl, xh, lsl #12
- bne 4f @ r0 is NAN.
-3: ands r0, r3, #0x80000000 @ the sign bit
+ bne 4f @ x is NAN.
+3: ands r0, xh, #0x80000000 @ the sign bit
moveq r0, #0x7fffffff @ maximum signed positive si
RET
@@ -1260,29 +1236,22 @@ ARM_FUNC_ALIAS aeabi_d2iz fixdfsi
ARM_FUNC_START fixunsdfsi
ARM_FUNC_ALIAS aeabi_d2uiz fixunsdfsi
- orrs ip, xl, xh, lsl #1
- movcss r0, #0 @ value is negative
- RETc(eq) @ or 0 (xl, xh overlap r0)
@ check exponent range.
- mov ip, #0x7f000000
- orr ip, ip, #0x00f00000
- and r2, xh, ip
- teq r2, ip
- beq 2f @ value is INF or NAN
- bic ip, ip, #0x40000000
- cmp r2, ip
- bcc 1f @ value is too small
- add ip, ip, #(31 << 20)
- cmp r2, ip
- bhi 3f @ value is too large
-
- rsb r2, r2, ip
- mov ip, xh, lsl #11
- orr ip, ip, #0x80000000
- orr ip, ip, xl, lsr #21
- mov r2, r2, lsr #20
- mov r0, ip, lsr r2
+ movs r2, xh, lsl #1
+ bcs 1f @ value is negative
+ adds r2, r2, #(1 << 21)
+ bcs 2f @ value is INF or NAN
+ bpl 1f @ value is too small
+ mov r3, #(0xfffffc00 + 31)
+ subs r2, r3, r2, asr #21
+ bmi 3f @ value is too large
+
+ @ scale value
+ mov r3, xh, lsl #11
+ orr r3, r3, #0x80000000
+ orr r3, r3, xl, lsr #21
+ mov r0, r3, lsr r2
RET
1: mov r0, #0
@@ -1305,90 +1274,60 @@ ARM_FUNC_ALIAS aeabi_d2uiz fixunsdfsi
ARM_FUNC_START truncdfsf2
ARM_FUNC_ALIAS aeabi_d2f truncdfsf2
- orrs r2, xl, xh, lsl #1
- moveq r0, r2, rrx
- RETc(eq) @ value is 0.0 or -0.0
-
+
@ check exponent range.
- mov ip, #0x7f000000
- orr ip, ip, #0x00f00000
- and r2, ip, xh
- teq r2, ip
- beq 2f @ value is INF or NAN
- bic xh, xh, ip
- cmp r2, #(0x380 << 20)
- bls 4f @ value is too small
-
- @ shift and round mantissa
-1: movs r3, xl, lsr #29
- adc r3, r3, xh, lsl #3
-
- @ if halfway between two numbers, round towards LSB = 0.
- mov xl, xl, lsl #3
- teq xl, #0x80000000
- biceq r3, r3, #1
-
- @ rounding might have created an extra MSB. If so adjust exponent.
- tst r3, #0x00800000
- addne r2, r2, #(1 << 20)
- bicne r3, r3, #0x00800000
-
- @ check exponent for overflow
- mov ip, #(0x400 << 20)
- orr ip, ip, #(0x07f << 20)
- cmp r2, ip
- bcs 3f @ overflow
-
- @ adjust exponent, merge with sign bit and mantissa.
- movs xh, xh, lsl #1
- mov r2, r2, lsl #4
- orr r0, r3, r2, rrx
- eor r0, r0, #0x40000000
+ mov r2, xh, lsl #1
+ subs r3, r2, #((1023 - 127) << 21)
+ subcss ip, r3, #(1 << 21)
+ rsbcss ip, ip, #(254 << 21)
+ bls 2f @ value is out of range
+
+1: @ shift and round mantissa
+ and ip, xh, #0x80000000
+ mov r2, xl, lsl #3
+ orr xl, ip, xl, lsr #29
+ cmp r2, #0x80000000
+ adc r0, xl, r3, lsl #2
+ biceq r0, r0, #1
RET
-2: @ chech for NAN
- orrs xl, xl, xh, lsl #12
- movne r0, #0x7f000000
- orrne r0, r0, #0x00c00000
- RETc(ne) @ return NAN
+2: @ either overflow or underflow
+ tst xh, #0x40000000
+ bne 3f @ overflow
-3: @ return INF with sign
- and r0, xh, #0x80000000
- orr r0, r0, #0x7f000000
- orr r0, r0, #0x00800000
- RET
+ @ check if denormalized value is possible
+ adds r2, r3, #(23 << 21)
+ andlt r0, xh, #0x80000000 @ too small, return signed 0.
+ RETc(lt)
-4: @ check if denormalized value is possible
- subs r2, r2, #((0x380 - 24) << 20)
- andle r0, xh, #0x80000000 @ too small, return signed 0.
- RETc(le)
-
@ denormalize value so we can resume with the code above afterwards.
orr xh, xh, #0x00100000
- mov r2, r2, lsr #20
- rsb r2, r2, #25
- cmp r2, #20
- bgt 6f
-
+ mov r2, r2, lsr #21
+ rsb r2, r2, #24
rsb ip, r2, #32
- mov r3, xl, lsl ip
+ movs r3, xl, lsl ip
mov xl, xl, lsr r2
- orr xl, xl, xh, lsl ip
- movs xh, xh, lsl #1
- mov xh, xh, lsr r2
- mov xh, xh, rrx
-5: teq r3, #0 @ fold r3 bits into the LSB
- orrne xl, xl, #1 @ for rounding considerations.
- mov r2, #(0x380 << 20) @ equivalent to the 0 float exponent
+ orrne xl, xl, #1 @ fold r3 for rounding considerations.
+ mov r3, xh, lsl #11
+ mov r3, r3, lsr #11
+ orr xl, xl, r3, lsl ip
+ mov r3, r3, lsr r2
+ mov r3, r3, lsl #1
b 1b
-6: rsb r2, r2, #(12 + 20)
- rsb ip, r2, #32
- mov r3, xl, lsl r2
- mov xl, xl, lsr ip
- orr xl, xl, xh, lsl r2
- and xh, xh, #0x80000000
- b 5b
+3: @ chech for NAN
+ mvns r3, r2, asr #21
+ bne 5f @ simple overflow
+ orrs r3, xl, xh, lsl #12
+ movne r0, #0x7f000000
+ orrne r0, r0, #0x00c00000
+ RETc(ne) @ return NAN
+
+5: @ return INF with sign
+ and r0, xh, #0x80000000
+ orr r0, r0, #0x7f000000
+ orr r0, r0, #0x00800000
+ RET
FUNC_END aeabi_d2f
FUNC_END truncdfsf2