diff options
Diffstat (limited to 'gcc')
| -rw-r--r-- | gcc/ChangeLog | 13 | ||||
| -rw-r--r-- | gcc/config/sh/lib1funcs.asm | 57 |
2 files changed, 43 insertions, 27 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5b70058..541c41a 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,16 @@ +Tue Jun 25 20:59:56 2002 J"orn Rennecke <joern.rennecke@superh.com> + + * config/sh/lib1funcs.asm (udivdi3): Make first divide step + produce a 32 bit result before normalization, then normalize with a + left shift. Compute approximative error of 2nd reciprocal + approximation in 2's complement. Fix mask generation from upper + longword of second divide stage result. + For large divisor, fix shift count used to truncate first stage + divide result; make decision if to adjust upwards based on comparison + of higher parts of normalized values. + (udivdi): Likewise. Undo normalization of result for large divisor + case. + 2002-06-25 David S. Miller <davem@redhat.com> * config/sparc/sparc.md: Change \\{t,n} to \{t,n}. diff --git a/gcc/config/sh/lib1funcs.asm b/gcc/config/sh/lib1funcs.asm index 2364db9..1021c9b 100644 --- a/gcc/config/sh/lib1funcs.asm +++ b/gcc/config/sh/lib1funcs.asm @@ -1501,33 +1501,33 @@ GLOBAL(udivdi3): shlri r6,32,r7 bgt/u r9,r63,tr0 // large_divisor mmulfx.w r5,r4,r4 - shlri r2,32,r19 - addi r20,14-1,r0 + shlri r2,32+14,r19 + addi r22,-31,r0 msub.w r1,r4,r1 mulu.l r1,r7,r4 addi r1,-3,r5 mulu.l r5,r19,r5 + sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as the case may be, %0000000000000000 000.11111111111, still */ muls.l r1,r4,r4 /* leaving at least one sign bit. */ - shlrd r5,r0,r8 - mulu.l r8,r3,r5 + mulu.l r5,r3,r8 mshalds.l r1,r21,r1 shari r4,26,r4 - shlli r5,32,r5 - sub r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) - sub r2,r5,r2 + shlld r8,r0,r8 + add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) + sub r2,r8,r2 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */ shlri r2,22,r21 mulu.l r21,r1,r21 + shlld r5,r0,r8 addi r20,30-22,r0 - shlli r8,32,r8 shlrd r21,r0,r21 mulu.l r21,r3,r5 add r8,r21,r8 - mcmpeq.l r21,r63,r21 // See Note 1 + mcmpgt.l r21,r63,r21 // See Note 1 addi r20,30,r0 mshfhi.l r63,r21,r21 sub r2,r5,r2 @@ -1555,14 +1555,15 @@ LOCAL(large_divisor): mulu.l r1,r7,r4 addi r1,-3,r5 mulu.l r5,r8,r5 + sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as the case may be, %0000000000000000 000.11111111111, still */ muls.l r1,r4,r4 /* leaving at least one sign bit. */ - shlri r5,14-1+32,r8 + shlri r5,14-1,r8 mulu.l r8,r7,r5 mshalds.l r1,r21,r1 shari r4,26,r4 - sub r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) + add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) sub r25,r5,r25 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */ @@ -1575,11 +1576,11 @@ LOCAL(large_divisor): add r8,r21,r8 shlld r2,r0,r2 sub r25,r5,r25 - mextr4 r2,r25,r2 - bgtu/u r6,r2,tr0 // no_lo_adj + bgtu/u r7,r25,tr0 // no_lo_adj addi r8,1,r8 - sub r2,r6,r2 + sub r25,r7,r25 LOCAL(no_lo_adj): + mextr4 r2,r25,r2 /* large_divisor: only needs a few adjustments. */ mulu.l r8,r6,r5 @@ -1647,22 +1648,22 @@ GLOBAL(umoddi3): shlri r6,32,r7 bgt/u r9,r63,tr0 // large_divisor mmulfx.w r5,r4,r4 - shlri r2,32,r19 - addi r20,14-1,r0 + shlri r2,32+14,r19 + addi r22,-31,r0 msub.w r1,r4,r1 mulu.l r1,r7,r4 addi r1,-3,r5 mulu.l r5,r19,r5 + sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as the case may be, %0000000000000000 000.11111111111, still */ muls.l r1,r4,r4 /* leaving at least one sign bit. */ - shlrd r5,r0,r8 - mulu.l r8,r3,r5 + mulu.l r5,r3,r5 mshalds.l r1,r21,r1 shari r4,26,r4 - shlli r5,32,r5 - sub r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) + shlld r5,r0,r5 + add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) sub r2,r5,r2 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */ @@ -1672,7 +1673,7 @@ GLOBAL(umoddi3): /* bubble */ /* could test r3 here to check for divide by zero. */ shlrd r21,r0,r21 mulu.l r21,r3,r5 - mcmpeq.l r21,r63,r21 // See Note 1 + mcmpgt.l r21,r63,r21 // See Note 1 addi r20,30,r0 mshfhi.l r63,r21,r21 sub r2,r5,r2 @@ -1700,14 +1701,15 @@ LOCAL(large_divisor): mulu.l r1,r7,r4 addi r1,-3,r5 mulu.l r5,r8,r5 + sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as the case may be, %0000000000000000 000.11111111111, still */ muls.l r1,r4,r4 /* leaving at least one sign bit. */ - shlri r5,14-1+32,r8 + shlri r5,14-1,r8 mulu.l r8,r7,r5 mshalds.l r1,r21,r1 shari r4,26,r4 - sub r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) + add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) sub r25,r5,r25 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */ @@ -1720,19 +1722,20 @@ LOCAL(large_divisor): add r8,r21,r8 shlld r2,r0,r2 sub r25,r5,r25 - mextr4 r2,r25,r2 - bgtu/u r6,r2,tr0 // no_lo_adj + bgtu/u r7,r25,tr0 // no_lo_adj addi r8,1,r8 - sub r2,r6,r2 + sub r25,r7,r25 LOCAL(no_lo_adj): + mextr4 r2,r25,r2 /* large_divisor: only needs a few adjustments. */ mulu.l r8,r6,r5 ptabs r18,tr0 - add r2,r3,r7 + add r2,r6,r7 cmpgtu r5,r2,r8 cmvne r8,r7,r2 sub r2,r5,r2 + shlrd r2,r22,r2 blink tr0,r63 /* Note 1: To shift the result of the second divide stage so that the result always fits into 32 bits, yet we still reduce the rest sufficiently |
