From 26731a9e43668e28553e494a9f5f3cf946a83012 Mon Sep 17 00:00:00 2001 From: Doug Evans Date: Mon, 11 Jul 1994 22:45:37 +0000 Subject: Initial revision From-SVN: r7726 --- gcc/config/h8300/lib1funcs.asm | 672 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 672 insertions(+) create mode 100644 gcc/config/h8300/lib1funcs.asm (limited to 'gcc/config/h8300') diff --git a/gcc/config/h8300/lib1funcs.asm b/gcc/config/h8300/lib1funcs.asm new file mode 100644 index 0000000..370ba75 --- /dev/null +++ b/gcc/config/h8300/lib1funcs.asm @@ -0,0 +1,672 @@ +;; libgcc1 routines for the Hitachi h8/300 cpu. +;; Contributed by Steve Chamberlain. +;; sac@cygnus.com +;; This file is in the public domain. + +/* Assembler register definitions. */ + +#define A0 r0 +#define A0L r0l +#define A0H r0h + +#define A1 r1 +#define A1L r1l +#define A1H r1h + +#define A2 r2 +#define A2L r2l +#define A2H r2h + +#define A3 r3 +#define A3L r3l +#define A3H r3h + +#define S0 r4 +#define S0L r4l +#define S0H r4h + +#define S1 r5 +#define S1L r5l +#define S1H r5h + +#define S2 r6 +#define S2L r6l +#define S2H r6h + +#ifdef __H8300__ +#define MOVP mov.w /* pointers are 16 bits */ +#define ADDP add.w +#define CMPP cmp.w +#define PUSHP push +#define POPP pop + +#define A0P r0 +#define A1P r1 +#define A2P r2 +#define A3P r3 +#define S0P r4 +#define S1P r5 +#define S2P r6 +#endif + +#ifdef __H8300H__ +#define MOVP mov.l /* pointers are 32 bits */ +#define ADDP add.l +#define CMPP cmp.l +#define PUSHP push.l +#define POPP pop.l + +#define A0P er0 +#define A1P er1 +#define A2P er2 +#define A3P er3 +#define S0P er4 +#define S1P er5 +#define S2P er6 + +#define A0E e0 +#define A1E e1 +#define A2E e2 +#define A3E e3 +#endif + +#ifdef L_cmpsi2 +#ifdef __H8300__ + .section .text + .align 2 + .global ___cmpsi2 +___cmpsi2: + cmp.w A2,A0 + bne .L2 + cmp.w A3,A1 + bne .L2 + mov.w #1,A0 + rts +.L2: + cmp.w A0,A2 + bgt .L4 + bne .L3 + cmp.w A1,A3 + bls .L3 +.L4: + sub.w A0,A0 + rts +.L3: + mov.w #2,A0 +.L5: + rts + .end +#endif +#endif /* L_cmpsi2 */ + +#ifdef L_ucmpsi2 +#ifdef __H8300__ + .section .text + .align 2 + .global ___ucmpsi2 +___ucmpsi2: + cmp.w A2,A0 + bne .L2 + cmp.w A3,A1 + bne .L2 + mov.w #1,A0 + rts +.L2: + cmp.w A0,A2 + bhi .L4 + bne .L3 + cmp.w A1,A3 + bls .L3 +.L4: + sub.w A0,A0 + rts +.L3: + mov.w #2,A0 +.L5: + rts + .end +#endif +#endif /* L_ucmpsi2 */ + +#ifdef L_divhi3 + +;; HImode divides for the H8/300. +;; We bunch all of this into one object file since there are several +;; "supporting routines". + +; general purpose normalize routine +; +; divisor in A0 +; dividend in A1 +; turns both into +ve numbers, and leaves what the answer sign +; should be in A2L + +#ifdef __H8300__ + .section .text + .align 2 +divnorm: + mov.b #0x0,A2L + or A0H,A0H ; is divisor > 0 + bge _lab1 + not A0H ; no - then make it +ve + not A0L + adds #1,A0 + xor #0x1,A2L ; and remember that in A2L +_lab1: or A1H,A1H ; look at dividend + bge _lab2 + not A1H ; it is -ve, make it positive + not A1L + adds #1,A1 + xor #0x1,A2L; and toggle sign of result +_lab2: rts + +; A0=A0/A1 signed + + .global ___divhi3 +___divhi3: + bsr divnorm + bsr ___udivhi3 +negans: or A2L,A2L ; should answer be negative ? + beq _lab4 + not A0H ; yes, so make it so + not A0L + adds #1,A0 +_lab4: rts + +; A0=A0%A1 signed + + .global ___modhi3 +___modhi3: + bsr divnorm + bsr ___udivhi3 + mov A3,A0 + bra negans + +; A0=A0%A1 unsigned + + .global ___umodhi3 +___umodhi3: + bsr ___udivhi3 + mov A3,A0 + rts + +; A0=A0/A1 unsigned +; A3=A0%A1 unsigned +; A2H trashed +; D high 8 bits of denom +; d low 8 bits of denom +; N high 8 bits of num +; n low 8 bits of num +; M high 8 bits of mod +; m low 8 bits of mod +; Q high 8 bits of quot +; q low 8 bits of quot +; P preserve + +; The h8 only has a 16/8 bit divide, so we look at the incoming and +; see how to partition up the expression. + + .global ___udivhi3 +___udivhi3: + ; A0 A1 A2 A3 + ; Nn Dd P + sub.w A3,A3 ; Nn Dd xP 00 + or A1H,A1H + bne divlongway + or A0H,A0H + beq _lab6 + +; we know that D == 0 and N is != 0 + mov.b A0H,A3L ; Nn Dd xP 0N + divxu A1L,A3 ; MQ + mov.b A3L,A0H ; Q +; dealt with N, do n +_lab6: mov.b A0L,A3L ; n + divxu A1L,A3 ; mq + mov.b A3L,A0L ; Qq + mov.b A3H,A3L ; m + mov.b #0x0,A3H ; Qq 0m + rts + +; D != 0 - which means the denominator is +; loop around to get the result. + +divlongway: + mov.b A0H,A3L ; Nn Dd xP 0N + mov.b #0x0,A0H ; high byte of answer has to be zero + mov.b #0x8,A2H ; 8 +div8: add.b A0L,A0L ; n*=2 + rotxl A3L ; Make remainder bigger + rotxl A3H + sub.w A1,A3 ; Q-=N + bhs setbit ; set a bit ? + add.w A1,A3 ; no : too far , Q+=N + + dec A2H + bne div8 ; next bit + rts + +setbit: inc A0L ; do insert bit + dec A2H + bne div8 ; next bit + rts + +#endif /* __H8300__ */ +#endif /* L_divhi3 */ + +#ifdef L_divsi3 + +;; 4 byte integer divides for the H8/300. +;; +;; We have one routine which does all the work and lots of +;; little ones which prepare the args and massage the sign. +;; We bunch all of this into one object file since there are several +;; "supporting routines". + +#ifdef __H8300H__ + .h8300h +#endif + + .section .text + .align 2 + +; Put abs SIs into r0/r1 and r2/r3, and leave a 1 in r6l with sign of rest. +; This function is here to keep branch displacements small. + +#ifdef __H8300__ + +divnorm: + mov.b #0,S2L ; keep the sign in S2 + mov.b A0H,A0H ; is the numerator -ve + bge postive + + ; negate arg + not A0H + not A1H + not A0L + not A1L + + add #1,A1L + addx #0,A1H + addx #0,A0H + addx #0,A0L + + mov.b #1,S2L ; the sign will be -ve +postive: + mov.b A2H,A2H ; is the denominator -ve + bge postive2 + not A2L + not A2H + not A3L + not A3H + add.b #1,A3L + addx #0,A3H + addx #0,A2L + addx #0,A2H + xor #1,S2L ; toggle result sign +postive2: + rts + +#else /* __H8300H__ */ + +divnorm: + mov.b #0,S2L ; keep the sign in S2 + mov.l A0P,A0P ; is the numerator -ve + bge postive + + neg.l A0P ; negate arg + mov.b #1,S2L ; the sign will be -ve + +postive: + mov.l A1P,A1P ; is the denominator -ve + bge postive2 + + neg.l A1P ; negate arg + xor.b #1,S2L ; toggle result sign + +postive2: + rts + +#endif + +; numerator in A0/A1 +; denominator in A2/A3 + .global ___modsi3 +___modsi3: + PUSHP S2P + PUSHP S0P + PUSHP S1P + + bsr divnorm + bsr divmodsi4 +#ifdef __H8300__ + mov S0,A0 + mov S1,A1 +#else + mov.l S0P,A0P +#endif + bra exitdiv + + .global ___udivsi3 +___udivsi3: + PUSHP S2P + PUSHP S0P + PUSHP S1P + mov.b #0,S2L ; keep sign low + bsr divmodsi4 + bra exitdiv + + .global ___umodsi3 +___umodsi3: + PUSHP S2P + PUSHP S0P + PUSHP S1P + mov.b #0,S2L ; keep sign low + bsr divmodsi4 +#ifdef __H8300__ + mov S0,A0 + mov S1,A1 +#else + mov.l S0P,A0P +#endif + bra exitdiv + + .global ___divsi3 +___divsi3: + PUSHP S2P + PUSHP S0P + PUSHP S1P + jsr divnorm + jsr divmodsi4 + + ; examine what the sign should be +exitdiv: + POPP S1P + POPP S0P + + or S2L,S2L + beq reti + + ; should be -ve +#ifdef __H8300__ + not A0H + not A1H + not A0L + not A1L + + add #1,A1L + addx #0,A1H + addx #0,A0H + addx #0,A0L +#else /* __H8300H__ */ + neg.l A0P +#endif + +reti: + POPP S2P + rts + + ; takes A0/A1 numerator (A0P for 300h) + ; A2/A3 denominator (A1P for 300h) + ; returns A0/A1 quotient (A0P for 300h) + ; S0/S1 remainder (S0P for 300h) + ; trashes S2 + +#ifdef __H8300__ + +divmodsi4: + sub.w S0,S0 ; zero play area + mov.w S0,S1 + mov.b A2H,S2H + or A2L,S2H + or A3H,S2H + bne DenHighZero + mov.b A0H,A0H + bne NumByte0Zero + mov.b A0L,A0L + bne NumByte1Zero + mov.b A1H,A1H + bne NumByte2Zero + bra NumByte3Zero +NumByte0Zero: + mov.b A0H,S1L + divxu A3L,S1 + mov.b S1L,A0H +NumByte1Zero: + mov.b A0L,S1L + divxu A3L,S1 + mov.b S1L,A0L +NumByte2Zero: + mov.b A1H,S1L + divxu A3L,S1 + mov.b S1L,A1H +NumByte3Zero: + mov.b A1L,S1L + divxu A3L,S1 + mov.b S1L,A1L + + mov.b S1H,S1L + mov.b #0x0,S1H + rts + +; have to do the divide by shift and test +DenHighZero: + mov.b A0H,S1L + mov.b A0L,A0H + mov.b A1H,A0L + mov.b A1L,A1H + + mov.b #0,A1L + mov.b #24,S2H ; only do 24 iterations + +nextbit: + add.w A1,A1 ; double the answer guess + rotxl A0L + rotxl A0H + + rotxl S1L ; double remainder + rotxl S1H + rotxl S0L + rotxl S0H + sub.w A3,S1 ; does it all fit + subx A2L,S0L + subx A2H,S0H + bhs setone + + add.w A3,S1 ; no, restore mistake + addx A2L,S0L + addx A2H,S0H + + dec S2H + bne nextbit + rts + +setone: + inc A1L + dec S2H + bne nextbit + rts + +#else /* __H8300H__ */ + +divmodsi4: + sub.l S0P,S0P ; zero play area + mov.w A1E,A1E ; denominator top word 0? + bne DenHighZero + + ; do it the easy way, see page 107 in manual + mov.w A0E,A2 + extu.l A2P + divxu.w A1,A2P + mov.w A2E,A0E + divxu.w A1,A0P + mov.w A0E,S0 + mov.w A2,A0E + extu.l S0P + rts + +DenHighZero: + mov.w A0E,A2 + mov.b A2H,S0L + mov.b A2L,A2H + mov.b A0H,A2L + mov.w A2,A0E + mov.b A0L,A0H + mov.b #0,A0L + mov.b #24,S2H ; only do 24 iterations + +nextbit: + shll.l A0P ; double the answer guess + rotxl.l S0P ; double remainder + sub.l A1P,S0P ; does it all fit? + bhs setone + + add.l A1P,S0P ; no, restore mistake + dec S2H + bne nextbit + rts + +setone: + inc A0L + dec S2H + bne nextbit + rts + +#endif +#endif /* L_divsi3 */ + +#ifdef L_mulhi3 + +;; HImode multiply. +; The h8 only has an 8*8->16 multiply. +; The answer is the same as: +; +; product = (srca.l * srcb.l) + ((srca.h * srcb.l) + (srcb.h * srca.l)) * 256 +; (we can ignore A1.h * A0.h cause that will all off the top) +; A0 in +; A1 in +; A0 answer + +#ifdef __H8300__ + .section .text + .align 2 + .global ___mulhi3 +___mulhi3: + mov.b A1L,A2L ; A2l gets srcb.l + mulxu A0L,A2 ; A2 gets first sub product + + mov.b A0H,A3L ; prepare for + mulxu A1L,A3 ; second sub product + + add.b A3L,A2H ; sum first two terms + + mov.b A1H,A3L ; third sub product + mulxu A0L,A3 + + add.b A3L,A2H ; almost there + mov.w A2,A0 ; that is + rts + +#endif +#endif /* L_mulhi3 */ + +#ifdef L_mulsi3 + +;; SImode multiply. +;; +;; I think that shift and add may be sufficient for this. Using the +;; supplied 8x8->16 would need 10 ops of 14 cycles each + overhead. This way +;; the inner loop uses maybe 20 cycles + overhead, but terminates +;; quickly on small args. +;; +;; A0/A1 src_a +;; A2/A3 src_b +;; +;; while (a) +;; { +;; if (a & 1) +;; r += b; +;; a >>= 1; +;; b <<= 1; +;; } + + .section .text + .align 2 + +#ifdef __H8300__ + + .global ___mulsi3 +___mulsi3: + PUSHP S0P + PUSHP S1P + PUSHP S2P + + sub.w S0,S0 + sub.w S1,S1 + + ; while (a) +_top: mov.w A0,A0 + bne _more + mov.w A1,A1 + beq _done +_more: ; if (a & 1) + bld #0,A1L + bcc _nobit + ; r += b + add.w A3,S1 + addx A2L,S0L + addx A2H,S0H +_nobit: + ; a >>= 1 + shlr A0H + rotxr A0L + rotxr A1H + rotxr A1L + + ; b <<= 1 + add.w A3,A3 + addx A2L,A2L + addx A2H,A2H + bra _top + +_done: + mov.w S0,A0 + mov.w S1,A1 + POPP S2P + POPP S1P + POPP S0P + rts + +#else /* __H8300H__ */ + + .h8300h + + .global ___mulsi3 +___mulsi3: + sub.l A2P,A2P + + ; while (a) +_top: mov.l A0P,A0P + beq _done + + ; if (a & 1) + bld #0,A0L + bcc _nobit + + ; r += b + add.l A1P,A2P + +_nobit: + ; a >>= 1 + shlr.l A0P + + ; b <<= 1 + shll.l A1P + bra _top + +_done: + mov.l A2P,A0P + rts + +#endif +#endif /* L_mulsi3 */ \ No newline at end of file -- cgit v1.1