From 783c0ec83143ad38cae2bf2aa86099fe4b4aa930 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 5 Nov 2010 16:46:36 -0700 Subject: [pk] various PK cleanups/speedups --- pk/fp.c | 97 ++++++----------------------------------------------------- pk/fp.h | 13 ++++++++ pk/fp_asm.S | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++ pk/memset.c | 39 ++++++++++++++++++++++++ pk/pk.c | 1 + pk/pk.h | 6 ---- pk/pk.ld | 2 -- pk/pk.mk.in | 4 +++ pk/riscv-pk.c | 2 +- pk/strlen.c | 26 ++++++++++++++++ 10 files changed, 177 insertions(+), 97 deletions(-) create mode 100644 pk/fp.h create mode 100644 pk/fp_asm.S create mode 100644 pk/memset.c create mode 100644 pk/strlen.c diff --git a/pk/fp.c b/pk/fp.c index 7ed43cb..a034c5a 100644 --- a/pk/fp.c +++ b/pk/fp.c @@ -2,6 +2,7 @@ #include "softfloat.h" #include "riscv-opc.h" #include "pk.h" +#include "fp.h" #include #define noisy 0 @@ -10,8 +11,6 @@ static void set_fp_reg(unsigned int which, unsigned int dp, uint64_t val); static uint64_t get_fp_reg(unsigned int which, unsigned int dp); static fp_state_t fp_state; -static void get_fp_state(); -static void put_fp_state(); static inline void validate_address(trapframe_t* tf, long addr, int size, int store) @@ -24,9 +23,8 @@ validate_address(trapframe_t* tf, long addr, int size, int store) int emulate_fp(trapframe_t* tf) { - fp_state.fsr = mfcr(CR_FSR); if(have_fp) - get_fp_state(); + fp_state.fsr = get_fp_state(fp_state.fpr); if(noisy) printk("FPU emulation at pc %lx, insn %x\n",tf->epc,(uint32_t)tf->insn); @@ -45,9 +43,9 @@ int emulate_fp(trapframe_t* tf) #define XRS2 (tf->gpr[RRS2]) #define XRDR (tf->gpr[RRD]) - uint64_t frs1d = get_fp_reg(RRS1, 1); - uint64_t frs2d = get_fp_reg(RRS2, 1); - uint64_t frs3d = get_fp_reg(RRS3, 1); + uint64_t frs1d = fp_state.fpr[RRS1]; + uint64_t frs2d = fp_state.fpr[RRS2]; + uint64_t frs3d = fp_state.fpr[RRS3]; uint32_t frs1s = get_fp_reg(RRS1, 0); uint32_t frs2s = get_fp_reg(RRS2, 0); uint32_t frs3s = get_fp_reg(RRS3, 0); @@ -217,9 +215,8 @@ int emulate_fp(trapframe_t* tf) else return -1; - mtcr(fp_state.fsr, CR_FSR); if(have_fp) - put_fp_state(); + put_fp_state(fp_state.fpr,fp_state.fsr); advance_pc(tf); @@ -231,6 +228,8 @@ int emulate_fp(trapframe_t* tf) #define PUT_FP_REG(which, type, val) asm("mtf." STR(type) " $f" STR(which) ",%0" : : "r"(val)) #define GET_FP_REG(which, type, val) asm("mff." STR(type) " %0,$f" STR(which) : "=r"(val)) +#define LOAD_FP_REG(which, type, val) asm("l." STR(type) " $f" STR(which) ",%0" : : "m"(val)) +#define STORE_FP_REG(which, type, val) asm("s." STR(type) " $f" STR(which) ",%0" : "=m"(val) : : "memory") static void __attribute__((noinline)) set_fp_reg(unsigned int which, unsigned int dp, uint64_t val) @@ -248,11 +247,8 @@ set_fp_reg(unsigned int which, unsigned int dp, uint64_t val) // to set an SP value, move the SP value into the FPU // then move it back out as a DP value. OK to clobber $f0 // because we'll restore it later. - uint64_t tmp; - GET_FP_REG(0,d,tmp); PUT_FP_REG(0,s,val); GET_FP_REG(0,d,fp_state.fpr[which]); - PUT_FP_REG(0,d,tmp); } } @@ -267,11 +263,8 @@ get_fp_reg(unsigned int which, unsigned int dp) // to get an SP value, move the DP value into the FPU // then move it back out as an SP value. OK to clobber $f0 // because we'll restore it later. - uint64_t tmp; - GET_FP_REG(0,d,tmp); PUT_FP_REG(0,d,fp_state.fpr[which]); GET_FP_REG(0,s,val); - PUT_FP_REG(0,d,tmp); } if(noisy) @@ -283,83 +276,11 @@ get_fp_reg(unsigned int which, unsigned int dp) return val; } -static void __attribute__((noinline)) get_fp_state() -{ - GET_FP_REG(0, d, fp_state.fpr[0]); - GET_FP_REG(1, d, fp_state.fpr[1]); - GET_FP_REG(2, d, fp_state.fpr[2]); - GET_FP_REG(3, d, fp_state.fpr[3]); - GET_FP_REG(4, d, fp_state.fpr[4]); - GET_FP_REG(5, d, fp_state.fpr[5]); - GET_FP_REG(6, d, fp_state.fpr[6]); - GET_FP_REG(7, d, fp_state.fpr[7]); - GET_FP_REG(8, d, fp_state.fpr[8]); - GET_FP_REG(9, d, fp_state.fpr[9]); - GET_FP_REG(10, d, fp_state.fpr[10]); - GET_FP_REG(11, d, fp_state.fpr[11]); - GET_FP_REG(12, d, fp_state.fpr[12]); - GET_FP_REG(13, d, fp_state.fpr[13]); - GET_FP_REG(14, d, fp_state.fpr[14]); - GET_FP_REG(15, d, fp_state.fpr[15]); - GET_FP_REG(16, d, fp_state.fpr[16]); - GET_FP_REG(17, d, fp_state.fpr[17]); - GET_FP_REG(18, d, fp_state.fpr[18]); - GET_FP_REG(19, d, fp_state.fpr[19]); - GET_FP_REG(20, d, fp_state.fpr[20]); - GET_FP_REG(21, d, fp_state.fpr[21]); - GET_FP_REG(22, d, fp_state.fpr[22]); - GET_FP_REG(23, d, fp_state.fpr[23]); - GET_FP_REG(24, d, fp_state.fpr[24]); - GET_FP_REG(25, d, fp_state.fpr[25]); - GET_FP_REG(26, d, fp_state.fpr[26]); - GET_FP_REG(27, d, fp_state.fpr[27]); - GET_FP_REG(28, d, fp_state.fpr[28]); - GET_FP_REG(29, d, fp_state.fpr[29]); - GET_FP_REG(30, d, fp_state.fpr[30]); - GET_FP_REG(31, d, fp_state.fpr[31]); -} - -static void __attribute__((noinline)) put_fp_state() -{ - PUT_FP_REG(0, d, fp_state.fpr[0]); - PUT_FP_REG(1, d, fp_state.fpr[1]); - PUT_FP_REG(2, d, fp_state.fpr[2]); - PUT_FP_REG(3, d, fp_state.fpr[3]); - PUT_FP_REG(4, d, fp_state.fpr[4]); - PUT_FP_REG(5, d, fp_state.fpr[5]); - PUT_FP_REG(6, d, fp_state.fpr[6]); - PUT_FP_REG(7, d, fp_state.fpr[7]); - PUT_FP_REG(8, d, fp_state.fpr[8]); - PUT_FP_REG(9, d, fp_state.fpr[9]); - PUT_FP_REG(10, d, fp_state.fpr[10]); - PUT_FP_REG(11, d, fp_state.fpr[11]); - PUT_FP_REG(12, d, fp_state.fpr[12]); - PUT_FP_REG(13, d, fp_state.fpr[13]); - PUT_FP_REG(14, d, fp_state.fpr[14]); - PUT_FP_REG(15, d, fp_state.fpr[15]); - PUT_FP_REG(16, d, fp_state.fpr[16]); - PUT_FP_REG(17, d, fp_state.fpr[17]); - PUT_FP_REG(18, d, fp_state.fpr[18]); - PUT_FP_REG(19, d, fp_state.fpr[19]); - PUT_FP_REG(20, d, fp_state.fpr[20]); - PUT_FP_REG(21, d, fp_state.fpr[21]); - PUT_FP_REG(22, d, fp_state.fpr[22]); - PUT_FP_REG(23, d, fp_state.fpr[23]); - PUT_FP_REG(24, d, fp_state.fpr[24]); - PUT_FP_REG(25, d, fp_state.fpr[25]); - PUT_FP_REG(26, d, fp_state.fpr[26]); - PUT_FP_REG(27, d, fp_state.fpr[27]); - PUT_FP_REG(28, d, fp_state.fpr[28]); - PUT_FP_REG(29, d, fp_state.fpr[29]); - PUT_FP_REG(30, d, fp_state.fpr[30]); - PUT_FP_REG(31, d, fp_state.fpr[31]); -} - void init_fp_regs() { long sr = mfpcr(PCR_SR); mtpcr(sr | SR_EF, PCR_SR); - put_fp_state(); + put_fp_state(fp_state.fpr,fp_state.fsr); mtpcr(sr, PCR_SR); } diff --git a/pk/fp.h b/pk/fp.h new file mode 100644 index 0000000..6cdae46 --- /dev/null +++ b/pk/fp.h @@ -0,0 +1,13 @@ +#ifndef _FP_H +#define _FP_H + +typedef struct +{ + uint64_t fpr[32]; + uint32_t fsr; +} fp_state_t; + +void put_fp_state(const void* fp_regs, long fsr); +long get_fp_state(void* fp_regs); + +#endif diff --git a/pk/fp_asm.S b/pk/fp_asm.S new file mode 100644 index 0000000..4788e84 --- /dev/null +++ b/pk/fp_asm.S @@ -0,0 +1,84 @@ +#include "pcr.h" + + .text + .globl get_fp_state + .ent get_fp_state +get_fp_state: + + mfcr $v0, ASM_CR(CR_FSR) + + s.d $f0 , 0($a0) + s.d $f1 , 8($a0) + s.d $f2 , 16($a0) + s.d $f3 , 24($a0) + s.d $f4 , 32($a0) + s.d $f5 , 40($a0) + s.d $f6 , 48($a0) + s.d $f7 , 56($a0) + s.d $f8 , 64($a0) + s.d $f9 , 72($a0) + s.d $f10, 80($a0) + s.d $f11, 88($a0) + s.d $f12, 96($a0) + s.d $f13,104($a0) + s.d $f14,112($a0) + s.d $f15,120($a0) + s.d $f16,128($a0) + s.d $f17,136($a0) + s.d $f18,144($a0) + s.d $f19,152($a0) + s.d $f20,160($a0) + s.d $f21,168($a0) + s.d $f22,176($a0) + s.d $f23,184($a0) + s.d $f24,192($a0) + s.d $f25,200($a0) + s.d $f26,208($a0) + s.d $f27,216($a0) + s.d $f28,224($a0) + s.d $f29,232($a0) + s.d $f30,240($a0) + s.d $f31,248($a0) + + .end get_fp_state + + .globl put_fp_state + .ent put_fp_state +put_fp_state: + + l.d $f0 , 0($a0) + l.d $f1 , 8($a0) + l.d $f2 , 16($a0) + l.d $f3 , 24($a0) + l.d $f4 , 32($a0) + l.d $f5 , 40($a0) + l.d $f6 , 48($a0) + l.d $f7 , 56($a0) + l.d $f8 , 64($a0) + l.d $f9 , 72($a0) + l.d $f10, 80($a0) + l.d $f11, 88($a0) + l.d $f12, 96($a0) + l.d $f13,104($a0) + l.d $f14,112($a0) + l.d $f15,120($a0) + l.d $f16,128($a0) + l.d $f17,136($a0) + l.d $f18,144($a0) + l.d $f19,152($a0) + l.d $f20,160($a0) + l.d $f21,168($a0) + l.d $f22,176($a0) + l.d $f23,184($a0) + l.d $f24,192($a0) + l.d $f25,200($a0) + l.d $f26,208($a0) + l.d $f27,216($a0) + l.d $f28,224($a0) + l.d $f29,232($a0) + l.d $f30,240($a0) + l.d $f31,248($a0) + + mtcr $a1, ASM_CR(CR_FSR) + + .end put_fp_state diff --git a/pk/memset.c b/pk/memset.c new file mode 100644 index 0000000..f062ad4 --- /dev/null +++ b/pk/memset.c @@ -0,0 +1,39 @@ +#include +#include +#include + +void* memset(void* m, int ch, size_t s) +{ + char* mem = (char*)m; + while(((long)m & (sizeof(long)-1)) && s) + { + *mem++ = ch; + s--; + } + + long l = ch & 0xFF; + l = l | (l << 8); + l = l | (l << 16); + if(sizeof(long) == 8) + l = l | (l << 32); + else if(sizeof(long) != 4) + abort(); + + long* lmem = (long*)mem; + for(size_t i = 0; i < (s+sizeof(long)-1)/sizeof(long)*sizeof(long); i += 8) + { + lmem[i+0] = l; + lmem[i+1] = l; + lmem[i+2] = l; + lmem[i+3] = l; + lmem[i+4] = l; + lmem[i+5] = l; + lmem[i+6] = l; + lmem[i+7] = l; + } + + for(size_t i = (s+sizeof(long)-1)/sizeof(long)*sizeof(long); i < s; i++) + mem[i] = ch; + + return m; +} diff --git a/pk/pk.c b/pk/pk.c index e9e3e71..f4e0424 100644 --- a/pk/pk.c +++ b/pk/pk.c @@ -145,6 +145,7 @@ static void mainvars_init() static void jump_usrstart() { + printk("strlen(\"\") = %d\n",strlen("")); trapframe_t tf; init_tf(&tf, USER_START, USER_MEM_SIZE-USER_MAINVARS_SIZE); pop_tf(&tf); diff --git a/pk/pk.h b/pk/pk.h index 0e56fcc..a1fd0da 100644 --- a/pk/pk.h +++ b/pk/pk.h @@ -13,12 +13,6 @@ typedef struct long insn; } trapframe_t; -typedef struct -{ - uint64_t fpr[32]; - uint32_t fsr; -} fp_state_t; - #define USER_MEM_SIZE 0x70000000 #define USER_MAINVARS_SIZE 0x1000 #define USER_START 0x10000 diff --git a/pk/pk.ld b/pk/pk.ld index 0ccf7ef..577de0f 100644 --- a/pk/pk.ld +++ b/pk/pk.ld @@ -2,8 +2,6 @@ OUTPUT_ARCH( "mips:riscv" ) ENTRY( __start ) -GROUP( -lc -lgcc -lgloss ) - SECTIONS { diff --git a/pk/pk.mk.in b/pk/pk.mk.in index 1de66e8..8d1a06a 100644 --- a/pk/pk.mk.in +++ b/pk/pk.mk.in @@ -5,6 +5,7 @@ pk_subproject_deps = \ pk_hdrs = \ pk.h \ pcr.h \ + fp.h \ atomic.h \ file.h \ frontend.h \ @@ -17,9 +18,12 @@ pk_c_srcs = \ handlers.c \ frontend.c \ fp.c \ + memset.c \ + strlen.c \ pk_asm_srcs = \ entry.S \ + fp_asm.S \ pk_test_srcs = diff --git a/pk/riscv-pk.c b/pk/riscv-pk.c index ea5d4d4..771982c 100644 --- a/pk/riscv-pk.c +++ b/pk/riscv-pk.c @@ -22,7 +22,7 @@ void __attribute__((section(".boottext"))) __start() #endif mtpcr(sr0 | SR_EF, PCR_SR); - have_fp = 0;//mfpcr(PCR_SR) & SR_EF; + have_fp = mfpcr(PCR_SR) & SR_EF; mtpcr(sr0, PCR_SR); extern void boot(); diff --git a/pk/strlen.c b/pk/strlen.c new file mode 100644 index 0000000..f600d47 --- /dev/null +++ b/pk/strlen.c @@ -0,0 +1,26 @@ +#include +#include + +// from http://www-graphics.stanford.edu/~seander/bithacks.html +static inline long hasZeroByte(long l) +{ + if(sizeof(long) == 4) + return (l - 0x01010101UL) & ~l & 0x80808080UL; + else if(sizeof(long) == 8) + return (l - 0x0101010101010101UL) & ~l & 0x8080808080808080UL; +} + +size_t strlen(const char* s) +{ + size_t i = 0; + + // use optimized version if string starts on a long boundary + if(((long)s & (sizeof(long)-1)) == 0) + while(!hasZeroByte(*(long*)(s+i))) + i += sizeof(long); + + while(s[i]) + i++; + + return i; +} -- cgit v1.1