diff options
author | Andrew Waterman <waterman@s144.Millennium.Berkeley.EDU> | 2010-10-18 01:47:09 -0700 |
---|---|---|
committer | Andrew Waterman <waterman@s144.Millennium.Berkeley.EDU> | 2010-10-18 01:47:09 -0700 |
commit | 77c382ff239f61ee9846a7abb6d96a6434b52b56 (patch) | |
tree | 44be4c6cf17b74bd0d87c2498c5a781f9be11156 /pk | |
parent | 68879a67d0f49323e1e2f8358d19bc1d94bb9f30 (diff) | |
download | pk-77c382ff239f61ee9846a7abb6d96a6434b52b56.zip pk-77c382ff239f61ee9846a7abb6d96a6434b52b56.tar.gz pk-77c382ff239f61ee9846a7abb6d96a6434b52b56.tar.bz2 |
[pk] improved performance of fp emulation
Diffstat (limited to 'pk')
-rw-r--r-- | pk/file.c | 4 | ||||
-rw-r--r-- | pk/fp.c | 456 | ||||
-rw-r--r-- | pk/handlers.c | 3 | ||||
-rw-r--r-- | pk/pcr.h | 10 | ||||
-rw-r--r-- | pk/pk.c | 22 | ||||
-rw-r--r-- | pk/pk.h | 10 | ||||
-rw-r--r-- | pk/riscv-pk.c | 2 |
7 files changed, 230 insertions, 277 deletions
@@ -4,9 +4,9 @@ #include "pk.h" #include "frontend.h" -#define MAX_FDS 1000 +#define MAX_FDS 128 file_t* fds[MAX_FDS]; -#define MAX_FILES 1000 +#define MAX_FILES 128 file_t files[MAX_FILES] = {[0 ... MAX_FILES-1] = {-1,{0}}}; file_t *stdout, *stdin, *stderr; @@ -1,3 +1,4 @@ +#include "pcr.h" #include "softfloat.h" #include "riscv-opc.h" #include "pk.h" @@ -8,10 +9,12 @@ static void set_fp_reg(unsigned int which, unsigned int dp, uint64_t val); static uint64_t get_fp_reg(unsigned int which, unsigned int dp); -uint64_t fp_regs[32]; -uint32_t fsr; +static fp_state_t fp_state; +static void get_fp_state(); +static void put_fp_state(); -void validate_address(trapframe_t* tf, long addr, int size, int store) +static inline void +validate_address(trapframe_t* tf, long addr, int size, int store) { if(addr & (size-1)) handle_misaligned_ldst(tf); @@ -21,6 +24,9 @@ void validate_address(trapframe_t* tf, long addr, int size, int store) int emulate_fp(trapframe_t* tf) { + if(have_fp) + get_fp_state(); + if(noisy) printk("FPU emulation at pc %lx, insn %x\n",tf->epc,(uint32_t)tf->insn); @@ -30,49 +36,53 @@ int emulate_fp(trapframe_t* tf) #define RRDR ( tf->insn & 0x1F) #define RRDI RRS2 + #define IMM (((int32_t)tf->insn << 20) >> 20) + #define XRS1 (tf->gpr[RRS1]) #define XRS2 (tf->gpr[RRS2]) #define XRDR (tf->gpr[RRDR]) - #define FRS1S get_fp_reg(RRS1,0) - #define FRS2S get_fp_reg(RRS2,0) - #define FRS3S get_fp_reg(RRS3,0) - #define FRS1D get_fp_reg(RRS1,1) - #define FRS2D get_fp_reg(RRS2,1) - #define FRS3D get_fp_reg(RRS3,1) - #define IMM (((int32_t)tf->insn << 20) >> 20) - #define EFFECTIVE_ADDRESS (XRS1+IMM) + uint64_t frs1d = get_fp_reg(RRS1, 1); + uint64_t frs2d = get_fp_reg(RRS2, 1); + uint64_t frs3d = get_fp_reg(RRS3, 1); + uint32_t frs1s = get_fp_reg(RRS1, 0); + uint32_t frs2s = get_fp_reg(RRS2, 0); + uint32_t frs3s = get_fp_reg(RRS3, 0); + + uint64_t effective_address = XRS1 + IMM; + + softfloat_exceptionFlags = 0; #define IS_INSN(x) ((tf->insn & MASK_ ## x) == MATCH_ ## x) if(IS_INSN(L_S)) { - validate_address(tf, EFFECTIVE_ADDRESS, 4, 0); - set_fp_reg(RRDI, 0, *(uint32_t*)EFFECTIVE_ADDRESS); + validate_address(tf, effective_address, 4, 0); + set_fp_reg(RRDI, 0, *(uint32_t*)effective_address); } else if(IS_INSN(L_D)) { - validate_address(tf, EFFECTIVE_ADDRESS, 8, 0); - set_fp_reg(RRDI, 1, *(uint64_t*)EFFECTIVE_ADDRESS); + validate_address(tf, effective_address, 8, 0); + set_fp_reg(RRDI, 1, *(uint64_t*)effective_address); } else if(IS_INSN(S_S)) { - validate_address(tf, EFFECTIVE_ADDRESS, 4, 1); - *(uint32_t*)EFFECTIVE_ADDRESS = FRS2S; + validate_address(tf, effective_address, 4, 1); + *(uint32_t*)effective_address = frs2s; } else if(IS_INSN(S_D)) { - validate_address(tf, EFFECTIVE_ADDRESS, 8, 1); - *(uint64_t*)EFFECTIVE_ADDRESS = FRS2D; + validate_address(tf, effective_address, 8, 1); + *(uint64_t*)effective_address = frs2d; } else if(IS_INSN(MFF_S)) - XRDR = FRS2S; + XRDR = frs2s; else if(IS_INSN(MFF_D)) - XRDR = FRS2D; + XRDR = frs2d; else if(IS_INSN(MFFL_D)) - XRDR = (int32_t)FRS2D; + XRDR = (int32_t)frs2d; else if(IS_INSN(MFFH_D)) - XRDR = (int64_t)FRS2D >> 32; + XRDR = (int64_t)frs2d >> 32; else if(IS_INSN(MTF_S)) set_fp_reg(RRDR, 0, XRS1); else if(IS_INSN(MTF_D)) @@ -80,41 +90,41 @@ int emulate_fp(trapframe_t* tf) else if(IS_INSN(MTFLH_D)) set_fp_reg(RRDR, 1, (uint32_t)XRS1 | (XRS2 << 32)); else if(IS_INSN(SGNINJ_S)) - set_fp_reg(RRDR, 0, (FRS1S &~ (uint32_t)INT32_MIN) | (FRS2S & (uint32_t)INT32_MIN)); + set_fp_reg(RRDR, 0, (frs1s &~ (uint32_t)INT32_MIN) | (frs2s & (uint32_t)INT32_MIN)); else if(IS_INSN(SGNINJ_D)) - set_fp_reg(RRDR, 1, (FRS1D &~ INT64_MIN) | (FRS2D & INT64_MIN)); + set_fp_reg(RRDR, 1, (frs1d &~ INT64_MIN) | (frs2d & INT64_MIN)); else if(IS_INSN(SGNINJN_S)) - set_fp_reg(RRDR, 0, (FRS1S &~ (uint32_t)INT32_MIN) | ((~FRS2S) & (uint32_t)INT32_MIN)); + set_fp_reg(RRDR, 0, (frs1s &~ (uint32_t)INT32_MIN) | ((~frs2s) & (uint32_t)INT32_MIN)); else if(IS_INSN(SGNINJN_D)) - set_fp_reg(RRDR, 1, (FRS1D &~ INT64_MIN) | ((~FRS2D) & INT64_MIN)); + set_fp_reg(RRDR, 1, (frs1d &~ INT64_MIN) | ((~frs2d) & INT64_MIN)); else if(IS_INSN(SGNMUL_S)) - set_fp_reg(RRDR, 0, FRS1S ^ (FRS2S & (uint32_t)INT32_MIN)); + set_fp_reg(RRDR, 0, frs1s ^ (frs2s & (uint32_t)INT32_MIN)); else if(IS_INSN(SGNMUL_D)) - set_fp_reg(RRDR, 1, FRS1D ^ (FRS2D & INT64_MIN)); + set_fp_reg(RRDR, 1, frs1d ^ (frs2d & INT64_MIN)); else if(IS_INSN(C_EQ_S)) - XRDR = f32_eq(FRS1S, FRS2S); + XRDR = f32_eq(frs1s, frs2s); else if(IS_INSN(C_EQ_D)) - XRDR = f64_eq(FRS1D, FRS2D); + XRDR = f64_eq(frs1d, frs2d); else if(IS_INSN(C_LE_S)) - XRDR = f32_le(FRS1S, FRS2S); + XRDR = f32_le(frs1s, frs2s); else if(IS_INSN(C_LE_D)) - XRDR = f64_le(FRS1D, FRS2D); + XRDR = f64_le(frs1d, frs2d); else if(IS_INSN(C_LT_S)) - XRDR = f32_lt(FRS1S, FRS2S); + XRDR = f32_lt(frs1s, frs2s); else if(IS_INSN(C_LT_D)) - XRDR = f64_lt(FRS1D, FRS2D); + XRDR = f64_lt(frs1d, frs2d); else if(IS_INSN(CVT_S_W)) set_fp_reg(RRDR, 0, i32_to_f32(XRS1)); else if(IS_INSN(CVT_S_L)) set_fp_reg(RRDR, 0, i64_to_f32(XRS1)); else if(IS_INSN(CVT_S_D)) - set_fp_reg(RRDR, 0, f64_to_f32(FRS1D)); + set_fp_reg(RRDR, 0, f64_to_f32(frs1d)); else if(IS_INSN(CVT_D_W)) set_fp_reg(RRDR, 1, i32_to_f64(XRS1)); else if(IS_INSN(CVT_D_L)) set_fp_reg(RRDR, 1, i64_to_f64(XRS1)); else if(IS_INSN(CVT_D_S)) - set_fp_reg(RRDR, 1, f32_to_f64(FRS1S)); + set_fp_reg(RRDR, 1, f32_to_f64(frs1s)); else if(IS_INSN(CVTU_S_W)) set_fp_reg(RRDR, 0, ui32_to_f32(XRS1)); else if(IS_INSN(CVTU_S_L)) @@ -124,60 +134,65 @@ int emulate_fp(trapframe_t* tf) else if(IS_INSN(CVTU_D_L)) set_fp_reg(RRDR, 1, ui64_to_f64(XRS1)); else if(IS_INSN(ADD_S)) - set_fp_reg(RRDR, 0, f32_add(FRS1S, FRS2S)); + set_fp_reg(RRDR, 0, f32_add(frs1s, frs2s)); else if(IS_INSN(ADD_D)) - set_fp_reg(RRDR, 1, f64_add(FRS1D, FRS2D)); + set_fp_reg(RRDR, 1, f64_add(frs1d, frs2d)); else if(IS_INSN(SUB_S)) - set_fp_reg(RRDR, 0, f32_sub(FRS1S, FRS2S)); + set_fp_reg(RRDR, 0, f32_sub(frs1s, frs2s)); else if(IS_INSN(SUB_D)) - set_fp_reg(RRDR, 1, f64_sub(FRS1D, FRS2D)); + set_fp_reg(RRDR, 1, f64_sub(frs1d, frs2d)); else if(IS_INSN(MUL_S)) - set_fp_reg(RRDR, 0, f32_mul(FRS1S, FRS2S)); + set_fp_reg(RRDR, 0, f32_mul(frs1s, frs2s)); else if(IS_INSN(MUL_D)) - set_fp_reg(RRDR, 1, f64_mul(FRS1D, FRS2D)); + set_fp_reg(RRDR, 1, f64_mul(frs1d, frs2d)); else if(IS_INSN(MADD_S)) - set_fp_reg(RRDR, 0, f32_mulAdd(FRS1S, FRS2S, FRS3S)); + set_fp_reg(RRDR, 0, f32_mulAdd(frs1s, frs2s, frs3s)); else if(IS_INSN(MADD_D)) - set_fp_reg(RRDR, 1, f64_mulAdd(FRS1D, FRS2D, FRS3D)); + set_fp_reg(RRDR, 1, f64_mulAdd(frs1d, frs2d, frs3d)); else if(IS_INSN(MSUB_S)) - set_fp_reg(RRDR, 0, f32_mulAdd(FRS1S, FRS2S, FRS3S ^ (uint32_t)INT32_MIN)); + set_fp_reg(RRDR, 0, f32_mulAdd(frs1s, frs2s, frs3s ^ (uint32_t)INT32_MIN)); else if(IS_INSN(MSUB_D)) - set_fp_reg(RRDR, 1, f64_mulAdd(FRS1D, FRS2D, FRS3D ^ INT64_MIN)); + set_fp_reg(RRDR, 1, f64_mulAdd(frs1d, frs2d, frs3d ^ INT64_MIN)); else if(IS_INSN(NMADD_S)) - set_fp_reg(RRDR, 0, f32_mulAdd(FRS1S, FRS2S, FRS3S) ^ (uint32_t)INT32_MIN); + set_fp_reg(RRDR, 0, f32_mulAdd(frs1s, frs2s, frs3s) ^ (uint32_t)INT32_MIN); else if(IS_INSN(NMADD_D)) - set_fp_reg(RRDR, 1, f64_mulAdd(FRS1D, FRS2D, FRS3D) ^ INT64_MIN); + set_fp_reg(RRDR, 1, f64_mulAdd(frs1d, frs2d, frs3d) ^ INT64_MIN); else if(IS_INSN(NMSUB_S)) - set_fp_reg(RRDR, 0, f32_mulAdd(FRS1S, FRS2S, FRS3S ^ (uint32_t)INT32_MIN) ^ (uint32_t)INT32_MIN); + set_fp_reg(RRDR, 0, f32_mulAdd(frs1s, frs2s, frs3s ^ (uint32_t)INT32_MIN) ^ (uint32_t)INT32_MIN); else if(IS_INSN(NMSUB_D)) - set_fp_reg(RRDR, 1, f64_mulAdd(FRS1D, FRS2D, FRS3D ^ INT64_MIN) ^ INT64_MIN); + set_fp_reg(RRDR, 1, f64_mulAdd(frs1d, frs2d, frs3d ^ INT64_MIN) ^ INT64_MIN); else if(IS_INSN(DIV_S)) - set_fp_reg(RRDR, 0, f32_div(FRS1S, FRS2S)); + set_fp_reg(RRDR, 0, f32_div(frs1s, frs2s)); else if(IS_INSN(DIV_D)) - set_fp_reg(RRDR, 1, f64_div(FRS1D, FRS2D)); + set_fp_reg(RRDR, 1, f64_div(frs1d, frs2d)); else if(IS_INSN(SQRT_S)) - set_fp_reg(RRDR, 0, f32_sqrt(FRS1S)); + set_fp_reg(RRDR, 0, f32_sqrt(frs1s)); else if(IS_INSN(SQRT_D)) - set_fp_reg(RRDR, 1, f64_sqrt(FRS1D)); + set_fp_reg(RRDR, 1, f64_sqrt(frs1d)); else if(IS_INSN(TRUNC_W_S)) - XRDR = f32_to_i32_r_minMag(FRS1S,true); + XRDR = f32_to_i32_r_minMag(frs1s,true); else if(IS_INSN(TRUNC_W_D)) - XRDR = f64_to_i32_r_minMag(FRS1D,true); + XRDR = f64_to_i32_r_minMag(frs1d,true); else if(IS_INSN(TRUNC_L_S)) - XRDR = f32_to_i64_r_minMag(FRS1S,true); + XRDR = f32_to_i64_r_minMag(frs1s,true); else if(IS_INSN(TRUNC_L_D)) - XRDR = f64_to_i64_r_minMag(FRS1D,true); + XRDR = f64_to_i64_r_minMag(frs1d,true); else if(IS_INSN(TRUNCU_W_S)) - XRDR = f32_to_ui32_r_minMag(FRS1S,true); + XRDR = f32_to_ui32_r_minMag(frs1s,true); else if(IS_INSN(TRUNCU_W_D)) - XRDR = f64_to_ui32_r_minMag(FRS1D,true); + XRDR = f64_to_ui32_r_minMag(frs1d,true); else if(IS_INSN(TRUNCU_L_S)) - XRDR = f32_to_ui64_r_minMag(FRS1S,true); + XRDR = f32_to_ui64_r_minMag(frs1s,true); else if(IS_INSN(TRUNCU_L_D)) - XRDR = f64_to_ui64_r_minMag(FRS1D,true); + XRDR = f64_to_ui64_r_minMag(frs1d,true); else return -1; + fp_state.fsr |= softfloat_exceptionFlags; + + if(have_fp) + put_fp_state(); + advance_pc(tf); return 0; @@ -186,210 +201,141 @@ int emulate_fp(trapframe_t* tf) #define STR(x) XSTR(x) #define XSTR(x) #x -#define PUT_FP_REG_S(which) case which: \ - if(have_fp) \ - asm volatile("mtf.s $f" STR(which) ",%0" : : "r"(val)); \ - else fp_regs[which] = val; \ - if(noisy) printk("set fp sp reg %x to %x\n",which,val); \ - break -#define PUT_FP_REG(which, val) asm volatile("mtf.d $f" STR(which) ",%0" : : "r"(val)) -#define PUT_FP_REG_D(which) case 32+which: \ - if(have_fp) \ - PUT_FP_REG(which,val); \ - else fp_regs[which] = val; \ - if(noisy) printk("set fp dp reg %x to %x%x\n",which,(uint32_t)(val>>32),(uint32_t)val); \ - break -#define GET_FP_REG_S(which) case which: \ - if(have_fp) asm volatile("mff.s %0,$f" STR(which) : "=r"(val));\ - else val = (uint64_t)(int64_t)(int32_t)fp_regs[which]; \ - if(noisy) printk("get fp sp reg %x v %x\n",which,val); \ - break -#define GET_FP_REG_D(which) case 32+which: \ - if(have_fp) asm volatile("mff.d %0,$f" STR(which) : "=r"(val));\ - else val = fp_regs[which]; \ - if(noisy) printk("get fp dp reg %x v %x%x\n",which,(uint32_t)(val>>32),(uint32_t)val); \ - break +#define PUT_FP_REG(which, type, val) asm("mtf." STR(type) " $f" STR(which) ",%0" : : "r"(val)) +#define GET_FP_REG(which, type, val) asm("mff." STR(type) " %0,$f" STR(which) : "=r"(val)) -static void set_fp_reg(unsigned int which, unsigned int dp, uint64_t val) +static void __attribute__((noinline)) +set_fp_reg(unsigned int which, unsigned int dp, uint64_t val) { - switch(which + (!!dp)*32) + if(noisy) { - PUT_FP_REG_S(0); - PUT_FP_REG_S(1); - PUT_FP_REG_S(2); - PUT_FP_REG_S(3); - PUT_FP_REG_S(4); - PUT_FP_REG_S(5); - PUT_FP_REG_S(6); - PUT_FP_REG_S(7); - PUT_FP_REG_S(8); - PUT_FP_REG_S(9); - PUT_FP_REG_S(10); - PUT_FP_REG_S(11); - PUT_FP_REG_S(12); - PUT_FP_REG_S(13); - PUT_FP_REG_S(14); - PUT_FP_REG_S(15); - PUT_FP_REG_S(16); - PUT_FP_REG_S(17); - PUT_FP_REG_S(18); - PUT_FP_REG_S(19); - PUT_FP_REG_S(20); - PUT_FP_REG_S(21); - PUT_FP_REG_S(22); - PUT_FP_REG_S(23); - PUT_FP_REG_S(24); - PUT_FP_REG_S(25); - PUT_FP_REG_S(26); - PUT_FP_REG_S(27); - PUT_FP_REG_S(28); - PUT_FP_REG_S(29); - PUT_FP_REG_S(30); - PUT_FP_REG_S(31); - PUT_FP_REG_D(0); - PUT_FP_REG_D(1); - PUT_FP_REG_D(2); - PUT_FP_REG_D(3); - PUT_FP_REG_D(4); - PUT_FP_REG_D(5); - PUT_FP_REG_D(6); - PUT_FP_REG_D(7); - PUT_FP_REG_D(8); - PUT_FP_REG_D(9); - PUT_FP_REG_D(10); - PUT_FP_REG_D(11); - PUT_FP_REG_D(12); - PUT_FP_REG_D(13); - PUT_FP_REG_D(14); - PUT_FP_REG_D(15); - PUT_FP_REG_D(16); - PUT_FP_REG_D(17); - PUT_FP_REG_D(18); - PUT_FP_REG_D(19); - PUT_FP_REG_D(20); - PUT_FP_REG_D(21); - PUT_FP_REG_D(22); - PUT_FP_REG_D(23); - PUT_FP_REG_D(24); - PUT_FP_REG_D(25); - PUT_FP_REG_D(26); - PUT_FP_REG_D(27); - PUT_FP_REG_D(28); - PUT_FP_REG_D(29); - PUT_FP_REG_D(30); - PUT_FP_REG_D(31); - default: - panic("bad fp register"); + printk("fpr%c[%x] <= ",dp?'d':'s',which); + printk("%lx\n",val); + } + + if(dp || !have_fp) + fp_state.fpr[which] = val; + else + { + // to set an SP value, move the SP value into the FPU + // then move it back out as a DP value. OK to clobber $f0 + // because we'll restore it later. + uint64_t tmp; + GET_FP_REG(0,d,tmp); + PUT_FP_REG(0,s,val); + GET_FP_REG(0,d,fp_state.fpr[which]); + PUT_FP_REG(0,d,tmp); } } -static uint64_t get_fp_reg(unsigned int which, unsigned int dp) +static uint64_t __attribute__((noinline)) +get_fp_reg(unsigned int which, unsigned int dp) { uint64_t val; - switch(which + (!!dp)*32) + if(dp || !have_fp) + val = fp_state.fpr[which]; + else { - GET_FP_REG_S(0); - GET_FP_REG_S(1); - GET_FP_REG_S(2); - GET_FP_REG_S(3); - GET_FP_REG_S(4); - GET_FP_REG_S(5); - GET_FP_REG_S(6); - GET_FP_REG_S(7); - GET_FP_REG_S(8); - GET_FP_REG_S(9); - GET_FP_REG_S(10); - GET_FP_REG_S(11); - GET_FP_REG_S(12); - GET_FP_REG_S(13); - GET_FP_REG_S(14); - GET_FP_REG_S(15); - GET_FP_REG_S(16); - GET_FP_REG_S(17); - GET_FP_REG_S(18); - GET_FP_REG_S(19); - GET_FP_REG_S(20); - GET_FP_REG_S(21); - GET_FP_REG_S(22); - GET_FP_REG_S(23); - GET_FP_REG_S(24); - GET_FP_REG_S(25); - GET_FP_REG_S(26); - GET_FP_REG_S(27); - GET_FP_REG_S(28); - GET_FP_REG_S(29); - GET_FP_REG_S(30); - GET_FP_REG_S(31); - GET_FP_REG_D(0); - GET_FP_REG_D(1); - GET_FP_REG_D(2); - GET_FP_REG_D(3); - GET_FP_REG_D(4); - GET_FP_REG_D(5); - GET_FP_REG_D(6); - GET_FP_REG_D(7); - GET_FP_REG_D(8); - GET_FP_REG_D(9); - GET_FP_REG_D(10); - GET_FP_REG_D(11); - GET_FP_REG_D(12); - GET_FP_REG_D(13); - GET_FP_REG_D(14); - GET_FP_REG_D(15); - GET_FP_REG_D(16); - GET_FP_REG_D(17); - GET_FP_REG_D(18); - GET_FP_REG_D(19); - GET_FP_REG_D(20); - GET_FP_REG_D(21); - GET_FP_REG_D(22); - GET_FP_REG_D(23); - GET_FP_REG_D(24); - GET_FP_REG_D(25); - GET_FP_REG_D(26); - GET_FP_REG_D(27); - GET_FP_REG_D(28); - GET_FP_REG_D(29); - GET_FP_REG_D(30); - GET_FP_REG_D(31); - default: - panic("bad fp register"); + // to get an SP value, move the DP value into the FPU + // then move it back out as an SP value. OK to clobber $f0 + // because we'll restore it later. + uint64_t tmp; + GET_FP_REG(0,d,tmp); + PUT_FP_REG(0,d,fp_state.fpr[which]); + GET_FP_REG(0,s,val); + PUT_FP_REG(0,d,tmp); } + + if(noisy) + { + printk("fpr%c[%x] => ",dp?'d':'s',which); + printk("%lx\n",val); + } + return val; } -void init_fpregs() +static void __attribute__((noinline)) get_fp_state() +{ + GET_FP_REG(0, d, fp_state.fpr[0]); + GET_FP_REG(1, d, fp_state.fpr[1]); + GET_FP_REG(2, d, fp_state.fpr[2]); + GET_FP_REG(3, d, fp_state.fpr[3]); + GET_FP_REG(4, d, fp_state.fpr[4]); + GET_FP_REG(5, d, fp_state.fpr[5]); + GET_FP_REG(6, d, fp_state.fpr[6]); + GET_FP_REG(7, d, fp_state.fpr[7]); + GET_FP_REG(8, d, fp_state.fpr[8]); + GET_FP_REG(9, d, fp_state.fpr[9]); + GET_FP_REG(10, d, fp_state.fpr[10]); + GET_FP_REG(11, d, fp_state.fpr[11]); + GET_FP_REG(12, d, fp_state.fpr[12]); + GET_FP_REG(13, d, fp_state.fpr[13]); + GET_FP_REG(14, d, fp_state.fpr[14]); + GET_FP_REG(15, d, fp_state.fpr[15]); + GET_FP_REG(16, d, fp_state.fpr[16]); + GET_FP_REG(17, d, fp_state.fpr[17]); + GET_FP_REG(18, d, fp_state.fpr[18]); + GET_FP_REG(19, d, fp_state.fpr[19]); + GET_FP_REG(20, d, fp_state.fpr[20]); + GET_FP_REG(21, d, fp_state.fpr[21]); + GET_FP_REG(22, d, fp_state.fpr[22]); + GET_FP_REG(23, d, fp_state.fpr[23]); + GET_FP_REG(24, d, fp_state.fpr[24]); + GET_FP_REG(25, d, fp_state.fpr[25]); + GET_FP_REG(26, d, fp_state.fpr[26]); + GET_FP_REG(27, d, fp_state.fpr[27]); + GET_FP_REG(28, d, fp_state.fpr[28]); + GET_FP_REG(29, d, fp_state.fpr[29]); + GET_FP_REG(30, d, fp_state.fpr[30]); + GET_FP_REG(31, d, fp_state.fpr[31]); + + fp_state.fsr = mfcr(CR_FSR); +} + +static void __attribute__((noinline)) put_fp_state() { - PUT_FP_REG(0, 0); - PUT_FP_REG(1, 0); - PUT_FP_REG(2, 0); - PUT_FP_REG(3, 0); - PUT_FP_REG(4, 0); - PUT_FP_REG(5, 0); - PUT_FP_REG(6, 0); - PUT_FP_REG(7, 0); - PUT_FP_REG(8, 0); - PUT_FP_REG(9, 0); - PUT_FP_REG(10, 0); - PUT_FP_REG(11, 0); - PUT_FP_REG(12, 0); - PUT_FP_REG(13, 0); - PUT_FP_REG(14, 0); - PUT_FP_REG(15, 0); - PUT_FP_REG(16, 0); - PUT_FP_REG(17, 0); - PUT_FP_REG(18, 0); - PUT_FP_REG(19, 0); - PUT_FP_REG(20, 0); - PUT_FP_REG(21, 0); - PUT_FP_REG(22, 0); - PUT_FP_REG(23, 0); - PUT_FP_REG(24, 0); - PUT_FP_REG(25, 0); - PUT_FP_REG(26, 0); - PUT_FP_REG(27, 0); - PUT_FP_REG(28, 0); - PUT_FP_REG(29, 0); - PUT_FP_REG(30, 0); - PUT_FP_REG(31, 0); + PUT_FP_REG(0, d, fp_state.fpr[0]); + PUT_FP_REG(1, d, fp_state.fpr[1]); + PUT_FP_REG(2, d, fp_state.fpr[2]); + PUT_FP_REG(3, d, fp_state.fpr[3]); + PUT_FP_REG(4, d, fp_state.fpr[4]); + PUT_FP_REG(5, d, fp_state.fpr[5]); + PUT_FP_REG(6, d, fp_state.fpr[6]); + PUT_FP_REG(7, d, fp_state.fpr[7]); + PUT_FP_REG(8, d, fp_state.fpr[8]); + PUT_FP_REG(9, d, fp_state.fpr[9]); + PUT_FP_REG(10, d, fp_state.fpr[10]); + PUT_FP_REG(11, d, fp_state.fpr[11]); + PUT_FP_REG(12, d, fp_state.fpr[12]); + PUT_FP_REG(13, d, fp_state.fpr[13]); + PUT_FP_REG(14, d, fp_state.fpr[14]); + PUT_FP_REG(15, d, fp_state.fpr[15]); + PUT_FP_REG(16, d, fp_state.fpr[16]); + PUT_FP_REG(17, d, fp_state.fpr[17]); + PUT_FP_REG(18, d, fp_state.fpr[18]); + PUT_FP_REG(19, d, fp_state.fpr[19]); + PUT_FP_REG(20, d, fp_state.fpr[20]); + PUT_FP_REG(21, d, fp_state.fpr[21]); + PUT_FP_REG(22, d, fp_state.fpr[22]); + PUT_FP_REG(23, d, fp_state.fpr[23]); + PUT_FP_REG(24, d, fp_state.fpr[24]); + PUT_FP_REG(25, d, fp_state.fpr[25]); + PUT_FP_REG(26, d, fp_state.fpr[26]); + PUT_FP_REG(27, d, fp_state.fpr[27]); + PUT_FP_REG(28, d, fp_state.fpr[28]); + PUT_FP_REG(29, d, fp_state.fpr[29]); + PUT_FP_REG(30, d, fp_state.fpr[30]); + PUT_FP_REG(31, d, fp_state.fpr[31]); + + mtcr(fp_state.fsr, CR_FSR); } + +void init_fp_regs() +{ + long sr = mfpcr(PCR_SR); + mtpcr(sr | SR_EF, PCR_SR); + put_fp_state(); + mtpcr(sr, PCR_SR); +} + diff --git a/pk/handlers.c b/pk/handlers.c index 7eee536..25005c8 100644 --- a/pk/handlers.c +++ b/pk/handlers.c @@ -13,7 +13,10 @@ void handle_breakpoint(trapframe_t* tf) void handle_fp_disabled(trapframe_t* tf) { if(have_fp) + { + init_fp_regs(); tf->sr |= SR_EF; + } else { if(emulate_fp(tf) != 0) @@ -23,6 +23,9 @@ #define PCR_K0 24 #define PCR_K1 25 +#define CR_FSR 0 +#define CR_TID 29 + #define TIMER_PERIOD 0x1000000 #define ASM_CR(r) _ASM_CR(r) @@ -37,6 +40,13 @@ asm volatile ("mfpcr %0,$cr%1" : "=r"(__tmp) : "i"(reg)); \ __tmp; }) +#define mtcr(val,reg) ({ long __tmp = (long)(val); \ + asm volatile ("mtcr %0,$cr%1"::"r"(__tmp),"i"(reg)); }) + +#define mfcr(reg) ({ long __tmp; \ + asm volatile ("mfcr %0,$cr%1" : "=r"(__tmp) : "i"(reg)); \ + __tmp; }) + #endif #endif @@ -130,25 +130,12 @@ void init_tf(trapframe_t* tf, long pc, long sp) tf->epc = USER_START; } -void init_fp() +static void bss_init() { - if (have_fp) - { - register long sr; - sr = mfpcr(PCR_SR); - mtpcr(sr | SR_EF, PCR_SR); - init_fpregs(); - mtpcr(sr, PCR_SR); - } -} - -void bss_init() -{ - extern char edata,end; - memset(&edata,0,&end-&edata); + // front-end server zeroes the bss automagically } -void mainvars_init() +static void mainvars_init() { sysret_t r = frontend_syscall(SYS_getmainvars, USER_MEM_SIZE-USER_MAINVARS_SIZE, USER_MAINVARS_SIZE, 0, 0); @@ -156,9 +143,8 @@ void mainvars_init() kassert(r.result == 0); } -void jump_usrstart() +static void jump_usrstart() { - init_fp(); trapframe_t tf; init_tf(&tf, USER_START, USER_MEM_SIZE-USER_MAINVARS_SIZE); pop_tf(&tf); @@ -1,6 +1,8 @@ #ifndef _PK_H #define _PK_H +#include <stdint.h> + typedef struct { long gpr[32]; @@ -12,6 +14,12 @@ typedef struct long insn; } trapframe_t; +typedef struct +{ + uint64_t fpr[32]; + uint32_t fsr; +} fp_state_t; + #define USER_MEM_SIZE 0x70000000 #define USER_MAINVARS_SIZE 0x1000 #define USER_START 0x10000 @@ -25,7 +33,7 @@ extern "C" { extern int have_fp; int emulate_fp(trapframe_t*); -void init_fpregs(); +void init_fp_regs(); void printk(const char* s, ...); void init_tf(trapframe_t*, long pc, long sp); diff --git a/pk/riscv-pk.c b/pk/riscv-pk.c index 771982c..ea5d4d4 100644 --- a/pk/riscv-pk.c +++ b/pk/riscv-pk.c @@ -22,7 +22,7 @@ void __attribute__((section(".boottext"))) __start() #endif mtpcr(sr0 | SR_EF, PCR_SR); - have_fp = mfpcr(PCR_SR) & SR_EF; + have_fp = 0;//mfpcr(PCR_SR) & SR_EF; mtpcr(sr0, PCR_SR); extern void boot(); |