aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Waterman <waterman@s144.Millennium.Berkeley.EDU>2010-11-05 16:46:36 -0700
committerAndrew Waterman <waterman@s144.Millennium.Berkeley.EDU>2010-11-21 16:54:33 -0800
commit783c0ec83143ad38cae2bf2aa86099fe4b4aa930 (patch)
treed784bce2a3c78ee6500390e868ba29009a960004
parent16a55362caca7f9bd5376eb1af20dbc3581159e2 (diff)
downloadpk-783c0ec83143ad38cae2bf2aa86099fe4b4aa930.zip
pk-783c0ec83143ad38cae2bf2aa86099fe4b4aa930.tar.gz
pk-783c0ec83143ad38cae2bf2aa86099fe4b4aa930.tar.bz2
[pk] various PK cleanups/speedups
-rw-r--r--pk/fp.c97
-rw-r--r--pk/fp.h13
-rw-r--r--pk/fp_asm.S84
-rw-r--r--pk/memset.c39
-rw-r--r--pk/pk.c1
-rw-r--r--pk/pk.h6
-rw-r--r--pk/pk.ld2
-rw-r--r--pk/pk.mk.in4
-rw-r--r--pk/riscv-pk.c2
-rw-r--r--pk/strlen.c26
10 files changed, 177 insertions, 97 deletions
diff --git a/pk/fp.c b/pk/fp.c
index 7ed43cb..a034c5a 100644
--- a/pk/fp.c
+++ b/pk/fp.c
@@ -2,6 +2,7 @@
#include "softfloat.h"
#include "riscv-opc.h"
#include "pk.h"
+#include "fp.h"
#include <stdint.h>
#define noisy 0
@@ -10,8 +11,6 @@ static void set_fp_reg(unsigned int which, unsigned int dp, uint64_t val);
static uint64_t get_fp_reg(unsigned int which, unsigned int dp);
static fp_state_t fp_state;
-static void get_fp_state();
-static void put_fp_state();
static inline void
validate_address(trapframe_t* tf, long addr, int size, int store)
@@ -24,9 +23,8 @@ validate_address(trapframe_t* tf, long addr, int size, int store)
int emulate_fp(trapframe_t* tf)
{
- fp_state.fsr = mfcr(CR_FSR);
if(have_fp)
- get_fp_state();
+ fp_state.fsr = get_fp_state(fp_state.fpr);
if(noisy)
printk("FPU emulation at pc %lx, insn %x\n",tf->epc,(uint32_t)tf->insn);
@@ -45,9 +43,9 @@ int emulate_fp(trapframe_t* tf)
#define XRS2 (tf->gpr[RRS2])
#define XRDR (tf->gpr[RRD])
- uint64_t frs1d = get_fp_reg(RRS1, 1);
- uint64_t frs2d = get_fp_reg(RRS2, 1);
- uint64_t frs3d = get_fp_reg(RRS3, 1);
+ uint64_t frs1d = fp_state.fpr[RRS1];
+ uint64_t frs2d = fp_state.fpr[RRS2];
+ uint64_t frs3d = fp_state.fpr[RRS3];
uint32_t frs1s = get_fp_reg(RRS1, 0);
uint32_t frs2s = get_fp_reg(RRS2, 0);
uint32_t frs3s = get_fp_reg(RRS3, 0);
@@ -217,9 +215,8 @@ int emulate_fp(trapframe_t* tf)
else
return -1;
- mtcr(fp_state.fsr, CR_FSR);
if(have_fp)
- put_fp_state();
+ put_fp_state(fp_state.fpr,fp_state.fsr);
advance_pc(tf);
@@ -231,6 +228,8 @@ int emulate_fp(trapframe_t* tf)
#define PUT_FP_REG(which, type, val) asm("mtf." STR(type) " $f" STR(which) ",%0" : : "r"(val))
#define GET_FP_REG(which, type, val) asm("mff." STR(type) " %0,$f" STR(which) : "=r"(val))
+#define LOAD_FP_REG(which, type, val) asm("l." STR(type) " $f" STR(which) ",%0" : : "m"(val))
+#define STORE_FP_REG(which, type, val) asm("s." STR(type) " $f" STR(which) ",%0" : "=m"(val) : : "memory")
static void __attribute__((noinline))
set_fp_reg(unsigned int which, unsigned int dp, uint64_t val)
@@ -248,11 +247,8 @@ set_fp_reg(unsigned int which, unsigned int dp, uint64_t val)
// to set an SP value, move the SP value into the FPU
// then move it back out as a DP value. OK to clobber $f0
// because we'll restore it later.
- uint64_t tmp;
- GET_FP_REG(0,d,tmp);
PUT_FP_REG(0,s,val);
GET_FP_REG(0,d,fp_state.fpr[which]);
- PUT_FP_REG(0,d,tmp);
}
}
@@ -267,11 +263,8 @@ get_fp_reg(unsigned int which, unsigned int dp)
// to get an SP value, move the DP value into the FPU
// then move it back out as an SP value. OK to clobber $f0
// because we'll restore it later.
- uint64_t tmp;
- GET_FP_REG(0,d,tmp);
PUT_FP_REG(0,d,fp_state.fpr[which]);
GET_FP_REG(0,s,val);
- PUT_FP_REG(0,d,tmp);
}
if(noisy)
@@ -283,83 +276,11 @@ get_fp_reg(unsigned int which, unsigned int dp)
return val;
}
-static void __attribute__((noinline)) get_fp_state()
-{
- GET_FP_REG(0, d, fp_state.fpr[0]);
- GET_FP_REG(1, d, fp_state.fpr[1]);
- GET_FP_REG(2, d, fp_state.fpr[2]);
- GET_FP_REG(3, d, fp_state.fpr[3]);
- GET_FP_REG(4, d, fp_state.fpr[4]);
- GET_FP_REG(5, d, fp_state.fpr[5]);
- GET_FP_REG(6, d, fp_state.fpr[6]);
- GET_FP_REG(7, d, fp_state.fpr[7]);
- GET_FP_REG(8, d, fp_state.fpr[8]);
- GET_FP_REG(9, d, fp_state.fpr[9]);
- GET_FP_REG(10, d, fp_state.fpr[10]);
- GET_FP_REG(11, d, fp_state.fpr[11]);
- GET_FP_REG(12, d, fp_state.fpr[12]);
- GET_FP_REG(13, d, fp_state.fpr[13]);
- GET_FP_REG(14, d, fp_state.fpr[14]);
- GET_FP_REG(15, d, fp_state.fpr[15]);
- GET_FP_REG(16, d, fp_state.fpr[16]);
- GET_FP_REG(17, d, fp_state.fpr[17]);
- GET_FP_REG(18, d, fp_state.fpr[18]);
- GET_FP_REG(19, d, fp_state.fpr[19]);
- GET_FP_REG(20, d, fp_state.fpr[20]);
- GET_FP_REG(21, d, fp_state.fpr[21]);
- GET_FP_REG(22, d, fp_state.fpr[22]);
- GET_FP_REG(23, d, fp_state.fpr[23]);
- GET_FP_REG(24, d, fp_state.fpr[24]);
- GET_FP_REG(25, d, fp_state.fpr[25]);
- GET_FP_REG(26, d, fp_state.fpr[26]);
- GET_FP_REG(27, d, fp_state.fpr[27]);
- GET_FP_REG(28, d, fp_state.fpr[28]);
- GET_FP_REG(29, d, fp_state.fpr[29]);
- GET_FP_REG(30, d, fp_state.fpr[30]);
- GET_FP_REG(31, d, fp_state.fpr[31]);
-}
-
-static void __attribute__((noinline)) put_fp_state()
-{
- PUT_FP_REG(0, d, fp_state.fpr[0]);
- PUT_FP_REG(1, d, fp_state.fpr[1]);
- PUT_FP_REG(2, d, fp_state.fpr[2]);
- PUT_FP_REG(3, d, fp_state.fpr[3]);
- PUT_FP_REG(4, d, fp_state.fpr[4]);
- PUT_FP_REG(5, d, fp_state.fpr[5]);
- PUT_FP_REG(6, d, fp_state.fpr[6]);
- PUT_FP_REG(7, d, fp_state.fpr[7]);
- PUT_FP_REG(8, d, fp_state.fpr[8]);
- PUT_FP_REG(9, d, fp_state.fpr[9]);
- PUT_FP_REG(10, d, fp_state.fpr[10]);
- PUT_FP_REG(11, d, fp_state.fpr[11]);
- PUT_FP_REG(12, d, fp_state.fpr[12]);
- PUT_FP_REG(13, d, fp_state.fpr[13]);
- PUT_FP_REG(14, d, fp_state.fpr[14]);
- PUT_FP_REG(15, d, fp_state.fpr[15]);
- PUT_FP_REG(16, d, fp_state.fpr[16]);
- PUT_FP_REG(17, d, fp_state.fpr[17]);
- PUT_FP_REG(18, d, fp_state.fpr[18]);
- PUT_FP_REG(19, d, fp_state.fpr[19]);
- PUT_FP_REG(20, d, fp_state.fpr[20]);
- PUT_FP_REG(21, d, fp_state.fpr[21]);
- PUT_FP_REG(22, d, fp_state.fpr[22]);
- PUT_FP_REG(23, d, fp_state.fpr[23]);
- PUT_FP_REG(24, d, fp_state.fpr[24]);
- PUT_FP_REG(25, d, fp_state.fpr[25]);
- PUT_FP_REG(26, d, fp_state.fpr[26]);
- PUT_FP_REG(27, d, fp_state.fpr[27]);
- PUT_FP_REG(28, d, fp_state.fpr[28]);
- PUT_FP_REG(29, d, fp_state.fpr[29]);
- PUT_FP_REG(30, d, fp_state.fpr[30]);
- PUT_FP_REG(31, d, fp_state.fpr[31]);
-}
-
void init_fp_regs()
{
long sr = mfpcr(PCR_SR);
mtpcr(sr | SR_EF, PCR_SR);
- put_fp_state();
+ put_fp_state(fp_state.fpr,fp_state.fsr);
mtpcr(sr, PCR_SR);
}
diff --git a/pk/fp.h b/pk/fp.h
new file mode 100644
index 0000000..6cdae46
--- /dev/null
+++ b/pk/fp.h
@@ -0,0 +1,13 @@
+#ifndef _FP_H
+#define _FP_H
+
+typedef struct
+{
+ uint64_t fpr[32];
+ uint32_t fsr;
+} fp_state_t;
+
+void put_fp_state(const void* fp_regs, long fsr);
+long get_fp_state(void* fp_regs);
+
+#endif
diff --git a/pk/fp_asm.S b/pk/fp_asm.S
new file mode 100644
index 0000000..4788e84
--- /dev/null
+++ b/pk/fp_asm.S
@@ -0,0 +1,84 @@
+#include "pcr.h"
+
+ .text
+ .globl get_fp_state
+ .ent get_fp_state
+get_fp_state:
+
+ mfcr $v0, ASM_CR(CR_FSR)
+
+ s.d $f0 , 0($a0)
+ s.d $f1 , 8($a0)
+ s.d $f2 , 16($a0)
+ s.d $f3 , 24($a0)
+ s.d $f4 , 32($a0)
+ s.d $f5 , 40($a0)
+ s.d $f6 , 48($a0)
+ s.d $f7 , 56($a0)
+ s.d $f8 , 64($a0)
+ s.d $f9 , 72($a0)
+ s.d $f10, 80($a0)
+ s.d $f11, 88($a0)
+ s.d $f12, 96($a0)
+ s.d $f13,104($a0)
+ s.d $f14,112($a0)
+ s.d $f15,120($a0)
+ s.d $f16,128($a0)
+ s.d $f17,136($a0)
+ s.d $f18,144($a0)
+ s.d $f19,152($a0)
+ s.d $f20,160($a0)
+ s.d $f21,168($a0)
+ s.d $f22,176($a0)
+ s.d $f23,184($a0)
+ s.d $f24,192($a0)
+ s.d $f25,200($a0)
+ s.d $f26,208($a0)
+ s.d $f27,216($a0)
+ s.d $f28,224($a0)
+ s.d $f29,232($a0)
+ s.d $f30,240($a0)
+ s.d $f31,248($a0)
+
+ .end get_fp_state
+
+ .globl put_fp_state
+ .ent put_fp_state
+put_fp_state:
+
+ l.d $f0 , 0($a0)
+ l.d $f1 , 8($a0)
+ l.d $f2 , 16($a0)
+ l.d $f3 , 24($a0)
+ l.d $f4 , 32($a0)
+ l.d $f5 , 40($a0)
+ l.d $f6 , 48($a0)
+ l.d $f7 , 56($a0)
+ l.d $f8 , 64($a0)
+ l.d $f9 , 72($a0)
+ l.d $f10, 80($a0)
+ l.d $f11, 88($a0)
+ l.d $f12, 96($a0)
+ l.d $f13,104($a0)
+ l.d $f14,112($a0)
+ l.d $f15,120($a0)
+ l.d $f16,128($a0)
+ l.d $f17,136($a0)
+ l.d $f18,144($a0)
+ l.d $f19,152($a0)
+ l.d $f20,160($a0)
+ l.d $f21,168($a0)
+ l.d $f22,176($a0)
+ l.d $f23,184($a0)
+ l.d $f24,192($a0)
+ l.d $f25,200($a0)
+ l.d $f26,208($a0)
+ l.d $f27,216($a0)
+ l.d $f28,224($a0)
+ l.d $f29,232($a0)
+ l.d $f30,240($a0)
+ l.d $f31,248($a0)
+
+ mtcr $a1, ASM_CR(CR_FSR)
+
+ .end put_fp_state
diff --git a/pk/memset.c b/pk/memset.c
new file mode 100644
index 0000000..f062ad4
--- /dev/null
+++ b/pk/memset.c
@@ -0,0 +1,39 @@
+#include <stdlib.h>
+#include <limits.h>
+#include <string.h>
+
+void* memset(void* m, int ch, size_t s)
+{
+ char* mem = (char*)m;
+ while(((long)m & (sizeof(long)-1)) && s)
+ {
+ *mem++ = ch;
+ s--;
+ }
+
+ long l = ch & 0xFF;
+ l = l | (l << 8);
+ l = l | (l << 16);
+ if(sizeof(long) == 8)
+ l = l | (l << 32);
+ else if(sizeof(long) != 4)
+ abort();
+
+ long* lmem = (long*)mem;
+ for(size_t i = 0; i < (s+sizeof(long)-1)/sizeof(long)*sizeof(long); i += 8)
+ {
+ lmem[i+0] = l;
+ lmem[i+1] = l;
+ lmem[i+2] = l;
+ lmem[i+3] = l;
+ lmem[i+4] = l;
+ lmem[i+5] = l;
+ lmem[i+6] = l;
+ lmem[i+7] = l;
+ }
+
+ for(size_t i = (s+sizeof(long)-1)/sizeof(long)*sizeof(long); i < s; i++)
+ mem[i] = ch;
+
+ return m;
+}
diff --git a/pk/pk.c b/pk/pk.c
index e9e3e71..f4e0424 100644
--- a/pk/pk.c
+++ b/pk/pk.c
@@ -145,6 +145,7 @@ static void mainvars_init()
static void jump_usrstart()
{
+ printk("strlen(\"\") = %d\n",strlen(""));
trapframe_t tf;
init_tf(&tf, USER_START, USER_MEM_SIZE-USER_MAINVARS_SIZE);
pop_tf(&tf);
diff --git a/pk/pk.h b/pk/pk.h
index 0e56fcc..a1fd0da 100644
--- a/pk/pk.h
+++ b/pk/pk.h
@@ -13,12 +13,6 @@ typedef struct
long insn;
} trapframe_t;
-typedef struct
-{
- uint64_t fpr[32];
- uint32_t fsr;
-} fp_state_t;
-
#define USER_MEM_SIZE 0x70000000
#define USER_MAINVARS_SIZE 0x1000
#define USER_START 0x10000
diff --git a/pk/pk.ld b/pk/pk.ld
index 0ccf7ef..577de0f 100644
--- a/pk/pk.ld
+++ b/pk/pk.ld
@@ -2,8 +2,6 @@ OUTPUT_ARCH( "mips:riscv" )
ENTRY( __start )
-GROUP( -lc -lgcc -lgloss )
-
SECTIONS
{
diff --git a/pk/pk.mk.in b/pk/pk.mk.in
index 1de66e8..8d1a06a 100644
--- a/pk/pk.mk.in
+++ b/pk/pk.mk.in
@@ -5,6 +5,7 @@ pk_subproject_deps = \
pk_hdrs = \
pk.h \
pcr.h \
+ fp.h \
atomic.h \
file.h \
frontend.h \
@@ -17,9 +18,12 @@ pk_c_srcs = \
handlers.c \
frontend.c \
fp.c \
+ memset.c \
+ strlen.c \
pk_asm_srcs = \
entry.S \
+ fp_asm.S \
pk_test_srcs =
diff --git a/pk/riscv-pk.c b/pk/riscv-pk.c
index ea5d4d4..771982c 100644
--- a/pk/riscv-pk.c
+++ b/pk/riscv-pk.c
@@ -22,7 +22,7 @@ void __attribute__((section(".boottext"))) __start()
#endif
mtpcr(sr0 | SR_EF, PCR_SR);
- have_fp = 0;//mfpcr(PCR_SR) & SR_EF;
+ have_fp = mfpcr(PCR_SR) & SR_EF;
mtpcr(sr0, PCR_SR);
extern void boot();
diff --git a/pk/strlen.c b/pk/strlen.c
new file mode 100644
index 0000000..f600d47
--- /dev/null
+++ b/pk/strlen.c
@@ -0,0 +1,26 @@
+#include <string.h>
+#include <stdlib.h>
+
+// from http://www-graphics.stanford.edu/~seander/bithacks.html
+static inline long hasZeroByte(long l)
+{
+ if(sizeof(long) == 4)
+ return (l - 0x01010101UL) & ~l & 0x80808080UL;
+ else if(sizeof(long) == 8)
+ return (l - 0x0101010101010101UL) & ~l & 0x8080808080808080UL;
+}
+
+size_t strlen(const char* s)
+{
+ size_t i = 0;
+
+ // use optimized version if string starts on a long boundary
+ if(((long)s & (sizeof(long)-1)) == 0)
+ while(!hasZeroByte(*(long*)(s+i)))
+ i += sizeof(long);
+
+ while(s[i])
+ i++;
+
+ return i;
+}