aboutsummaryrefslogtreecommitdiff
path: root/pk
diff options
context:
space:
mode:
authorAndrew Waterman <waterman@cs.berkeley.edu>2016-03-04 21:02:42 -0800
committerAndrew Waterman <waterman@cs.berkeley.edu>2016-03-04 21:02:42 -0800
commitbbc9a65fed7c85ee058d7188a62f0b904c38b77b (patch)
tree004716e7b7a0ff50cd7ca80b0e6e5b035b96df98 /pk
parent82dcccf73c7be17415a7e84fc872c9627ee275fc (diff)
downloadriscv-pk-bbc9a65fed7c85ee058d7188a62f0b904c38b77b.zip
riscv-pk-bbc9a65fed7c85ee058d7188a62f0b904c38b77b.tar.gz
riscv-pk-bbc9a65fed7c85ee058d7188a62f0b904c38b77b.tar.bz2
Begin refactoring emulation code
Diffstat (limited to 'pk')
-rw-r--r--pk/bits.h3
-rw-r--r--pk/emulation.c596
-rw-r--r--pk/emulation.h28
-rw-r--r--pk/fp_emulation.c449
-rw-r--r--pk/fp_emulation.h81
-rw-r--r--pk/mentry.S10
-rw-r--r--pk/minit.c1
-rw-r--r--pk/mtrap.c90
-rw-r--r--pk/mtrap.h136
-rw-r--r--pk/pk.h3
-rw-r--r--pk/pk.mk.in15
-rw-r--r--pk/unprivileged_memory.h79
12 files changed, 771 insertions, 720 deletions
diff --git a/pk/bits.h b/pk/bits.h
index e7fd8d3..9947351 100644
--- a/pk/bits.h
+++ b/pk/bits.h
@@ -1,6 +1,9 @@
#ifndef PK_BITS_H
#define PK_BITS_H
+#define likely(x) __builtin_expect((x), 1)
+#define unlikely(x) __builtin_expect((x), 0)
+
#define CONST_POPCOUNT2(x) ((((x) >> 0) & 1) + (((x) >> 1) & 1))
#define CONST_POPCOUNT4(x) (CONST_POPCOUNT2(x) + CONST_POPCOUNT2((x)>>2))
#define CONST_POPCOUNT8(x) (CONST_POPCOUNT4(x) + CONST_POPCOUNT4((x)>>4))
diff --git a/pk/emulation.c b/pk/emulation.c
index 87db2de..4943e0d 100644
--- a/pk/emulation.c
+++ b/pk/emulation.c
@@ -1,24 +1,76 @@
+#include "emulation.h"
+#include "fp_emulation.h"
+#include "config.h"
+#include "unprivileged_memory.h"
#include "mtrap.h"
-#include "softfloat.h"
#include <limits.h>
-void redirect_trap(uintptr_t epc, uintptr_t mstatus)
+void illegal_insn_trap(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc)
{
- write_csr(sepc, epc);
- write_csr(scause, read_csr(mcause));
- write_csr(mepc, read_csr(stvec));
+ asm (".pushsection .rodata\n"
+ "illegal_insn_trap_table:\n"
+ " .word truly_illegal_insn\n"
+#ifdef PK_ENABLE_FP_EMULATION
+ " .word emulate_float_load\n"
+#else
+ " .word truly_illegal_insn\n"
+#endif
+ " .word truly_illegal_insn\n"
+ " .word truly_illegal_insn\n"
+ " .word truly_illegal_insn\n"
+ " .word truly_illegal_insn\n"
+ " .word truly_illegal_insn\n"
+ " .word truly_illegal_insn\n"
+ " .word truly_illegal_insn\n"
+#ifdef PK_ENABLE_FP_EMULATION
+ " .word emulate_float_store\n"
+#else
+ " .word truly_illegal_insn\n"
+#endif
+ " .word truly_illegal_insn\n"
+ " .word truly_illegal_insn\n"
+ " .word emulate_mul_div\n"
+ " .word truly_illegal_insn\n"
+ " .word emulate_mul_div32\n"
+ " .word truly_illegal_insn\n"
+#ifdef PK_ENABLE_FP_EMULATION
+ " .word emulate_fmadd\n"
+ " .word emulate_fmadd\n"
+ " .word emulate_fmadd\n"
+ " .word emulate_fmadd\n"
+ " .word emulate_fp\n"
+#else
+ " .word truly_illegal_insn\n"
+ " .word truly_illegal_insn\n"
+ " .word truly_illegal_insn\n"
+ " .word truly_illegal_insn\n"
+ " .word truly_illegal_insn\n"
+#endif
+ " .word truly_illegal_insn\n"
+ " .word truly_illegal_insn\n"
+ " .word truly_illegal_insn\n"
+ " .word truly_illegal_insn\n"
+ " .word truly_illegal_insn\n"
+ " .word truly_illegal_insn\n"
+ " .word truly_illegal_insn\n"
+ " .word emulate_system\n"
+ " .word truly_illegal_insn\n"
+ " .word truly_illegal_insn\n"
+ " .word truly_illegal_insn\n"
+ " .popsection");
- uintptr_t prev_priv = EXTRACT_FIELD(mstatus, MSTATUS_MPP);
- uintptr_t prev_ie = EXTRACT_FIELD(mstatus, MSTATUS_MPIE);
- kassert(prev_priv <= PRV_S);
- mstatus = INSERT_FIELD(mstatus, MSTATUS_SPP, prev_priv);
- mstatus = INSERT_FIELD(mstatus, MSTATUS_SPIE, prev_ie);
- mstatus = INSERT_FIELD(mstatus, MSTATUS_MPP, PRV_S);
- mstatus = INSERT_FIELD(mstatus, MSTATUS_MPIE, 0);
- write_csr(mstatus, mstatus);
+ uintptr_t mstatus;
+ insn_t insn = get_insn(mepc, &mstatus);
- extern void __redirect_trap();
- return __redirect_trap();
+ if (unlikely((insn & 3) != 3))
+ return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
+
+ write_csr(mepc, mepc + 4);
+
+ extern int32_t illegal_insn_trap_table[];
+ int32_t* pf = (void*)illegal_insn_trap_table + (insn & 0x7c);
+ emulation_func f = (emulation_func)(uintptr_t)*pf;
+ f(regs, mcause, mepc, mstatus, insn);
}
void __attribute__((noinline)) truly_illegal_insn(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc, uintptr_t mstatus, insn_t insn)
@@ -28,8 +80,14 @@ void __attribute__((noinline)) truly_illegal_insn(uintptr_t* regs, uintptr_t mca
void misaligned_load_trap(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc)
{
+ union {
+ uint8_t bytes[8];
+ uintptr_t intx;
+ uint64_t int64;
+ } val;
uintptr_t mstatus;
insn_t insn = get_insn(mepc, &mstatus);
+ uintptr_t addr = GET_RS1(insn, regs) + IMM_I(insn);
int shift = 0, fp = 0, len;
if ((insn & MASK_LW) == MATCH_LW)
@@ -51,139 +109,54 @@ void misaligned_load_trap(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc)
else
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
- uintptr_t addr = GET_RS1(insn, regs) + IMM_I(insn);
- uintptr_t val = 0, tmp, tmp2;
- unpriv_mem_access("add %[tmp2], %[addr], %[len];"
- "1: slli %[val], %[val], 8;"
- "lbu %[tmp], -1(%[tmp2]);"
- "addi %[tmp2], %[tmp2], -1;"
- "or %[val], %[val], %[tmp];"
- "bne %[addr], %[tmp2], 1b;",
- val, tmp, tmp2, addr, len);
-
- if (shift)
- val = (intptr_t)val << shift >> shift;
+ val.int64 = 0;
+ for (intptr_t i = len-1; i >= 0; i--)
+ val.bytes[i] = load_uint8_t((void *)(addr + i), mepc);
if (!fp)
- SET_RD(insn, regs, val);
+ SET_RD(insn, regs, (intptr_t)val.intx << shift >> shift);
else if (len == 8)
- SET_F64_RD(insn, regs, val);
+ SET_F64_RD(insn, regs, val.int64);
else
- SET_F32_RD(insn, regs, val);
+ SET_F32_RD(insn, regs, val.intx);
write_csr(mepc, mepc + 4);
}
void misaligned_store_trap(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc)
{
+ union {
+ uint8_t bytes[8];
+ uintptr_t intx;
+ uint64_t int64;
+ } val;
uintptr_t mstatus;
insn_t insn = get_insn(mepc, &mstatus);
-
- uintptr_t val = GET_RS2(insn, regs), error;
int len;
+
+ val.intx = GET_RS2(insn, regs);
if ((insn & MASK_SW) == MATCH_SW)
len = 4;
+#ifdef __riscv64
else if ((insn & MASK_SD) == MATCH_SD)
len = 8;
+#endif
else if ((insn & MASK_FSD) == MATCH_FSD)
- len = 8, val = GET_F64_RS2(insn, regs);
+ len = 8, val.int64 = GET_F64_RS2(insn, regs);
else if ((insn & MASK_FSW) == MATCH_FSW)
- len = 4, val = GET_F32_RS2(insn, regs);
+ len = 4, val.intx = GET_F32_RS2(insn, regs);
else if ((insn & MASK_SH) == MATCH_SH)
len = 2;
else
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
uintptr_t addr = GET_RS1(insn, regs) + IMM_S(insn);
- uintptr_t tmp, tmp2, addr_end = addr + len;
- unpriv_mem_access("mv %[tmp], %[val];"
- "mv %[tmp2], %[addr];"
- "1: sb %[tmp], 0(%[tmp2]);"
- "srli %[tmp], %[tmp], 8;"
- "addi %[tmp2], %[tmp2], 1;"
- "bne %[tmp2], %[addr_end], 1b",
- tmp, tmp2, unused1, val, addr, addr_end);
+ for (size_t i = 0; i < len; i++)
+ store_uint8_t((void *)(addr + i), val.bytes[i], mepc);
write_csr(mepc, mepc + 4);
}
-DECLARE_EMULATION_FUNC(emulate_float_load)
-{
- uintptr_t val_lo, val_hi;
- uint64_t val;
- uintptr_t addr = GET_RS1(insn, regs) + IMM_I(insn);
-
- switch (insn & MASK_FUNCT3)
- {
- case MATCH_FLW & MASK_FUNCT3:
- if (addr % 4 != 0)
- return misaligned_load_trap(regs, mcause, mepc);
-
- unpriv_mem_access("lw %[val_lo], (%[addr])",
- val_lo, unused1, unused2, addr, mepc/*X*/);
- SET_F32_RD(insn, regs, val_lo);
- break;
-
- case MATCH_FLD & MASK_FUNCT3:
- if (addr % sizeof(uintptr_t) != 0)
- return misaligned_load_trap(regs, mcause, mepc);
-
-#ifdef __riscv64
- unpriv_mem_access("ld %[val], (%[addr])",
- val, val_hi/*X*/, unused1, addr, mepc/*X*/);
-#else
- unpriv_mem_access("lw %[val_lo], (%[addr]);"
- "lw %[val_hi], 4(%[addr])",
- val_lo, val_hi, unused1, addr, mepc/*X*/);
- val = val_lo | ((uint64_t)val_hi << 32);
-#endif
- SET_F64_RD(insn, regs, val);
- break;
-
- default:
- return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
- }
-}
-
-DECLARE_EMULATION_FUNC(emulate_float_store)
-{
- uintptr_t val_lo, val_hi;
- uint64_t val;
- uintptr_t addr = GET_RS1(insn, regs) + IMM_S(insn);
-
- switch (insn & MASK_FUNCT3)
- {
- case MATCH_FSW & MASK_FUNCT3:
- if (addr % 4 != 0)
- return misaligned_store_trap(regs, mcause, mepc);
-
- val_lo = GET_F32_RS2(insn, regs);
- unpriv_mem_access("sw %[val_lo], (%[addr])",
- unused1, unused2, unused3, val_lo, addr);
- break;
-
- case MATCH_FSD & MASK_FUNCT3:
- if (addr % sizeof(uintptr_t) != 0)
- return misaligned_store_trap(regs, mcause, mepc);
-
- val = GET_F64_RS2(insn, regs);
-#ifdef __riscv64
- unpriv_mem_access("sd %[val], (%[addr])",
- unused1, unused2, unused3, val, addr);
-#else
- val_lo = val;
- val_hi = val >> 32;
- unpriv_mem_access("sw %[val_lo], (%[addr]);"
- "sw %[val_hi], 4(%[addr])",
- unused1, unused2, unused3, val_lo, val_hi, addr);
-#endif
- break;
-
- default:
- return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
- }
-}
-
#ifdef __riscv64
typedef __int128 double_int;
#else
@@ -279,7 +252,7 @@ static inline int emulate_read_csr(int num, uintptr_t mstatus, uintptr_t* result
+ HLS()->sinstret_delta) >> 32;
return 0;
#endif
-#ifndef __riscv_hard_float
+#ifdef PK_ENABLE_FP_EMULATION
case CSR_FRM:
if ((mstatus & MSTATUS_FS) == 0) break;
*result = GET_FRM();
@@ -301,7 +274,7 @@ static inline int emulate_write_csr(int num, uintptr_t value, uintptr_t mstatus)
{
switch (num)
{
-#ifndef __riscv_hard_float
+#ifndef PK_ENABLE_FP_EMULATION
case CSR_FRM: SET_FRM(value); return 0;
case CSR_FFLAGS: SET_FFLAGS(value); return 0;
case CSR_FCSR: SET_FCSR(value); return 0;
@@ -338,384 +311,3 @@ DECLARE_EMULATION_FUNC(emulate_system)
SET_RD(insn, regs, csr_val);
}
-
-DECLARE_EMULATION_FUNC(emulate_fp)
-{
- asm (".pushsection .rodata\n"
- "fp_emulation_table:\n"
- " .word emulate_fadd\n"
- " .word emulate_fsub\n"
- " .word emulate_fmul\n"
- " .word emulate_fdiv\n"
- " .word emulate_fsgnj\n"
- " .word emulate_fmin\n"
- " .word truly_illegal_insn\n"
- " .word truly_illegal_insn\n"
- " .word emulate_fcvt_ff\n"
- " .word truly_illegal_insn\n"
- " .word truly_illegal_insn\n"
- " .word emulate_fsqrt\n"
- " .word truly_illegal_insn\n"
- " .word truly_illegal_insn\n"
- " .word truly_illegal_insn\n"
- " .word truly_illegal_insn\n"
- " .word truly_illegal_insn\n"
- " .word truly_illegal_insn\n"
- " .word truly_illegal_insn\n"
- " .word truly_illegal_insn\n"
- " .word emulate_fcmp\n"
- " .word truly_illegal_insn\n"
- " .word truly_illegal_insn\n"
- " .word truly_illegal_insn\n"
- " .word emulate_fcvt_if\n"
- " .word truly_illegal_insn\n"
- " .word emulate_fcvt_fi\n"
- " .word truly_illegal_insn\n"
- " .word emulate_fmv_if\n"
- " .word truly_illegal_insn\n"
- " .word emulate_fmv_fi\n"
- " .word truly_illegal_insn\n"
- " .popsection");
-
- // if FPU is disabled, punt back to the OS
- if (unlikely((mstatus & MSTATUS_FS) == 0))
- return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
-
- extern int32_t fp_emulation_table[];
- int32_t* pf = (void*)fp_emulation_table + ((insn >> 25) & 0x7c);
- emulation_func f = (emulation_func)(uintptr_t)*pf;
-
- SETUP_STATIC_ROUNDING(insn);
- return f(regs, mcause, mepc, mstatus, insn);
-}
-
-void emulate_any_fadd(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc, uintptr_t mstatus, insn_t insn, int32_t neg_b)
-{
- if (GET_PRECISION(insn) == PRECISION_S) {
- uint32_t rs1 = GET_F32_RS1(insn, regs);
- uint32_t rs2 = GET_F32_RS2(insn, regs) ^ neg_b;
- SET_F32_RD(insn, regs, f32_add(rs1, rs2));
- } else if (GET_PRECISION(insn) == PRECISION_D) {
- uint64_t rs1 = GET_F64_RS1(insn, regs);
- uint64_t rs2 = GET_F64_RS2(insn, regs) ^ ((uint64_t)neg_b << 32);
- SET_F64_RD(insn, regs, f64_add(rs1, rs2));
- } else {
- return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
- }
-}
-
-DECLARE_EMULATION_FUNC(emulate_fadd)
-{
- return emulate_any_fadd(regs, mcause, mepc, mstatus, insn, 0);
-}
-
-DECLARE_EMULATION_FUNC(emulate_fsub)
-{
- return emulate_any_fadd(regs, mcause, mepc, mstatus, insn, INT32_MIN);
-}
-
-DECLARE_EMULATION_FUNC(emulate_fmul)
-{
- if (GET_PRECISION(insn) == PRECISION_S) {
- uint32_t rs1 = GET_F32_RS1(insn, regs);
- uint32_t rs2 = GET_F32_RS2(insn, regs);
- SET_F32_RD(insn, regs, f32_mul(rs1, rs2));
- } else if (GET_PRECISION(insn) == PRECISION_D) {
- uint64_t rs1 = GET_F64_RS1(insn, regs);
- uint64_t rs2 = GET_F64_RS2(insn, regs);
- SET_F64_RD(insn, regs, f64_mul(rs1, rs2));
- } else {
- return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
- }
-}
-
-DECLARE_EMULATION_FUNC(emulate_fdiv)
-{
- if (GET_PRECISION(insn) == PRECISION_S) {
- uint32_t rs1 = GET_F32_RS1(insn, regs);
- uint32_t rs2 = GET_F32_RS2(insn, regs);
- SET_F32_RD(insn, regs, f32_div(rs1, rs2));
- } else if (GET_PRECISION(insn) == PRECISION_D) {
- uint64_t rs1 = GET_F64_RS1(insn, regs);
- uint64_t rs2 = GET_F64_RS2(insn, regs);
- SET_F64_RD(insn, regs, f64_div(rs1, rs2));
- } else {
- return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
- }
-}
-
-DECLARE_EMULATION_FUNC(emulate_fsqrt)
-{
- if ((insn >> 20) & 0x1f)
- return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
-
- if (GET_PRECISION(insn) == PRECISION_S) {
- SET_F32_RD(insn, regs, f32_sqrt(GET_F32_RS1(insn, regs)));
- } else if (GET_PRECISION(insn) == PRECISION_D) {
- SET_F64_RD(insn, regs, f64_sqrt(GET_F64_RS1(insn, regs)));
- } else {
- return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
- }
-}
-
-DECLARE_EMULATION_FUNC(emulate_fsgnj)
-{
- int rm = GET_RM(insn);
- if (rm >= 3)
- return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
-
- #define DO_FSGNJ(rs1, rs2, rm) ({ \
- typeof(rs1) rs1_sign = (rs1) >> (8*sizeof(rs1)-1); \
- typeof(rs1) rs2_sign = (rs2) >> (8*sizeof(rs1)-1); \
- rs1_sign &= (rm) >> 1; \
- rs1_sign ^= (rm) ^ rs2_sign; \
- ((rs1) << 1 >> 1) | (rs1_sign << (8*sizeof(rs1)-1)); })
-
- if (GET_PRECISION(insn) == PRECISION_S) {
- uint32_t rs1 = GET_F32_RS1(insn, regs);
- uint32_t rs2 = GET_F32_RS2(insn, regs);
- SET_F32_RD(insn, regs, DO_FSGNJ(rs1, rs2, rm));
- } else if (GET_PRECISION(insn) == PRECISION_D) {
- uint64_t rs1 = GET_F64_RS1(insn, regs);
- uint64_t rs2 = GET_F64_RS2(insn, regs);
- SET_F64_RD(insn, regs, DO_FSGNJ(rs1, rs2, rm));
- } else {
- return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
- }
-}
-
-DECLARE_EMULATION_FUNC(emulate_fmin)
-{
- int rm = GET_RM(insn);
- if (rm >= 2)
- return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
-
- if (GET_PRECISION(insn) == PRECISION_S) {
- uint32_t rs1 = GET_F32_RS1(insn, regs);
- uint32_t rs2 = GET_F32_RS2(insn, regs);
- uint32_t arg1 = rm ? rs2 : rs1;
- uint32_t arg2 = rm ? rs1 : rs2;
- int use_rs1 = f32_lt_quiet(arg1, arg2) || isNaNF32UI(rs2);
- SET_F32_RD(insn, regs, use_rs1 ? rs1 : rs2);
- } else if (GET_PRECISION(insn) == PRECISION_D) {
- uint64_t rs1 = GET_F64_RS1(insn, regs);
- uint64_t rs2 = GET_F64_RS2(insn, regs);
- uint64_t arg1 = rm ? rs2 : rs1;
- uint64_t arg2 = rm ? rs1 : rs2;
- int use_rs1 = f64_lt_quiet(arg1, arg2) || isNaNF64UI(rs2);
- SET_F64_RD(insn, regs, use_rs1 ? rs1 : rs2);
- } else {
- return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
- }
-}
-
-DECLARE_EMULATION_FUNC(emulate_fcvt_ff)
-{
- int rs2_num = (insn >> 20) & 0x1f;
- if (GET_PRECISION(insn) == PRECISION_S) {
- if (rs2_num != 1)
- return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
- SET_F32_RD(insn, regs, f64_to_f32(GET_F64_RS1(insn, regs)));
- } else if (GET_PRECISION(insn) == PRECISION_D) {
- if (rs2_num != 0)
- return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
- SET_F64_RD(insn, regs, f32_to_f64(GET_F32_RS1(insn, regs)));
- } else {
- return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
- }
-}
-
-DECLARE_EMULATION_FUNC(emulate_fcvt_fi)
-{
- if (GET_PRECISION(insn) != PRECISION_S && GET_PRECISION(insn) != PRECISION_D)
- return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
-
- int negative = 0;
- uint64_t uint_val = GET_RS1(insn, regs);
-
- switch ((insn >> 20) & 0x1f)
- {
- case 0: // int32
- negative = (int32_t)uint_val < 0;
- uint_val = negative ? -(int32_t)uint_val : (int32_t)uint_val;
- break;
- case 1: // uint32
- uint_val = (uint32_t)uint_val;
- break;
-#ifdef __riscv64
- case 2: // int64
- negative = (int64_t)uint_val < 0;
- uint_val = negative ? -uint_val : uint_val;
- case 3: // uint64
- break;
-#endif
- default:
- return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
- }
-
- uint64_t float64 = ui64_to_f64(uint_val);
- if (negative)
- float64 ^= INT64_MIN;
-
- if (GET_PRECISION(insn) == PRECISION_S)
- SET_F32_RD(insn, regs, f64_to_f32(float64));
- else
- SET_F64_RD(insn, regs, float64);
-}
-
-DECLARE_EMULATION_FUNC(emulate_fcvt_if)
-{
- int rs2_num = (insn >> 20) & 0x1f;
-#ifdef __riscv64
- if (rs2_num >= 4)
- return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
-#else
- if (rs2_num >= 2)
- return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
-#endif
-
- int64_t float64;
- if (GET_PRECISION(insn) == PRECISION_S)
- float64 = f32_to_f64(GET_F32_RS1(insn, regs));
- else if (GET_PRECISION(insn) == PRECISION_D)
- float64 = GET_F64_RS1(insn, regs);
- else
- return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
-
- int negative = 0;
- if (float64 < 0) {
- negative = 1;
- float64 ^= INT64_MIN;
- }
- uint64_t uint_val = f64_to_ui64(float64, softfloat_roundingMode, true);
- uint64_t result, limit, limit_result;
-
- switch (rs2_num)
- {
- case 0: // int32
- if (negative) {
- result = (int32_t)-uint_val;
- limit_result = limit = (uint32_t)INT32_MIN;
- } else {
- result = (int32_t)uint_val;
- limit_result = limit = INT32_MAX;
- }
- break;
-
- case 1: // uint32
- limit = limit_result = UINT32_MAX;
- if (negative)
- result = limit = 0;
- else
- result = (uint32_t)uint_val;
- break;
-
- case 2: // int32
- if (negative) {
- result = (int64_t)-uint_val;
- limit_result = limit = (uint64_t)INT64_MIN;
- } else {
- result = (int64_t)uint_val;
- limit_result = limit = INT64_MAX;
- }
- break;
-
- case 3: // uint64
- limit = limit_result = UINT64_MAX;
- if (negative)
- result = limit = 0;
- else
- result = (uint64_t)uint_val;
- break;
-
- default:
- __builtin_unreachable();
- }
-
- if (uint_val > limit) {
- result = limit_result;
- softfloat_raiseFlags(softfloat_flag_invalid);
- }
-
- SET_FS_DIRTY();
- SET_RD(insn, regs, result);
-}
-
-DECLARE_EMULATION_FUNC(emulate_fcmp)
-{
- int rm = GET_RM(insn);
- if (rm >= 3)
- return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
-
- uintptr_t result;
- if (GET_PRECISION(insn) == PRECISION_S) {
- uint32_t rs1 = GET_F32_RS1(insn, regs);
- uint32_t rs2 = GET_F32_RS2(insn, regs);
- if (rm != 1)
- result = f32_eq(rs1, rs2);
- if (rm == 1 || (rm == 0 && !result))
- result = f32_lt(rs1, rs2);
- goto success;
- } else if (GET_PRECISION(insn) == PRECISION_D) {
- uint64_t rs1 = GET_F64_RS1(insn, regs);
- uint64_t rs2 = GET_F64_RS2(insn, regs);
- if (rm != 1)
- result = f64_eq(rs1, rs2);
- if (rm == 1 || (rm == 0 && !result))
- result = f64_lt(rs1, rs2);
- goto success;
- }
- return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
-success:
- SET_RD(insn, regs, result);
-}
-
-DECLARE_EMULATION_FUNC(emulate_fmv_if)
-{
- uintptr_t result;
- if ((insn & MASK_FMV_X_S) == MATCH_FMV_X_S)
- result = GET_F32_RS1(insn, regs);
-#ifdef __riscv64
- else if ((insn & MASK_FMV_X_D) == MATCH_FMV_X_D)
- result = GET_F64_RS1(insn, regs);
-#endif
- else
- return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
-
- SET_RD(insn, regs, result);
-}
-
-DECLARE_EMULATION_FUNC(emulate_fmv_fi)
-{
- uintptr_t rs1 = GET_RS1(insn, regs);
-
- if ((insn & MASK_FMV_S_X) == MATCH_FMV_S_X)
- SET_F32_RD(insn, regs, rs1);
- else if ((insn & MASK_FMV_D_X) == MATCH_FMV_D_X)
- SET_F64_RD(insn, regs, rs1);
- else
- return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
-}
-
-DECLARE_EMULATION_FUNC(emulate_fmadd)
-{
- // if FPU is disabled, punt back to the OS
- if (unlikely((mstatus & MSTATUS_FS) == 0))
- return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
-
- int op = (insn >> 2) & 3;
- SETUP_STATIC_ROUNDING(insn);
- if (GET_PRECISION(insn) == PRECISION_S) {
- uint32_t rs1 = GET_F32_RS1(insn, regs);
- uint32_t rs2 = GET_F32_RS2(insn, regs);
- uint32_t rs3 = GET_F32_RS3(insn, regs);
- SET_F32_RD(insn, regs, softfloat_mulAddF32(op, rs1, rs2, rs3));
- } else if (GET_PRECISION(insn) == PRECISION_D) {
- uint64_t rs1 = GET_F64_RS1(insn, regs);
- uint64_t rs2 = GET_F64_RS2(insn, regs);
- uint64_t rs3 = GET_F64_RS3(insn, regs);
- SET_F64_RD(insn, regs, softfloat_mulAddF64(op, rs1, rs2, rs3));
- } else {
- return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
- }
-}
diff --git a/pk/emulation.h b/pk/emulation.h
new file mode 100644
index 0000000..f1a71ec
--- /dev/null
+++ b/pk/emulation.h
@@ -0,0 +1,28 @@
+#ifndef _RISCV_EMULATION_H
+#define _RISCV_EMULATION_H
+
+#include "encoding.h"
+#include "bits.h"
+#include <stdint.h>
+
+typedef uint32_t insn_t;
+typedef void (*emulation_func)(uintptr_t*, uintptr_t, uintptr_t, uintptr_t, insn_t);
+#define DECLARE_EMULATION_FUNC(name) void name(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc, uintptr_t mstatus, insn_t insn)
+
+void misaligned_load_trap(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc);
+void misaligned_store_trap(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc);
+void redirect_trap(uintptr_t epc, uintptr_t mstatus);
+DECLARE_EMULATION_FUNC(truly_illegal_insn);
+
+#define GET_REG(insn, pos, regs) ({ \
+ int mask = (1 << (5+LOG_REGBYTES)) - (1 << LOG_REGBYTES); \
+ (uintptr_t*)((uintptr_t)regs + (((insn) >> ((pos) - LOG_REGBYTES)) & mask)); \
+})
+#define GET_RS1(insn, regs) (*GET_REG(insn, 15, regs))
+#define GET_RS2(insn, regs) (*GET_REG(insn, 20, regs))
+#define SET_RD(insn, regs, val) (*GET_REG(insn, 7, regs) = (val))
+#define IMM_I(insn) ((int32_t)(insn) >> 20)
+#define IMM_S(insn) (((int32_t)(insn) >> 25 << 5) | (int32_t)(((insn) >> 7) & 0x1f))
+#define MASK_FUNCT3 0x7000
+
+#endif
diff --git a/pk/fp_emulation.c b/pk/fp_emulation.c
new file mode 100644
index 0000000..536967f
--- /dev/null
+++ b/pk/fp_emulation.c
@@ -0,0 +1,449 @@
+#include "fp_emulation.h"
+#include "unprivileged_memory.h"
+#include "softfloat.h"
+#include "config.h"
+
+DECLARE_EMULATION_FUNC(emulate_float_load)
+{
+ uint64_t val;
+ uintptr_t addr = GET_RS1(insn, regs) + IMM_I(insn);
+
+ switch (insn & MASK_FUNCT3)
+ {
+ case MATCH_FLW & MASK_FUNCT3:
+ if (addr % 4 != 0)
+ return misaligned_load_trap(regs, mcause, mepc);
+
+ SET_F32_RD(insn, regs, load_int32_t((void *)addr, mepc));
+ break;
+
+ case MATCH_FLD & MASK_FUNCT3:
+ if (addr % sizeof(uintptr_t) != 0)
+ return misaligned_load_trap(regs, mcause, mepc);
+
+#ifdef __riscv64
+ val = load_uint64_t((void *)addr, mepc);
+#else
+ val = load_uint32_t(addr, mepc);
+ val += (uint64_t)load_uint32_t((void *)(addr + 4), mepc) << 32;
+#endif
+ SET_F64_RD(insn, regs, val);
+ break;
+
+ default:
+ return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
+ }
+}
+
+DECLARE_EMULATION_FUNC(emulate_float_store)
+{
+ uint64_t val;
+ uintptr_t addr = GET_RS1(insn, regs) + IMM_S(insn);
+
+ switch (insn & MASK_FUNCT3)
+ {
+ case MATCH_FSW & MASK_FUNCT3:
+ if (addr % 4 != 0)
+ return misaligned_store_trap(regs, mcause, mepc);
+
+ store_uint32_t((void *)addr, GET_F32_RS2(insn, regs), mepc);
+ break;
+
+ case MATCH_FSD & MASK_FUNCT3:
+ if (addr % sizeof(uintptr_t) != 0)
+ return misaligned_store_trap(regs, mcause, mepc);
+
+ val = GET_F64_RS2(insn, regs);
+#ifdef __riscv64
+ store_uint64_t((void *)addr, val, mepc);
+#else
+ store_uint32_t((void *)addr, val, mepc);
+ store_uint32_t((void *)(addr + 4), val >> 32, mepc);
+#endif
+ break;
+
+ default:
+ return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
+ }
+}
+
+DECLARE_EMULATION_FUNC(emulate_fp)
+{
+ asm (".pushsection .rodata\n"
+ "fp_emulation_table:\n"
+ " .word emulate_fadd\n"
+ " .word emulate_fsub\n"
+ " .word emulate_fmul\n"
+ " .word emulate_fdiv\n"
+ " .word emulate_fsgnj\n"
+ " .word emulate_fmin\n"
+ " .word truly_illegal_insn\n"
+ " .word truly_illegal_insn\n"
+ " .word emulate_fcvt_ff\n"
+ " .word truly_illegal_insn\n"
+ " .word truly_illegal_insn\n"
+ " .word emulate_fsqrt\n"
+ " .word truly_illegal_insn\n"
+ " .word truly_illegal_insn\n"
+ " .word truly_illegal_insn\n"
+ " .word truly_illegal_insn\n"
+ " .word truly_illegal_insn\n"
+ " .word truly_illegal_insn\n"
+ " .word truly_illegal_insn\n"
+ " .word truly_illegal_insn\n"
+ " .word emulate_fcmp\n"
+ " .word truly_illegal_insn\n"
+ " .word truly_illegal_insn\n"
+ " .word truly_illegal_insn\n"
+ " .word emulate_fcvt_if\n"
+ " .word truly_illegal_insn\n"
+ " .word emulate_fcvt_fi\n"
+ " .word truly_illegal_insn\n"
+ " .word emulate_fmv_if\n"
+ " .word truly_illegal_insn\n"
+ " .word emulate_fmv_fi\n"
+ " .word truly_illegal_insn\n"
+ " .popsection");
+
+ // if FPU is disabled, punt back to the OS
+ if (unlikely((mstatus & MSTATUS_FS) == 0))
+ return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
+
+ extern int32_t fp_emulation_table[];
+ int32_t* pf = (void*)fp_emulation_table + ((insn >> 25) & 0x7c);
+ emulation_func f = (emulation_func)(uintptr_t)*pf;
+
+ SETUP_STATIC_ROUNDING(insn);
+ return f(regs, mcause, mepc, mstatus, insn);
+}
+
+void emulate_any_fadd(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc, uintptr_t mstatus, insn_t insn, int32_t neg_b)
+{
+ if (GET_PRECISION(insn) == PRECISION_S) {
+ uint32_t rs1 = GET_F32_RS1(insn, regs);
+ uint32_t rs2 = GET_F32_RS2(insn, regs) ^ neg_b;
+ SET_F32_RD(insn, regs, f32_add(rs1, rs2));
+ } else if (GET_PRECISION(insn) == PRECISION_D) {
+ uint64_t rs1 = GET_F64_RS1(insn, regs);
+ uint64_t rs2 = GET_F64_RS2(insn, regs) ^ ((uint64_t)neg_b << 32);
+ SET_F64_RD(insn, regs, f64_add(rs1, rs2));
+ } else {
+ return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
+ }
+}
+
+DECLARE_EMULATION_FUNC(emulate_fadd)
+{
+ return emulate_any_fadd(regs, mcause, mepc, mstatus, insn, 0);
+}
+
+DECLARE_EMULATION_FUNC(emulate_fsub)
+{
+ return emulate_any_fadd(regs, mcause, mepc, mstatus, insn, INT32_MIN);
+}
+
+DECLARE_EMULATION_FUNC(emulate_fmul)
+{
+ if (GET_PRECISION(insn) == PRECISION_S) {
+ uint32_t rs1 = GET_F32_RS1(insn, regs);
+ uint32_t rs2 = GET_F32_RS2(insn, regs);
+ SET_F32_RD(insn, regs, f32_mul(rs1, rs2));
+ } else if (GET_PRECISION(insn) == PRECISION_D) {
+ uint64_t rs1 = GET_F64_RS1(insn, regs);
+ uint64_t rs2 = GET_F64_RS2(insn, regs);
+ SET_F64_RD(insn, regs, f64_mul(rs1, rs2));
+ } else {
+ return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
+ }
+}
+
+DECLARE_EMULATION_FUNC(emulate_fdiv)
+{
+ if (GET_PRECISION(insn) == PRECISION_S) {
+ uint32_t rs1 = GET_F32_RS1(insn, regs);
+ uint32_t rs2 = GET_F32_RS2(insn, regs);
+ SET_F32_RD(insn, regs, f32_div(rs1, rs2));
+ } else if (GET_PRECISION(insn) == PRECISION_D) {
+ uint64_t rs1 = GET_F64_RS1(insn, regs);
+ uint64_t rs2 = GET_F64_RS2(insn, regs);
+ SET_F64_RD(insn, regs, f64_div(rs1, rs2));
+ } else {
+ return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
+ }
+}
+
+DECLARE_EMULATION_FUNC(emulate_fsqrt)
+{
+ if ((insn >> 20) & 0x1f)
+ return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
+
+ if (GET_PRECISION(insn) == PRECISION_S) {
+ SET_F32_RD(insn, regs, f32_sqrt(GET_F32_RS1(insn, regs)));
+ } else if (GET_PRECISION(insn) == PRECISION_D) {
+ SET_F64_RD(insn, regs, f64_sqrt(GET_F64_RS1(insn, regs)));
+ } else {
+ return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
+ }
+}
+
+DECLARE_EMULATION_FUNC(emulate_fsgnj)
+{
+ int rm = GET_RM(insn);
+ if (rm >= 3)
+ return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
+
+ #define DO_FSGNJ(rs1, rs2, rm) ({ \
+ typeof(rs1) rs1_sign = (rs1) >> (8*sizeof(rs1)-1); \
+ typeof(rs1) rs2_sign = (rs2) >> (8*sizeof(rs1)-1); \
+ rs1_sign &= (rm) >> 1; \
+ rs1_sign ^= (rm) ^ rs2_sign; \
+ ((rs1) << 1 >> 1) | (rs1_sign << (8*sizeof(rs1)-1)); })
+
+ if (GET_PRECISION(insn) == PRECISION_S) {
+ uint32_t rs1 = GET_F32_RS1(insn, regs);
+ uint32_t rs2 = GET_F32_RS2(insn, regs);
+ SET_F32_RD(insn, regs, DO_FSGNJ(rs1, rs2, rm));
+ } else if (GET_PRECISION(insn) == PRECISION_D) {
+ uint64_t rs1 = GET_F64_RS1(insn, regs);
+ uint64_t rs2 = GET_F64_RS2(insn, regs);
+ SET_F64_RD(insn, regs, DO_FSGNJ(rs1, rs2, rm));
+ } else {
+ return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
+ }
+}
+
+DECLARE_EMULATION_FUNC(emulate_fmin)
+{
+ int rm = GET_RM(insn);
+ if (rm >= 2)
+ return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
+
+ if (GET_PRECISION(insn) == PRECISION_S) {
+ uint32_t rs1 = GET_F32_RS1(insn, regs);
+ uint32_t rs2 = GET_F32_RS2(insn, regs);
+ uint32_t arg1 = rm ? rs2 : rs1;
+ uint32_t arg2 = rm ? rs1 : rs2;
+ int use_rs1 = f32_lt_quiet(arg1, arg2) || isNaNF32UI(rs2);
+ SET_F32_RD(insn, regs, use_rs1 ? rs1 : rs2);
+ } else if (GET_PRECISION(insn) == PRECISION_D) {
+ uint64_t rs1 = GET_F64_RS1(insn, regs);
+ uint64_t rs2 = GET_F64_RS2(insn, regs);
+ uint64_t arg1 = rm ? rs2 : rs1;
+ uint64_t arg2 = rm ? rs1 : rs2;
+ int use_rs1 = f64_lt_quiet(arg1, arg2) || isNaNF64UI(rs2);
+ SET_F64_RD(insn, regs, use_rs1 ? rs1 : rs2);
+ } else {
+ return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
+ }
+}
+
+DECLARE_EMULATION_FUNC(emulate_fcvt_ff)
+{
+ int rs2_num = (insn >> 20) & 0x1f;
+ if (GET_PRECISION(insn) == PRECISION_S) {
+ if (rs2_num != 1)
+ return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
+ SET_F32_RD(insn, regs, f64_to_f32(GET_F64_RS1(insn, regs)));
+ } else if (GET_PRECISION(insn) == PRECISION_D) {
+ if (rs2_num != 0)
+ return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
+ SET_F64_RD(insn, regs, f32_to_f64(GET_F32_RS1(insn, regs)));
+ } else {
+ return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
+ }
+}
+
+DECLARE_EMULATION_FUNC(emulate_fcvt_fi)
+{
+ if (GET_PRECISION(insn) != PRECISION_S && GET_PRECISION(insn) != PRECISION_D)
+ return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
+
+ int negative = 0;
+ uint64_t uint_val = GET_RS1(insn, regs);
+
+ switch ((insn >> 20) & 0x1f)
+ {
+ case 0: // int32
+ negative = (int32_t)uint_val < 0;
+ uint_val = negative ? -(int32_t)uint_val : (int32_t)uint_val;
+ break;
+ case 1: // uint32
+ uint_val = (uint32_t)uint_val;
+ break;
+#ifdef __riscv64
+ case 2: // int64
+ negative = (int64_t)uint_val < 0;
+ uint_val = negative ? -uint_val : uint_val;
+ case 3: // uint64
+ break;
+#endif
+ default:
+ return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
+ }
+
+ uint64_t float64 = ui64_to_f64(uint_val);
+ if (negative)
+ float64 ^= INT64_MIN;
+
+ if (GET_PRECISION(insn) == PRECISION_S)
+ SET_F32_RD(insn, regs, f64_to_f32(float64));
+ else
+ SET_F64_RD(insn, regs, float64);
+}
+
+DECLARE_EMULATION_FUNC(emulate_fcvt_if)
+{
+ int rs2_num = (insn >> 20) & 0x1f;
+#ifdef __riscv64
+ if (rs2_num >= 4)
+ return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
+#else
+ if (rs2_num >= 2)
+ return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
+#endif
+
+ int64_t float64;
+ if (GET_PRECISION(insn) == PRECISION_S)
+ float64 = f32_to_f64(GET_F32_RS1(insn, regs));
+ else if (GET_PRECISION(insn) == PRECISION_D)
+ float64 = GET_F64_RS1(insn, regs);
+ else
+ return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
+
+ int negative = 0;
+ if (float64 < 0) {
+ negative = 1;
+ float64 ^= INT64_MIN;
+ }
+ uint64_t uint_val = f64_to_ui64(float64, softfloat_roundingMode, true);
+ uint64_t result, limit, limit_result;
+
+ switch (rs2_num)
+ {
+ case 0: // int32
+ if (negative) {
+ result = (int32_t)-uint_val;
+ limit_result = limit = (uint32_t)INT32_MIN;
+ } else {
+ result = (int32_t)uint_val;
+ limit_result = limit = INT32_MAX;
+ }
+ break;
+
+ case 1: // uint32
+ limit = limit_result = UINT32_MAX;
+ if (negative)
+ result = limit = 0;
+ else
+ result = (uint32_t)uint_val;
+ break;
+
+ case 2: // int32
+ if (negative) {
+ result = (int64_t)-uint_val;
+ limit_result = limit = (uint64_t)INT64_MIN;
+ } else {
+ result = (int64_t)uint_val;
+ limit_result = limit = INT64_MAX;
+ }
+ break;
+
+ case 3: // uint64
+ limit = limit_result = UINT64_MAX;
+ if (negative)
+ result = limit = 0;
+ else
+ result = (uint64_t)uint_val;
+ break;
+
+ default:
+ __builtin_unreachable();
+ }
+
+ if (uint_val > limit) {
+ result = limit_result;
+ softfloat_raiseFlags(softfloat_flag_invalid);
+ }
+
+ SET_FS_DIRTY();
+ SET_RD(insn, regs, result);
+}
+
+DECLARE_EMULATION_FUNC(emulate_fcmp)
+{
+ int rm = GET_RM(insn);
+ if (rm >= 3)
+ return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
+
+ uintptr_t result;
+ if (GET_PRECISION(insn) == PRECISION_S) {
+ uint32_t rs1 = GET_F32_RS1(insn, regs);
+ uint32_t rs2 = GET_F32_RS2(insn, regs);
+ if (rm != 1)
+ result = f32_eq(rs1, rs2);
+ if (rm == 1 || (rm == 0 && !result))
+ result = f32_lt(rs1, rs2);
+ goto success;
+ } else if (GET_PRECISION(insn) == PRECISION_D) {
+ uint64_t rs1 = GET_F64_RS1(insn, regs);
+ uint64_t rs2 = GET_F64_RS2(insn, regs);
+ if (rm != 1)
+ result = f64_eq(rs1, rs2);
+ if (rm == 1 || (rm == 0 && !result))
+ result = f64_lt(rs1, rs2);
+ goto success;
+ }
+ return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
+success:
+ SET_RD(insn, regs, result);
+}
+
+DECLARE_EMULATION_FUNC(emulate_fmv_if)
+{
+ uintptr_t result;
+ if ((insn & MASK_FMV_X_S) == MATCH_FMV_X_S)
+ result = GET_F32_RS1(insn, regs);
+#ifdef __riscv64
+ else if ((insn & MASK_FMV_X_D) == MATCH_FMV_X_D)
+ result = GET_F64_RS1(insn, regs);
+#endif
+ else
+ return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
+
+ SET_RD(insn, regs, result);
+}
+
+DECLARE_EMULATION_FUNC(emulate_fmv_fi)
+{
+ uintptr_t rs1 = GET_RS1(insn, regs);
+
+ if ((insn & MASK_FMV_S_X) == MATCH_FMV_S_X)
+ SET_F32_RD(insn, regs, rs1);
+ else if ((insn & MASK_FMV_D_X) == MATCH_FMV_D_X)
+ SET_F64_RD(insn, regs, rs1);
+ else
+ return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
+}
+
+DECLARE_EMULATION_FUNC(emulate_fmadd)
+{
+ // if FPU is disabled, punt back to the OS
+ if (unlikely((mstatus & MSTATUS_FS) == 0))
+ return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
+
+ int op = (insn >> 2) & 3;
+ SETUP_STATIC_ROUNDING(insn);
+ if (GET_PRECISION(insn) == PRECISION_S) {
+ uint32_t rs1 = GET_F32_RS1(insn, regs);
+ uint32_t rs2 = GET_F32_RS2(insn, regs);
+ uint32_t rs3 = GET_F32_RS3(insn, regs);
+ SET_F32_RD(insn, regs, softfloat_mulAddF32(op, rs1, rs2, rs3));
+ } else if (GET_PRECISION(insn) == PRECISION_D) {
+ uint64_t rs1 = GET_F64_RS1(insn, regs);
+ uint64_t rs2 = GET_F64_RS2(insn, regs);
+ uint64_t rs3 = GET_F64_RS3(insn, regs);
+ SET_F64_RD(insn, regs, softfloat_mulAddF64(op, rs1, rs2, rs3));
+ } else {
+ return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
+ }
+}
diff --git a/pk/fp_emulation.h b/pk/fp_emulation.h
new file mode 100644
index 0000000..d2357b7
--- /dev/null
+++ b/pk/fp_emulation.h
@@ -0,0 +1,81 @@
+#ifndef _RISCV_FP_EMULATION_H
+#define _RISCV_FP_EMULATION_H
+
+#include "emulation.h"
+
+#define GET_PRECISION(insn) (((insn) >> 25) & 3)
+#define GET_RM(insn) (((insn) >> 12) & 7)
+#define PRECISION_S 0
+#define PRECISION_D 1
+
+#ifdef __riscv_hard_float
+# define GET_F32_REG(insn, pos, regs) ({ \
+ register int32_t value asm("a0") = ((insn) >> ((pos)-3)) & 0xf8; \
+ uintptr_t tmp; \
+ asm ("1: auipc %0, %%pcrel_hi(get_f32_reg); add %0, %0, %1; jalr t0, %0, %%pcrel_lo(1b)" : "=&r"(tmp), "+&r"(value) :: "t0"); \
+ value; })
+# define SET_F32_REG(insn, pos, regs, val) ({ \
+ register uint32_t value asm("a0") = (val); \
+ uintptr_t offset = ((insn) >> ((pos)-3)) & 0xf8; \
+ uintptr_t tmp; \
+ asm volatile ("1: auipc %0, %%pcrel_hi(put_f32_reg); add %0, %0, %2; jalr t0, %0, %%pcrel_lo(1b)" : "=&r"(tmp) : "r"(value), "r"(offset) : "t0"); })
+# define init_fp_reg(i) SET_F32_REG((i) << 3, 3, 0, 0)
+# define GET_F64_REG(insn, pos, regs) ({ \
+ register uintptr_t value asm("a0") = ((insn) >> ((pos)-3)) & 0xf8; \
+ uintptr_t tmp; \
+ asm ("1: auipc %0, %%pcrel_hi(get_f64_reg); add %0, %0, %1; jalr t0, %0, %%pcrel_lo(1b)" : "=&r"(tmp), "+&r"(value) :: "t0"); \
+ sizeof(uintptr_t) == 4 ? *(int64_t*)value : (int64_t)value; })
+# define SET_F64_REG(insn, pos, regs, val) ({ \
+ uint64_t __val = (val); \
+ register uintptr_t value asm("a0") = sizeof(uintptr_t) == 4 ? (uintptr_t)&__val : (uintptr_t)__val; \
+ uintptr_t offset = ((insn) >> ((pos)-3)) & 0xf8; \
+ uintptr_t tmp; \
+ asm volatile ("1: auipc %0, %%pcrel_hi(put_f64_reg); add %0, %0, %2; jalr t0, %0, %%pcrel_lo(1b)" : "=&r"(tmp) : "r"(value), "r"(offset) : "t0"); })
+# define GET_FCSR() read_csr(fcsr)
+# define SET_FCSR(value) write_csr(fcsr, (value))
+# define GET_FRM() read_csr(frm)
+# define SET_FRM(value) write_csr(frm, (value))
+# define GET_FFLAGS() read_csr(fflags)
+# define SET_FFLAGS(value) write_csr(fflags, (value))
+
+# define SETUP_STATIC_ROUNDING(insn) ({ \
+ register long tp asm("tp") = read_csr(frm); \
+ if (likely(((insn) & MASK_FUNCT3) == MASK_FUNCT3)) ; \
+ else if (GET_RM(insn) > 4) return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); \
+ else tp = GET_RM(insn); \
+ asm volatile ("":"+r"(tp)); })
+# define softfloat_raiseFlags(which) set_csr(fflags, which)
+# define softfloat_roundingMode ({ register int tp asm("tp"); tp; })
+#else
+# define GET_F64_REG(insn, pos, regs) (*(int64_t*)((void*)((regs) + 32) + (((insn) >> ((pos)-3)) & 0xf8)))
+# define SET_F64_REG(insn, pos, regs, val) (GET_F64_REG(insn, pos, regs) = (val))
+# define GET_F32_REG(insn, pos, regs) (*(int32_t*)&GET_F64_REG(insn, pos, regs))
+# define SET_F32_REG(insn, pos, regs, val) (GET_F32_REG(insn, pos, regs) = (val))
+# define GET_FCSR() ({ register int tp asm("tp"); tp & 0xFF; })
+# define SET_FCSR(value) ({ asm volatile("add tp, x0, %0" :: "rI"((value) & 0xFF)); })
+# define GET_FRM() (GET_FCSR() >> 5)
+# define SET_FRM(value) SET_FCSR(GET_FFLAGS() | ((value) << 5))
+# define GET_FFLAGS() (GET_FCSR() & 0x1F)
+# define SET_FFLAGS(value) SET_FCSR((GET_FRM() << 5) | ((value) & 0x1F))
+
+# define SETUP_STATIC_ROUNDING(insn) ({ \
+ register int tp asm("tp"); tp &= 0xFF; \
+ if (likely(((insn) & MASK_FUNCT3) == MASK_FUNCT3)) tp |= tp << 8; \
+ else if (GET_RM(insn) > 4) return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); \
+ else tp |= GET_RM(insn) << 13; \
+ asm volatile ("":"+r"(tp)); })
+# define softfloat_raiseFlags(which) ({ asm volatile ("or tp, tp, %0" :: "rI"(which)); })
+# define softfloat_roundingMode ({ register int tp asm("tp"); tp >> 13; })
+#endif
+
+#define GET_F32_RS1(insn, regs) (GET_F32_REG(insn, 15, regs))
+#define GET_F32_RS2(insn, regs) (GET_F32_REG(insn, 20, regs))
+#define GET_F32_RS3(insn, regs) (GET_F32_REG(insn, 27, regs))
+#define GET_F64_RS1(insn, regs) (GET_F64_REG(insn, 15, regs))
+#define GET_F64_RS2(insn, regs) (GET_F64_REG(insn, 20, regs))
+#define GET_F64_RS3(insn, regs) (GET_F64_REG(insn, 27, regs))
+#define SET_F32_RD(insn, regs, val) (SET_F32_REG(insn, 7, regs, val), SET_FS_DIRTY())
+#define SET_F64_RD(insn, regs, val) (SET_F64_REG(insn, 7, regs, val), SET_FS_DIRTY())
+#define SET_FS_DIRTY() set_csr(mstatus, MSTATUS_FS)
+
+#endif
diff --git a/pk/mentry.S b/pk/mentry.S
index ee3a146..2ded375 100644
--- a/pk/mentry.S
+++ b/pk/mentry.S
@@ -86,15 +86,15 @@ trap_vector:
STORE t1, 6*REGBYTES(sp)
sll t1, a1, 2 # t1 <- mcause << 2
STORE t2, 7*REGBYTES(sp)
- add t0, t0, t1 # t0 <- %hi(trap_table)[mcause]
+ add t1, t0, t1 # t1 <- %hi(trap_table)[mcause]
STORE s0, 8*REGBYTES(sp)
- lw t0, %pcrel_lo(1b)(t0) # t0 <- trap_table[mcause]
+ lw t1, %pcrel_lo(1b)(t1) # t1 <- trap_table[mcause]
STORE s1, 9*REGBYTES(sp)
mv a0, sp # a0 <- regs
STORE a2,12*REGBYTES(sp)
csrr a2, mepc # a2 <- mepc
STORE a3,13*REGBYTES(sp)
- csrrw t1, mscratch, x0 # t1 <- user sp
+ csrrw t0, mscratch, x0 # t0 <- user sp
STORE a4,14*REGBYTES(sp)
STORE a5,15*REGBYTES(sp)
STORE a6,16*REGBYTES(sp)
@@ -113,7 +113,7 @@ trap_vector:
STORE t4,29*REGBYTES(sp)
STORE t5,30*REGBYTES(sp)
STORE t6,31*REGBYTES(sp)
- STORE t1, 2*REGBYTES(sp) # sp
+ STORE t0, 2*REGBYTES(sp) # sp
#ifndef __riscv_hard_float
lw tp, (sp) # Move the emulated FCSR from x0's save slot into tp.
@@ -121,7 +121,7 @@ trap_vector:
STORE x0, (sp) # Zero x0's save slot.
# Invoke the handler.
- jalr t0
+ jalr t1
#ifndef __riscv_hard_float
sw tp, (sp) # Move the emulated FCSR from tp into x0's save slot.
diff --git a/pk/minit.c b/pk/minit.c
index 741445b..42cdfe8 100644
--- a/pk/minit.c
+++ b/pk/minit.c
@@ -1,5 +1,6 @@
#include "vm.h"
#include "mtrap.h"
+#include "fp_emulation.h"
uintptr_t mem_size;
uint32_t num_harts;
diff --git a/pk/mtrap.c b/pk/mtrap.c
index a6a2fdb..52b85d8 100644
--- a/pk/mtrap.c
+++ b/pk/mtrap.c
@@ -4,77 +4,6 @@
#include "vm.h"
#include <errno.h>
-void illegal_insn_trap(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc)
-{
- asm (".pushsection .rodata\n"
- "illegal_insn_trap_table:\n"
- " .word truly_illegal_insn\n"
-#ifdef PK_ENABLE_FP_EMULATION
- " .word emulate_float_load\n"
-#else
- " .word truly_illegal_insn\n"
-#endif
- " .word truly_illegal_insn\n"
- " .word truly_illegal_insn\n"
- " .word truly_illegal_insn\n"
- " .word truly_illegal_insn\n"
- " .word truly_illegal_insn\n"
- " .word truly_illegal_insn\n"
- " .word truly_illegal_insn\n"
-#ifdef PK_ENABLE_FP_EMULATION
- " .word emulate_float_store\n"
-#else
- " .word truly_illegal_insn\n"
-#endif
- " .word truly_illegal_insn\n"
- " .word truly_illegal_insn\n"
- " .word emulate_mul_div\n"
- " .word truly_illegal_insn\n"
- " .word emulate_mul_div32\n"
- " .word truly_illegal_insn\n"
-#ifdef PK_ENABLE_FP_EMULATION
- " .word emulate_fmadd\n"
- " .word emulate_fmadd\n"
- " .word emulate_fmadd\n"
- " .word emulate_fmadd\n"
- " .word emulate_fp\n"
-#else
- " .word truly_illegal_insn\n"
- " .word truly_illegal_insn\n"
- " .word truly_illegal_insn\n"
- " .word truly_illegal_insn\n"
- " .word truly_illegal_insn\n"
-#endif
- " .word truly_illegal_insn\n"
- " .word truly_illegal_insn\n"
- " .word truly_illegal_insn\n"
- " .word truly_illegal_insn\n"
- " .word truly_illegal_insn\n"
- " .word truly_illegal_insn\n"
- " .word truly_illegal_insn\n"
-#ifdef PK_ENABLE_FP_EMULATION
- " .word emulate_system\n"
-#else
- " .word truly_illegal_insn\n"
-#endif
- " .word truly_illegal_insn\n"
- " .word truly_illegal_insn\n"
- " .word truly_illegal_insn\n"
- " .popsection");
-
- uintptr_t mstatus;
- insn_t insn = get_insn(mepc, &mstatus);
-
- if ((insn & 3) != 3)
- return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
- write_csr(mepc, mepc + 4);
-
- extern int32_t illegal_insn_trap_table[];
- int32_t* pf = (void*)illegal_insn_trap_table + (insn & 0x7c);
- emulation_func f = (emulation_func)(uintptr_t)*pf;
- f(regs, mcause, mepc, mstatus, insn);
-}
-
void __attribute__((noreturn)) bad_trap()
{
panic("machine mode: unhandlable trap %d @ %p", read_csr(mcause), read_csr(mepc));
@@ -301,6 +230,25 @@ void mcall_trap(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc)
write_csr(mepc, mepc + 4);
}
+void redirect_trap(uintptr_t epc, uintptr_t mstatus)
+{
+ write_csr(sepc, epc);
+ write_csr(scause, read_csr(mcause));
+ write_csr(mepc, read_csr(stvec));
+
+ uintptr_t prev_priv = EXTRACT_FIELD(mstatus, MSTATUS_MPP);
+ uintptr_t prev_ie = EXTRACT_FIELD(mstatus, MSTATUS_MPIE);
+ kassert(prev_priv <= PRV_S);
+ mstatus = INSERT_FIELD(mstatus, MSTATUS_SPP, prev_priv);
+ mstatus = INSERT_FIELD(mstatus, MSTATUS_SPIE, prev_ie);
+ mstatus = INSERT_FIELD(mstatus, MSTATUS_MPP, PRV_S);
+ mstatus = INSERT_FIELD(mstatus, MSTATUS_MPIE, 0);
+ write_csr(mstatus, mstatus);
+
+ extern void __redirect_trap();
+ return __redirect_trap();
+}
+
static void machine_page_fault(uintptr_t* regs, uintptr_t mepc)
{
// MPRV=1 iff this trap occurred while emulating an instruction on behalf
diff --git a/pk/mtrap.h b/pk/mtrap.h
index 7b2a181..fe44c58 100644
--- a/pk/mtrap.h
+++ b/pk/mtrap.h
@@ -9,142 +9,6 @@
#include "sbi.h"
-#define GET_MACRO(_1,_2,_3,_4,NAME,...) NAME
-
-#define unpriv_mem_access(a, b, c, ...) GET_MACRO(__VA_ARGS__, unpriv_mem_access3, unpriv_mem_access2, unpriv_mem_access1, unpriv_mem_access0)(a, b, c, __VA_ARGS__)
-#define unpriv_mem_access0(a, b, c, d) ({ uintptr_t z = 0, z1 = 0, z2 = 0; unpriv_mem_access_base(a, b, c, d, z, z1, z2); })
-#define unpriv_mem_access1(a, b, c, d, e) ({ uintptr_t z = 0, z1 = 0; unpriv_mem_access_base(a, b, c, d, e, z, z1); })
-#define unpriv_mem_access2(a, b, c, d, e, f) ({ uintptr_t z = 0; unpriv_mem_access_base(a, b, c, d, e, f, z); })
-#define unpriv_mem_access3(a, b, c, d, e, f, g) unpriv_mem_access_base(a, b, c, d, e, f, g)
-#define unpriv_mem_access_base(code, o0, o1, o2, i0, i1, i2) ({ \
- register uintptr_t mstatus asm ("a3") = MSTATUS_MPRV; \
- register uintptr_t __mepc asm ("a2") = mepc; \
- uintptr_t unused1, unused2, unused3 __attribute__((unused)); \
- asm volatile ("csrrs %[mstatus], mstatus, %[mstatus]\n" \
- code "\n" \
- "csrw mstatus, %[mstatus]\n" \
- : [o0] "=&r"(o0), [o1] "=&r"(o1), [o2] "=&r"(o2), \
- [mstatus] "+&r"(mstatus) \
- : [i0] "rJ"(i0), [i1] "rJ"(i1), [i2] "rJ"(i2), \
- "r"(__mepc)); \
- (mstatus); \
-})
-
-typedef uint32_t insn_t;
-typedef void (*emulation_func)(uintptr_t*, uintptr_t, uintptr_t, uintptr_t, insn_t);
-#define DECLARE_EMULATION_FUNC(name) void name(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc, uintptr_t mstatus, insn_t insn)
-
-void truly_illegal_insn(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc, uintptr_t mstatus, insn_t insn);
-void redirect_trap(uintptr_t epc, uintptr_t mstatus);
-
-#define GET_REG(insn, pos, regs) ({ \
- int mask = (1 << (5+LOG_REGBYTES)) - (1 << LOG_REGBYTES); \
- (uintptr_t*)((uintptr_t)regs + (((insn) >> ((pos) - LOG_REGBYTES)) & mask)); \
-})
-#define GET_RS1(insn, regs) (*GET_REG(insn, 15, regs))
-#define GET_RS2(insn, regs) (*GET_REG(insn, 20, regs))
-#define SET_RD(insn, regs, val) (*GET_REG(insn, 7, regs) = (val))
-#define IMM_I(insn) ((int32_t)(insn) >> 20)
-#define IMM_S(insn) (((int32_t)(insn) >> 25 << 5) | (int32_t)(((insn) >> 7) & 0x1f))
-#define MASK_FUNCT3 0x7000
-
-#define GET_PRECISION(insn) (((insn) >> 25) & 3)
-#define GET_RM(insn) (((insn) >> 12) & 7)
-#define PRECISION_S 0
-#define PRECISION_D 1
-
-#ifdef __riscv_hard_float
-# define GET_F32_REG(insn, pos, regs) ({ \
- register int32_t value asm("a0") = ((insn) >> ((pos)-3)) & 0xf8; \
- uintptr_t tmp; \
- asm ("1: auipc %0, %%pcrel_hi(get_f32_reg); add %0, %0, %1; jalr t0, %0, %%pcrel_lo(1b)" : "=&r"(tmp), "+&r"(value) :: "t0"); \
- value; })
-# define SET_F32_REG(insn, pos, regs, val) ({ \
- register uint32_t value asm("a0") = (val); \
- uintptr_t offset = ((insn) >> ((pos)-3)) & 0xf8; \
- uintptr_t tmp; \
- asm volatile ("1: auipc %0, %%pcrel_hi(put_f32_reg); add %0, %0, %2; jalr t0, %0, %%pcrel_lo(1b)" : "=&r"(tmp) : "r"(value), "r"(offset) : "t0"); })
-# define init_fp_reg(i) SET_F32_REG((i) << 3, 3, 0, 0)
-# define GET_F64_REG(insn, pos, regs) ({ \
- register uintptr_t value asm("a0") = ((insn) >> ((pos)-3)) & 0xf8; \
- uintptr_t tmp; \
- asm ("1: auipc %0, %%pcrel_hi(get_f64_reg); add %0, %0, %1; jalr t0, %0, %%pcrel_lo(1b)" : "=&r"(tmp), "+&r"(value) :: "t0"); \
- sizeof(uintptr_t) == 4 ? *(int64_t*)value : (int64_t)value; })
-# define SET_F64_REG(insn, pos, regs, val) ({ \
- uint64_t __val = (val); \
- register uintptr_t value asm("a0") = sizeof(uintptr_t) == 4 ? (uintptr_t)&__val : (uintptr_t)__val; \
- uintptr_t offset = ((insn) >> ((pos)-3)) & 0xf8; \
- uintptr_t tmp; \
- asm volatile ("1: auipc %0, %%pcrel_hi(put_f64_reg); add %0, %0, %2; jalr t0, %0, %%pcrel_lo(1b)" : "=&r"(tmp) : "r"(value), "r"(offset) : "t0"); })
-# define GET_FCSR() read_csr(fcsr)
-# define SET_FCSR(value) write_csr(fcsr, (value))
-# define GET_FRM() read_csr(frm)
-# define SET_FRM(value) write_csr(frm, (value))
-# define GET_FFLAGS() read_csr(fflags)
-# define SET_FFLAGS(value) write_csr(fflags, (value))
-
-# define SETUP_STATIC_ROUNDING(insn) ({ \
- register long tp asm("tp") = read_csr(frm); \
- if (likely(((insn) & MASK_FUNCT3) == MASK_FUNCT3)) ; \
- else if (GET_RM(insn) > 4) return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); \
- else tp = GET_RM(insn); \
- asm volatile ("":"+r"(tp)); })
-# define softfloat_raiseFlags(which) set_csr(fflags, which)
-# define softfloat_roundingMode ({ register int tp asm("tp"); tp; })
-#else
-# define GET_F64_REG(insn, pos, regs) (((int64_t*)(&(regs)[32]))[((insn) >> (pos)) & 0x1f])
-# define SET_F64_REG(insn, pos, regs, val) (GET_F64_REG(insn, pos, regs) = (val))
-# define GET_F32_REG(insn, pos, regs) (*(int32_t*)&GET_F64_REG(insn, pos, regs))
-# define SET_F32_REG(insn, pos, regs, val) (GET_F32_REG(insn, pos, regs) = (val))
-# define GET_FCSR() ({ register int tp asm("tp"); tp & 0xFF; })
-# define SET_FCSR(value) ({ asm volatile("add tp, x0, %0" :: "rI"((value) & 0xFF)); })
-# define GET_FRM() (GET_FCSR() >> 5)
-# define SET_FRM(value) SET_FCSR(GET_FFLAGS() | ((value) << 5))
-# define GET_FFLAGS() (GET_FCSR() & 0x1F)
-# define SET_FFLAGS(value) SET_FCSR((GET_FRM() << 5) | ((value) & 0x1F))
-
-# define SETUP_STATIC_ROUNDING(insn) ({ \
- register int tp asm("tp"); tp &= 0xFF; \
- if (likely(((insn) & MASK_FUNCT3) == MASK_FUNCT3)) tp |= tp << 8; \
- else if (GET_RM(insn) > 4) return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); \
- else tp |= GET_RM(insn) << 13; \
- asm volatile ("":"+r"(tp)); })
-# define softfloat_raiseFlags(which) ({ asm volatile ("or tp, tp, %0" :: "rI"(which)); })
-# define softfloat_roundingMode ({ register int tp asm("tp"); tp >> 13; })
-#endif
-
-#define GET_F32_RS1(insn, regs) (GET_F32_REG(insn, 15, regs))
-#define GET_F32_RS2(insn, regs) (GET_F32_REG(insn, 20, regs))
-#define GET_F32_RS3(insn, regs) (GET_F32_REG(insn, 27, regs))
-#define GET_F64_RS1(insn, regs) (GET_F64_REG(insn, 15, regs))
-#define GET_F64_RS2(insn, regs) (GET_F64_REG(insn, 20, regs))
-#define GET_F64_RS3(insn, regs) (GET_F64_REG(insn, 27, regs))
-#define SET_F32_RD(insn, regs, val) (SET_F32_REG(insn, 7, regs, val), SET_FS_DIRTY())
-#define SET_F64_RD(insn, regs, val) (SET_F64_REG(insn, 7, regs, val), SET_FS_DIRTY())
-#define SET_FS_DIRTY() set_csr(mstatus, MSTATUS_FS)
-
-static insn_t __attribute__((always_inline)) get_insn(uintptr_t mepc, uintptr_t* mstatus)
-{
- insn_t insn;
-
-#ifdef __riscv_compressed
- int rvc_mask = 3, insn_hi;
- *mstatus = unpriv_mem_access("lhu %[insn], 0(%[mepc]);"
- "and %[insn_hi], %[insn], %[rvc_mask];"
- "bne %[insn_hi], %[rvc_mask], 1f;"
- "lh %[insn_hi], 2(%[mepc]);"
- "sll %[insn_hi], %[insn_hi], 16;"
- "or %[insn], %[insn], %[insn_hi];"
- "1:",
- insn, insn_hi, unused1, mepc, rvc_mask);
-#else
- *mstatus = unpriv_mem_access("lw %[insn], 0(%[mepc])",
- insn, unused1, unused2, mepc);
-#endif
-
- return insn;
-}
-
#define read_const_csr(reg) ({ unsigned long __tmp; \
asm ("csrr %0, " #reg : "=r"(__tmp)); \
__tmp; })
diff --git a/pk/pk.h b/pk/pk.h
index 7785ca9..917b841 100644
--- a/pk/pk.h
+++ b/pk/pk.h
@@ -41,9 +41,6 @@ void kassert_fail(const char* s) __attribute__((noreturn));
#define MIN(a, b) ((a) < (b) ? (a) : (b))
#define CLAMP(a, lo, hi) MIN(MAX(a, lo), hi)
-#define likely(x) __builtin_expect((x), 1)
-#define unlikely(x) __builtin_expect((x), 0)
-
#define EXTRACT_FIELD(val, which) (((val) & (which)) / ((which) & ~((which)-1)))
#define INSERT_FIELD(val, which, fieldval) (((val) & ~(which)) | ((fieldval) * ((which) & ~((which)-1))))
diff --git a/pk/pk.mk.in b/pk/pk.mk.in
index 7ada789..0189666 100644
--- a/pk/pk.mk.in
+++ b/pk/pk.mk.in
@@ -2,18 +2,27 @@ pk_subproject_deps = \
softfloat \
pk_hdrs = \
- mtrap.h \
- encoding.h \
atomic.h \
+ bits.h \
+ elf.h \
+ emulation.h \
+ encoding.h \
file.h \
+ fp_emulation.h \
frontend.h \
- elf.h \
+ mcall.h \
+ mtrap.h \
+ pk.h \
+ sbi.h \
+ syscall.h \
+ unprivileged_memory.h \
vm.h \
pk_c_srcs = \
mtrap.c \
minit.c \
emulation.c \
+ fp_emulation.c \
sbi_impl.c \
init.c \
file.c \
diff --git a/pk/unprivileged_memory.h b/pk/unprivileged_memory.h
new file mode 100644
index 0000000..d03cc5e
--- /dev/null
+++ b/pk/unprivileged_memory.h
@@ -0,0 +1,79 @@
+#ifndef _RISCV_MISALIGNED_H
+#define _RISCV_MISALIGNED_H
+
+#include "encoding.h"
+#include <stdint.h>
+
+#define DECLARE_UNPRIVILEGED_LOAD_FUNCTION(type, insn) \
+ static inline type load_##type(const type* addr, uintptr_t mepc) \
+ { \
+ register uintptr_t __mepc asm ("a2") = mepc; \
+ register uintptr_t __mstatus asm ("a3"); \
+ type val; \
+ asm ("csrrs %0, mstatus, %3\n" \
+ #insn " %1, %2\n" \
+ "csrw mstatus, %0" \
+ : "+&r" (__mstatus), "=&r" (val) \
+ : "m" (*addr), "r" (MSTATUS_MPRV), "r" (__mepc)); \
+ return val; \
+ }
+
+#define DECLARE_UNPRIVILEGED_STORE_FUNCTION(type, insn) \
+ static inline void store_##type(type* addr, type val, uintptr_t mepc) \
+ { \
+ register uintptr_t __mepc asm ("a2") = mepc; \
+ register uintptr_t __mstatus asm ("a3"); \
+ asm volatile ("csrrs %0, mstatus, %3\n" \
+ #insn " %1, %2\n" \
+ "csrw mstatus, %0" \
+ : "+&r" (__mstatus) \
+ : "r" (val), "m" (*addr), "r" (MSTATUS_MPRV), \
+ "r" (__mepc)); \
+ }
+
+DECLARE_UNPRIVILEGED_LOAD_FUNCTION(uint8_t, lbu)
+DECLARE_UNPRIVILEGED_LOAD_FUNCTION(uint16_t, lhu)
+DECLARE_UNPRIVILEGED_LOAD_FUNCTION(int8_t, lb)
+DECLARE_UNPRIVILEGED_LOAD_FUNCTION(int16_t, lh)
+DECLARE_UNPRIVILEGED_LOAD_FUNCTION(int32_t, lw)
+DECLARE_UNPRIVILEGED_STORE_FUNCTION(uint8_t, sb)
+DECLARE_UNPRIVILEGED_STORE_FUNCTION(uint16_t, sh)
+DECLARE_UNPRIVILEGED_STORE_FUNCTION(uint32_t, sw)
+#ifdef __riscv64
+DECLARE_UNPRIVILEGED_LOAD_FUNCTION(uint32_t, lwu)
+DECLARE_UNPRIVILEGED_LOAD_FUNCTION(uint64_t, ld)
+DECLARE_UNPRIVILEGED_STORE_FUNCTION(uint64_t, sd)
+#else
+DECLARE_UNPRIVILEGED_LOAD_FUNCTION(uint32_t, lw)
+#endif
+
+static uint32_t __attribute__((always_inline)) get_insn(uintptr_t mepc, uintptr_t* mstatus)
+{
+ register uintptr_t __mepc asm ("a2") = mepc;
+ register uintptr_t __mstatus asm ("a3");
+ uint32_t val;
+#ifndef __riscv_compressed
+ asm ("csrrs %[mstatus], mstatus, %[mprv]\n"
+ "lw %[insn], (%[addr])\n"
+ "csrw mstatus, %[mstatus]"
+ : [mstatus] "+&r" (__mstatus), [insn] "=&r" (val)
+ : [mprv] "r" (MSTATUS_MPRV), [addr] "r" (__mepc));
+#else
+ uintptr_t rvc_mask = 3, tmp;
+ asm ("csrrs %[mstatus], mstatus, %[mprv]\n"
+ "lhu %[insn], (%[addr])\n"
+ "and %[tmp], %[insn], %[rvc_mask]\n"
+ "bne %[tmp], %[rvc_mask], 1f\n"
+ "lh %[tmp], 2(%[addr])\n"
+ "sll %[tmp], %[tmp], 16\n"
+ "add %[insn], %[insn], %[tmp]\n"
+ "1: csrw mstatus, %[mstatus]"
+ : [mstatus] "+&r" (__mstatus), [insn] "=&r" (val), [tmp] "=&r" (tmp)
+ : [mprv] "r" (MSTATUS_MPRV), [addr] "r" (__mepc),
+ [rvc_mask] "r" (rvc_mask));
+#endif
+ *mstatus = __mstatus;
+ return val;
+}
+
+#endif