From bbc9a65fed7c85ee058d7188a62f0b904c38b77b Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 4 Mar 2016 21:02:42 -0800 Subject: Begin refactoring emulation code --- pk/bits.h | 3 + pk/emulation.c | 596 ++++++++--------------------------------------- pk/emulation.h | 28 +++ pk/fp_emulation.c | 449 +++++++++++++++++++++++++++++++++++ pk/fp_emulation.h | 81 +++++++ pk/mentry.S | 10 +- pk/minit.c | 1 + pk/mtrap.c | 90 ++----- pk/mtrap.h | 136 ----------- pk/pk.h | 3 - pk/pk.mk.in | 15 +- pk/unprivileged_memory.h | 79 +++++++ 12 files changed, 771 insertions(+), 720 deletions(-) create mode 100644 pk/emulation.h create mode 100644 pk/fp_emulation.c create mode 100644 pk/fp_emulation.h create mode 100644 pk/unprivileged_memory.h (limited to 'pk') diff --git a/pk/bits.h b/pk/bits.h index e7fd8d3..9947351 100644 --- a/pk/bits.h +++ b/pk/bits.h @@ -1,6 +1,9 @@ #ifndef PK_BITS_H #define PK_BITS_H +#define likely(x) __builtin_expect((x), 1) +#define unlikely(x) __builtin_expect((x), 0) + #define CONST_POPCOUNT2(x) ((((x) >> 0) & 1) + (((x) >> 1) & 1)) #define CONST_POPCOUNT4(x) (CONST_POPCOUNT2(x) + CONST_POPCOUNT2((x)>>2)) #define CONST_POPCOUNT8(x) (CONST_POPCOUNT4(x) + CONST_POPCOUNT4((x)>>4)) diff --git a/pk/emulation.c b/pk/emulation.c index 87db2de..4943e0d 100644 --- a/pk/emulation.c +++ b/pk/emulation.c @@ -1,24 +1,76 @@ +#include "emulation.h" +#include "fp_emulation.h" +#include "config.h" +#include "unprivileged_memory.h" #include "mtrap.h" -#include "softfloat.h" #include -void redirect_trap(uintptr_t epc, uintptr_t mstatus) +void illegal_insn_trap(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc) { - write_csr(sepc, epc); - write_csr(scause, read_csr(mcause)); - write_csr(mepc, read_csr(stvec)); + asm (".pushsection .rodata\n" + "illegal_insn_trap_table:\n" + " .word truly_illegal_insn\n" +#ifdef PK_ENABLE_FP_EMULATION + " .word emulate_float_load\n" +#else + " .word truly_illegal_insn\n" +#endif + " .word truly_illegal_insn\n" + " .word truly_illegal_insn\n" + " .word truly_illegal_insn\n" + " .word truly_illegal_insn\n" + " .word truly_illegal_insn\n" + " .word truly_illegal_insn\n" + " .word truly_illegal_insn\n" +#ifdef PK_ENABLE_FP_EMULATION + " .word emulate_float_store\n" +#else + " .word truly_illegal_insn\n" +#endif + " .word truly_illegal_insn\n" + " .word truly_illegal_insn\n" + " .word emulate_mul_div\n" + " .word truly_illegal_insn\n" + " .word emulate_mul_div32\n" + " .word truly_illegal_insn\n" +#ifdef PK_ENABLE_FP_EMULATION + " .word emulate_fmadd\n" + " .word emulate_fmadd\n" + " .word emulate_fmadd\n" + " .word emulate_fmadd\n" + " .word emulate_fp\n" +#else + " .word truly_illegal_insn\n" + " .word truly_illegal_insn\n" + " .word truly_illegal_insn\n" + " .word truly_illegal_insn\n" + " .word truly_illegal_insn\n" +#endif + " .word truly_illegal_insn\n" + " .word truly_illegal_insn\n" + " .word truly_illegal_insn\n" + " .word truly_illegal_insn\n" + " .word truly_illegal_insn\n" + " .word truly_illegal_insn\n" + " .word truly_illegal_insn\n" + " .word emulate_system\n" + " .word truly_illegal_insn\n" + " .word truly_illegal_insn\n" + " .word truly_illegal_insn\n" + " .popsection"); - uintptr_t prev_priv = EXTRACT_FIELD(mstatus, MSTATUS_MPP); - uintptr_t prev_ie = EXTRACT_FIELD(mstatus, MSTATUS_MPIE); - kassert(prev_priv <= PRV_S); - mstatus = INSERT_FIELD(mstatus, MSTATUS_SPP, prev_priv); - mstatus = INSERT_FIELD(mstatus, MSTATUS_SPIE, prev_ie); - mstatus = INSERT_FIELD(mstatus, MSTATUS_MPP, PRV_S); - mstatus = INSERT_FIELD(mstatus, MSTATUS_MPIE, 0); - write_csr(mstatus, mstatus); + uintptr_t mstatus; + insn_t insn = get_insn(mepc, &mstatus); - extern void __redirect_trap(); - return __redirect_trap(); + if (unlikely((insn & 3) != 3)) + return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); + + write_csr(mepc, mepc + 4); + + extern int32_t illegal_insn_trap_table[]; + int32_t* pf = (void*)illegal_insn_trap_table + (insn & 0x7c); + emulation_func f = (emulation_func)(uintptr_t)*pf; + f(regs, mcause, mepc, mstatus, insn); } void __attribute__((noinline)) truly_illegal_insn(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc, uintptr_t mstatus, insn_t insn) @@ -28,8 +80,14 @@ void __attribute__((noinline)) truly_illegal_insn(uintptr_t* regs, uintptr_t mca void misaligned_load_trap(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc) { + union { + uint8_t bytes[8]; + uintptr_t intx; + uint64_t int64; + } val; uintptr_t mstatus; insn_t insn = get_insn(mepc, &mstatus); + uintptr_t addr = GET_RS1(insn, regs) + IMM_I(insn); int shift = 0, fp = 0, len; if ((insn & MASK_LW) == MATCH_LW) @@ -51,139 +109,54 @@ void misaligned_load_trap(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc) else return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); - uintptr_t addr = GET_RS1(insn, regs) + IMM_I(insn); - uintptr_t val = 0, tmp, tmp2; - unpriv_mem_access("add %[tmp2], %[addr], %[len];" - "1: slli %[val], %[val], 8;" - "lbu %[tmp], -1(%[tmp2]);" - "addi %[tmp2], %[tmp2], -1;" - "or %[val], %[val], %[tmp];" - "bne %[addr], %[tmp2], 1b;", - val, tmp, tmp2, addr, len); - - if (shift) - val = (intptr_t)val << shift >> shift; + val.int64 = 0; + for (intptr_t i = len-1; i >= 0; i--) + val.bytes[i] = load_uint8_t((void *)(addr + i), mepc); if (!fp) - SET_RD(insn, regs, val); + SET_RD(insn, regs, (intptr_t)val.intx << shift >> shift); else if (len == 8) - SET_F64_RD(insn, regs, val); + SET_F64_RD(insn, regs, val.int64); else - SET_F32_RD(insn, regs, val); + SET_F32_RD(insn, regs, val.intx); write_csr(mepc, mepc + 4); } void misaligned_store_trap(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc) { + union { + uint8_t bytes[8]; + uintptr_t intx; + uint64_t int64; + } val; uintptr_t mstatus; insn_t insn = get_insn(mepc, &mstatus); - - uintptr_t val = GET_RS2(insn, regs), error; int len; + + val.intx = GET_RS2(insn, regs); if ((insn & MASK_SW) == MATCH_SW) len = 4; +#ifdef __riscv64 else if ((insn & MASK_SD) == MATCH_SD) len = 8; +#endif else if ((insn & MASK_FSD) == MATCH_FSD) - len = 8, val = GET_F64_RS2(insn, regs); + len = 8, val.int64 = GET_F64_RS2(insn, regs); else if ((insn & MASK_FSW) == MATCH_FSW) - len = 4, val = GET_F32_RS2(insn, regs); + len = 4, val.intx = GET_F32_RS2(insn, regs); else if ((insn & MASK_SH) == MATCH_SH) len = 2; else return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); uintptr_t addr = GET_RS1(insn, regs) + IMM_S(insn); - uintptr_t tmp, tmp2, addr_end = addr + len; - unpriv_mem_access("mv %[tmp], %[val];" - "mv %[tmp2], %[addr];" - "1: sb %[tmp], 0(%[tmp2]);" - "srli %[tmp], %[tmp], 8;" - "addi %[tmp2], %[tmp2], 1;" - "bne %[tmp2], %[addr_end], 1b", - tmp, tmp2, unused1, val, addr, addr_end); + for (size_t i = 0; i < len; i++) + store_uint8_t((void *)(addr + i), val.bytes[i], mepc); write_csr(mepc, mepc + 4); } -DECLARE_EMULATION_FUNC(emulate_float_load) -{ - uintptr_t val_lo, val_hi; - uint64_t val; - uintptr_t addr = GET_RS1(insn, regs) + IMM_I(insn); - - switch (insn & MASK_FUNCT3) - { - case MATCH_FLW & MASK_FUNCT3: - if (addr % 4 != 0) - return misaligned_load_trap(regs, mcause, mepc); - - unpriv_mem_access("lw %[val_lo], (%[addr])", - val_lo, unused1, unused2, addr, mepc/*X*/); - SET_F32_RD(insn, regs, val_lo); - break; - - case MATCH_FLD & MASK_FUNCT3: - if (addr % sizeof(uintptr_t) != 0) - return misaligned_load_trap(regs, mcause, mepc); - -#ifdef __riscv64 - unpriv_mem_access("ld %[val], (%[addr])", - val, val_hi/*X*/, unused1, addr, mepc/*X*/); -#else - unpriv_mem_access("lw %[val_lo], (%[addr]);" - "lw %[val_hi], 4(%[addr])", - val_lo, val_hi, unused1, addr, mepc/*X*/); - val = val_lo | ((uint64_t)val_hi << 32); -#endif - SET_F64_RD(insn, regs, val); - break; - - default: - return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); - } -} - -DECLARE_EMULATION_FUNC(emulate_float_store) -{ - uintptr_t val_lo, val_hi; - uint64_t val; - uintptr_t addr = GET_RS1(insn, regs) + IMM_S(insn); - - switch (insn & MASK_FUNCT3) - { - case MATCH_FSW & MASK_FUNCT3: - if (addr % 4 != 0) - return misaligned_store_trap(regs, mcause, mepc); - - val_lo = GET_F32_RS2(insn, regs); - unpriv_mem_access("sw %[val_lo], (%[addr])", - unused1, unused2, unused3, val_lo, addr); - break; - - case MATCH_FSD & MASK_FUNCT3: - if (addr % sizeof(uintptr_t) != 0) - return misaligned_store_trap(regs, mcause, mepc); - - val = GET_F64_RS2(insn, regs); -#ifdef __riscv64 - unpriv_mem_access("sd %[val], (%[addr])", - unused1, unused2, unused3, val, addr); -#else - val_lo = val; - val_hi = val >> 32; - unpriv_mem_access("sw %[val_lo], (%[addr]);" - "sw %[val_hi], 4(%[addr])", - unused1, unused2, unused3, val_lo, val_hi, addr); -#endif - break; - - default: - return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); - } -} - #ifdef __riscv64 typedef __int128 double_int; #else @@ -279,7 +252,7 @@ static inline int emulate_read_csr(int num, uintptr_t mstatus, uintptr_t* result + HLS()->sinstret_delta) >> 32; return 0; #endif -#ifndef __riscv_hard_float +#ifdef PK_ENABLE_FP_EMULATION case CSR_FRM: if ((mstatus & MSTATUS_FS) == 0) break; *result = GET_FRM(); @@ -301,7 +274,7 @@ static inline int emulate_write_csr(int num, uintptr_t value, uintptr_t mstatus) { switch (num) { -#ifndef __riscv_hard_float +#ifndef PK_ENABLE_FP_EMULATION case CSR_FRM: SET_FRM(value); return 0; case CSR_FFLAGS: SET_FFLAGS(value); return 0; case CSR_FCSR: SET_FCSR(value); return 0; @@ -338,384 +311,3 @@ DECLARE_EMULATION_FUNC(emulate_system) SET_RD(insn, regs, csr_val); } - -DECLARE_EMULATION_FUNC(emulate_fp) -{ - asm (".pushsection .rodata\n" - "fp_emulation_table:\n" - " .word emulate_fadd\n" - " .word emulate_fsub\n" - " .word emulate_fmul\n" - " .word emulate_fdiv\n" - " .word emulate_fsgnj\n" - " .word emulate_fmin\n" - " .word truly_illegal_insn\n" - " .word truly_illegal_insn\n" - " .word emulate_fcvt_ff\n" - " .word truly_illegal_insn\n" - " .word truly_illegal_insn\n" - " .word emulate_fsqrt\n" - " .word truly_illegal_insn\n" - " .word truly_illegal_insn\n" - " .word truly_illegal_insn\n" - " .word truly_illegal_insn\n" - " .word truly_illegal_insn\n" - " .word truly_illegal_insn\n" - " .word truly_illegal_insn\n" - " .word truly_illegal_insn\n" - " .word emulate_fcmp\n" - " .word truly_illegal_insn\n" - " .word truly_illegal_insn\n" - " .word truly_illegal_insn\n" - " .word emulate_fcvt_if\n" - " .word truly_illegal_insn\n" - " .word emulate_fcvt_fi\n" - " .word truly_illegal_insn\n" - " .word emulate_fmv_if\n" - " .word truly_illegal_insn\n" - " .word emulate_fmv_fi\n" - " .word truly_illegal_insn\n" - " .popsection"); - - // if FPU is disabled, punt back to the OS - if (unlikely((mstatus & MSTATUS_FS) == 0)) - return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); - - extern int32_t fp_emulation_table[]; - int32_t* pf = (void*)fp_emulation_table + ((insn >> 25) & 0x7c); - emulation_func f = (emulation_func)(uintptr_t)*pf; - - SETUP_STATIC_ROUNDING(insn); - return f(regs, mcause, mepc, mstatus, insn); -} - -void emulate_any_fadd(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc, uintptr_t mstatus, insn_t insn, int32_t neg_b) -{ - if (GET_PRECISION(insn) == PRECISION_S) { - uint32_t rs1 = GET_F32_RS1(insn, regs); - uint32_t rs2 = GET_F32_RS2(insn, regs) ^ neg_b; - SET_F32_RD(insn, regs, f32_add(rs1, rs2)); - } else if (GET_PRECISION(insn) == PRECISION_D) { - uint64_t rs1 = GET_F64_RS1(insn, regs); - uint64_t rs2 = GET_F64_RS2(insn, regs) ^ ((uint64_t)neg_b << 32); - SET_F64_RD(insn, regs, f64_add(rs1, rs2)); - } else { - return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); - } -} - -DECLARE_EMULATION_FUNC(emulate_fadd) -{ - return emulate_any_fadd(regs, mcause, mepc, mstatus, insn, 0); -} - -DECLARE_EMULATION_FUNC(emulate_fsub) -{ - return emulate_any_fadd(regs, mcause, mepc, mstatus, insn, INT32_MIN); -} - -DECLARE_EMULATION_FUNC(emulate_fmul) -{ - if (GET_PRECISION(insn) == PRECISION_S) { - uint32_t rs1 = GET_F32_RS1(insn, regs); - uint32_t rs2 = GET_F32_RS2(insn, regs); - SET_F32_RD(insn, regs, f32_mul(rs1, rs2)); - } else if (GET_PRECISION(insn) == PRECISION_D) { - uint64_t rs1 = GET_F64_RS1(insn, regs); - uint64_t rs2 = GET_F64_RS2(insn, regs); - SET_F64_RD(insn, regs, f64_mul(rs1, rs2)); - } else { - return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); - } -} - -DECLARE_EMULATION_FUNC(emulate_fdiv) -{ - if (GET_PRECISION(insn) == PRECISION_S) { - uint32_t rs1 = GET_F32_RS1(insn, regs); - uint32_t rs2 = GET_F32_RS2(insn, regs); - SET_F32_RD(insn, regs, f32_div(rs1, rs2)); - } else if (GET_PRECISION(insn) == PRECISION_D) { - uint64_t rs1 = GET_F64_RS1(insn, regs); - uint64_t rs2 = GET_F64_RS2(insn, regs); - SET_F64_RD(insn, regs, f64_div(rs1, rs2)); - } else { - return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); - } -} - -DECLARE_EMULATION_FUNC(emulate_fsqrt) -{ - if ((insn >> 20) & 0x1f) - return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); - - if (GET_PRECISION(insn) == PRECISION_S) { - SET_F32_RD(insn, regs, f32_sqrt(GET_F32_RS1(insn, regs))); - } else if (GET_PRECISION(insn) == PRECISION_D) { - SET_F64_RD(insn, regs, f64_sqrt(GET_F64_RS1(insn, regs))); - } else { - return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); - } -} - -DECLARE_EMULATION_FUNC(emulate_fsgnj) -{ - int rm = GET_RM(insn); - if (rm >= 3) - return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); - - #define DO_FSGNJ(rs1, rs2, rm) ({ \ - typeof(rs1) rs1_sign = (rs1) >> (8*sizeof(rs1)-1); \ - typeof(rs1) rs2_sign = (rs2) >> (8*sizeof(rs1)-1); \ - rs1_sign &= (rm) >> 1; \ - rs1_sign ^= (rm) ^ rs2_sign; \ - ((rs1) << 1 >> 1) | (rs1_sign << (8*sizeof(rs1)-1)); }) - - if (GET_PRECISION(insn) == PRECISION_S) { - uint32_t rs1 = GET_F32_RS1(insn, regs); - uint32_t rs2 = GET_F32_RS2(insn, regs); - SET_F32_RD(insn, regs, DO_FSGNJ(rs1, rs2, rm)); - } else if (GET_PRECISION(insn) == PRECISION_D) { - uint64_t rs1 = GET_F64_RS1(insn, regs); - uint64_t rs2 = GET_F64_RS2(insn, regs); - SET_F64_RD(insn, regs, DO_FSGNJ(rs1, rs2, rm)); - } else { - return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); - } -} - -DECLARE_EMULATION_FUNC(emulate_fmin) -{ - int rm = GET_RM(insn); - if (rm >= 2) - return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); - - if (GET_PRECISION(insn) == PRECISION_S) { - uint32_t rs1 = GET_F32_RS1(insn, regs); - uint32_t rs2 = GET_F32_RS2(insn, regs); - uint32_t arg1 = rm ? rs2 : rs1; - uint32_t arg2 = rm ? rs1 : rs2; - int use_rs1 = f32_lt_quiet(arg1, arg2) || isNaNF32UI(rs2); - SET_F32_RD(insn, regs, use_rs1 ? rs1 : rs2); - } else if (GET_PRECISION(insn) == PRECISION_D) { - uint64_t rs1 = GET_F64_RS1(insn, regs); - uint64_t rs2 = GET_F64_RS2(insn, regs); - uint64_t arg1 = rm ? rs2 : rs1; - uint64_t arg2 = rm ? rs1 : rs2; - int use_rs1 = f64_lt_quiet(arg1, arg2) || isNaNF64UI(rs2); - SET_F64_RD(insn, regs, use_rs1 ? rs1 : rs2); - } else { - return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); - } -} - -DECLARE_EMULATION_FUNC(emulate_fcvt_ff) -{ - int rs2_num = (insn >> 20) & 0x1f; - if (GET_PRECISION(insn) == PRECISION_S) { - if (rs2_num != 1) - return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); - SET_F32_RD(insn, regs, f64_to_f32(GET_F64_RS1(insn, regs))); - } else if (GET_PRECISION(insn) == PRECISION_D) { - if (rs2_num != 0) - return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); - SET_F64_RD(insn, regs, f32_to_f64(GET_F32_RS1(insn, regs))); - } else { - return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); - } -} - -DECLARE_EMULATION_FUNC(emulate_fcvt_fi) -{ - if (GET_PRECISION(insn) != PRECISION_S && GET_PRECISION(insn) != PRECISION_D) - return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); - - int negative = 0; - uint64_t uint_val = GET_RS1(insn, regs); - - switch ((insn >> 20) & 0x1f) - { - case 0: // int32 - negative = (int32_t)uint_val < 0; - uint_val = negative ? -(int32_t)uint_val : (int32_t)uint_val; - break; - case 1: // uint32 - uint_val = (uint32_t)uint_val; - break; -#ifdef __riscv64 - case 2: // int64 - negative = (int64_t)uint_val < 0; - uint_val = negative ? -uint_val : uint_val; - case 3: // uint64 - break; -#endif - default: - return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); - } - - uint64_t float64 = ui64_to_f64(uint_val); - if (negative) - float64 ^= INT64_MIN; - - if (GET_PRECISION(insn) == PRECISION_S) - SET_F32_RD(insn, regs, f64_to_f32(float64)); - else - SET_F64_RD(insn, regs, float64); -} - -DECLARE_EMULATION_FUNC(emulate_fcvt_if) -{ - int rs2_num = (insn >> 20) & 0x1f; -#ifdef __riscv64 - if (rs2_num >= 4) - return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); -#else - if (rs2_num >= 2) - return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); -#endif - - int64_t float64; - if (GET_PRECISION(insn) == PRECISION_S) - float64 = f32_to_f64(GET_F32_RS1(insn, regs)); - else if (GET_PRECISION(insn) == PRECISION_D) - float64 = GET_F64_RS1(insn, regs); - else - return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); - - int negative = 0; - if (float64 < 0) { - negative = 1; - float64 ^= INT64_MIN; - } - uint64_t uint_val = f64_to_ui64(float64, softfloat_roundingMode, true); - uint64_t result, limit, limit_result; - - switch (rs2_num) - { - case 0: // int32 - if (negative) { - result = (int32_t)-uint_val; - limit_result = limit = (uint32_t)INT32_MIN; - } else { - result = (int32_t)uint_val; - limit_result = limit = INT32_MAX; - } - break; - - case 1: // uint32 - limit = limit_result = UINT32_MAX; - if (negative) - result = limit = 0; - else - result = (uint32_t)uint_val; - break; - - case 2: // int32 - if (negative) { - result = (int64_t)-uint_val; - limit_result = limit = (uint64_t)INT64_MIN; - } else { - result = (int64_t)uint_val; - limit_result = limit = INT64_MAX; - } - break; - - case 3: // uint64 - limit = limit_result = UINT64_MAX; - if (negative) - result = limit = 0; - else - result = (uint64_t)uint_val; - break; - - default: - __builtin_unreachable(); - } - - if (uint_val > limit) { - result = limit_result; - softfloat_raiseFlags(softfloat_flag_invalid); - } - - SET_FS_DIRTY(); - SET_RD(insn, regs, result); -} - -DECLARE_EMULATION_FUNC(emulate_fcmp) -{ - int rm = GET_RM(insn); - if (rm >= 3) - return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); - - uintptr_t result; - if (GET_PRECISION(insn) == PRECISION_S) { - uint32_t rs1 = GET_F32_RS1(insn, regs); - uint32_t rs2 = GET_F32_RS2(insn, regs); - if (rm != 1) - result = f32_eq(rs1, rs2); - if (rm == 1 || (rm == 0 && !result)) - result = f32_lt(rs1, rs2); - goto success; - } else if (GET_PRECISION(insn) == PRECISION_D) { - uint64_t rs1 = GET_F64_RS1(insn, regs); - uint64_t rs2 = GET_F64_RS2(insn, regs); - if (rm != 1) - result = f64_eq(rs1, rs2); - if (rm == 1 || (rm == 0 && !result)) - result = f64_lt(rs1, rs2); - goto success; - } - return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); -success: - SET_RD(insn, regs, result); -} - -DECLARE_EMULATION_FUNC(emulate_fmv_if) -{ - uintptr_t result; - if ((insn & MASK_FMV_X_S) == MATCH_FMV_X_S) - result = GET_F32_RS1(insn, regs); -#ifdef __riscv64 - else if ((insn & MASK_FMV_X_D) == MATCH_FMV_X_D) - result = GET_F64_RS1(insn, regs); -#endif - else - return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); - - SET_RD(insn, regs, result); -} - -DECLARE_EMULATION_FUNC(emulate_fmv_fi) -{ - uintptr_t rs1 = GET_RS1(insn, regs); - - if ((insn & MASK_FMV_S_X) == MATCH_FMV_S_X) - SET_F32_RD(insn, regs, rs1); - else if ((insn & MASK_FMV_D_X) == MATCH_FMV_D_X) - SET_F64_RD(insn, regs, rs1); - else - return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); -} - -DECLARE_EMULATION_FUNC(emulate_fmadd) -{ - // if FPU is disabled, punt back to the OS - if (unlikely((mstatus & MSTATUS_FS) == 0)) - return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); - - int op = (insn >> 2) & 3; - SETUP_STATIC_ROUNDING(insn); - if (GET_PRECISION(insn) == PRECISION_S) { - uint32_t rs1 = GET_F32_RS1(insn, regs); - uint32_t rs2 = GET_F32_RS2(insn, regs); - uint32_t rs3 = GET_F32_RS3(insn, regs); - SET_F32_RD(insn, regs, softfloat_mulAddF32(op, rs1, rs2, rs3)); - } else if (GET_PRECISION(insn) == PRECISION_D) { - uint64_t rs1 = GET_F64_RS1(insn, regs); - uint64_t rs2 = GET_F64_RS2(insn, regs); - uint64_t rs3 = GET_F64_RS3(insn, regs); - SET_F64_RD(insn, regs, softfloat_mulAddF64(op, rs1, rs2, rs3)); - } else { - return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); - } -} diff --git a/pk/emulation.h b/pk/emulation.h new file mode 100644 index 0000000..f1a71ec --- /dev/null +++ b/pk/emulation.h @@ -0,0 +1,28 @@ +#ifndef _RISCV_EMULATION_H +#define _RISCV_EMULATION_H + +#include "encoding.h" +#include "bits.h" +#include + +typedef uint32_t insn_t; +typedef void (*emulation_func)(uintptr_t*, uintptr_t, uintptr_t, uintptr_t, insn_t); +#define DECLARE_EMULATION_FUNC(name) void name(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc, uintptr_t mstatus, insn_t insn) + +void misaligned_load_trap(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc); +void misaligned_store_trap(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc); +void redirect_trap(uintptr_t epc, uintptr_t mstatus); +DECLARE_EMULATION_FUNC(truly_illegal_insn); + +#define GET_REG(insn, pos, regs) ({ \ + int mask = (1 << (5+LOG_REGBYTES)) - (1 << LOG_REGBYTES); \ + (uintptr_t*)((uintptr_t)regs + (((insn) >> ((pos) - LOG_REGBYTES)) & mask)); \ +}) +#define GET_RS1(insn, regs) (*GET_REG(insn, 15, regs)) +#define GET_RS2(insn, regs) (*GET_REG(insn, 20, regs)) +#define SET_RD(insn, regs, val) (*GET_REG(insn, 7, regs) = (val)) +#define IMM_I(insn) ((int32_t)(insn) >> 20) +#define IMM_S(insn) (((int32_t)(insn) >> 25 << 5) | (int32_t)(((insn) >> 7) & 0x1f)) +#define MASK_FUNCT3 0x7000 + +#endif diff --git a/pk/fp_emulation.c b/pk/fp_emulation.c new file mode 100644 index 0000000..536967f --- /dev/null +++ b/pk/fp_emulation.c @@ -0,0 +1,449 @@ +#include "fp_emulation.h" +#include "unprivileged_memory.h" +#include "softfloat.h" +#include "config.h" + +DECLARE_EMULATION_FUNC(emulate_float_load) +{ + uint64_t val; + uintptr_t addr = GET_RS1(insn, regs) + IMM_I(insn); + + switch (insn & MASK_FUNCT3) + { + case MATCH_FLW & MASK_FUNCT3: + if (addr % 4 != 0) + return misaligned_load_trap(regs, mcause, mepc); + + SET_F32_RD(insn, regs, load_int32_t((void *)addr, mepc)); + break; + + case MATCH_FLD & MASK_FUNCT3: + if (addr % sizeof(uintptr_t) != 0) + return misaligned_load_trap(regs, mcause, mepc); + +#ifdef __riscv64 + val = load_uint64_t((void *)addr, mepc); +#else + val = load_uint32_t(addr, mepc); + val += (uint64_t)load_uint32_t((void *)(addr + 4), mepc) << 32; +#endif + SET_F64_RD(insn, regs, val); + break; + + default: + return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); + } +} + +DECLARE_EMULATION_FUNC(emulate_float_store) +{ + uint64_t val; + uintptr_t addr = GET_RS1(insn, regs) + IMM_S(insn); + + switch (insn & MASK_FUNCT3) + { + case MATCH_FSW & MASK_FUNCT3: + if (addr % 4 != 0) + return misaligned_store_trap(regs, mcause, mepc); + + store_uint32_t((void *)addr, GET_F32_RS2(insn, regs), mepc); + break; + + case MATCH_FSD & MASK_FUNCT3: + if (addr % sizeof(uintptr_t) != 0) + return misaligned_store_trap(regs, mcause, mepc); + + val = GET_F64_RS2(insn, regs); +#ifdef __riscv64 + store_uint64_t((void *)addr, val, mepc); +#else + store_uint32_t((void *)addr, val, mepc); + store_uint32_t((void *)(addr + 4), val >> 32, mepc); +#endif + break; + + default: + return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); + } +} + +DECLARE_EMULATION_FUNC(emulate_fp) +{ + asm (".pushsection .rodata\n" + "fp_emulation_table:\n" + " .word emulate_fadd\n" + " .word emulate_fsub\n" + " .word emulate_fmul\n" + " .word emulate_fdiv\n" + " .word emulate_fsgnj\n" + " .word emulate_fmin\n" + " .word truly_illegal_insn\n" + " .word truly_illegal_insn\n" + " .word emulate_fcvt_ff\n" + " .word truly_illegal_insn\n" + " .word truly_illegal_insn\n" + " .word emulate_fsqrt\n" + " .word truly_illegal_insn\n" + " .word truly_illegal_insn\n" + " .word truly_illegal_insn\n" + " .word truly_illegal_insn\n" + " .word truly_illegal_insn\n" + " .word truly_illegal_insn\n" + " .word truly_illegal_insn\n" + " .word truly_illegal_insn\n" + " .word emulate_fcmp\n" + " .word truly_illegal_insn\n" + " .word truly_illegal_insn\n" + " .word truly_illegal_insn\n" + " .word emulate_fcvt_if\n" + " .word truly_illegal_insn\n" + " .word emulate_fcvt_fi\n" + " .word truly_illegal_insn\n" + " .word emulate_fmv_if\n" + " .word truly_illegal_insn\n" + " .word emulate_fmv_fi\n" + " .word truly_illegal_insn\n" + " .popsection"); + + // if FPU is disabled, punt back to the OS + if (unlikely((mstatus & MSTATUS_FS) == 0)) + return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); + + extern int32_t fp_emulation_table[]; + int32_t* pf = (void*)fp_emulation_table + ((insn >> 25) & 0x7c); + emulation_func f = (emulation_func)(uintptr_t)*pf; + + SETUP_STATIC_ROUNDING(insn); + return f(regs, mcause, mepc, mstatus, insn); +} + +void emulate_any_fadd(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc, uintptr_t mstatus, insn_t insn, int32_t neg_b) +{ + if (GET_PRECISION(insn) == PRECISION_S) { + uint32_t rs1 = GET_F32_RS1(insn, regs); + uint32_t rs2 = GET_F32_RS2(insn, regs) ^ neg_b; + SET_F32_RD(insn, regs, f32_add(rs1, rs2)); + } else if (GET_PRECISION(insn) == PRECISION_D) { + uint64_t rs1 = GET_F64_RS1(insn, regs); + uint64_t rs2 = GET_F64_RS2(insn, regs) ^ ((uint64_t)neg_b << 32); + SET_F64_RD(insn, regs, f64_add(rs1, rs2)); + } else { + return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); + } +} + +DECLARE_EMULATION_FUNC(emulate_fadd) +{ + return emulate_any_fadd(regs, mcause, mepc, mstatus, insn, 0); +} + +DECLARE_EMULATION_FUNC(emulate_fsub) +{ + return emulate_any_fadd(regs, mcause, mepc, mstatus, insn, INT32_MIN); +} + +DECLARE_EMULATION_FUNC(emulate_fmul) +{ + if (GET_PRECISION(insn) == PRECISION_S) { + uint32_t rs1 = GET_F32_RS1(insn, regs); + uint32_t rs2 = GET_F32_RS2(insn, regs); + SET_F32_RD(insn, regs, f32_mul(rs1, rs2)); + } else if (GET_PRECISION(insn) == PRECISION_D) { + uint64_t rs1 = GET_F64_RS1(insn, regs); + uint64_t rs2 = GET_F64_RS2(insn, regs); + SET_F64_RD(insn, regs, f64_mul(rs1, rs2)); + } else { + return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); + } +} + +DECLARE_EMULATION_FUNC(emulate_fdiv) +{ + if (GET_PRECISION(insn) == PRECISION_S) { + uint32_t rs1 = GET_F32_RS1(insn, regs); + uint32_t rs2 = GET_F32_RS2(insn, regs); + SET_F32_RD(insn, regs, f32_div(rs1, rs2)); + } else if (GET_PRECISION(insn) == PRECISION_D) { + uint64_t rs1 = GET_F64_RS1(insn, regs); + uint64_t rs2 = GET_F64_RS2(insn, regs); + SET_F64_RD(insn, regs, f64_div(rs1, rs2)); + } else { + return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); + } +} + +DECLARE_EMULATION_FUNC(emulate_fsqrt) +{ + if ((insn >> 20) & 0x1f) + return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); + + if (GET_PRECISION(insn) == PRECISION_S) { + SET_F32_RD(insn, regs, f32_sqrt(GET_F32_RS1(insn, regs))); + } else if (GET_PRECISION(insn) == PRECISION_D) { + SET_F64_RD(insn, regs, f64_sqrt(GET_F64_RS1(insn, regs))); + } else { + return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); + } +} + +DECLARE_EMULATION_FUNC(emulate_fsgnj) +{ + int rm = GET_RM(insn); + if (rm >= 3) + return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); + + #define DO_FSGNJ(rs1, rs2, rm) ({ \ + typeof(rs1) rs1_sign = (rs1) >> (8*sizeof(rs1)-1); \ + typeof(rs1) rs2_sign = (rs2) >> (8*sizeof(rs1)-1); \ + rs1_sign &= (rm) >> 1; \ + rs1_sign ^= (rm) ^ rs2_sign; \ + ((rs1) << 1 >> 1) | (rs1_sign << (8*sizeof(rs1)-1)); }) + + if (GET_PRECISION(insn) == PRECISION_S) { + uint32_t rs1 = GET_F32_RS1(insn, regs); + uint32_t rs2 = GET_F32_RS2(insn, regs); + SET_F32_RD(insn, regs, DO_FSGNJ(rs1, rs2, rm)); + } else if (GET_PRECISION(insn) == PRECISION_D) { + uint64_t rs1 = GET_F64_RS1(insn, regs); + uint64_t rs2 = GET_F64_RS2(insn, regs); + SET_F64_RD(insn, regs, DO_FSGNJ(rs1, rs2, rm)); + } else { + return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); + } +} + +DECLARE_EMULATION_FUNC(emulate_fmin) +{ + int rm = GET_RM(insn); + if (rm >= 2) + return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); + + if (GET_PRECISION(insn) == PRECISION_S) { + uint32_t rs1 = GET_F32_RS1(insn, regs); + uint32_t rs2 = GET_F32_RS2(insn, regs); + uint32_t arg1 = rm ? rs2 : rs1; + uint32_t arg2 = rm ? rs1 : rs2; + int use_rs1 = f32_lt_quiet(arg1, arg2) || isNaNF32UI(rs2); + SET_F32_RD(insn, regs, use_rs1 ? rs1 : rs2); + } else if (GET_PRECISION(insn) == PRECISION_D) { + uint64_t rs1 = GET_F64_RS1(insn, regs); + uint64_t rs2 = GET_F64_RS2(insn, regs); + uint64_t arg1 = rm ? rs2 : rs1; + uint64_t arg2 = rm ? rs1 : rs2; + int use_rs1 = f64_lt_quiet(arg1, arg2) || isNaNF64UI(rs2); + SET_F64_RD(insn, regs, use_rs1 ? rs1 : rs2); + } else { + return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); + } +} + +DECLARE_EMULATION_FUNC(emulate_fcvt_ff) +{ + int rs2_num = (insn >> 20) & 0x1f; + if (GET_PRECISION(insn) == PRECISION_S) { + if (rs2_num != 1) + return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); + SET_F32_RD(insn, regs, f64_to_f32(GET_F64_RS1(insn, regs))); + } else if (GET_PRECISION(insn) == PRECISION_D) { + if (rs2_num != 0) + return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); + SET_F64_RD(insn, regs, f32_to_f64(GET_F32_RS1(insn, regs))); + } else { + return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); + } +} + +DECLARE_EMULATION_FUNC(emulate_fcvt_fi) +{ + if (GET_PRECISION(insn) != PRECISION_S && GET_PRECISION(insn) != PRECISION_D) + return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); + + int negative = 0; + uint64_t uint_val = GET_RS1(insn, regs); + + switch ((insn >> 20) & 0x1f) + { + case 0: // int32 + negative = (int32_t)uint_val < 0; + uint_val = negative ? -(int32_t)uint_val : (int32_t)uint_val; + break; + case 1: // uint32 + uint_val = (uint32_t)uint_val; + break; +#ifdef __riscv64 + case 2: // int64 + negative = (int64_t)uint_val < 0; + uint_val = negative ? -uint_val : uint_val; + case 3: // uint64 + break; +#endif + default: + return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); + } + + uint64_t float64 = ui64_to_f64(uint_val); + if (negative) + float64 ^= INT64_MIN; + + if (GET_PRECISION(insn) == PRECISION_S) + SET_F32_RD(insn, regs, f64_to_f32(float64)); + else + SET_F64_RD(insn, regs, float64); +} + +DECLARE_EMULATION_FUNC(emulate_fcvt_if) +{ + int rs2_num = (insn >> 20) & 0x1f; +#ifdef __riscv64 + if (rs2_num >= 4) + return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); +#else + if (rs2_num >= 2) + return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); +#endif + + int64_t float64; + if (GET_PRECISION(insn) == PRECISION_S) + float64 = f32_to_f64(GET_F32_RS1(insn, regs)); + else if (GET_PRECISION(insn) == PRECISION_D) + float64 = GET_F64_RS1(insn, regs); + else + return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); + + int negative = 0; + if (float64 < 0) { + negative = 1; + float64 ^= INT64_MIN; + } + uint64_t uint_val = f64_to_ui64(float64, softfloat_roundingMode, true); + uint64_t result, limit, limit_result; + + switch (rs2_num) + { + case 0: // int32 + if (negative) { + result = (int32_t)-uint_val; + limit_result = limit = (uint32_t)INT32_MIN; + } else { + result = (int32_t)uint_val; + limit_result = limit = INT32_MAX; + } + break; + + case 1: // uint32 + limit = limit_result = UINT32_MAX; + if (negative) + result = limit = 0; + else + result = (uint32_t)uint_val; + break; + + case 2: // int32 + if (negative) { + result = (int64_t)-uint_val; + limit_result = limit = (uint64_t)INT64_MIN; + } else { + result = (int64_t)uint_val; + limit_result = limit = INT64_MAX; + } + break; + + case 3: // uint64 + limit = limit_result = UINT64_MAX; + if (negative) + result = limit = 0; + else + result = (uint64_t)uint_val; + break; + + default: + __builtin_unreachable(); + } + + if (uint_val > limit) { + result = limit_result; + softfloat_raiseFlags(softfloat_flag_invalid); + } + + SET_FS_DIRTY(); + SET_RD(insn, regs, result); +} + +DECLARE_EMULATION_FUNC(emulate_fcmp) +{ + int rm = GET_RM(insn); + if (rm >= 3) + return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); + + uintptr_t result; + if (GET_PRECISION(insn) == PRECISION_S) { + uint32_t rs1 = GET_F32_RS1(insn, regs); + uint32_t rs2 = GET_F32_RS2(insn, regs); + if (rm != 1) + result = f32_eq(rs1, rs2); + if (rm == 1 || (rm == 0 && !result)) + result = f32_lt(rs1, rs2); + goto success; + } else if (GET_PRECISION(insn) == PRECISION_D) { + uint64_t rs1 = GET_F64_RS1(insn, regs); + uint64_t rs2 = GET_F64_RS2(insn, regs); + if (rm != 1) + result = f64_eq(rs1, rs2); + if (rm == 1 || (rm == 0 && !result)) + result = f64_lt(rs1, rs2); + goto success; + } + return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); +success: + SET_RD(insn, regs, result); +} + +DECLARE_EMULATION_FUNC(emulate_fmv_if) +{ + uintptr_t result; + if ((insn & MASK_FMV_X_S) == MATCH_FMV_X_S) + result = GET_F32_RS1(insn, regs); +#ifdef __riscv64 + else if ((insn & MASK_FMV_X_D) == MATCH_FMV_X_D) + result = GET_F64_RS1(insn, regs); +#endif + else + return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); + + SET_RD(insn, regs, result); +} + +DECLARE_EMULATION_FUNC(emulate_fmv_fi) +{ + uintptr_t rs1 = GET_RS1(insn, regs); + + if ((insn & MASK_FMV_S_X) == MATCH_FMV_S_X) + SET_F32_RD(insn, regs, rs1); + else if ((insn & MASK_FMV_D_X) == MATCH_FMV_D_X) + SET_F64_RD(insn, regs, rs1); + else + return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); +} + +DECLARE_EMULATION_FUNC(emulate_fmadd) +{ + // if FPU is disabled, punt back to the OS + if (unlikely((mstatus & MSTATUS_FS) == 0)) + return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); + + int op = (insn >> 2) & 3; + SETUP_STATIC_ROUNDING(insn); + if (GET_PRECISION(insn) == PRECISION_S) { + uint32_t rs1 = GET_F32_RS1(insn, regs); + uint32_t rs2 = GET_F32_RS2(insn, regs); + uint32_t rs3 = GET_F32_RS3(insn, regs); + SET_F32_RD(insn, regs, softfloat_mulAddF32(op, rs1, rs2, rs3)); + } else if (GET_PRECISION(insn) == PRECISION_D) { + uint64_t rs1 = GET_F64_RS1(insn, regs); + uint64_t rs2 = GET_F64_RS2(insn, regs); + uint64_t rs3 = GET_F64_RS3(insn, regs); + SET_F64_RD(insn, regs, softfloat_mulAddF64(op, rs1, rs2, rs3)); + } else { + return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); + } +} diff --git a/pk/fp_emulation.h b/pk/fp_emulation.h new file mode 100644 index 0000000..d2357b7 --- /dev/null +++ b/pk/fp_emulation.h @@ -0,0 +1,81 @@ +#ifndef _RISCV_FP_EMULATION_H +#define _RISCV_FP_EMULATION_H + +#include "emulation.h" + +#define GET_PRECISION(insn) (((insn) >> 25) & 3) +#define GET_RM(insn) (((insn) >> 12) & 7) +#define PRECISION_S 0 +#define PRECISION_D 1 + +#ifdef __riscv_hard_float +# define GET_F32_REG(insn, pos, regs) ({ \ + register int32_t value asm("a0") = ((insn) >> ((pos)-3)) & 0xf8; \ + uintptr_t tmp; \ + asm ("1: auipc %0, %%pcrel_hi(get_f32_reg); add %0, %0, %1; jalr t0, %0, %%pcrel_lo(1b)" : "=&r"(tmp), "+&r"(value) :: "t0"); \ + value; }) +# define SET_F32_REG(insn, pos, regs, val) ({ \ + register uint32_t value asm("a0") = (val); \ + uintptr_t offset = ((insn) >> ((pos)-3)) & 0xf8; \ + uintptr_t tmp; \ + asm volatile ("1: auipc %0, %%pcrel_hi(put_f32_reg); add %0, %0, %2; jalr t0, %0, %%pcrel_lo(1b)" : "=&r"(tmp) : "r"(value), "r"(offset) : "t0"); }) +# define init_fp_reg(i) SET_F32_REG((i) << 3, 3, 0, 0) +# define GET_F64_REG(insn, pos, regs) ({ \ + register uintptr_t value asm("a0") = ((insn) >> ((pos)-3)) & 0xf8; \ + uintptr_t tmp; \ + asm ("1: auipc %0, %%pcrel_hi(get_f64_reg); add %0, %0, %1; jalr t0, %0, %%pcrel_lo(1b)" : "=&r"(tmp), "+&r"(value) :: "t0"); \ + sizeof(uintptr_t) == 4 ? *(int64_t*)value : (int64_t)value; }) +# define SET_F64_REG(insn, pos, regs, val) ({ \ + uint64_t __val = (val); \ + register uintptr_t value asm("a0") = sizeof(uintptr_t) == 4 ? (uintptr_t)&__val : (uintptr_t)__val; \ + uintptr_t offset = ((insn) >> ((pos)-3)) & 0xf8; \ + uintptr_t tmp; \ + asm volatile ("1: auipc %0, %%pcrel_hi(put_f64_reg); add %0, %0, %2; jalr t0, %0, %%pcrel_lo(1b)" : "=&r"(tmp) : "r"(value), "r"(offset) : "t0"); }) +# define GET_FCSR() read_csr(fcsr) +# define SET_FCSR(value) write_csr(fcsr, (value)) +# define GET_FRM() read_csr(frm) +# define SET_FRM(value) write_csr(frm, (value)) +# define GET_FFLAGS() read_csr(fflags) +# define SET_FFLAGS(value) write_csr(fflags, (value)) + +# define SETUP_STATIC_ROUNDING(insn) ({ \ + register long tp asm("tp") = read_csr(frm); \ + if (likely(((insn) & MASK_FUNCT3) == MASK_FUNCT3)) ; \ + else if (GET_RM(insn) > 4) return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); \ + else tp = GET_RM(insn); \ + asm volatile ("":"+r"(tp)); }) +# define softfloat_raiseFlags(which) set_csr(fflags, which) +# define softfloat_roundingMode ({ register int tp asm("tp"); tp; }) +#else +# define GET_F64_REG(insn, pos, regs) (*(int64_t*)((void*)((regs) + 32) + (((insn) >> ((pos)-3)) & 0xf8))) +# define SET_F64_REG(insn, pos, regs, val) (GET_F64_REG(insn, pos, regs) = (val)) +# define GET_F32_REG(insn, pos, regs) (*(int32_t*)&GET_F64_REG(insn, pos, regs)) +# define SET_F32_REG(insn, pos, regs, val) (GET_F32_REG(insn, pos, regs) = (val)) +# define GET_FCSR() ({ register int tp asm("tp"); tp & 0xFF; }) +# define SET_FCSR(value) ({ asm volatile("add tp, x0, %0" :: "rI"((value) & 0xFF)); }) +# define GET_FRM() (GET_FCSR() >> 5) +# define SET_FRM(value) SET_FCSR(GET_FFLAGS() | ((value) << 5)) +# define GET_FFLAGS() (GET_FCSR() & 0x1F) +# define SET_FFLAGS(value) SET_FCSR((GET_FRM() << 5) | ((value) & 0x1F)) + +# define SETUP_STATIC_ROUNDING(insn) ({ \ + register int tp asm("tp"); tp &= 0xFF; \ + if (likely(((insn) & MASK_FUNCT3) == MASK_FUNCT3)) tp |= tp << 8; \ + else if (GET_RM(insn) > 4) return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); \ + else tp |= GET_RM(insn) << 13; \ + asm volatile ("":"+r"(tp)); }) +# define softfloat_raiseFlags(which) ({ asm volatile ("or tp, tp, %0" :: "rI"(which)); }) +# define softfloat_roundingMode ({ register int tp asm("tp"); tp >> 13; }) +#endif + +#define GET_F32_RS1(insn, regs) (GET_F32_REG(insn, 15, regs)) +#define GET_F32_RS2(insn, regs) (GET_F32_REG(insn, 20, regs)) +#define GET_F32_RS3(insn, regs) (GET_F32_REG(insn, 27, regs)) +#define GET_F64_RS1(insn, regs) (GET_F64_REG(insn, 15, regs)) +#define GET_F64_RS2(insn, regs) (GET_F64_REG(insn, 20, regs)) +#define GET_F64_RS3(insn, regs) (GET_F64_REG(insn, 27, regs)) +#define SET_F32_RD(insn, regs, val) (SET_F32_REG(insn, 7, regs, val), SET_FS_DIRTY()) +#define SET_F64_RD(insn, regs, val) (SET_F64_REG(insn, 7, regs, val), SET_FS_DIRTY()) +#define SET_FS_DIRTY() set_csr(mstatus, MSTATUS_FS) + +#endif diff --git a/pk/mentry.S b/pk/mentry.S index ee3a146..2ded375 100644 --- a/pk/mentry.S +++ b/pk/mentry.S @@ -86,15 +86,15 @@ trap_vector: STORE t1, 6*REGBYTES(sp) sll t1, a1, 2 # t1 <- mcause << 2 STORE t2, 7*REGBYTES(sp) - add t0, t0, t1 # t0 <- %hi(trap_table)[mcause] + add t1, t0, t1 # t1 <- %hi(trap_table)[mcause] STORE s0, 8*REGBYTES(sp) - lw t0, %pcrel_lo(1b)(t0) # t0 <- trap_table[mcause] + lw t1, %pcrel_lo(1b)(t1) # t1 <- trap_table[mcause] STORE s1, 9*REGBYTES(sp) mv a0, sp # a0 <- regs STORE a2,12*REGBYTES(sp) csrr a2, mepc # a2 <- mepc STORE a3,13*REGBYTES(sp) - csrrw t1, mscratch, x0 # t1 <- user sp + csrrw t0, mscratch, x0 # t0 <- user sp STORE a4,14*REGBYTES(sp) STORE a5,15*REGBYTES(sp) STORE a6,16*REGBYTES(sp) @@ -113,7 +113,7 @@ trap_vector: STORE t4,29*REGBYTES(sp) STORE t5,30*REGBYTES(sp) STORE t6,31*REGBYTES(sp) - STORE t1, 2*REGBYTES(sp) # sp + STORE t0, 2*REGBYTES(sp) # sp #ifndef __riscv_hard_float lw tp, (sp) # Move the emulated FCSR from x0's save slot into tp. @@ -121,7 +121,7 @@ trap_vector: STORE x0, (sp) # Zero x0's save slot. # Invoke the handler. - jalr t0 + jalr t1 #ifndef __riscv_hard_float sw tp, (sp) # Move the emulated FCSR from tp into x0's save slot. diff --git a/pk/minit.c b/pk/minit.c index 741445b..42cdfe8 100644 --- a/pk/minit.c +++ b/pk/minit.c @@ -1,5 +1,6 @@ #include "vm.h" #include "mtrap.h" +#include "fp_emulation.h" uintptr_t mem_size; uint32_t num_harts; diff --git a/pk/mtrap.c b/pk/mtrap.c index a6a2fdb..52b85d8 100644 --- a/pk/mtrap.c +++ b/pk/mtrap.c @@ -4,77 +4,6 @@ #include "vm.h" #include -void illegal_insn_trap(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc) -{ - asm (".pushsection .rodata\n" - "illegal_insn_trap_table:\n" - " .word truly_illegal_insn\n" -#ifdef PK_ENABLE_FP_EMULATION - " .word emulate_float_load\n" -#else - " .word truly_illegal_insn\n" -#endif - " .word truly_illegal_insn\n" - " .word truly_illegal_insn\n" - " .word truly_illegal_insn\n" - " .word truly_illegal_insn\n" - " .word truly_illegal_insn\n" - " .word truly_illegal_insn\n" - " .word truly_illegal_insn\n" -#ifdef PK_ENABLE_FP_EMULATION - " .word emulate_float_store\n" -#else - " .word truly_illegal_insn\n" -#endif - " .word truly_illegal_insn\n" - " .word truly_illegal_insn\n" - " .word emulate_mul_div\n" - " .word truly_illegal_insn\n" - " .word emulate_mul_div32\n" - " .word truly_illegal_insn\n" -#ifdef PK_ENABLE_FP_EMULATION - " .word emulate_fmadd\n" - " .word emulate_fmadd\n" - " .word emulate_fmadd\n" - " .word emulate_fmadd\n" - " .word emulate_fp\n" -#else - " .word truly_illegal_insn\n" - " .word truly_illegal_insn\n" - " .word truly_illegal_insn\n" - " .word truly_illegal_insn\n" - " .word truly_illegal_insn\n" -#endif - " .word truly_illegal_insn\n" - " .word truly_illegal_insn\n" - " .word truly_illegal_insn\n" - " .word truly_illegal_insn\n" - " .word truly_illegal_insn\n" - " .word truly_illegal_insn\n" - " .word truly_illegal_insn\n" -#ifdef PK_ENABLE_FP_EMULATION - " .word emulate_system\n" -#else - " .word truly_illegal_insn\n" -#endif - " .word truly_illegal_insn\n" - " .word truly_illegal_insn\n" - " .word truly_illegal_insn\n" - " .popsection"); - - uintptr_t mstatus; - insn_t insn = get_insn(mepc, &mstatus); - - if ((insn & 3) != 3) - return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); - write_csr(mepc, mepc + 4); - - extern int32_t illegal_insn_trap_table[]; - int32_t* pf = (void*)illegal_insn_trap_table + (insn & 0x7c); - emulation_func f = (emulation_func)(uintptr_t)*pf; - f(regs, mcause, mepc, mstatus, insn); -} - void __attribute__((noreturn)) bad_trap() { panic("machine mode: unhandlable trap %d @ %p", read_csr(mcause), read_csr(mepc)); @@ -301,6 +230,25 @@ void mcall_trap(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc) write_csr(mepc, mepc + 4); } +void redirect_trap(uintptr_t epc, uintptr_t mstatus) +{ + write_csr(sepc, epc); + write_csr(scause, read_csr(mcause)); + write_csr(mepc, read_csr(stvec)); + + uintptr_t prev_priv = EXTRACT_FIELD(mstatus, MSTATUS_MPP); + uintptr_t prev_ie = EXTRACT_FIELD(mstatus, MSTATUS_MPIE); + kassert(prev_priv <= PRV_S); + mstatus = INSERT_FIELD(mstatus, MSTATUS_SPP, prev_priv); + mstatus = INSERT_FIELD(mstatus, MSTATUS_SPIE, prev_ie); + mstatus = INSERT_FIELD(mstatus, MSTATUS_MPP, PRV_S); + mstatus = INSERT_FIELD(mstatus, MSTATUS_MPIE, 0); + write_csr(mstatus, mstatus); + + extern void __redirect_trap(); + return __redirect_trap(); +} + static void machine_page_fault(uintptr_t* regs, uintptr_t mepc) { // MPRV=1 iff this trap occurred while emulating an instruction on behalf diff --git a/pk/mtrap.h b/pk/mtrap.h index 7b2a181..fe44c58 100644 --- a/pk/mtrap.h +++ b/pk/mtrap.h @@ -9,142 +9,6 @@ #include "sbi.h" -#define GET_MACRO(_1,_2,_3,_4,NAME,...) NAME - -#define unpriv_mem_access(a, b, c, ...) GET_MACRO(__VA_ARGS__, unpriv_mem_access3, unpriv_mem_access2, unpriv_mem_access1, unpriv_mem_access0)(a, b, c, __VA_ARGS__) -#define unpriv_mem_access0(a, b, c, d) ({ uintptr_t z = 0, z1 = 0, z2 = 0; unpriv_mem_access_base(a, b, c, d, z, z1, z2); }) -#define unpriv_mem_access1(a, b, c, d, e) ({ uintptr_t z = 0, z1 = 0; unpriv_mem_access_base(a, b, c, d, e, z, z1); }) -#define unpriv_mem_access2(a, b, c, d, e, f) ({ uintptr_t z = 0; unpriv_mem_access_base(a, b, c, d, e, f, z); }) -#define unpriv_mem_access3(a, b, c, d, e, f, g) unpriv_mem_access_base(a, b, c, d, e, f, g) -#define unpriv_mem_access_base(code, o0, o1, o2, i0, i1, i2) ({ \ - register uintptr_t mstatus asm ("a3") = MSTATUS_MPRV; \ - register uintptr_t __mepc asm ("a2") = mepc; \ - uintptr_t unused1, unused2, unused3 __attribute__((unused)); \ - asm volatile ("csrrs %[mstatus], mstatus, %[mstatus]\n" \ - code "\n" \ - "csrw mstatus, %[mstatus]\n" \ - : [o0] "=&r"(o0), [o1] "=&r"(o1), [o2] "=&r"(o2), \ - [mstatus] "+&r"(mstatus) \ - : [i0] "rJ"(i0), [i1] "rJ"(i1), [i2] "rJ"(i2), \ - "r"(__mepc)); \ - (mstatus); \ -}) - -typedef uint32_t insn_t; -typedef void (*emulation_func)(uintptr_t*, uintptr_t, uintptr_t, uintptr_t, insn_t); -#define DECLARE_EMULATION_FUNC(name) void name(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc, uintptr_t mstatus, insn_t insn) - -void truly_illegal_insn(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc, uintptr_t mstatus, insn_t insn); -void redirect_trap(uintptr_t epc, uintptr_t mstatus); - -#define GET_REG(insn, pos, regs) ({ \ - int mask = (1 << (5+LOG_REGBYTES)) - (1 << LOG_REGBYTES); \ - (uintptr_t*)((uintptr_t)regs + (((insn) >> ((pos) - LOG_REGBYTES)) & mask)); \ -}) -#define GET_RS1(insn, regs) (*GET_REG(insn, 15, regs)) -#define GET_RS2(insn, regs) (*GET_REG(insn, 20, regs)) -#define SET_RD(insn, regs, val) (*GET_REG(insn, 7, regs) = (val)) -#define IMM_I(insn) ((int32_t)(insn) >> 20) -#define IMM_S(insn) (((int32_t)(insn) >> 25 << 5) | (int32_t)(((insn) >> 7) & 0x1f)) -#define MASK_FUNCT3 0x7000 - -#define GET_PRECISION(insn) (((insn) >> 25) & 3) -#define GET_RM(insn) (((insn) >> 12) & 7) -#define PRECISION_S 0 -#define PRECISION_D 1 - -#ifdef __riscv_hard_float -# define GET_F32_REG(insn, pos, regs) ({ \ - register int32_t value asm("a0") = ((insn) >> ((pos)-3)) & 0xf8; \ - uintptr_t tmp; \ - asm ("1: auipc %0, %%pcrel_hi(get_f32_reg); add %0, %0, %1; jalr t0, %0, %%pcrel_lo(1b)" : "=&r"(tmp), "+&r"(value) :: "t0"); \ - value; }) -# define SET_F32_REG(insn, pos, regs, val) ({ \ - register uint32_t value asm("a0") = (val); \ - uintptr_t offset = ((insn) >> ((pos)-3)) & 0xf8; \ - uintptr_t tmp; \ - asm volatile ("1: auipc %0, %%pcrel_hi(put_f32_reg); add %0, %0, %2; jalr t0, %0, %%pcrel_lo(1b)" : "=&r"(tmp) : "r"(value), "r"(offset) : "t0"); }) -# define init_fp_reg(i) SET_F32_REG((i) << 3, 3, 0, 0) -# define GET_F64_REG(insn, pos, regs) ({ \ - register uintptr_t value asm("a0") = ((insn) >> ((pos)-3)) & 0xf8; \ - uintptr_t tmp; \ - asm ("1: auipc %0, %%pcrel_hi(get_f64_reg); add %0, %0, %1; jalr t0, %0, %%pcrel_lo(1b)" : "=&r"(tmp), "+&r"(value) :: "t0"); \ - sizeof(uintptr_t) == 4 ? *(int64_t*)value : (int64_t)value; }) -# define SET_F64_REG(insn, pos, regs, val) ({ \ - uint64_t __val = (val); \ - register uintptr_t value asm("a0") = sizeof(uintptr_t) == 4 ? (uintptr_t)&__val : (uintptr_t)__val; \ - uintptr_t offset = ((insn) >> ((pos)-3)) & 0xf8; \ - uintptr_t tmp; \ - asm volatile ("1: auipc %0, %%pcrel_hi(put_f64_reg); add %0, %0, %2; jalr t0, %0, %%pcrel_lo(1b)" : "=&r"(tmp) : "r"(value), "r"(offset) : "t0"); }) -# define GET_FCSR() read_csr(fcsr) -# define SET_FCSR(value) write_csr(fcsr, (value)) -# define GET_FRM() read_csr(frm) -# define SET_FRM(value) write_csr(frm, (value)) -# define GET_FFLAGS() read_csr(fflags) -# define SET_FFLAGS(value) write_csr(fflags, (value)) - -# define SETUP_STATIC_ROUNDING(insn) ({ \ - register long tp asm("tp") = read_csr(frm); \ - if (likely(((insn) & MASK_FUNCT3) == MASK_FUNCT3)) ; \ - else if (GET_RM(insn) > 4) return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); \ - else tp = GET_RM(insn); \ - asm volatile ("":"+r"(tp)); }) -# define softfloat_raiseFlags(which) set_csr(fflags, which) -# define softfloat_roundingMode ({ register int tp asm("tp"); tp; }) -#else -# define GET_F64_REG(insn, pos, regs) (((int64_t*)(&(regs)[32]))[((insn) >> (pos)) & 0x1f]) -# define SET_F64_REG(insn, pos, regs, val) (GET_F64_REG(insn, pos, regs) = (val)) -# define GET_F32_REG(insn, pos, regs) (*(int32_t*)&GET_F64_REG(insn, pos, regs)) -# define SET_F32_REG(insn, pos, regs, val) (GET_F32_REG(insn, pos, regs) = (val)) -# define GET_FCSR() ({ register int tp asm("tp"); tp & 0xFF; }) -# define SET_FCSR(value) ({ asm volatile("add tp, x0, %0" :: "rI"((value) & 0xFF)); }) -# define GET_FRM() (GET_FCSR() >> 5) -# define SET_FRM(value) SET_FCSR(GET_FFLAGS() | ((value) << 5)) -# define GET_FFLAGS() (GET_FCSR() & 0x1F) -# define SET_FFLAGS(value) SET_FCSR((GET_FRM() << 5) | ((value) & 0x1F)) - -# define SETUP_STATIC_ROUNDING(insn) ({ \ - register int tp asm("tp"); tp &= 0xFF; \ - if (likely(((insn) & MASK_FUNCT3) == MASK_FUNCT3)) tp |= tp << 8; \ - else if (GET_RM(insn) > 4) return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); \ - else tp |= GET_RM(insn) << 13; \ - asm volatile ("":"+r"(tp)); }) -# define softfloat_raiseFlags(which) ({ asm volatile ("or tp, tp, %0" :: "rI"(which)); }) -# define softfloat_roundingMode ({ register int tp asm("tp"); tp >> 13; }) -#endif - -#define GET_F32_RS1(insn, regs) (GET_F32_REG(insn, 15, regs)) -#define GET_F32_RS2(insn, regs) (GET_F32_REG(insn, 20, regs)) -#define GET_F32_RS3(insn, regs) (GET_F32_REG(insn, 27, regs)) -#define GET_F64_RS1(insn, regs) (GET_F64_REG(insn, 15, regs)) -#define GET_F64_RS2(insn, regs) (GET_F64_REG(insn, 20, regs)) -#define GET_F64_RS3(insn, regs) (GET_F64_REG(insn, 27, regs)) -#define SET_F32_RD(insn, regs, val) (SET_F32_REG(insn, 7, regs, val), SET_FS_DIRTY()) -#define SET_F64_RD(insn, regs, val) (SET_F64_REG(insn, 7, regs, val), SET_FS_DIRTY()) -#define SET_FS_DIRTY() set_csr(mstatus, MSTATUS_FS) - -static insn_t __attribute__((always_inline)) get_insn(uintptr_t mepc, uintptr_t* mstatus) -{ - insn_t insn; - -#ifdef __riscv_compressed - int rvc_mask = 3, insn_hi; - *mstatus = unpriv_mem_access("lhu %[insn], 0(%[mepc]);" - "and %[insn_hi], %[insn], %[rvc_mask];" - "bne %[insn_hi], %[rvc_mask], 1f;" - "lh %[insn_hi], 2(%[mepc]);" - "sll %[insn_hi], %[insn_hi], 16;" - "or %[insn], %[insn], %[insn_hi];" - "1:", - insn, insn_hi, unused1, mepc, rvc_mask); -#else - *mstatus = unpriv_mem_access("lw %[insn], 0(%[mepc])", - insn, unused1, unused2, mepc); -#endif - - return insn; -} - #define read_const_csr(reg) ({ unsigned long __tmp; \ asm ("csrr %0, " #reg : "=r"(__tmp)); \ __tmp; }) diff --git a/pk/pk.h b/pk/pk.h index 7785ca9..917b841 100644 --- a/pk/pk.h +++ b/pk/pk.h @@ -41,9 +41,6 @@ void kassert_fail(const char* s) __attribute__((noreturn)); #define MIN(a, b) ((a) < (b) ? (a) : (b)) #define CLAMP(a, lo, hi) MIN(MAX(a, lo), hi) -#define likely(x) __builtin_expect((x), 1) -#define unlikely(x) __builtin_expect((x), 0) - #define EXTRACT_FIELD(val, which) (((val) & (which)) / ((which) & ~((which)-1))) #define INSERT_FIELD(val, which, fieldval) (((val) & ~(which)) | ((fieldval) * ((which) & ~((which)-1)))) diff --git a/pk/pk.mk.in b/pk/pk.mk.in index 7ada789..0189666 100644 --- a/pk/pk.mk.in +++ b/pk/pk.mk.in @@ -2,18 +2,27 @@ pk_subproject_deps = \ softfloat \ pk_hdrs = \ - mtrap.h \ - encoding.h \ atomic.h \ + bits.h \ + elf.h \ + emulation.h \ + encoding.h \ file.h \ + fp_emulation.h \ frontend.h \ - elf.h \ + mcall.h \ + mtrap.h \ + pk.h \ + sbi.h \ + syscall.h \ + unprivileged_memory.h \ vm.h \ pk_c_srcs = \ mtrap.c \ minit.c \ emulation.c \ + fp_emulation.c \ sbi_impl.c \ init.c \ file.c \ diff --git a/pk/unprivileged_memory.h b/pk/unprivileged_memory.h new file mode 100644 index 0000000..d03cc5e --- /dev/null +++ b/pk/unprivileged_memory.h @@ -0,0 +1,79 @@ +#ifndef _RISCV_MISALIGNED_H +#define _RISCV_MISALIGNED_H + +#include "encoding.h" +#include + +#define DECLARE_UNPRIVILEGED_LOAD_FUNCTION(type, insn) \ + static inline type load_##type(const type* addr, uintptr_t mepc) \ + { \ + register uintptr_t __mepc asm ("a2") = mepc; \ + register uintptr_t __mstatus asm ("a3"); \ + type val; \ + asm ("csrrs %0, mstatus, %3\n" \ + #insn " %1, %2\n" \ + "csrw mstatus, %0" \ + : "+&r" (__mstatus), "=&r" (val) \ + : "m" (*addr), "r" (MSTATUS_MPRV), "r" (__mepc)); \ + return val; \ + } + +#define DECLARE_UNPRIVILEGED_STORE_FUNCTION(type, insn) \ + static inline void store_##type(type* addr, type val, uintptr_t mepc) \ + { \ + register uintptr_t __mepc asm ("a2") = mepc; \ + register uintptr_t __mstatus asm ("a3"); \ + asm volatile ("csrrs %0, mstatus, %3\n" \ + #insn " %1, %2\n" \ + "csrw mstatus, %0" \ + : "+&r" (__mstatus) \ + : "r" (val), "m" (*addr), "r" (MSTATUS_MPRV), \ + "r" (__mepc)); \ + } + +DECLARE_UNPRIVILEGED_LOAD_FUNCTION(uint8_t, lbu) +DECLARE_UNPRIVILEGED_LOAD_FUNCTION(uint16_t, lhu) +DECLARE_UNPRIVILEGED_LOAD_FUNCTION(int8_t, lb) +DECLARE_UNPRIVILEGED_LOAD_FUNCTION(int16_t, lh) +DECLARE_UNPRIVILEGED_LOAD_FUNCTION(int32_t, lw) +DECLARE_UNPRIVILEGED_STORE_FUNCTION(uint8_t, sb) +DECLARE_UNPRIVILEGED_STORE_FUNCTION(uint16_t, sh) +DECLARE_UNPRIVILEGED_STORE_FUNCTION(uint32_t, sw) +#ifdef __riscv64 +DECLARE_UNPRIVILEGED_LOAD_FUNCTION(uint32_t, lwu) +DECLARE_UNPRIVILEGED_LOAD_FUNCTION(uint64_t, ld) +DECLARE_UNPRIVILEGED_STORE_FUNCTION(uint64_t, sd) +#else +DECLARE_UNPRIVILEGED_LOAD_FUNCTION(uint32_t, lw) +#endif + +static uint32_t __attribute__((always_inline)) get_insn(uintptr_t mepc, uintptr_t* mstatus) +{ + register uintptr_t __mepc asm ("a2") = mepc; + register uintptr_t __mstatus asm ("a3"); + uint32_t val; +#ifndef __riscv_compressed + asm ("csrrs %[mstatus], mstatus, %[mprv]\n" + "lw %[insn], (%[addr])\n" + "csrw mstatus, %[mstatus]" + : [mstatus] "+&r" (__mstatus), [insn] "=&r" (val) + : [mprv] "r" (MSTATUS_MPRV), [addr] "r" (__mepc)); +#else + uintptr_t rvc_mask = 3, tmp; + asm ("csrrs %[mstatus], mstatus, %[mprv]\n" + "lhu %[insn], (%[addr])\n" + "and %[tmp], %[insn], %[rvc_mask]\n" + "bne %[tmp], %[rvc_mask], 1f\n" + "lh %[tmp], 2(%[addr])\n" + "sll %[tmp], %[tmp], 16\n" + "add %[insn], %[insn], %[tmp]\n" + "1: csrw mstatus, %[mstatus]" + : [mstatus] "+&r" (__mstatus), [insn] "=&r" (val), [tmp] "=&r" (tmp) + : [mprv] "r" (MSTATUS_MPRV), [addr] "r" (__mepc), + [rvc_mask] "r" (rvc_mask)); +#endif + *mstatus = __mstatus; + return val; +} + +#endif -- cgit v1.1