diff options
-rw-r--r-- | machine/emulation.c | 70 | ||||
-rw-r--r-- | machine/emulation.h | 37 | ||||
-rw-r--r-- | machine/fp_emulation.c | 2 | ||||
-rw-r--r-- | machine/fp_emulation.h | 17 | ||||
-rw-r--r-- | machine/fp_ldst.c | 46 | ||||
-rw-r--r-- | machine/misaligned_ldst.c | 49 | ||||
-rw-r--r-- | machine/unprivileged_memory.h | 16 |
7 files changed, 190 insertions, 47 deletions
diff --git a/machine/emulation.c b/machine/emulation.c index 882daa1..696467d 100644 --- a/machine/emulation.c +++ b/machine/emulation.c @@ -5,6 +5,67 @@ #include "mtrap.h" #include <limits.h> +static DECLARE_EMULATION_FUNC(emulate_rvc) +{ +#ifdef __riscv_compressed + // the only emulable RVC instructions are FP loads and stores. +# if !defined(__riscv_flen) && defined(PK_ENABLE_FP_EMULATION) + write_csr(mepc, mepc + 2); + + // if FPU is disabled, punt back to the OS + if (unlikely((mstatus & MSTATUS_FS) == 0)) + return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); + + if ((insn & MASK_C_FLD) == MATCH_C_FLD) { + uintptr_t addr = GET_RS1S(insn, regs) + RVC_LD_IMM(insn); + if (unlikely(addr % sizeof(uintptr_t))) + return misaligned_load_trap(regs, mcause, mepc); + SET_F64_RD(RVC_RS2S(insn) << SH_RD, regs, load_uint64_t((void *)addr, mepc)); + } else if ((insn & MASK_C_FLDSP) == MATCH_C_FLDSP) { + uintptr_t addr = GET_SP(regs) + RVC_LDSP_IMM(insn); + if (unlikely(addr % sizeof(uintptr_t))) + return misaligned_load_trap(regs, mcause, mepc); + SET_F64_RD(insn, regs, load_uint64_t((void *)addr, mepc)); + } else if ((insn & MASK_C_FSD) == MATCH_C_FSD) { + uintptr_t addr = GET_RS1S(insn, regs) + RVC_LD_IMM(insn); + if (unlikely(addr % sizeof(uintptr_t))) + return misaligned_store_trap(regs, mcause, mepc); + store_uint64_t((void *)addr, GET_F64_RS2(RVC_RS2S(insn) << SH_RS2, regs), mepc); + } else if ((insn & MASK_C_FSDSP) == MATCH_C_FSDSP) { + uintptr_t addr = GET_SP(regs) + RVC_SDSP_IMM(insn); + if (unlikely(addr % sizeof(uintptr_t))) + return misaligned_store_trap(regs, mcause, mepc); + store_uint64_t((void *)addr, GET_F64_RS2(RVC_RS2(insn) << SH_RS2, regs), mepc); + } else +# if __riscv_xlen == 32 + if ((insn & MASK_C_FLW) == MATCH_C_FLW) { + uintptr_t addr = GET_RS1S(insn, regs) + RVC_LW_IMM(insn); + if (unlikely(addr % 4)) + return misaligned_load_trap(regs, mcause, mepc); + SET_F32_RD(RVC_RS2S(insn) << SH_RD, regs, load_int32_t((void *)addr, mepc)); + } else if ((insn & MASK_C_FLWSP) == MATCH_C_FLWSP) { + uintptr_t addr = GET_SP(regs) + RVC_LWSP_IMM(insn); + if (unlikely(addr % 4)) + return misaligned_load_trap(regs, mcause, mepc); + SET_F32_RD(insn, regs, load_int32_t((void *)addr, mepc)); + } else if ((insn & MASK_C_FSW) == MATCH_C_FSW) { + uintptr_t addr = GET_RS1S(insn, regs) + RVC_LW_IMM(insn); + if (unlikely(addr % 4)) + return misaligned_store_trap(regs, mcause, mepc); + store_uint32_t((void *)addr, GET_F32_RS2(RVC_RS2S(insn) << SH_RS2, regs), mepc); + } else if ((insn & MASK_C_FSWSP) == MATCH_C_FSWSP) { + uintptr_t addr = GET_SP(regs) + RVC_SWSP_IMM(insn); + if (unlikely(addr % 4)) + return misaligned_store_trap(regs, mcause, mepc); + store_uint32_t((void *)addr, GET_F32_RS2(RVC_RS2(insn) << SH_RS2, regs), mepc); + } else +# endif +# endif +#endif + + return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); +} + void illegal_insn_trap(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc) { asm (".pushsection .rodata\n" @@ -70,8 +131,8 @@ void illegal_insn_trap(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc) uintptr_t mstatus; insn_t insn = get_insn(mepc, &mstatus); - if (unlikely((insn & 3) != 3)) - return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); + if ((insn & 3) != 3) + return emulate_rvc(regs, mcause, mepc, mstatus, insn); write_csr(mepc, mepc + 4); @@ -81,9 +142,10 @@ void illegal_insn_trap(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc) f(regs, mcause, mepc, mstatus, insn); } -void __attribute__((noinline)) truly_illegal_insn(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc, uintptr_t mstatus, insn_t insn) +__attribute__((noinline)) +DECLARE_EMULATION_FUNC(truly_illegal_insn) { - redirect_trap(mepc, mstatus); + return redirect_trap(mepc, mstatus); } static inline int emulate_read_csr(int num, uintptr_t mstatus, uintptr_t* result) diff --git a/machine/emulation.h b/machine/emulation.h index b8712b5..156d847 100644 --- a/machine/emulation.h +++ b/machine/emulation.h @@ -5,22 +5,45 @@ #include "bits.h" #include <stdint.h> -typedef uint32_t insn_t; +typedef uintptr_t insn_t; typedef void (*emulation_func)(uintptr_t*, uintptr_t, uintptr_t, uintptr_t, insn_t); #define DECLARE_EMULATION_FUNC(name) void name(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc, uintptr_t mstatus, insn_t insn) void misaligned_load_trap(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc); void misaligned_store_trap(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc); -void redirect_trap(uintptr_t epc, uintptr_t mstatus) __attribute__((noreturn)); -DECLARE_EMULATION_FUNC(truly_illegal_insn) __attribute__((noreturn)); +void redirect_trap(uintptr_t epc, uintptr_t mstatus); +DECLARE_EMULATION_FUNC(truly_illegal_insn); +DECLARE_EMULATION_FUNC(emulate_rvc_0); +DECLARE_EMULATION_FUNC(emulate_rvc_2); +#define SH_RD 7 +#define SH_RS1 15 +#define SH_RS2 20 +#define SH_RS2C 2 + +#define RV_X(x, s, n) (((x) >> (s)) & ((1 << (n)) - 1)) +#define RVC_LW_IMM(x) ((RV_X(x, 6, 1) << 2) | (RV_X(x, 10, 3) << 3) | (RV_X(x, 5, 1) << 6)) +#define RVC_LD_IMM(x) ((RV_X(x, 10, 3) << 3) | (RV_X(x, 5, 2) << 6)) +#define RVC_LWSP_IMM(x) ((RV_X(x, 4, 3) << 2) | (RV_X(x, 12, 1) << 5) | (RV_X(x, 2, 2) << 6)) +#define RVC_LDSP_IMM(x) ((RV_X(x, 5, 2) << 3) | (RV_X(x, 12, 1) << 5) | (RV_X(x, 2, 3) << 6)) +#define RVC_SWSP_IMM(x) ((RV_X(x, 9, 4) << 2) | (RV_X(x, 7, 2) << 6)) +#define RVC_SDSP_IMM(x) ((RV_X(x, 10, 3) << 3) | (RV_X(x, 7, 3) << 6)) +#define RVC_RS1S(insn) (8 + RV_X(insn, SH_RD, 3)) +#define RVC_RS2S(insn) (8 + RV_X(insn, SH_RS2C, 3)) +#define RVC_RS2(insn) RV_X(insn, SH_RS2C, 5) + +#define SHIFT_RIGHT(x, y) ((y) < 0 ? ((x) << -(y)) : ((x) >> (y))) #define GET_REG(insn, pos, regs) ({ \ int mask = (1 << (5+LOG_REGBYTES)) - (1 << LOG_REGBYTES); \ - (uintptr_t*)((uintptr_t)regs + (((insn) >> ((pos) - LOG_REGBYTES)) & mask)); \ + (uintptr_t*)((uintptr_t)regs + (SHIFT_RIGHT(insn, (pos) - LOG_REGBYTES) & (mask))); \ }) -#define GET_RS1(insn, regs) (*GET_REG(insn, 15, regs)) -#define GET_RS2(insn, regs) (*GET_REG(insn, 20, regs)) -#define SET_RD(insn, regs, val) (*GET_REG(insn, 7, regs) = (val)) +#define GET_RS1(insn, regs) (*GET_REG(insn, SH_RS1, regs)) +#define GET_RS2(insn, regs) (*GET_REG(insn, SH_RS2, regs)) +#define GET_RS1S(insn, regs) (*GET_REG(RVC_RS1S(insn), 0, regs)) +#define GET_RS2S(insn, regs) (*GET_REG(RVC_RS2S(insn), 0, regs)) +#define GET_RS2C(insn, regs) (*GET_REG(insn, SH_RS2C, regs)) +#define GET_SP(regs) (*GET_REG(2, 0, regs)) +#define SET_RD(insn, regs, val) (*GET_REG(insn, SH_RD, regs) = (val)) #define IMM_I(insn) ((int32_t)(insn) >> 20) #define IMM_S(insn) (((int32_t)(insn) >> 25 << 5) | (int32_t)(((insn) >> 7) & 0x1f)) #define MASK_FUNCT3 0x7000 diff --git a/machine/fp_emulation.c b/machine/fp_emulation.c index 182567a..0bbe83c 100644 --- a/machine/fp_emulation.c +++ b/machine/fp_emulation.c @@ -331,6 +331,7 @@ DECLARE_EMULATION_FUNC(emulate_fcmp) } return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); success: + SET_FS_DIRTY(); SET_RD(insn, regs, result); } @@ -358,6 +359,7 @@ DECLARE_EMULATION_FUNC(emulate_fmv_if) return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); } + SET_FS_DIRTY(); SET_RD(insn, regs, result); } diff --git a/machine/fp_emulation.h b/machine/fp_emulation.h index c2177a2..97950df 100644 --- a/machine/fp_emulation.h +++ b/machine/fp_emulation.h @@ -10,25 +10,25 @@ #ifdef __riscv_flen # define GET_F32_REG(insn, pos, regs) ({ \ - register int32_t value asm("a0") = ((insn) >> ((pos)-3)) & 0xf8; \ + register int32_t value asm("a0") = SHIFT_RIGHT(insn, (pos)-3) & 0xf8; \ uintptr_t tmp; \ asm ("1: auipc %0, %%pcrel_hi(get_f32_reg); add %0, %0, %1; jalr t0, %0, %%pcrel_lo(1b)" : "=&r"(tmp), "+&r"(value) :: "t0"); \ value; }) # define SET_F32_REG(insn, pos, regs, val) ({ \ register uint32_t value asm("a0") = (val); \ - uintptr_t offset = ((insn) >> ((pos)-3)) & 0xf8; \ + uintptr_t offset = SHIFT_RIGHT(insn, (pos)-3) & 0xf8; \ uintptr_t tmp; \ asm volatile ("1: auipc %0, %%pcrel_hi(put_f32_reg); add %0, %0, %2; jalr t0, %0, %%pcrel_lo(1b)" : "=&r"(tmp) : "r"(value), "r"(offset) : "t0"); }) # define init_fp_reg(i) SET_F32_REG((i) << 3, 3, 0, 0) # define GET_F64_REG(insn, pos, regs) ({ \ - register uintptr_t value asm("a0") = ((insn) >> ((pos)-3)) & 0xf8; \ + register uintptr_t value asm("a0") = SHIFT_RIGHT(insn, (pos)-3) & 0xf8; \ uintptr_t tmp; \ asm ("1: auipc %0, %%pcrel_hi(get_f64_reg); add %0, %0, %1; jalr t0, %0, %%pcrel_lo(1b)" : "=&r"(tmp), "+&r"(value) :: "t0"); \ sizeof(uintptr_t) == 4 ? *(int64_t*)value : (int64_t)value; }) # define SET_F64_REG(insn, pos, regs, val) ({ \ uint64_t __val = (val); \ register uintptr_t value asm("a0") = sizeof(uintptr_t) == 4 ? (uintptr_t)&__val : (uintptr_t)__val; \ - uintptr_t offset = ((insn) >> ((pos)-3)) & 0xf8; \ + uintptr_t offset = SHIFT_RIGHT(insn, (pos)-3) & 0xf8; \ uintptr_t tmp; \ asm volatile ("1: auipc %0, %%pcrel_hi(put_f64_reg); add %0, %0, %2; jalr t0, %0, %%pcrel_lo(1b)" : "=&r"(tmp) : "r"(value), "r"(offset) : "t0"); }) # define GET_FCSR() read_csr(fcsr) @@ -48,12 +48,12 @@ # define softfloat_roundingMode ({ register int tp asm("tp"); tp; }) # define SET_FS_DIRTY() ((void) 0) #else -# define GET_F64_REG(insn, pos, regs) (*(int64_t*)((void*)((regs) + 32) + (((insn) >> ((pos)-3)) & 0xf8))) +# define GET_F64_REG(insn, pos, regs) (*(int64_t*)((void*)((regs) + 32) + (SHIFT_RIGHT(insn, (pos)-3) & 0xf8))) # define SET_F64_REG(insn, pos, regs, val) (GET_F64_REG(insn, pos, regs) = (val)) # define GET_F32_REG(insn, pos, regs) (*(int32_t*)&GET_F64_REG(insn, pos, regs)) # define SET_F32_REG(insn, pos, regs, val) (GET_F32_REG(insn, pos, regs) = (val)) # define GET_FCSR() ({ register int tp asm("tp"); tp & 0xFF; }) -# define SET_FCSR(value) ({ asm volatile("add tp, x0, %0" :: "rI"((value) & 0xFF)); }) +# define SET_FCSR(value) ({ asm volatile("add tp, x0, %0" :: "rI"((value) & 0xFF)); SET_FS_DIRTY(); }) # define GET_FRM() (GET_FCSR() >> 5) # define SET_FRM(value) SET_FCSR(GET_FFLAGS() | ((value) << 5)) # define GET_FFLAGS() (GET_FCSR() & 0x1F) @@ -79,4 +79,9 @@ #define SET_F32_RD(insn, regs, val) (SET_F32_REG(insn, 7, regs, val), SET_FS_DIRTY()) #define SET_F64_RD(insn, regs, val) (SET_F64_REG(insn, 7, regs, val), SET_FS_DIRTY()) +#define GET_F32_RS2C(insn, regs) (GET_F32_REG(insn, 2, regs)) +#define GET_F32_RS2S(insn, regs) (GET_F32_REG(RVC_RS2S(insn), 0, regs)) +#define GET_F64_RS2C(insn, regs) (GET_F64_REG(insn, 2, regs)) +#define GET_F64_RS2S(insn, regs) (GET_F64_REG(RVC_RS2S(insn), 0, regs)) + #endif diff --git a/machine/fp_ldst.c b/machine/fp_ldst.c index 93a4844..e11900c 100644 --- a/machine/fp_ldst.c +++ b/machine/fp_ldst.c @@ -1,31 +1,28 @@ #include "fp_emulation.h" #include "unprivileged_memory.h" +#define punt_to_misaligned_handler(align, handler) \ + if (addr % (align) != 0) \ + return write_csr(mbadaddr, addr), (handler)(regs, mcause, mepc) + DECLARE_EMULATION_FUNC(emulate_float_load) { - uint64_t val; uintptr_t addr = GET_RS1(insn, regs) + IMM_I(insn); + + // if FPU is disabled, punt back to the OS + if (unlikely((mstatus & MSTATUS_FS) == 0)) + return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); switch (insn & MASK_FUNCT3) { case MATCH_FLW & MASK_FUNCT3: - if (addr % 4 != 0) - return misaligned_load_trap(regs, mcause, mepc); - + punt_to_misaligned_handler(4, misaligned_load_trap); SET_F32_RD(insn, regs, load_int32_t((void *)addr, mepc)); break; case MATCH_FLD & MASK_FUNCT3: - if (addr % sizeof(uintptr_t) != 0) - return misaligned_load_trap(regs, mcause, mepc); - -#if __riscv_xlen == 64 - val = load_uint64_t((void *)addr, mepc); -#else - val = load_uint32_t((void *)addr, mepc); - val += (uint64_t)load_uint32_t((void *)(addr + 4), mepc) << 32; -#endif - SET_F64_RD(insn, regs, val); + punt_to_misaligned_handler(sizeof(uintptr_t), misaligned_load_trap); + SET_F64_RD(insn, regs, load_uint64_t((void *)addr, mepc)); break; default: @@ -35,29 +32,22 @@ DECLARE_EMULATION_FUNC(emulate_float_load) DECLARE_EMULATION_FUNC(emulate_float_store) { - uint64_t val; uintptr_t addr = GET_RS1(insn, regs) + IMM_S(insn); + + // if FPU is disabled, punt back to the OS + if (unlikely((mstatus & MSTATUS_FS) == 0)) + return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); switch (insn & MASK_FUNCT3) { case MATCH_FSW & MASK_FUNCT3: - if (addr % 4 != 0) - return misaligned_store_trap(regs, mcause, mepc); - + punt_to_misaligned_handler(4, misaligned_store_trap); store_uint32_t((void *)addr, GET_F32_RS2(insn, regs), mepc); break; case MATCH_FSD & MASK_FUNCT3: - if (addr % sizeof(uintptr_t) != 0) - return misaligned_store_trap(regs, mcause, mepc); - - val = GET_F64_RS2(insn, regs); -#if __riscv_xlen == 64 - store_uint64_t((void *)addr, val, mepc); -#else - store_uint32_t((void *)addr, val, mepc); - store_uint32_t((void *)(addr + 4), val >> 32, mepc); -#endif + punt_to_misaligned_handler(sizeof(uintptr_t), misaligned_store_trap); + store_uint64_t((void *)addr, GET_F64_RS2(insn, regs), mepc); break; default: diff --git a/machine/misaligned_ldst.c b/machine/misaligned_ldst.c index 980aeba..8c96c18 100644 --- a/machine/misaligned_ldst.c +++ b/machine/misaligned_ldst.c @@ -2,6 +2,7 @@ #include "fp_emulation.h" #include "unprivileged_memory.h" #include "mtrap.h" +#include "config.h" union byte_array { uint8_t bytes[8]; @@ -35,6 +36,30 @@ void misaligned_load_trap(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc) len = 2, shift = 8*(sizeof(uintptr_t) - len); else if ((insn & MASK_LHU) == MATCH_LHU) len = 2; +#ifdef __riscv_compressed +# if __riscv_xlen >= 64 + else if ((insn & MASK_C_LD) == MATCH_C_LD) + len = 8, shift = 8*(sizeof(uintptr_t) - len), insn = RVC_RS2S(insn) << SH_RD; + else if ((insn & MASK_C_LDSP) == MATCH_C_LDSP && ((insn >> SH_RD) & 0x1f)) + len = 8, shift = 8*(sizeof(uintptr_t) - len); +# endif + else if ((insn & MASK_C_LW) == MATCH_C_LW) + len = 4, shift = 8*(sizeof(uintptr_t) - len), insn = RVC_RS2S(insn) << SH_RD; + else if ((insn & MASK_C_LWSP) == MATCH_C_LWSP && ((insn >> SH_RD) & 0x1f)) + len = 4, shift = 8*(sizeof(uintptr_t) - len); +# ifdef PK_ENABLE_FP_EMULATION + else if ((insn & MASK_C_FLD) == MATCH_C_FLD) + fp = 1, len = 8, insn = RVC_RS2S(insn) << SH_RD; + else if ((insn & MASK_C_FLDSP) == MATCH_C_FLDSP) + fp = 1, len = 8; +# if __riscv_xlen == 32 + else if ((insn & MASK_C_FLW) == MATCH_C_FLW) + fp = 1, len = 4, insn = RVC_RS2S(insn) << SH_RD; + else if ((insn & MASK_C_FLWSP) == MATCH_C_FLWSP) + fp = 1, len = 4; +# endif +# endif +#endif else return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); @@ -74,6 +99,30 @@ void misaligned_store_trap(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc) #endif else if ((insn & MASK_SH) == MATCH_SH) len = 2; +#ifdef __riscv_compressed +# if __riscv_xlen >= 64 + else if ((insn & MASK_C_SD) == MATCH_C_SD) + len = 8, val.intx = GET_RS2S(insn, regs); + else if ((insn & MASK_C_SDSP) == MATCH_C_SDSP && ((insn >> SH_RD) & 0x1f)) + len = 8, val.intx = GET_RS2C(insn, regs); +# endif + else if ((insn & MASK_C_SW) == MATCH_C_SW) + len = 4, val.intx = GET_RS2S(insn, regs); + else if ((insn & MASK_C_SWSP) == MATCH_C_SWSP && ((insn >> SH_RD) & 0x1f)) + len = 4, val.intx = GET_RS2C(insn, regs); +# ifdef PK_ENABLE_FP_EMULATION + else if ((insn & MASK_C_FSD) == MATCH_C_FSD) + len = 8, val.int64 = GET_F64_RS2S(insn, regs); + else if ((insn & MASK_C_FSDSP) == MATCH_C_FSDSP) + len = 8, val.int64 = GET_F64_RS2C(insn, regs); +# if __riscv_xlen == 32 + else if ((insn & MASK_C_FSW) == MATCH_C_FSW) + len = 4, val.intx = GET_F32_RS2S(insn, regs); + else if ((insn & MASK_C_FSWSP) == MATCH_C_FSWSP) + len = 4, val.intx = GET_F32_RS2C(insn, regs); +# endif +# endif +#endif else return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); diff --git a/machine/unprivileged_memory.h b/machine/unprivileged_memory.h index 03f1e27..39a7722 100644 --- a/machine/unprivileged_memory.h +++ b/machine/unprivileged_memory.h @@ -45,13 +45,25 @@ DECLARE_UNPRIVILEGED_LOAD_FUNCTION(uint64_t, ld) DECLARE_UNPRIVILEGED_STORE_FUNCTION(uint64_t, sd) #else DECLARE_UNPRIVILEGED_LOAD_FUNCTION(uint32_t, lw) + +static inline uint64_t load_uint64_t(const uint64_t* addr, uintptr_t mepc) +{ + return load_uint32_t((uint32_t*)addr, mepc) + + ((uint64_t)load_uint32_t((uint32_t*)addr + 1, mepc) << 32); +} + +static inline void store_uint64_t(uint64_t* addr, uint64_t val, uintptr_t mepc) +{ + store_uint32_t((uint32_t*)addr, val, mepc); + store_uint32_t((uint32_t*)addr + 1, val >> 32, mepc); +} #endif -static uint32_t __attribute__((always_inline)) get_insn(uintptr_t mepc, uintptr_t* mstatus) +static uintptr_t __attribute__((always_inline)) get_insn(uintptr_t mepc, uintptr_t* mstatus) { register uintptr_t __mepc asm ("a2") = mepc; register uintptr_t __mstatus asm ("a3"); - uint32_t val; + uintptr_t val; #ifndef __riscv_compressed asm ("csrrs %[mstatus], mstatus, %[mprv]\n" "lw %[insn], (%[addr])\n" |