aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Waterman <waterman@eecs.berkeley.edu>2013-12-17 10:18:47 -0800
committerAndrew Waterman <waterman@eecs.berkeley.edu>2013-12-17 10:18:47 -0800
commit7f457c47b339cc7c79f56bb277ed8ed989e88ae1 (patch)
tree4a3cc949c4a5631699c69443b1a24049ca01fbb4
parente85cb99c5e042ebce78f64213677a48ee7ba0491 (diff)
downloadspike-7f457c47b339cc7c79f56bb277ed8ed989e88ae1.zip
spike-7f457c47b339cc7c79f56bb277ed8ed989e88ae1.tar.gz
spike-7f457c47b339cc7c79f56bb277ed8ed989e88ae1.tar.bz2
Speed things up quite a bit
-rw-r--r--hwacha/decode_hwacha.h2
-rw-r--r--riscv/decode.h22
-rw-r--r--riscv/memtracer.h1
-rw-r--r--riscv/mmu.cc9
-rw-r--r--riscv/mmu.h71
-rw-r--r--riscv/processor.cc76
-rw-r--r--riscv/processor.h16
7 files changed, 118 insertions, 79 deletions
diff --git a/hwacha/decode_hwacha.h b/hwacha/decode_hwacha.h
index b7069fa..fa94b72 100644
--- a/hwacha/decode_hwacha.h
+++ b/hwacha/decode_hwacha.h
@@ -25,7 +25,7 @@
#define INSN_RS2 (insn.rs2())
#define INSN_RS3 (insn.rs3())
#define INSN_RD (insn.rd())
-#define INSN_SEG ((insn.i_imm() >> 9)+1)
+#define INSN_SEG (((reg_t)insn.i_imm() >> 9)+1)
static inline reg_t read_xpr(hwacha_t* h, insn_t insn, uint32_t idx, size_t src)
{
diff --git a/riscv/decode.h b/riscv/decode.h
index 8e506e2..6c26a68 100644
--- a/riscv/decode.h
+++ b/riscv/decode.h
@@ -52,21 +52,22 @@ class insn_t
{
public:
uint32_t bits() { return b; }
- reg_t i_imm() { return int64_t(int32_t(b) >> 20); }
- reg_t s_imm() { return x(7, 5) | (x(25, 7) << 5) | (imm_sign() << 12); }
- reg_t sb_imm() { return (x(8, 4) << 1) | (x(25,6) << 5) | (x(7,1) << 11) | (imm_sign() << 12); }
- reg_t u_imm() { return int64_t(int32_t(b) >> 12 << 12); }
- reg_t uj_imm() { return (x(21, 10) << 1) | (x(20, 1) << 11) | (x(12, 8) << 12) | (imm_sign() << 20); }
+ int32_t i_imm() { return int32_t(b) >> 20; }
+ int32_t s_imm() { return x(7, 5) + (xs(25, 7) << 5); }
+ int32_t sb_imm() { return (x(8, 4) << 1) + (x(25,6) << 5) + (x(7,1) << 11) + (imm_sign() << 12); }
+ int32_t u_imm() { return int32_t(b) >> 12 << 12; }
+ int32_t uj_imm() { return (x(21, 10) << 1) + (x(20, 1) << 11) + (x(12, 8) << 12) + (imm_sign() << 20); }
uint32_t rd() { return x(7, 5); }
uint32_t rs1() { return x(15, 5); }
uint32_t rs2() { return x(20, 5); }
uint32_t rs3() { return x(27, 5); }
uint32_t rm() { return x(12, 3); }
- reg_t csr() { return x(20, 12); }
+ uint32_t csr() { return x(20, 12); }
private:
uint32_t b;
- reg_t x(int lo, int len) { return b << (32-lo-len) >> (32-len); }
- reg_t imm_sign() { return int64_t(int32_t(b) >> 31); }
+ uint32_t x(int lo, int len) { return b << (32-lo-len) >> (32-len); }
+ uint32_t xs(int lo, int len) { return int32_t(b) << (32-lo-len) >> (32-len); }
+ uint32_t imm_sign() { return xs(31, 1); }
};
template <class T, size_t N, bool zero_reg>
@@ -79,12 +80,11 @@ public:
}
void write(size_t i, T value)
{
- data[i] = value;
+ if (!(zero_reg && i == 0))
+ data[i] = value;
}
const T& operator [] (size_t i) const
{
- if (zero_reg)
- const_cast<T&>(data[0]) = 0;
return data[i];
}
private:
diff --git a/riscv/memtracer.h b/riscv/memtracer.h
index 127a641..e223c43 100644
--- a/riscv/memtracer.h
+++ b/riscv/memtracer.h
@@ -20,6 +20,7 @@ class memtracer_t
class memtracer_list_t : public memtracer_t
{
public:
+ bool empty() { return list.empty(); }
bool interested_in_range(uint64_t begin, uint64_t end, bool store, bool fetch)
{
for (std::vector<memtracer_t*>::iterator it = list.begin(); it != list.end(); ++it)
diff --git a/riscv/mmu.cc b/riscv/mmu.cc
index 96884d6..f8efd5a 100644
--- a/riscv/mmu.cc
+++ b/riscv/mmu.cc
@@ -16,7 +16,8 @@ mmu_t::~mmu_t()
void mmu_t::flush_icache()
{
- memset(icache_tag, -1, sizeof(icache_tag));
+ for (size_t i = 0; i < ICACHE_ENTRIES; i++)
+ icache[i].tag = -1;
}
void mmu_t::flush_tlb()
@@ -28,7 +29,7 @@ void mmu_t::flush_tlb()
flush_icache();
}
-reg_t mmu_t::refill_tlb(reg_t addr, reg_t bytes, bool store, bool fetch)
+void* mmu_t::refill_tlb(reg_t addr, reg_t bytes, bool store, bool fetch)
{
reg_t idx = (addr >> PGSHIFT) % TLB_ENTRIES;
reg_t expected_tag = addr & ~(PGSIZE-1);
@@ -62,10 +63,10 @@ reg_t mmu_t::refill_tlb(reg_t addr, reg_t bytes, bool store, bool fetch)
tlb_load_tag[idx] = (pte_perm & PTE_UR) ? expected_tag : -1;
tlb_store_tag[idx] = (pte_perm & PTE_UW) ? expected_tag : -1;
tlb_insn_tag[idx] = (pte_perm & PTE_UX) ? expected_tag : -1;
- tlb_data[idx] = pgbase;
+ tlb_data[idx] = mem + pgbase - (addr & ~(PGSIZE-1));
}
- return paddr;
+ return mem + paddr;
}
pte_t mmu_t::walk(reg_t addr)
diff --git a/riscv/mmu.h b/riscv/mmu.h
index 227d5c7..551fa46 100644
--- a/riscv/mmu.h
+++ b/riscv/mmu.h
@@ -31,11 +31,11 @@ public:
// template for functions that load an aligned value from memory
#define load_func(type) \
- type##_t load_##type(reg_t addr) { \
+ type##_t load_##type(reg_t addr) __attribute__((always_inline)) { \
if(unlikely(addr % sizeof(type##_t))) \
throw trap_load_address_misaligned(addr); \
- reg_t paddr = translate(addr, sizeof(type##_t), false, false); \
- return *(type##_t*)(mem + paddr); \
+ void* paddr = translate(addr, sizeof(type##_t), false, false); \
+ return *(type##_t*)paddr; \
}
// load value from memory at aligned address; zero extend to register width
@@ -55,8 +55,8 @@ public:
void store_##type(reg_t addr, type##_t val) { \
if(unlikely(addr % sizeof(type##_t))) \
throw trap_store_address_misaligned(addr); \
- reg_t paddr = translate(addr, sizeof(type##_t), true, false); \
- *(type##_t*)(mem + paddr) = val; \
+ void* paddr = translate(addr, sizeof(type##_t), true, false); \
+ *(type##_t*)paddr = val; \
}
// store value to memory at aligned address
@@ -77,25 +77,28 @@ public:
// load instruction from memory at aligned address.
inline insn_fetch_t load_insn(reg_t addr)
{
- reg_t idx = (addr/sizeof(insn_t)) % ICACHE_ENTRIES;
- if (unlikely(icache_tag[idx] != addr))
+ reg_t offset = addr & (sizeof(insn_t) * (ICACHE_ENTRIES-1));
+ offset *= sizeof(icache_entry_t) / sizeof(insn_t);
+ icache_entry_t* entry = (icache_entry_t*)((char*)icache + offset);
+ insn_fetch_t data = entry->data;
+ if (likely(entry->tag == addr))
+ return data;
+
+ void* iaddr = translate(addr, sizeof(insn_t), false, true);
+ insn_fetch_t fetch;
+ fetch.insn.pad = *(decltype(fetch.insn.insn.bits())*)iaddr;
+ fetch.func = proc->decode_insn(fetch.insn.insn);
+
+ entry->tag = addr;
+ entry->data = fetch;
+
+ reg_t paddr = (char*)iaddr - mem;
+ if (!tracer.empty() && tracer.interested_in_range(paddr, paddr + sizeof(insn_t), false, true))
{
- reg_t paddr = translate(addr, sizeof(insn_t), false, true);
- insn_fetch_t fetch;
- fetch.insn.insn = *(insn_t*)(mem + paddr);
- fetch.func = proc->decode_insn(fetch.insn.insn);
-
- reg_t idx = (paddr/sizeof(insn_t)) % ICACHE_ENTRIES;
- icache_tag[idx] = addr;
- icache_data[idx] = fetch;
-
- if (tracer.interested_in_range(paddr, paddr + sizeof(insn_t), false, true))
- {
- icache_tag[idx] = -1;
- tracer.trace(paddr, sizeof(insn_t), false, true);
- }
+ entry->tag = -1;
+ tracer.trace(paddr, sizeof(insn_t), false, true);
}
- return icache_data[idx];
+ return entry->data;
}
void set_processor(processor_t* p) { proc = p; flush_tlb(); }
@@ -112,32 +115,38 @@ private:
memtracer_list_t tracer;
// implement an instruction cache for simulator performance
- static const reg_t ICACHE_ENTRIES = 256;
- insn_fetch_t icache_data[ICACHE_ENTRIES];
+ static const reg_t ICACHE_ENTRIES = 2048;
+ struct icache_entry_t {
+ reg_t tag;
+ reg_t pad;
+ insn_fetch_t data;
+ };
+ icache_entry_t icache[ICACHE_ENTRIES];
// implement a TLB for simulator performance
static const reg_t TLB_ENTRIES = 256;
- reg_t tlb_data[TLB_ENTRIES];
+ char* tlb_data[TLB_ENTRIES];
reg_t tlb_insn_tag[TLB_ENTRIES];
reg_t tlb_load_tag[TLB_ENTRIES];
reg_t tlb_store_tag[TLB_ENTRIES];
- reg_t icache_tag[ICACHE_ENTRIES];
// finish translation on a TLB miss and upate the TLB
- reg_t refill_tlb(reg_t addr, reg_t bytes, bool store, bool fetch);
+ void* refill_tlb(reg_t addr, reg_t bytes, bool store, bool fetch);
// perform a page table walk for a given virtual address
pte_t walk(reg_t addr);
// translate a virtual address to a physical address
- reg_t translate(reg_t addr, reg_t bytes, bool store, bool fetch)
+ void* translate(reg_t addr, reg_t bytes, bool store, bool fetch)
+ __attribute__((always_inline))
{
reg_t idx = (addr >> PGSHIFT) % TLB_ENTRIES;
+ reg_t expected_tag = addr & ~(PGSIZE-1);
reg_t* tlb_tag = fetch ? tlb_insn_tag : store ? tlb_store_tag :tlb_load_tag;
- reg_t expected_tag = addr & ~(PGSIZE-1);
- if(likely(tlb_tag[idx] == expected_tag))
- return ((uintptr_t)addr & (PGSIZE-1)) + tlb_data[idx];
+ void* data = tlb_data[idx] + addr;
+ if (likely(tlb_tag[idx] == expected_tag))
+ return data;
return refill_tlb(addr, bytes, store, fetch);
}
diff --git a/riscv/processor.cc b/riscv/processor.cc
index 5e2910f..b12a8e0 100644
--- a/riscv/processor.cc
+++ b/riscv/processor.cc
@@ -13,10 +13,11 @@
#include <assert.h>
#include <limits.h>
#include <stdexcept>
+#include <algorithm>
processor_t::processor_t(sim_t* _sim, mmu_t* _mmu, uint32_t _id)
- : sim(_sim), mmu(_mmu), ext(NULL), id(_id), run(false), debug(false),
- opcode_bits(0)
+ : sim(_sim), mmu(_mmu), ext(NULL), disassembler(new disassembler_t),
+ id(_id), run(false), debug(false)
{
reset(true);
mmu->set_processor(this);
@@ -24,6 +25,7 @@ processor_t::processor_t(sim_t* _sim, mmu_t* _mmu, uint32_t _id)
#define DECLARE_INSN(name, match, mask) REGISTER_INSN(this, name, match, mask)
#include "encoding.h"
#undef DECLARE_INSN
+ build_opcode_map();
}
processor_t::~processor_t()
@@ -35,10 +37,7 @@ void state_t::reset()
// the ISA guarantees on boot that the PC is 0x2000 and the the processor
// is in supervisor mode, and in 64-bit mode, if supported, with traps
// and virtual memory disabled.
- sr = SR_S;
-#ifdef RISCV_ENABLE_64BIT
- sr |= SR_S64;
-#endif
+ sr = SR_S | SR_S64;
pc = 0x2000;
// the following state is undefined upon boot-up,
@@ -74,6 +73,8 @@ void processor_t::reset(bool value)
run = !value;
state.reset(); // reset the core
+ set_pcr(CSR_STATUS, state.sr);
+
if (ext)
ext->reset(); // reset the extension
}
@@ -185,7 +186,7 @@ void processor_t::disasm(insn_t insn)
{
// the disassembler is stateless, so we share it
fprintf(stderr, "core %3d: 0x%016" PRIx64 " (0x%08" PRIx32 ") %s\n",
- id, state.pc, insn.bits(), disassembler.disassemble(insn).c_str());
+ id, state.pc, insn.bits(), disassembler->disassemble(insn).c_str());
}
reg_t processor_t::set_pcr(int which, reg_t val)
@@ -215,6 +216,7 @@ reg_t processor_t::set_pcr(int which, reg_t val)
if (!ext)
state.sr &= ~SR_EA;
state.sr &= ~SR_ZERO;
+ rv64 = (state.sr & SR_S) ? (state.sr & SR_S64) : (state.sr & SR_U64);
mmu->flush_tlb();
break;
case CSR_EPC:
@@ -328,42 +330,64 @@ reg_t illegal_instruction(processor_t* p, insn_t insn, reg_t pc)
insn_func_t processor_t::decode_insn(insn_t insn)
{
- bool rv64 = (state.sr & SR_S) ? (state.sr & SR_S64) : (state.sr & SR_U64);
+ size_t mask = opcode_map.size()-1;
+ insn_desc_t* desc = opcode_map[insn.bits() & mask];
- auto key = insn.bits() & ((1L << opcode_bits)-1);
- for (auto it = opcode_map.find(key); it != opcode_map.end() && it->first == key; ++it)
- if ((insn.bits() & it->second.mask) == it->second.match)
- return rv64 ? it->second.rv64 : it->second.rv32;
+ while ((insn.bits() & desc->mask) != desc->match)
+ desc++;
- return &illegal_instruction;
+ return rv64 ? desc->rv64 : desc->rv32;
}
void processor_t::register_insn(insn_desc_t desc)
{
assert(desc.mask & 1);
- if (opcode_bits == 0 || (desc.mask & ((1L << opcode_bits)-1)) != ((1L << opcode_bits)-1))
+ instructions.push_back(desc);
+}
+
+void processor_t::build_opcode_map()
+{
+ size_t buckets = -1;
+ for (auto& inst : instructions)
+ while ((inst.mask & buckets) != buckets)
+ buckets /= 2;
+ buckets++;
+
+ struct cmp {
+ decltype(insn_desc_t::match) mask;
+ cmp(decltype(mask) mask) : mask(mask) {}
+ bool operator()(const insn_desc_t& lhs, const insn_desc_t& rhs) {
+ if ((lhs.match & mask) != (rhs.match & mask))
+ return (lhs.match & mask) < (rhs.match & mask);
+ return lhs.match < rhs.match;
+ }
+ };
+ std::sort(instructions.begin(), instructions.end(), cmp(buckets-1));
+
+ opcode_map.resize(buckets);
+ opcode_store.resize(instructions.size() + 1);
+
+ size_t j = 0;
+ for (size_t b = 0, i = 0; b < buckets; b++)
{
- unsigned x = 0;
- while ((desc.mask & ((1L << (x+1))-1)) == ((1L << (x+1))-1) &&
- (opcode_bits == 0 || x <= opcode_bits))
- x++;
- opcode_bits = x;
-
- decltype(opcode_map) new_map;
- for (auto it = opcode_map.begin(); it != opcode_map.end(); ++it)
- new_map.insert(std::make_pair(it->second.match & ((1L<<x)-1), it->second));
- opcode_map = new_map;
+ opcode_map[b] = &opcode_store[j];
+ while (i < instructions.size() && b == (instructions[i].match & (buckets-1)))
+ opcode_store[j++] = instructions[i++];
}
- opcode_map.insert(std::make_pair(desc.match & ((1L<<opcode_bits)-1), desc));
+ assert(j == opcode_store.size()-1);
+ opcode_store[j].match = opcode_store[j].mask = 0;
+ opcode_store[j].rv32 = &illegal_instruction;
+ opcode_store[j].rv64 = &illegal_instruction;
}
void processor_t::register_extension(extension_t* x)
{
for (auto insn : x->get_instructions())
register_insn(insn);
+ build_opcode_map();
for (auto disasm_insn : x->get_disasms())
- disassembler.add_insn(disasm_insn);
+ disassembler->add_insn(disasm_insn);
if (ext != NULL)
throw std::logic_error("only one extension may be registered");
ext = x;
diff --git a/riscv/processor.h b/riscv/processor.h
index f53b269..e27aa82 100644
--- a/riscv/processor.h
+++ b/riscv/processor.h
@@ -3,10 +3,10 @@
#define _RISCV_PROCESSOR_H
#include "decode.h"
-#include "disasm.h"
-#include <cstring>
#include "config.h"
-#include <map>
+#include <cstring>
+#include <memory>
+#include <vector>
class processor_t;
class mmu_t;
@@ -14,6 +14,7 @@ typedef reg_t (*insn_func_t)(processor_t*, insn_t, reg_t);
class sim_t;
class trap_t;
class extension_t;
+class disassembler_t;
struct insn_desc_t
{
@@ -78,14 +79,16 @@ private:
sim_t* sim;
mmu_t* mmu; // main memory is always accessed via the mmu
extension_t* ext;
- disassembler_t disassembler;
+ std::unique_ptr<disassembler_t> disassembler;
state_t state;
uint32_t id;
bool run; // !reset
bool debug;
+ bool rv64;
- unsigned opcode_bits;
- std::multimap<uint32_t, insn_desc_t> opcode_map;
+ std::vector<insn_desc_t> instructions;
+ std::vector<insn_desc_t*> opcode_map;
+ std::vector<insn_desc_t> opcode_store;
void take_interrupt(); // take a trap if any interrupts are pending
void take_trap(reg_t pc, trap_t& t); // take an exception
@@ -96,6 +99,7 @@ private:
friend class extension_t;
friend class htif_isasim_t;
+ void build_opcode_map();
insn_func_t decode_insn(insn_t insn);
};