aboutsummaryrefslogtreecommitdiff
path: root/riscv
diff options
context:
space:
mode:
Diffstat (limited to 'riscv')
-rw-r--r--riscv/byteorder.h30
-rw-r--r--riscv/decode.h39
-rw-r--r--riscv/mmu.cc4
-rw-r--r--riscv/mmu.h35
-rw-r--r--riscv/processor.h5
-rw-r--r--riscv/sim.cc7
6 files changed, 84 insertions, 36 deletions
diff --git a/riscv/byteorder.h b/riscv/byteorder.h
new file mode 100644
index 0000000..393a70b
--- /dev/null
+++ b/riscv/byteorder.h
@@ -0,0 +1,30 @@
+// See LICENSE for license details.
+
+#ifndef _RISCV_BYTEORDER_H
+#define _RISCV_BYTEORDER_H
+
+#include "config.h"
+#include <stdint.h>
+
+static inline uint8_t swap(uint8_t n) { return n; }
+static inline uint16_t swap(uint16_t n) { return __builtin_bswap16(n); }
+static inline uint32_t swap(uint32_t n) { return __builtin_bswap32(n); }
+static inline uint64_t swap(uint64_t n) { return __builtin_bswap64(n); }
+static inline int8_t swap(int8_t n) { return n; }
+static inline int16_t swap(int16_t n) { return __builtin_bswap16(n); }
+static inline int32_t swap(int32_t n) { return __builtin_bswap32(n); }
+static inline int64_t swap(int64_t n) { return __builtin_bswap64(n); }
+
+#ifdef WORDS_BIGENDIAN
+template<typename T> static inline T from_be(T n) { return n; }
+template<typename T> static inline T to_be(T n) { return n; }
+template<typename T> static inline T from_le(T n) { return swap(n); }
+template<typename T> static inline T to_le(T n) { return swap(n); }
+#else
+template<typename T> static inline T from_le(T n) { return n; }
+template<typename T> static inline T to_le(T n) { return n; }
+template<typename T> static inline T from_be(T n) { return swap(n); }
+template<typename T> static inline T to_be(T n) { return swap(n); }
+#endif
+
+#endif
diff --git a/riscv/decode.h b/riscv/decode.h
index 7a2ffa8..7ecd74f 100644
--- a/riscv/decode.h
+++ b/riscv/decode.h
@@ -7,10 +7,6 @@
# error spike requires a two''s-complement c++ implementation
#endif
-#ifdef WORDS_BIGENDIAN
-# error spike requires a little-endian host
-#endif
-
#include <algorithm>
#include <cstdint>
#include <string.h>
@@ -75,6 +71,23 @@ const int NCSR = 4096;
#define TAIL_ZEROING false
#endif
+#ifdef WORDS_BIGENDIAN
+ // Elements are stored in opposite order, see comment in processor.h
+ #define TAIL_ZERO(x) \
+ uint8_t *tail = &P.VU.elt<uint8_t>(rd_num, vl * (x) - 1); \
+ memset(tail - (P.VU.vlmax - vl) * (x), 0, (P.VU.vlmax - vl) * (x));
+ #define TAIL_ZERO_REDUCTION(x) \
+ uint8_t *tail = (uint8_t *)&P.VU.elt<type_sew_t<x>::type>(rd_num, 0); \
+ memset(tail - ((P.VU.get_vlen() - x) >> 3), 0, (P.VU.get_vlen() - x) >> 3);
+#else
+ #define TAIL_ZERO(x) \
+ uint8_t *tail = &P.VU.elt<uint8_t>(rd_num, vl * (x)); \
+ memset(tail, 0, (P.VU.vlmax - vl) * (x));
+ #define TAIL_ZERO_REDUCTION(x) \
+ uint8_t *tail = (uint8_t *)&P.VU.elt<type_sew_t<x>::type>(rd_num, 1); \
+ memset(tail, 0, (P.VU.get_vlen() - x) >> 3);
+#endif
+
typedef uint64_t insn_bits_t;
class insn_t
{
@@ -425,8 +438,7 @@ static inline bool is_overlaped(const int astart, const int asize,
#define VI_TAIL_ZERO(elm) \
if (vl != 0 && vl < P.VU.vlmax && TAIL_ZEROING) { \
- uint8_t *tail = &P.VU.elt<uint8_t>(rd_num, vl * ((sew >> 3) * elm)); \
- memset(tail, 0, (P.VU.vlmax - vl) * ((sew >> 3) * elm)); \
+ TAIL_ZERO((sew >> 3) * elm); \
}
#define VI_TAIL_ZERO_MASK(dst) \
@@ -448,8 +460,7 @@ static inline bool is_overlaped(const int astart, const int asize,
#define VI_LOOP_END \
} \
if (vl != 0 && vl < P.VU.vlmax && TAIL_ZEROING){ \
- uint8_t *tail = &P.VU.elt<uint8_t>(rd_num, vl * ((sew >> 3) * 1)); \
- memset(tail, 0, (P.VU.vlmax - vl) * ((sew >> 3) * 1)); \
+ TAIL_ZERO((sew >> 3) * 1); \
}\
P.VU.vstart = 0;
@@ -460,8 +471,7 @@ static inline bool is_overlaped(const int astart, const int asize,
#define VI_LOOP_WIDEN_END \
} \
if (vl != 0 && vl < P.VU.vlmax && TAIL_ZEROING){ \
- uint8_t *tail = &P.VU.elt<uint8_t>(rd_num, vl * ((sew >> 3) * 2)); \
- memset(tail, 0, (P.VU.vlmax - vl) * ((sew >> 3) * 2)); \
+ TAIL_ZERO((sew >> 3) * 2); \
}\
P.VU.vstart = 0;
@@ -469,8 +479,7 @@ static inline bool is_overlaped(const int astart, const int asize,
} \
if (vl > 0 && TAIL_ZEROING) { \
vd_0_des = vd_0_res; \
- uint8_t *tail = (uint8_t *)&P.VU.elt<type_sew_t<x>::type>(rd_num, 1); \
- memset(tail, 0, (P.VU.get_vlen() - x) >> 3); \
+ TAIL_ZERO_REDUCTION(x); \
} \
P.VU.vstart = 0;
@@ -1558,16 +1567,14 @@ for (reg_t i = 0; i < vlmax; ++i) { \
#define VI_VFP_LOOP_END \
} \
if (vl != 0 && vl < P.VU.vlmax && TAIL_ZEROING){ \
- uint8_t *tail = &P.VU.elt<uint8_t>(rd_num, vl * ((P.VU.vsew >> 3) * 1)); \
- memset(tail, 0, (P.VU.vlmax - vl) * ((P.VU.vsew >> 3) * 1)); \
+ TAIL_ZERO((P.VU.vsew >> 3) * 1); \
}\
P.VU.vstart = 0; \
#define VI_VFP_LOOP_WIDE_END \
} \
if (vl != 0 && vl < P.VU.vlmax && TAIL_ZEROING){ \
- uint8_t *tail = &P.VU.elt<uint8_t>(rd_num, vl * ((P.VU.vsew >> 3) * 2)); \
- memset(tail, 0, (P.VU.vlmax - vl) * ((P.VU.vsew >> 3) * 2)); \
+ TAIL_ZERO((P.VU.vsew >> 3) * 2); \
}\
P.VU.vstart = 0; \
set_fp_exceptions;
diff --git a/riscv/mmu.cc b/riscv/mmu.cc
index a0e500b..eca090f 100644
--- a/riscv/mmu.cc
+++ b/riscv/mmu.cc
@@ -288,7 +288,7 @@ reg_t mmu_t::walk(reg_t addr, access_type type, reg_t mode)
if (!ppte || !pmp_ok(pte_paddr, vm.ptesize, LOAD, PRV_S))
throw_access_exception(addr, type);
- reg_t pte = vm.ptesize == 4 ? *(uint32_t*)ppte : *(uint64_t*)ppte;
+ reg_t pte = vm.ptesize == 4 ? from_le(*(uint32_t*)ppte) : from_le(*(uint64_t*)ppte);
reg_t ppn = pte >> PTE_PPN_SHIFT;
if (PTE_TABLE(pte)) { // next level of page table
@@ -310,7 +310,7 @@ reg_t mmu_t::walk(reg_t addr, access_type type, reg_t mode)
if ((pte & ad) != ad) {
if (!pmp_ok(pte_paddr, vm.ptesize, STORE, PRV_S))
throw_access_exception(addr, type);
- *(uint32_t*)ppte |= ad;
+ *(uint32_t*)ppte |= to_le((uint32_t)ad);
}
#else
// take exception if access or possibly dirty bit is not set.
diff --git a/riscv/mmu.h b/riscv/mmu.h
index c7e047a..ebacc96 100644
--- a/riscv/mmu.h
+++ b/riscv/mmu.h
@@ -10,6 +10,7 @@
#include "simif.h"
#include "processor.h"
#include "memtracer.h"
+#include "byteorder.h"
#include <stdlib.h>
#include <vector>
@@ -86,9 +87,9 @@ public:
return misaligned_load(addr, sizeof(type##_t)); \
reg_t vpn = addr >> PGSHIFT; \
if (likely(tlb_load_tag[vpn % TLB_ENTRIES] == vpn)) \
- return *(type##_t*)(tlb_data[vpn % TLB_ENTRIES].host_offset + addr); \
+ return from_le(*(type##_t*)(tlb_data[vpn % TLB_ENTRIES].host_offset + addr)); \
if (unlikely(tlb_load_tag[vpn % TLB_ENTRIES] == (vpn | TLB_CHECK_TRIGGERS))) { \
- type##_t data = *(type##_t*)(tlb_data[vpn % TLB_ENTRIES].host_offset + addr); \
+ type##_t data = from_le(*(type##_t*)(tlb_data[vpn % TLB_ENTRIES].host_offset + addr)); \
if (!matched_trigger) { \
matched_trigger = trigger_exception(OPERATION_LOAD, addr, data); \
if (matched_trigger) \
@@ -98,7 +99,7 @@ public:
} \
type##_t res; \
load_slow_path(addr, sizeof(type##_t), (uint8_t*)&res); \
- return res; \
+ return from_le(res); \
}
// load value from memory at aligned address; zero extend to register width
@@ -130,17 +131,19 @@ public:
return misaligned_store(addr, val, sizeof(type##_t)); \
reg_t vpn = addr >> PGSHIFT; \
if (likely(tlb_store_tag[vpn % TLB_ENTRIES] == vpn)) \
- *(type##_t*)(tlb_data[vpn % TLB_ENTRIES].host_offset + addr) = val; \
+ *(type##_t*)(tlb_data[vpn % TLB_ENTRIES].host_offset + addr) = to_le(val); \
else if (unlikely(tlb_store_tag[vpn % TLB_ENTRIES] == (vpn | TLB_CHECK_TRIGGERS))) { \
if (!matched_trigger) { \
matched_trigger = trigger_exception(OPERATION_STORE, addr, val); \
if (matched_trigger) \
throw *matched_trigger; \
} \
- *(type##_t*)(tlb_data[vpn % TLB_ENTRIES].host_offset + addr) = val; \
+ *(type##_t*)(tlb_data[vpn % TLB_ENTRIES].host_offset + addr) = to_le(val); \
+ } \
+ else { \
+ type##_t le_val = to_le(val); \
+ store_slow_path(addr, sizeof(type##_t), (const uint8_t*)&le_val); \
} \
- else \
- store_slow_path(addr, sizeof(type##_t), (const uint8_t*)&val); \
if (proc) { \
size_t size = sizeof(type##_t); \
WRITE_MEM(addr, val, size); \
@@ -228,21 +231,21 @@ public:
inline icache_entry_t* refill_icache(reg_t addr, icache_entry_t* entry)
{
auto tlb_entry = translate_insn_addr(addr);
- insn_bits_t insn = *(uint16_t*)(tlb_entry.host_offset + addr);
+ insn_bits_t insn = from_le(*(uint16_t*)(tlb_entry.host_offset + addr));
int length = insn_length(insn);
if (likely(length == 4)) {
- insn |= (insn_bits_t)*(const int16_t*)translate_insn_addr_to_host(addr + 2) << 16;
+ insn |= (insn_bits_t)from_le(*(const int16_t*)translate_insn_addr_to_host(addr + 2)) << 16;
} else if (length == 2) {
insn = (int16_t)insn;
} else if (length == 6) {
- insn |= (insn_bits_t)*(const int16_t*)translate_insn_addr_to_host(addr + 4) << 32;
- insn |= (insn_bits_t)*(const uint16_t*)translate_insn_addr_to_host(addr + 2) << 16;
+ insn |= (insn_bits_t)from_le(*(const int16_t*)translate_insn_addr_to_host(addr + 4)) << 32;
+ insn |= (insn_bits_t)from_le(*(const uint16_t*)translate_insn_addr_to_host(addr + 2)) << 16;
} else {
static_assert(sizeof(insn_bits_t) == 8, "insn_bits_t must be uint64_t");
- insn |= (insn_bits_t)*(const int16_t*)translate_insn_addr_to_host(addr + 6) << 48;
- insn |= (insn_bits_t)*(const uint16_t*)translate_insn_addr_to_host(addr + 4) << 32;
- insn |= (insn_bits_t)*(const uint16_t*)translate_insn_addr_to_host(addr + 2) << 16;
+ insn |= (insn_bits_t)from_le(*(const int16_t*)translate_insn_addr_to_host(addr + 6)) << 48;
+ insn |= (insn_bits_t)from_le(*(const uint16_t*)translate_insn_addr_to_host(addr + 4)) << 32;
+ insn |= (insn_bits_t)from_le(*(const uint16_t*)translate_insn_addr_to_host(addr + 2)) << 16;
}
insn_fetch_t fetch = {proc->decode_insn(insn), insn};
@@ -341,9 +344,9 @@ private:
}
if (unlikely(tlb_insn_tag[vpn % TLB_ENTRIES] == (vpn | TLB_CHECK_TRIGGERS))) {
uint16_t* ptr = (uint16_t*)(tlb_data[vpn % TLB_ENTRIES].host_offset + addr);
- int match = proc->trigger_match(OPERATION_EXECUTE, addr, *ptr);
+ int match = proc->trigger_match(OPERATION_EXECUTE, addr, from_le(*ptr));
if (match >= 0) {
- throw trigger_matched_t(match, OPERATION_EXECUTE, addr, *ptr);
+ throw trigger_matched_t(match, OPERATION_EXECUTE, addr, from_le(*ptr));
}
}
return result;
diff --git a/riscv/processor.h b/riscv/processor.h
index 2b5003b..68e6249 100644
--- a/riscv/processor.h
+++ b/riscv/processor.h
@@ -177,6 +177,11 @@ class vectorUnit_t {
reg_t elts_per_reg = (VLEN >> 3) / (sizeof(T));
vReg += n / elts_per_reg;
n = n % elts_per_reg;
+#ifdef WORDS_BIGENDIAN
+ // "V" spec 0.7.1 requires lower indices to map to lower significant
+ // bits when changing SEW, thus we need to index from the end on BE.
+ n ^= elts_per_reg - 1;
+#endif
reg_referenced[vReg] = 1;
T *regStart = (T*)((char*)reg_file + vReg * (VLEN >> 3));
diff --git a/riscv/sim.cc b/riscv/sim.cc
index cffc037..eca7057 100644
--- a/riscv/sim.cc
+++ b/riscv/sim.cc
@@ -4,6 +4,7 @@
#include "mmu.h"
#include "dts.h"
#include "remote_bitbang.h"
+#include "byteorder.h"
#include <map>
#include <iostream>
#include <sstream>
@@ -194,6 +195,8 @@ void sim_t::make_dtb()
(uint32_t) (start_pc & 0xffffffff),
(uint32_t) (start_pc >> 32)
};
+ for(int i = 0; i < reset_vec_size; i++)
+ reset_vec[i] = to_le(reset_vec[i]);
std::vector<char> rom((char*)reset_vec, (char*)reset_vec + sizeof(reset_vec));
@@ -234,7 +237,7 @@ void sim_t::idle()
void sim_t::read_chunk(addr_t taddr, size_t len, void* dst)
{
assert(len == 8);
- auto data = debug_mmu->load_uint64(taddr);
+ auto data = to_le(debug_mmu->load_uint64(taddr));
memcpy(dst, &data, sizeof data);
}
@@ -243,7 +246,7 @@ void sim_t::write_chunk(addr_t taddr, size_t len, const void* src)
assert(len == 8);
uint64_t data;
memcpy(&data, src, sizeof data);
- debug_mmu->store_uint64(taddr, data);
+ debug_mmu->store_uint64(taddr, from_le(data));
}
void sim_t::proc_reset(unsigned id)