Implement support for big-endian hosts

Signed-off-by: Chih-Min Chao <chihmin.chao@sifive.com>
author: Marcus Comstedt <marcus@mc.pp.se> 2019-08-18 16:03:43 +0200
committer: Chih-Min Chao <chihmin.chao@sifive.com> 2019-10-29 03:33:45 -0700
commit: 65648669c15343cb1aa6b102cf2eae0ed91024cd (patch)
tree: 4530bee628e72cf17adeca999e9826ea4a0f88a9
parent: 7c85cc44d13547ab7260438d97671d7f423e5d6c (diff)
download: spike-65648669c15343cb1aa6b102cf2eae0ed91024cd.zip
spike-65648669c15343cb1aa6b102cf2eae0ed91024cd.tar.gz
spike-65648669c15343cb1aa6b102cf2eae0ed91024cd.tar.bz2
13 files changed, 141 insertions, 76 deletions
diff --git a/configure b/configure
index 63cbe49..ceb28af 100755
--- a/configure
+++ b/configure
@@ -4132,7 +4132,8 @@ fi
 $as_echo "$ac_cv_c_bigendian" >&6; }
  case $ac_cv_c_bigendian in #(
    yes)
-     as_fn_error $? "Spike requires a little-endian host" "$LINENO" 5;; #(
+     $as_echo "#define WORDS_BIGENDIAN 1" >>confdefs.h
+;; #(
    no)
       ;; #(
    universal)
diff --git a/configure.ac b/configure.ac
index b4bbd43..a6f52d8 100644
--- a/configure.ac
+++ b/configure.ac
@@ -57,7 +57,7 @@ AC_PATH_PROG([DTC],[dtc],[no])
 AS_IF([test x"$DTC" == xno],AC_MSG_ERROR([device-tree-compiler not found]))
 AC_DEFINE_UNQUOTED(DTC, ["$DTC"], [Path to the device-tree-compiler])
 
-AC_C_BIGENDIAN(AC_MSG_ERROR([Spike requires a little-endian host]))
+AC_C_BIGENDIAN
 
 #-------------------------------------------------------------------------
 # MCPPBS specific program checks
diff --git a/fesvr/elf.h b/fesvr/elf.h
index b66038d..b4b0add 100644
--- a/fesvr/elf.h
+++ b/fesvr/elf.h
@@ -11,6 +11,8 @@
 
 #define IS_ELF32(hdr) (IS_ELF(hdr) && (hdr).e_ident[4] == 1)
 #define IS_ELF64(hdr) (IS_ELF(hdr) && (hdr).e_ident[4] == 2)
+#define IS_ELFLE(hdr) (IS_ELF(hdr) && (hdr).e_ident[5] == 1)
+#define IS_ELFBE(hdr) (IS_ELF(hdr) && (hdr).e_ident[5] == 2)
 
 #define PT_LOAD 1
 
diff --git a/fesvr/elfloader.cc b/fesvr/elfloader.cc
index 3042f54..6e764ef 100644
--- a/fesvr/elfloader.cc
+++ b/fesvr/elfloader.cc
@@ -2,6 +2,7 @@
 
 #include "elf.h"
 #include "memif.h"
+#include "byteorder.h"
 #include <cstring>
 #include <string>
 #include <sys/stat.h>
@@ -30,58 +31,65 @@ std::map<std::string, uint64_t> load_elf(const char* fn, memif_t* memif, reg_t*
   assert(size >= sizeof(Elf64_Ehdr));
   const Elf64_Ehdr* eh64 = (const Elf64_Ehdr*)buf;
   assert(IS_ELF32(*eh64) || IS_ELF64(*eh64));
+  assert(IS_ELFLE(*eh64) || IS_ELFBE(*eh64));
 
   std::vector<uint8_t> zeros;
   std::map<std::string, uint64_t> symbols;
 
-  #define LOAD_ELF(ehdr_t, phdr_t, shdr_t, sym_t) do { \
+  #define LOAD_ELF(ehdr_t, phdr_t, shdr_t, sym_t, bswap) do { \
     ehdr_t* eh = (ehdr_t*)buf; \
-    phdr_t* ph = (phdr_t*)(buf + eh->e_phoff); \
-    *entry = eh->e_entry; \
-    assert(size >= eh->e_phoff + eh->e_phnum*sizeof(*ph)); \
-    for (unsigned i = 0; i < eh->e_phnum; i++) { \
-      if(ph[i].p_type == PT_LOAD && ph[i].p_memsz) { \
-        if (ph[i].p_filesz) { \
-          assert(size >= ph[i].p_offset + ph[i].p_filesz); \
-          memif->write(ph[i].p_paddr, ph[i].p_filesz, (uint8_t*)buf + ph[i].p_offset); \
+    phdr_t* ph = (phdr_t*)(buf + bswap(eh->e_phoff)); \
+    *entry = bswap(eh->e_entry); \
+    assert(size >= bswap(eh->e_phoff) + bswap(eh->e_phnum)*sizeof(*ph)); \
+    for (unsigned i = 0; i < bswap(eh->e_phnum); i++) {			\
+      if(bswap(ph[i].p_type) == PT_LOAD && bswap(ph[i].p_memsz)) {	\
+        if (bswap(ph[i].p_filesz)) {					\
+          assert(size >= bswap(ph[i].p_offset) + bswap(ph[i].p_filesz)); \
+          memif->write(bswap(ph[i].p_paddr), bswap(ph[i].p_filesz), (uint8_t*)buf + bswap(ph[i].p_offset)); \
         } \
-        zeros.resize(ph[i].p_memsz - ph[i].p_filesz); \
-        memif->write(ph[i].p_paddr + ph[i].p_filesz, ph[i].p_memsz - ph[i].p_filesz, &zeros[0]); \
+        zeros.resize(bswap(ph[i].p_memsz) - bswap(ph[i].p_filesz)); \
+        memif->write(bswap(ph[i].p_paddr) + bswap(ph[i].p_filesz), bswap(ph[i].p_memsz) - bswap(ph[i].p_filesz), &zeros[0]); \
       } \
     } \
-    shdr_t* sh = (shdr_t*)(buf + eh->e_shoff); \
-    assert(size >= eh->e_shoff + eh->e_shnum*sizeof(*sh)); \
-    assert(eh->e_shstrndx < eh->e_shnum); \
-    assert(size >= sh[eh->e_shstrndx].sh_offset + sh[eh->e_shstrndx].sh_size); \
-    char *shstrtab = buf + sh[eh->e_shstrndx].sh_offset; \
+    shdr_t* sh = (shdr_t*)(buf + bswap(eh->e_shoff)); \
+    assert(size >= bswap(eh->e_shoff) + bswap(eh->e_shnum)*sizeof(*sh)); \
+    assert(bswap(eh->e_shstrndx) < bswap(eh->e_shnum)); \
+    assert(size >= bswap(sh[bswap(eh->e_shstrndx)].sh_offset) + bswap(sh[bswap(eh->e_shstrndx)].sh_size)); \
+    char *shstrtab = buf + bswap(sh[bswap(eh->e_shstrndx)].sh_offset);	\
     unsigned strtabidx = 0, symtabidx = 0; \
-    for (unsigned i = 0; i < eh->e_shnum; i++) { \
-      unsigned max_len = sh[eh->e_shstrndx].sh_size - sh[i].sh_name; \
-      assert(sh[i].sh_name < sh[eh->e_shstrndx].sh_size); \
-      assert(strnlen(shstrtab + sh[i].sh_name, max_len) < max_len); \
-      if (sh[i].sh_type & SHT_NOBITS) continue; \
-      assert(size >= sh[i].sh_offset + sh[i].sh_size); \
-      if (strcmp(shstrtab + sh[i].sh_name, ".strtab") == 0) \
+    for (unsigned i = 0; i < bswap(eh->e_shnum); i++) {		     \
+      unsigned max_len = bswap(sh[bswap(eh->e_shstrndx)].sh_size) - bswap(sh[i].sh_name); \
+      assert(bswap(sh[i].sh_name) < bswap(sh[bswap(eh->e_shstrndx)].sh_size));	\
+      assert(strnlen(shstrtab + bswap(sh[i].sh_name), max_len) < max_len); \
+      if (bswap(sh[i].sh_type) & SHT_NOBITS) continue; \
+      assert(size >= bswap(sh[i].sh_offset) + bswap(sh[i].sh_size)); \
+      if (strcmp(shstrtab + bswap(sh[i].sh_name), ".strtab") == 0) \
         strtabidx = i; \
-      if (strcmp(shstrtab + sh[i].sh_name, ".symtab") == 0) \
+      if (strcmp(shstrtab + bswap(sh[i].sh_name), ".symtab") == 0) \
         symtabidx = i; \
     } \
     if (strtabidx && symtabidx) { \
-      char* strtab = buf + sh[strtabidx].sh_offset; \
-      sym_t* sym = (sym_t*)(buf + sh[symtabidx].sh_offset); \
-      for (unsigned i = 0; i < sh[symtabidx].sh_size/sizeof(sym_t); i++) { \
-        unsigned max_len = sh[strtabidx].sh_size - sym[i].st_name; \
-        assert(sym[i].st_name < sh[strtabidx].sh_size); \
-        assert(strnlen(strtab + sym[i].st_name, max_len) < max_len); \
-        symbols[strtab + sym[i].st_name] = sym[i].st_value; \
+      char* strtab = buf + bswap(sh[strtabidx].sh_offset); \
+      sym_t* sym = (sym_t*)(buf + bswap(sh[symtabidx].sh_offset)); \
+      for (unsigned i = 0; i < bswap(sh[symtabidx].sh_size)/sizeof(sym_t); i++) { \
+        unsigned max_len = bswap(sh[strtabidx].sh_size) - bswap(sym[i].st_name); \
+        assert(bswap(sym[i].st_name) < bswap(sh[strtabidx].sh_size));	\
+        assert(strnlen(strtab + bswap(sym[i].st_name), max_len) < max_len); \
+        symbols[strtab + bswap(sym[i].st_name)] = bswap(sym[i].st_value); \
       } \
     } \
   } while(0)
 
   if (IS_ELF32(*eh64))
-    LOAD_ELF(Elf32_Ehdr, Elf32_Phdr, Elf32_Shdr, Elf32_Sym);
+    if (IS_ELFLE(*eh64))
+      LOAD_ELF(Elf32_Ehdr, Elf32_Phdr, Elf32_Shdr, Elf32_Sym, from_le);
+    else
+      LOAD_ELF(Elf32_Ehdr, Elf32_Phdr, Elf32_Shdr, Elf32_Sym, from_be);
   else
-    LOAD_ELF(Elf64_Ehdr, Elf64_Phdr, Elf64_Shdr, Elf64_Sym);
+    if (IS_ELFLE(*eh64))
+      LOAD_ELF(Elf64_Ehdr, Elf64_Phdr, Elf64_Shdr, Elf64_Sym, from_le);
+    else
+      LOAD_ELF(Elf64_Ehdr, Elf64_Phdr, Elf64_Shdr, Elf64_Sym, from_be);
 
   munmap(buf, size);
 
diff --git a/fesvr/htif.cc b/fesvr/htif.cc
index d9ff341..d9e884f 100644
--- a/fesvr/htif.cc
+++ b/fesvr/htif.cc
@@ -4,6 +4,7 @@
 #include "rfb.h"
 #include "elfloader.h"
 #include "encoding.h"
+#include "byteorder.h"
 #include <algorithm>
 #include <assert.h>
 #include <vector>
@@ -183,7 +184,7 @@ int htif_t::run()
 
   while (!signal_exit && exitcode == 0)
   {
-    if (auto tohost = mem.read_uint64(tohost_addr)) {
+    if (auto tohost = from_le(mem.read_uint64(tohost_addr))) {
       mem.write_uint64(tohost_addr, 0);
       command_t cmd(mem, tohost, fromhost_callback);
       device_list.handle_command(cmd);
@@ -194,7 +195,7 @@ int htif_t::run()
     device_list.tick();
 
     if (!fromhost_queue.empty() && mem.read_uint64(fromhost_addr) == 0) {
-      mem.write_uint64(fromhost_addr, fromhost_queue.front());
+      mem.write_uint64(fromhost_addr, to_le(fromhost_queue.front()));
       fromhost_queue.pop();
     }
   }
diff --git a/fesvr/syscall.cc b/fesvr/syscall.cc
index 6e8baf6..f0bdd25 100644
--- a/fesvr/syscall.cc
+++ b/fesvr/syscall.cc
@@ -2,6 +2,7 @@
 
 #include "syscall.h"
 #include "htif.h"
+#include "byteorder.h"
 #include <unistd.h>
 #include <fcntl.h>
 #include <sys/stat.h>
@@ -299,21 +300,21 @@ reg_t syscall_t::sys_getmainvars(reg_t pbuf, reg_t limit, reg_t a2, reg_t a3, re
 {
   std::vector<std::string> args = htif->target_args();
   std::vector<uint64_t> words(args.size() + 3);
-  words[0] = args.size();
+  words[0] = to_le(args.size());
   words[args.size()+1] = 0; // argv[argc] = NULL
   words[args.size()+2] = 0; // envp[0] = NULL
 
   size_t sz = (args.size() + 3) * sizeof(words[0]);
   for (size_t i = 0; i < args.size(); i++)
   {
-    words[i+1] = sz + pbuf;
+    words[i+1] = to_le(sz + pbuf);
     sz += args[i].length() + 1;
   }
 
   std::vector<char> bytes(sz);
   memcpy(&bytes[0], &words[0], sizeof(words[0]) * words.size());
   for (size_t i = 0; i < args.size(); i++)
-    strcpy(&bytes[words[i+1] - pbuf], args[i].c_str());
+    strcpy(&bytes[from_le(words[i+1]) - pbuf], args[i].c_str());
 
   if (bytes.size() > limit)
     return -ENOMEM;
@@ -342,11 +343,11 @@ void syscall_t::dispatch(reg_t mm)
   reg_t magicmem[8];
   memif->read(mm, sizeof(magicmem), magicmem);
 
-  reg_t n = magicmem[0];
+  reg_t n = from_le(magicmem[0]);
   if (n >= table.size() || !table[n])
     throw std::runtime_error("bad syscall #" + std::to_string(n));
 
-  magicmem[0] = (this->*table[n])(magicmem[1], magicmem[2], magicmem[3], magicmem[4], magicmem[5], magicmem[6], magicmem[7]);
+  magicmem[0] = to_le((this->*table[n])(from_le(magicmem[1]), from_le(magicmem[2]), from_le(magicmem[3]), from_le(magicmem[4]), from_le(magicmem[5]), from_le(magicmem[6]), from_le(magicmem[7])));
 
   memif->write(mm, sizeof(magicmem), magicmem);
 }
diff --git a/riscv/byteorder.h b/riscv/byteorder.h
new file mode 100644
index 0000000..393a70b
--- /dev/null
+++ b/riscv/byteorder.h
@@ -0,0 +1,30 @@
+// See LICENSE for license details.
+
+#ifndef _RISCV_BYTEORDER_H
+#define _RISCV_BYTEORDER_H
+
+#include "config.h"
+#include <stdint.h>
+
+static inline uint8_t swap(uint8_t n) { return n; }
+static inline uint16_t swap(uint16_t n) { return __builtin_bswap16(n); }
+static inline uint32_t swap(uint32_t n) { return __builtin_bswap32(n); }
+static inline uint64_t swap(uint64_t n) { return __builtin_bswap64(n); }
+static inline int8_t swap(int8_t n) { return n; }
+static inline int16_t swap(int16_t n) { return __builtin_bswap16(n); }
+static inline int32_t swap(int32_t n) { return __builtin_bswap32(n); }
+static inline int64_t swap(int64_t n) { return __builtin_bswap64(n); }
+
+#ifdef WORDS_BIGENDIAN
+template<typename T> static inline T from_be(T n) { return n; }
+template<typename T> static inline T to_be(T n) { return n; }
+template<typename T> static inline T from_le(T n) { return swap(n); }
+template<typename T> static inline T to_le(T n) { return swap(n); }
+#else
+template<typename T> static inline T from_le(T n) { return n; }
+template<typename T> static inline T to_le(T n) { return n; }
+template<typename T> static inline T from_be(T n) { return swap(n); }
+template<typename T> static inline T to_be(T n) { return swap(n); }
+#endif
+
+#endif
diff --git a/riscv/decode.h b/riscv/decode.h
index 2fb8192..3e759a3 100644
--- a/riscv/decode.h
+++ b/riscv/decode.h
@@ -7,10 +7,6 @@
 # error spike requires a two''s-complement c++ implementation
 #endif
 
-#ifdef WORDS_BIGENDIAN
-# error spike requires a little-endian host
-#endif
-
 #include <algorithm>
 #include <cstdint>
 #include <string.h>
@@ -69,6 +65,23 @@ const int NCSR = 4096;
 #define MAX_INSN_LENGTH 8
 #define PC_ALIGN 2
 
+#ifdef WORDS_BIGENDIAN
+  // Elements are stored in opposite order, see comment in processor.h
+  #define TAIL_ZERO(x) \
+    uint8_t *tail = &P.VU.elt<uint8_t>(rd_num, vl * (x) - 1); \
+    memset(tail - (P.VU.vlmax - vl) * (x), 0, (P.VU.vlmax - vl) * (x));
+  #define TAIL_ZERO_REDUCTION(x) \
+    uint8_t *tail = (uint8_t *)&P.VU.elt<type_sew_t<x>::type>(rd_num, 0); \
+    memset(tail - ((P.VU.get_vlen() - x) >> 3), 0, (P.VU.get_vlen() - x) >> 3);
+#else
+  #define TAIL_ZERO(x) \
+    uint8_t *tail = &P.VU.elt<uint8_t>(rd_num, vl * (x)); \
+    memset(tail, 0, (P.VU.vlmax - vl) * (x));
+  #define TAIL_ZERO_REDUCTION(x) \
+    uint8_t *tail = (uint8_t *)&P.VU.elt<type_sew_t<x>::type>(rd_num, 1); \
+    memset(tail, 0, (P.VU.get_vlen() - x) >> 3);
+#endif
+
 typedef uint64_t insn_bits_t;
 class insn_t
 {
@@ -486,8 +499,7 @@ static inline bool is_overlaped(const int astart, const int asize,
 
 #define VI_TAIL_ZERO(elm) \
   if (vl != 0 && vl < P.VU.vlmax && P.VU.TZ) { \
-    uint8_t *tail = &P.VU.elt<uint8_t>(rd_num, vl * ((sew >> 3) * elm)); \
-    memset(tail, 0, (P.VU.vlmax - vl) * ((sew >> 3) * elm)); \
+    TAIL_ZERO((sew >> 3) * elm); \
   }
 
 #define VI_TAIL_ZERO_MASK(dst) \
@@ -518,8 +530,7 @@ static inline bool is_overlaped(const int astart, const int asize,
 #define VI_LOOP_WIDEN_END \
   } \
   if (vl != 0 && vl < P.VU.vlmax && P.VU.TZ){ \
-    uint8_t *tail = &P.VU.elt<uint8_t>(rd_num, vl * ((sew >> 3) * 2)); \
-    memset(tail, 0, (P.VU.vlmax - vl) * ((sew >> 3) * 2)); \
+    TAIL_ZERO((sew >> 3) * 2); \
   }\
   P.VU.vstart = 0;
 
@@ -528,8 +539,7 @@ static inline bool is_overlaped(const int astart, const int asize,
   if (vl > 0) { \
     vd_0_des = vd_0_res; \
     if (P.VU.TZ) { \
-        uint8_t *tail = (uint8_t *)&P.VU.elt<type_sew_t<x>::type>(rd_num, 1); \
-        memset(tail, 0, (P.VU.get_vlen() - x) >> 3); \
+      TAIL_ZERO_REDUCTION(x); \
     } \
   } \
   P.VU.vstart = 0; 
@@ -1638,16 +1648,14 @@ for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \
 #define VI_VFP_LOOP_END \
   } \
   if (vl != 0 && vl < P.VU.vlmax && P.VU.TZ){ \
-    uint8_t *tail = &P.VU.elt<uint8_t>(rd_num, vl * ((P.VU.vsew >> 3) * 1)); \
-    memset(tail, 0, (P.VU.vlmax - vl) * ((P.VU.vsew >> 3) * 1)); \
+    TAIL_ZERO((P.VU.vsew >> 3) * 1); \
   }\
   P.VU.vstart = 0; \
 
 #define VI_VFP_LOOP_WIDE_END \
   } \
   if (vl != 0 && vl < P.VU.vlmax && P.VU.TZ){ \
-    uint8_t *tail = &P.VU.elt<uint8_t>(rd_num, vl * ((P.VU.vsew >> 3) * 2)); \
-    memset(tail, 0, (P.VU.vlmax - vl) * ((P.VU.vsew >> 3) * 2)); \
+    TAIL_ZERO((P.VU.vsew >> 3) * 2); \
   }\
   P.VU.vstart = 0; \
   set_fp_exceptions;
diff --git a/riscv/mmu.cc b/riscv/mmu.cc
index a0e500b..eca090f 100644
--- a/riscv/mmu.cc
+++ b/riscv/mmu.cc
@@ -288,7 +288,7 @@ reg_t mmu_t::walk(reg_t addr, access_type type, reg_t mode)
     if (!ppte || !pmp_ok(pte_paddr, vm.ptesize, LOAD, PRV_S))
       throw_access_exception(addr, type);
 
-    reg_t pte = vm.ptesize == 4 ? *(uint32_t*)ppte : *(uint64_t*)ppte;
+    reg_t pte = vm.ptesize == 4 ? from_le(*(uint32_t*)ppte) : from_le(*(uint64_t*)ppte);
     reg_t ppn = pte >> PTE_PPN_SHIFT;
 
     if (PTE_TABLE(pte)) { // next level of page table
@@ -310,7 +310,7 @@ reg_t mmu_t::walk(reg_t addr, access_type type, reg_t mode)
       if ((pte & ad) != ad) {
         if (!pmp_ok(pte_paddr, vm.ptesize, STORE, PRV_S))
           throw_access_exception(addr, type);
-        *(uint32_t*)ppte |= ad;
+        *(uint32_t*)ppte |= to_le((uint32_t)ad);
       }
 #else
       // take exception if access or possibly dirty bit is not set.
diff --git a/riscv/mmu.h b/riscv/mmu.h
index eb2a0be..77b4892 100644
--- a/riscv/mmu.h
+++ b/riscv/mmu.h
@@ -10,6 +10,7 @@
 #include "simif.h"
 #include "processor.h"
 #include "memtracer.h"
+#include "byteorder.h"
 #include <stdlib.h>
 #include <vector>
 
@@ -95,9 +96,9 @@ public:
         return misaligned_load(addr, sizeof(type##_t), mistrap); \
       reg_t vpn = addr >> PGSHIFT; \
       if (likely(tlb_load_tag[vpn % TLB_ENTRIES] == vpn)) \
-        return *(type##_t*)(tlb_data[vpn % TLB_ENTRIES].host_offset + addr); \
+        return from_le(*(type##_t*)(tlb_data[vpn % TLB_ENTRIES].host_offset + addr)); \
       if (unlikely(tlb_load_tag[vpn % TLB_ENTRIES] == (vpn | TLB_CHECK_TRIGGERS))) { \
-        type##_t data = *(type##_t*)(tlb_data[vpn % TLB_ENTRIES].host_offset + addr); \
+        type##_t data = from_le(*(type##_t*)(tlb_data[vpn % TLB_ENTRIES].host_offset + addr)); \
         if (!matched_trigger) { \
           matched_trigger = trigger_exception(OPERATION_LOAD, addr, data); \
           if (matched_trigger) \
@@ -107,7 +108,7 @@ public:
       } \
       type##_t res; \
       load_slow_path(addr, sizeof(type##_t), (uint8_t*)&res); \
-      return res; \
+      return from_le(res); \
     }
 
   // load value from memory at aligned address; zero extend to register width
@@ -139,17 +140,19 @@ public:
         return misaligned_store(addr, val, sizeof(type##_t), mistrap); \
       reg_t vpn = addr >> PGSHIFT; \
       if (likely(tlb_store_tag[vpn % TLB_ENTRIES] == vpn)) \
-        *(type##_t*)(tlb_data[vpn % TLB_ENTRIES].host_offset + addr) = val; \
+        *(type##_t*)(tlb_data[vpn % TLB_ENTRIES].host_offset + addr) = to_le(val); \
       else if (unlikely(tlb_store_tag[vpn % TLB_ENTRIES] == (vpn | TLB_CHECK_TRIGGERS))) { \
         if (!matched_trigger) { \
           matched_trigger = trigger_exception(OPERATION_STORE, addr, val); \
           if (matched_trigger) \
             throw *matched_trigger; \
         } \
-        *(type##_t*)(tlb_data[vpn % TLB_ENTRIES].host_offset + addr) = val; \
+        *(type##_t*)(tlb_data[vpn % TLB_ENTRIES].host_offset + addr) = to_le(val); \
+      } \
+      else { \
+	type##_t le_val = to_le(val); \
+        store_slow_path(addr, sizeof(type##_t), (const uint8_t*)&le_val); \
       } \
-      else \
-        store_slow_path(addr, sizeof(type##_t), (const uint8_t*)&val); \
       if (proc) { \
         size_t size = sizeof(type##_t); \
         WRITE_MEM(addr, val, size); \
@@ -237,21 +240,21 @@ public:
   inline icache_entry_t* refill_icache(reg_t addr, icache_entry_t* entry)
   {
     auto tlb_entry = translate_insn_addr(addr);
-    insn_bits_t insn = *(uint16_t*)(tlb_entry.host_offset + addr);
+    insn_bits_t insn = from_le(*(uint16_t*)(tlb_entry.host_offset + addr));
     int length = insn_length(insn);
 
     if (likely(length == 4)) {
-      insn |= (insn_bits_t)*(const int16_t*)translate_insn_addr_to_host(addr + 2) << 16;
+      insn |= (insn_bits_t)from_le(*(const int16_t*)translate_insn_addr_to_host(addr + 2)) << 16;
     } else if (length == 2) {
       insn = (int16_t)insn;
     } else if (length == 6) {
-      insn |= (insn_bits_t)*(const int16_t*)translate_insn_addr_to_host(addr + 4) << 32;
-      insn |= (insn_bits_t)*(const uint16_t*)translate_insn_addr_to_host(addr + 2) << 16;
+      insn |= (insn_bits_t)from_le(*(const int16_t*)translate_insn_addr_to_host(addr + 4)) << 32;
+      insn |= (insn_bits_t)from_le(*(const uint16_t*)translate_insn_addr_to_host(addr + 2)) << 16;
     } else {
       static_assert(sizeof(insn_bits_t) == 8, "insn_bits_t must be uint64_t");
-      insn |= (insn_bits_t)*(const int16_t*)translate_insn_addr_to_host(addr + 6) << 48;
-      insn |= (insn_bits_t)*(const uint16_t*)translate_insn_addr_to_host(addr + 4) << 32;
-      insn |= (insn_bits_t)*(const uint16_t*)translate_insn_addr_to_host(addr + 2) << 16;
+      insn |= (insn_bits_t)from_le(*(const int16_t*)translate_insn_addr_to_host(addr + 6)) << 48;
+      insn |= (insn_bits_t)from_le(*(const uint16_t*)translate_insn_addr_to_host(addr + 4)) << 32;
+      insn |= (insn_bits_t)from_le(*(const uint16_t*)translate_insn_addr_to_host(addr + 2)) << 16;
     }
 
     insn_fetch_t fetch = {proc->decode_insn(insn), insn};
@@ -350,9 +353,9 @@ private:
     }
     if (unlikely(tlb_insn_tag[vpn % TLB_ENTRIES] == (vpn | TLB_CHECK_TRIGGERS))) {
       uint16_t* ptr = (uint16_t*)(tlb_data[vpn % TLB_ENTRIES].host_offset + addr);
-      int match = proc->trigger_match(OPERATION_EXECUTE, addr, *ptr);
+      int match = proc->trigger_match(OPERATION_EXECUTE, addr, from_le(*ptr));
       if (match >= 0) {
-        throw trigger_matched_t(match, OPERATION_EXECUTE, addr, *ptr);
+        throw trigger_matched_t(match, OPERATION_EXECUTE, addr, from_le(*ptr));
       }
     }
     return result;
diff --git a/riscv/processor.h b/riscv/processor.h
index ef0319f..8f1a381 100644
--- a/riscv/processor.h
+++ b/riscv/processor.h
@@ -180,6 +180,11 @@ class vectorUnit_t {
         reg_t elts_per_reg = (VLEN >> 3) / (sizeof(T));
         vReg += n / elts_per_reg;
         n = n % elts_per_reg;
+#ifdef WORDS_BIGENDIAN
+	// "V" spec 0.7.1 requires lower indices to map to lower significant
+	// bits when changing SEW, thus we need to index from the end on BE.
+	n ^= elts_per_reg - 1;
+#endif
         reg_referenced[vReg] = 1;
 
         T *regStart = (T*)((char*)reg_file + vReg * (VLEN >> 3));
diff --git a/riscv/sim.cc b/riscv/sim.cc
index be53a9c..faae14f 100644
--- a/riscv/sim.cc
+++ b/riscv/sim.cc
@@ -4,6 +4,7 @@
 #include "mmu.h"
 #include "dts.h"
 #include "remote_bitbang.h"
+#include "byteorder.h"
 #include <map>
 #include <iostream>
 #include <sstream>
@@ -200,6 +201,8 @@ void sim_t::make_dtb()
     (uint32_t) (start_pc & 0xffffffff),
     (uint32_t) (start_pc >> 32)
   };
+  for(int i = 0; i < reset_vec_size; i++)
+    reset_vec[i] = to_le(reset_vec[i]);
 
   std::vector<char> rom((char*)reset_vec, (char*)reset_vec + sizeof(reset_vec));
 
@@ -240,7 +243,7 @@ void sim_t::idle()
 void sim_t::read_chunk(addr_t taddr, size_t len, void* dst)
 {
   assert(len == 8);
-  auto data = debug_mmu->load_uint64(taddr);
+  auto data = to_le(debug_mmu->load_uint64(taddr));
   memcpy(dst, &data, sizeof data);
 }
 
@@ -249,7 +252,7 @@ void sim_t::write_chunk(addr_t taddr, size_t len, const void* src)
   assert(len == 8);
   uint64_t data;
   memcpy(&data, src, sizeof data);
-  debug_mmu->store_uint64(taddr, data);
+  debug_mmu->store_uint64(taddr, from_le(data));
 }
 
 void sim_t::proc_reset(unsigned id)
diff --git a/softfloat/platform.h b/softfloat/platform.h
index 03dd429..48838cd 100644
--- a/softfloat/platform.h
+++ b/softfloat/platform.h
@@ -36,7 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 /*----------------------------------------------------------------------------
 *----------------------------------------------------------------------------*/
+#include "config.h"
+#ifndef WORDS_BIGENDIAN
 #define LITTLEENDIAN 1
+#endif
 
 #define INLINE_LEVEL 5
 #define SOFTFLOAT_FAST_INT64
author	Marcus Comstedt <marcus@mc.pp.se>	2019-08-18 16:03:43 +0200
committer	Chih-Min Chao <chihmin.chao@sifive.com>	2019-10-29 03:33:45 -0700
commit	65648669c15343cb1aa6b102cf2eae0ed91024cd (patch)
tree	4530bee628e72cf17adeca999e9826ea4a0f88a9
parent	7c85cc44d13547ab7260438d97671d7f423e5d6c (diff)
download	spike-65648669c15343cb1aa6b102cf2eae0ed91024cd.zip spike-65648669c15343cb1aa6b102cf2eae0ed91024cd.tar.gz spike-65648669c15343cb1aa6b102cf2eae0ed91024cd.tar.bz2