aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Waterman <waterman@eecs.berkeley.edu>2014-02-06 01:36:26 -0800
committerAndrew Waterman <waterman@eecs.berkeley.edu>2014-02-06 01:36:26 -0800
commit71330800b89cb4c6ae8716a7e78bdcf574fe04ab (patch)
tree5395bdf46cccee8f6c94538b63ab161fa41648db
parent6fdd12c130d0c0c13934364a4dfe12d8dcf28e27 (diff)
downloadriscv-tests-71330800b89cb4c6ae8716a7e78bdcf574fe04ab.zip
riscv-tests-71330800b89cb4c6ae8716a7e78bdcf574fe04ab.tar.gz
riscv-tests-71330800b89cb4c6ae8716a7e78bdcf574fe04ab.tar.bz2
Clean up benchmarks; support uarch-specific counters
-rw-r--r--benchmarks/Makefile4
-rw-r--r--benchmarks/common/crt-mt.S113
-rw-r--r--benchmarks/common/crt.S94
-rw-r--r--benchmarks/common/syscalls.c178
-rw-r--r--benchmarks/common/test-mt.ld45
-rw-r--r--benchmarks/common/util.h128
-rw-r--r--benchmarks/dgemm/bmark.mk3
-rw-r--r--benchmarks/dgemm/dgemm_main.c84
-rw-r--r--benchmarks/dhrystone/bmark.mk1
-rw-r--r--benchmarks/dhrystone/dhrystone.h8
-rw-r--r--benchmarks/dhrystone/dhrystone_main.c12
-rw-r--r--benchmarks/median/bmark.mk3
-rw-r--r--benchmarks/median/median_main.c80
-rw-r--r--benchmarks/mt-matmul/bmark.mk5
-rw-r--r--benchmarks/mt-matmul/mt-matmul.c51
-rw-r--r--benchmarks/mt-vvadd/bmark.mk4
-rw-r--r--benchmarks/mt-vvadd/mt-vvadd.c58
-rw-r--r--benchmarks/multiply/bmark.mk3
-rw-r--r--benchmarks/multiply/multiply_main.c90
-rw-r--r--benchmarks/qsort/bmark.mk3
-rw-r--r--benchmarks/qsort/qsort_main.c75
-rw-r--r--benchmarks/spmv/bmark.mk3
-rw-r--r--benchmarks/spmv/spmv_main.c64
-rw-r--r--benchmarks/towers/bmark.mk3
-rw-r--r--benchmarks/towers/towers_main.c44
-rw-r--r--benchmarks/vec-cmplxmult/vec_cmplxmult_main.c3
-rw-r--r--benchmarks/vec-matmul/vec_matmul_main.c59
-rw-r--r--benchmarks/vec-vvadd/vec_vvadd_main.c59
-rw-r--r--benchmarks/vvadd/bmark.mk3
-rw-r--r--benchmarks/vvadd/vvadd_main.c88
m---------env10
31 files changed, 352 insertions, 1026 deletions
diff --git a/benchmarks/Makefile b/benchmarks/Makefile
index cff904a..8f580a4 100644
--- a/benchmarks/Makefile
+++ b/benchmarks/Makefile
@@ -51,10 +51,10 @@ HOST_OPTS = -std=gnu99 -DPREALLOCATE=0 -DHOST_DEBUG=1
HOST_COMP = gcc $(HOST_OPTS)
RISCV_GCC = riscv-gcc
-RISCV_GCC_OPTS = -Wa,-march=RVIMAFDXhwacha -std=gnu99 -O2 -nostdlib -nostartfiles -ffast-math
+RISCV_GCC_OPTS = -Wa,-march=RVIMAFDXhwacha -std=gnu99 -O2 -ffast-math
RISCV_LINK = riscv-gcc -T $(bmarkdir)/common/test.ld $(incs)
RISCV_LINK_MT = riscv-gcc -T $(bmarkdir)/common/test-mt.ld
-RISCV_LINK_OPTS = -lc
+RISCV_LINK_OPTS = -nostdlib -nostartfiles -ffast-math -lc
RISCV_OBJDUMP = riscv-objdump --disassemble-all --disassemble-zeroes --section=.text --section=.text.startup --section=.data
RISCV_SIM = spike
diff --git a/benchmarks/common/crt-mt.S b/benchmarks/common/crt-mt.S
deleted file mode 100644
index 6cedec0..0000000
--- a/benchmarks/common/crt-mt.S
+++ /dev/null
@@ -1,113 +0,0 @@
-#include "encoding.h"
-
- .data
- .globl _heapend
- .globl environ
-_heapend:
- .word 0
-environ:
- .word 0
-
- .text
- .globl _start
-
-_start:
- li x1, 0
- li x2, 0
- li x3, 0
- li x4, 0
- li x5, 0
- li x6, 0
- li x7, 0
- li x8, 0
- li x9, 0
- li x10,0
- li x11,0
- li x12,0
- li x13,0
- li x14,0
- li x15,0
- li x16,0
- li x17,0
- li x18,0
- li x19,0
- li x20,0
- li x21,0
- li x22,0
- li x23,0
- li x24,0
- li x25,0
- li x26,0
- li x27,0
- li x28,0
- li x29,0
- li x30,0
- li x31,0
-
- # enable fp and accelerator
- li a0, SR_EF | SR_EA
- csrs status, a0
-
- ## if that didn't stick, we don't have an FPU, so don't initialize it
- csrr t0, status
- and t0, t0, SR_EF
- beqz t0, 1f
-
- fssr x0
- fmv.s.x f0, x0
- fmv.s.x f1, x0
- fmv.s.x f2, x0
- fmv.s.x f3, x0
- fmv.s.x f4, x0
- fmv.s.x f5, x0
- fmv.s.x f6, x0
- fmv.s.x f7, x0
- fmv.s.x f8, x0
- fmv.s.x f9, x0
- fmv.s.x f10,x0
- fmv.s.x f11,x0
- fmv.s.x f12,x0
- fmv.s.x f13,x0
- fmv.s.x f14,x0
- fmv.s.x f15,x0
- fmv.s.x f16,x0
- fmv.s.x f17,x0
- fmv.s.x f18,x0
- fmv.s.x f19,x0
- fmv.s.x f20,x0
- fmv.s.x f21,x0
- fmv.s.x f22,x0
- fmv.s.x f23,x0
- fmv.s.x f24,x0
- fmv.s.x f25,x0
- fmv.s.x f26,x0
- fmv.s.x f27,x0
- fmv.s.x f28,x0
- fmv.s.x f29,x0
- fmv.s.x f30,x0
- fmv.s.x f31,x0
-1:
-
-
- # get core id and number of cores
- csrr a0, hartid
- lw a1, 4(zero)
-
- slli a2, a0, 13
- la sp, stacktop
- sub sp, sp, a2
-
- la tp, tlstop
- sub tp, tp, a2
-
- jal thread_entry
-
- .bss
- .globl stacktop
- .globl tlstop
-
- .align 4
- .skip 32768
-stacktop:
- .skip 65536
-tlstop:
diff --git a/benchmarks/common/crt.S b/benchmarks/common/crt.S
index ae8706b..fb2cc25 100644
--- a/benchmarks/common/crt.S
+++ b/benchmarks/common/crt.S
@@ -93,48 +93,52 @@ _start:
fmv.s.x f31,x0
1:
- lui a0, %hi(trap_entry)
- add a0, a0, %lo(trap_entry)
- csrw evec, a0
-
- lui a0, %hi(main)
- add a0, a0, %lo(main)
- csrw epc, a0
-
- # only allow core 0 to proceed
-1:csrr a0, hartid
- bnez a0, 1b
-
- la sp,stacktop
-
- # jmp to main as a user program
- sret
-1:b 1b
-
-.align 4
-.globl trap_entry
-trap_entry: # only check for SYS_exit, otherwise crash out
- li a3, 1337 # magic "bad things" happened error code
- csrr a1, cause
- li a2, 6 # syscall exception number
- bne a1, a2, exit_error
-handle_syscall:
- li a1, 93 # SYS_exit number
- bne v0, a1, exit_error
- li a1, 1 # successful exit code
- move a3, a0
- bne a3, a1, exit_error
- csrw tohost, a1 # exit successfully (tohost == 1)
-1:b 1b
-exit_error:
- sll a3, a3, 1
- or a3, a3, 1
- csrw tohost, a3
-1:b 1b
-
- .bss
- .globl stacktop
-
- .align 4
- .skip 131072
-stacktop:
+ la t0, trap_entry
+ csrw evec, t0
+
+ la tp, _end + 63
+ and tp, tp, -64
+
+ # get core id and number of cores
+ csrr a0, hartid
+ lw a1, 4(zero)
+
+ # give each core a 1KB TLS and a 127KB stack
+#define STKSHIFT 17
+ sll a2, a0, STKSHIFT
+ add tp, tp, a2
+ add sp, a0, 1
+ sll sp, sp, STKSHIFT
+ add sp, sp, tp
+ add tp, tp, 1024
+
+ jal _init
+ unimp
+
+trap_entry:
+ csrw sup0, t0
+ csrw sup1, t1
+ la t0, uarch_insn
+ lw t0, (t0)
+ csrr t1, epc
+ and t1, t1, ~3
+ lw t1, (t1)
+ and t1, t1, t0
+ beq t1, t0, handle_uarch_insn
+
+ # a trap occurred that shouldn't have.
+ li t0, 1337
+ csrw tohost, t0
+1:j 1b
+
+handle_uarch_insn:
+ # we trapped on an illegal uarch-specific CSR. just skip over it.
+ csrr t1, epc
+ add t1, t1, 4
+ csrw epc, t1
+ csrr t0, sup0
+ csrr t1, sup1
+ sret
+
+uarch_insn:
+ csrr x0, uarch0
diff --git a/benchmarks/common/syscalls.c b/benchmarks/common/syscalls.c
index 4154ba4..1a53349 100644
--- a/benchmarks/common/syscalls.c
+++ b/benchmarks/common/syscalls.c
@@ -1,50 +1,106 @@
#include <stdint.h>
#include <string.h>
#include <stdarg.h>
+#include <stdio.h>
+#include <limits.h>
#include <machine/syscall.h>
#include "encoding.h"
-void exit(int code)
+#define static_assert(cond) switch(0) { case 0: case !!(long)(cond): ; }
+
+void syscall(long which, long arg0, long arg1, long arg2)
{
- volatile uint64_t magic_mem[8] = {0};
- magic_mem[0] = SYS_exit;
- magic_mem[1] = code;
+ volatile uint64_t magic_mem[8] __attribute__((aligned(64)));
+ magic_mem[0] = which;
+ magic_mem[1] = arg0;
+ magic_mem[2] = arg1;
+ magic_mem[3] = arg2;
__sync_synchronize();
write_csr(tohost, (long)magic_mem);
- while(1);
+ while (swap_csr(fromhost, 0) == 0);
+}
+
+void exit(int code)
+{
+ write_csr(tohost, (code << 1) | 1);
+ while (1);
}
void printstr(const char* s)
{
- volatile uint64_t magic_mem[8] = {0};
- magic_mem[0] = SYS_write;
- magic_mem[1] = 1;
- magic_mem[2] = (unsigned long)s;
- magic_mem[3] = strlen(s);
- __sync_synchronize();
- write_csr(tohost, (long)magic_mem);
- while (swap_csr(fromhost, 0) == 0);
+ syscall(SYS_write, 1, (long)s, strlen(s));
+}
+
+// In setStats, we might trap reading uarch-specific counters.
+// The trap handler will skip over the instruction, but we want
+// to pretend as though we read the value 0 in this case.
+#define read_csr_safe(reg) ({ long __tmp = 0; \
+ asm volatile ("csrr %0, " #reg : "+r"(__tmp)); \
+ __tmp; })
+
+#define NUM_COUNTERS 18
+static long counters[NUM_COUNTERS];
+static char* counter_names[NUM_COUNTERS];
+void setStats(int enable)
+{
+ int i = 0;
+#define READ_CTR(name) do { \
+ if (i >= NUM_COUNTERS) exit(-1); \
+ long csr = read_csr_safe(name); \
+ if (!enable) { csr -= counters[i]; counter_names[i] = #name; } \
+ counters[i++] = csr; \
+ } while (0)
+ READ_CTR(cycle); READ_CTR(instret);
+ READ_CTR(uarch0); READ_CTR(uarch1); READ_CTR(uarch2); READ_CTR(uarch3);
+ READ_CTR(uarch4); READ_CTR(uarch5); READ_CTR(uarch6); READ_CTR(uarch7);
+ READ_CTR(uarch8); READ_CTR(uarch9); READ_CTR(uarch10); READ_CTR(uarch11);
+ READ_CTR(uarch12); READ_CTR(uarch13); READ_CTR(uarch14); READ_CTR(uarch15);
+#undef READ_CTR
+}
+
+void __attribute__((weak)) thread_entry(int cid, int nc)
+{
+ // multi-threaded programs override this function.
+ // for the case of single-threaded programs, only let core 0 proceed.
+ while (cid != 0);
+}
+
+int __attribute__((weak)) main(int argc, char** argv)
+{
+ // single-threaded programs override this function.
+ printstr("Implement main(), foo!\n");
+ return -1;
}
+void _init(int cid, int nc)
+{
+ thread_entry(cid, nc);
+
+ // only single-threaded programs should ever get here.
+ int ret = main(0, 0);
+
+ char buf[NUM_COUNTERS * 32] __attribute__((aligned(64)));
+ char* pbuf = buf;
+ for (int i = 0; i < NUM_COUNTERS; i++)
+ if (counters[i])
+ pbuf += sprintf(pbuf, "%s = %d\n", counter_names[i], counters[i]);
+ if (pbuf != buf)
+ printstr(buf);
+
+ exit(ret);
+}
+
+#undef putchar
int putchar(int ch)
{
- static char buf[64];
+ static char buf[64] __attribute__((aligned(64)));
static int buflen = 0;
- if(ch != -1)
- buf[buflen++] = ch;
+ buf[buflen++] = ch;
- if(ch == -1 || buflen == sizeof(buf))
+ if (ch == '\n' || buflen == sizeof(buf))
{
- volatile uint64_t magic_mem[8] = {0};
- magic_mem[0] = SYS_write;
- magic_mem[1] = 1;
- magic_mem[2] = (long)buf;
- magic_mem[3] = buflen;
- __sync_synchronize();
- write_csr(tohost, (long)magic_mem);
- while (swap_csr(fromhost, 0) == 0);
-
+ syscall(SYS_write, 1, (long)buf, buflen);
buflen = 0;
}
@@ -65,15 +121,25 @@ void printhex(uint64_t x)
printstr(str);
}
-static void printnum(void (*putch)(int, void**), void **putdat,
- unsigned long long num, unsigned base, int width, int padc)
+static inline void printnum(void (*putch)(int, void**), void **putdat,
+ unsigned long long num, unsigned base, int width, int padc)
{
- if (num >= base)
- printnum(putch, putdat, num / base, base, width - 1, padc);
- else while (--width > 0)
+ unsigned digs[sizeof(num)*CHAR_BIT];
+ int pos = 0;
+
+ while (1)
+ {
+ digs[pos++] = num % base;
+ if (num < base)
+ break;
+ num /= base;
+ }
+
+ while (width-- > pos)
putch(padc, putdat);
- putch("0123456789abcdef"[num % base], putdat);
+ while (pos-- > 0)
+ putch(digs[pos] + (digs[pos] >= 10 ? 'a' - 10 : '0'), putdat);
}
static unsigned long long getuint(va_list *ap, int lflag)
@@ -96,7 +162,7 @@ static long long getint(va_list *ap, int lflag)
return va_arg(*ap, int);
}
-void vprintfmt(void (*putch)(int, void**), void **putdat, const char *fmt, va_list ap)
+static void vprintfmt(void (*putch)(int, void**), void **putdat, const char *fmt, va_list ap)
{
register const char* p;
const char* last_fmt;
@@ -188,10 +254,7 @@ void vprintfmt(void (*putch)(int, void**), void **putdat, const char *fmt, va_li
for (width -= strnlen(p, precision); width > 0; width--)
putch(padc, putdat);
for (; (ch = *p) != '\0' && (precision < 0 || --precision >= 0); width--) {
- if (altflag && (ch < ' ' || ch > '~'))
- putch('?', putdat);
- else
- putch(ch, putdat);
+ putch(ch, putdat);
p++;
}
for (; width > 0; width--)
@@ -206,35 +269,33 @@ void vprintfmt(void (*putch)(int, void**), void **putdat, const char *fmt, va_li
num = -(long long) num;
}
base = 10;
- goto number;
+ goto signed_number;
// unsigned decimal
case 'u':
- num = getuint(&ap, lflag);
base = 10;
- goto number;
+ goto unsigned_number;
// (unsigned) octal
case 'o':
// should do something with padding so it's always 3 octits
- num = getuint(&ap, lflag);
base = 8;
- goto number;
+ goto unsigned_number;
// pointer
case 'p':
+ static_assert(sizeof(long) == sizeof(void*));
+ lflag = 1;
putch('0', putdat);
putch('x', putdat);
- num = (unsigned long long)
- (uintptr_t) va_arg(ap, void *);
- base = 16;
- goto number;
+ /* fall through to 'x' */
// (unsigned) hexadecimal
case 'x':
- num = getuint(&ap, lflag);
base = 16;
- number:
+ unsigned_number:
+ num = getuint(&ap, lflag);
+ signed_number:
printnum(putch, putdat, num, base, width, padc);
break;
@@ -258,8 +319,27 @@ int printf(const char* fmt, ...)
va_start(ap, fmt);
vprintfmt((void*)putchar, 0, fmt, ap);
- putchar(-1);
va_end(ap);
return 0; // incorrect return value, but who cares, anyway?
}
+
+int sprintf(char* str, const char* fmt, ...)
+{
+ va_list ap;
+ char* str0 = str;
+ va_start(ap, fmt);
+
+ void sprintf_putch(int ch, void** data)
+ {
+ char** pstr = (char**)data;
+ **pstr = ch;
+ (*pstr)++;
+ }
+
+ vprintfmt(sprintf_putch, (void**)&str, fmt, ap);
+ *str = 0;
+
+ va_end(ap);
+ return str - str0;
+}
diff --git a/benchmarks/common/test-mt.ld b/benchmarks/common/test-mt.ld
deleted file mode 100644
index 5523032..0000000
--- a/benchmarks/common/test-mt.ld
+++ /dev/null
@@ -1,45 +0,0 @@
-/*======================================================================*/
-/* Proxy kernel linker script */
-/*======================================================================*/
-/* This is the linker script used when building the proxy kernel. */
-
-/*----------------------------------------------------------------------*/
-/* Setup */
-/*----------------------------------------------------------------------*/
-
-/* The OUTPUT_ARCH command specifies the machine architecture where the
- argument is one of the names used in the BFD library. More
- specifically one of the entires in bfd/cpu-mips.c */
-
-OUTPUT_ARCH( "riscv" )
-
-/* The ENTRY command specifies the entry point (ie. first instruction
- to execute). The symbol _start should be defined in each test. */
-
-ENTRY( _start )
-
-/*----------------------------------------------------------------------*/
-/* Sections */
-/*----------------------------------------------------------------------*/
-
-SECTIONS
-{
-
- /* text: test code section */
- . = 0x00002000;
- .text :
- {
- crt-mt.o(.text)
- *(.text)
- }
-
- /* data: Initialized data segment */
- .data :
- {
- *(.data)
- }
-
- /* End of uninitalized data segement */
- _end = .;
-}
-
diff --git a/benchmarks/common/util.h b/benchmarks/common/util.h
index 10f3169..1f0c73c 100644
--- a/benchmarks/common/util.h
+++ b/benchmarks/common/util.h
@@ -1,16 +1,99 @@
-// helpful utility and synch functions
-
-// relies on defining "ncores" before including this file...
-
#ifndef __UTIL_H
#define __UTIL_H
-#include <machine/syscall.h>
+//--------------------------------------------------------------------------
+// Macros
+
+// Set HOST_DEBUG to 1 if you are going to compile this for a host
+// machine (ie Athena/Linux) for debug purposes and set HOST_DEBUG
+// to 0 if you are compiling with the smips-gcc toolchain.
+
+#ifndef HOST_DEBUG
+#define HOST_DEBUG 0
+#endif
+
+// Set PREALLOCATE to 1 if you want to preallocate the benchmark
+// function before starting stats. If you have instruction/data
+// caches and you don't want to count the overhead of misses, then
+// you will need to use preallocation.
+
+#ifndef PREALLOCATE
+#define PREALLOCATE 0
+#endif
+
+// Set SET_STATS to 1 if you want to carve out the piece that actually
+// does the computation.
+
+#ifndef SET_STATS
+#define SET_STATS 0
+#endif
+
+#if HOST_DEBUG
+#include <stdio.h>
+static void setStats(int enable) {}
+#else
+extern void setStats(int enable);
+#endif
+
+static void printArray(const char name[], int n, const int arr[])
+{
+#if HOST_DEBUG
+ int i;
+ printf( " %10s :", name );
+ for ( i = 0; i < n; i++ )
+ printf( " %3d ", arr[i] );
+ printf( "\n" );
+#endif
+}
+
+static void printDoubleArray(const char name[], int n, const double arr[])
+{
+#if HOST_DEBUG
+ int i;
+ printf( " %10s :", name );
+ for ( i = 0; i < n; i++ )
+ printf( " %g ", arr[i] );
+ printf( "\n" );
+#endif
+}
+
+static int verify(int n, const int test[], const int verify[])
+{
+ int i;
+ // Unrolled for faster verification
+ for (i = 0; i < n/2*2; i+=2)
+ {
+ int t0 = test[i], t1 = test[i+1];
+ int v0 = verify[i], v1 = verify[i+1];
+ if (t0 != v0) return i+1;
+ if (t1 != v1) return i+2;
+ }
+ if (n % 2 != 0 && test[n-1] != verify[n-1])
+ return n;
+ return 0;
+}
-#define rdcycle() ({ unsigned long _c; asm volatile ("rdcycle %0" : "=r"(_c) :: "memory"); _c; })
-#define rdinstret() ({ unsigned long _c; asm volatile ("rdinstret %0" : "=r"(_c) :: "memory"); _c; })
-
-void __attribute__((noinline)) barrier()
+static int verifyDouble(int n, const double test[], const double verify[])
+{
+ int i;
+ // Unrolled for faster verification
+ for (i = 0; i < n/2*2; i+=2)
+ {
+ double t0 = test[i], t1 = test[i+1];
+ double v0 = verify[i], v1 = verify[i+1];
+ int eq1 = t0 == v0, eq2 = t1 == v1;
+ if (!(eq1 & eq2)) return i+1+eq1;
+ }
+ if (n % 2 != 0 && test[n-1] != verify[n-1])
+ return n;
+ return 0;
+}
+
+#ifndef ncores
+#define ncores 1
+#endif
+
+static void __attribute__((noinline)) barrier()
{
static volatile int sense;
static volatile int count;
@@ -30,31 +113,8 @@ void __attribute__((noinline)) barrier()
__sync_synchronize();
}
-
-
-
-
-void finishTest(int test_result)
-{
-#if HOST_DEBUG
- if ( test_result == 1 )
- printf( "*** PASSED ***\n" );
- else
- printf( "*** FAILED *** (tohost = %d)\n", test_result);
- exit(0);
-#else
- {
- // perform exit syscall
- asm volatile(
- "move a0,%0 ;"
- "li a1,0 ;"
- "li a2,0 ;"
- "li a3,0 ;"
- "li v0,%1 ;"
- "scall" : : "r"(test_result) , "i"(SYS_exit));
- }
+#ifdef __riscv
+#include "encoding.h"
#endif
-}
#endif //__UTIL_H
-
diff --git a/benchmarks/dgemm/bmark.mk b/benchmarks/dgemm/bmark.mk
index 5a26242..11d8656 100644
--- a/benchmarks/dgemm/bmark.mk
+++ b/benchmarks/dgemm/bmark.mk
@@ -10,6 +10,7 @@
dgemm_c_src = \
dgemm_main.c \
+ syscalls.c \
dgemm_riscv_src = \
crt.S \
@@ -23,7 +24,7 @@ $(dgemm_host_bin) : $(dgemm_c_src)
dgemm_riscv_bin = dgemm.riscv
$(dgemm_riscv_bin) : $(dgemm_c_objs) $(dgemm_riscv_objs)
- $(RISCV_LINK) $(dgemm_c_objs) $(dgemm_riscv_objs) -o $(dgemm_riscv_bin)
+ $(RISCV_LINK) $(dgemm_c_objs) $(dgemm_riscv_objs) -o $(dgemm_riscv_bin) $(RISCV_LINK_OPTS)
junk += $(dgemm_c_objs) $(dgemm_riscv_objs) \
$(dgemm_host_bin) $(dgemm_riscv_bin)
diff --git a/benchmarks/dgemm/dgemm_main.c b/benchmarks/dgemm/dgemm_main.c
index 7fd7dc2..9f28c07 100644
--- a/benchmarks/dgemm/dgemm_main.c
+++ b/benchmarks/dgemm/dgemm_main.c
@@ -2,76 +2,14 @@
// Double-precision general matrix multiplication benchmark
//--------------------------------------------------------------------------
-int ncores = 1;
#include "util.h"
//--------------------------------------------------------------------------
-// Macros
-
-// Set HOST_DEBUG to 1 if you are going to compile this for a host
-// machine (ie Athena/Linux) for debug purposes and set HOST_DEBUG
-// to 0 if you are compiling with the smips-gcc toolchain.
-
-#ifndef HOST_DEBUG
-#define HOST_DEBUG 0
-#endif
-
-// Set PREALLOCATE to 1 if you want to preallocate the benchmark
-// function before starting stats. If you have instruction/data
-// caches and you don't want to count the overhead of misses, then
-// you will need to use preallocation.
-
-#ifndef PREALLOCATE
-#define PREALLOCATE 0
-#endif
-
-// Set SET_STATS to 1 if you want to carve out the piece that actually
-// does the computation.
-
-#ifndef SET_STATS
-#define SET_STATS 0
-#endif
-
-//--------------------------------------------------------------------------
// Input/Reference Data
#include "dataset1.h"
//--------------------------------------------------------------------------
-// Helper functions
-
-int verify( long n, const double test[], const double correct[] )
-{
- int i;
- for ( i = 0; i < n; i++ ) {
- if ( test[i] != correct[i] ) {
- return 2;
- }
- }
- return 1;
-}
-
-#if HOST_DEBUG
-#include <stdio.h>
-#include <stdlib.h>
-void printArray( char name[], long n, const double arr[] )
-{
- int i;
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %8.1f ", arr[i] );
- printf( "\n" );
-}
-#endif
-
-void setStats( int enable )
-{
-#if ( !HOST_DEBUG && SET_STATS )
- asm( "mtpcr %0, cr10" : : "r" (enable) );
-#endif
-}
-
-//--------------------------------------------------------------------------
// square_dgemm function
void square_dgemm( long n0, const double a0[], const double b0[], double c0[] )
@@ -162,33 +100,23 @@ int main( int argc, char* argv[] )
double results_data[DATA_SIZE*DATA_SIZE];
// Output the input array
-
-#if HOST_DEBUG
- printArray( "input1", DATA_SIZE*DATA_SIZE, input1_data );
- printArray( "input2", DATA_SIZE*DATA_SIZE, input2_data );
- printArray( "verify", DATA_SIZE*DATA_SIZE, verify_data );
-#endif
-
- // If needed we preallocate everything in the caches
+ printDoubleArray( "input1", DATA_SIZE*DATA_SIZE, input1_data );
+ printDoubleArray( "input2", DATA_SIZE*DATA_SIZE, input2_data );
+ printDoubleArray( "verify", DATA_SIZE*DATA_SIZE, verify_data );
#if PREALLOCATE
+ // If needed we preallocate everything in the caches
square_dgemm( DATA_SIZE, input1_data, input2_data, results_data );
#endif
// Do the dgemm
-
setStats(1);
square_dgemm( DATA_SIZE, input1_data, input2_data, results_data );
setStats(0);
// Print out the results
-
-#if HOST_DEBUG
- printArray( "results", DATA_SIZE*DATA_SIZE, results_data );
-#endif
+ printDoubleArray( "results", DATA_SIZE*DATA_SIZE, results_data );
// Check the results
-
- finishTest(verify( DATA_SIZE*DATA_SIZE, results_data, verify_data ));
-
+ return verifyDouble( DATA_SIZE*DATA_SIZE, results_data, verify_data );
}
diff --git a/benchmarks/dhrystone/bmark.mk b/benchmarks/dhrystone/bmark.mk
index 6e45706..06b4ab8 100644
--- a/benchmarks/dhrystone/bmark.mk
+++ b/benchmarks/dhrystone/bmark.mk
@@ -11,6 +11,7 @@
dhrystone_c_src = \
dhrystone_main.c \
dhrystone.c \
+ syscalls.c \
dhrystone_riscv_src = \
crt.S \
diff --git a/benchmarks/dhrystone/dhrystone.h b/benchmarks/dhrystone/dhrystone.h
index 8abb874..f274c57 100644
--- a/benchmarks/dhrystone/dhrystone.h
+++ b/benchmarks/dhrystone/dhrystone.h
@@ -381,13 +381,9 @@ extern clock_t clock();
#define HZ 976563
#define Too_Small_Time 50
-#define rdcycle() ({ \
- long __x; \
- asm volatile("rdcycle %0; srl %0, %0, 10" : "=r"(__x)); \
- __x; })
#define CLOCK_TYPE "rdcycle()"
-#define Start_Timer() Begin_Time = rdcycle()
-#define Stop_Timer() End_Time = rdcycle()
+#define Start_Timer() Begin_Time = rdcycle()/1024
+#define Stop_Timer() End_Time = rdcycle()/1024
#else
/* Use times(2) time function unless */
diff --git a/benchmarks/dhrystone/dhrystone_main.c b/benchmarks/dhrystone/dhrystone_main.c
index 93b79da..a755743 100644
--- a/benchmarks/dhrystone/dhrystone_main.c
+++ b/benchmarks/dhrystone/dhrystone_main.c
@@ -8,9 +8,6 @@
#include "dhrystone.h"
-int ncores = 1;
-#include "util.h"
-
//--------------------------------------------------------------------------
// Macros
@@ -47,12 +44,7 @@ int __attribute__((noinline)) do_fprintf(FILE* f, const char* str, ...)
}
#endif
-void setStats( int enable )
-{
-#if ( !HOST_DEBUG && SET_STATS )
- asm( "mtpcr %0, cr10" : : "r" (enable) );
-#endif
-}
+#include "util.h"
#include <alloca.h>
@@ -299,7 +291,7 @@ int main (int argc, char** argv)
do_fprintf (stdout, "\n");
#endif
- finishTest(1);
+ return 0;
}
diff --git a/benchmarks/median/bmark.mk b/benchmarks/median/bmark.mk
index 31c853a..b489a67 100644
--- a/benchmarks/median/bmark.mk
+++ b/benchmarks/median/bmark.mk
@@ -11,6 +11,7 @@
median_c_src = \
median_main.c \
median.c \
+ syscalls.c \
median_riscv_src = \
crt.S \
@@ -24,7 +25,7 @@ $(median_host_bin): $(median_c_src)
median_riscv_bin = median.riscv
$(median_riscv_bin): $(median_c_objs) $(median_riscv_objs)
- $(RISCV_LINK) $(median_c_objs) $(median_riscv_objs) -o $(median_riscv_bin)
+ $(RISCV_LINK) $(median_c_objs) $(median_riscv_objs) -o $(median_riscv_bin) $(RISCV_LINK_OPTS)
junk += $(median_c_objs) $(median_riscv_objs) \
$(median_host_bin) $(median_riscv_bin)
diff --git a/benchmarks/median/median_main.c b/benchmarks/median/median_main.c
index 0691bec..7d50f76 100644
--- a/benchmarks/median/median_main.c
+++ b/benchmarks/median/median_main.c
@@ -8,37 +8,9 @@
// dataset1.h You should not change anything except the
// HOST_DEBUG and PREALLOCATE macros for your timing run.
-#include "median.h"
-
-int ncores = 1;
#include "util.h"
-//--------------------------------------------------------------------------
-// Macros
-
-// Set HOST_DEBUG to 1 if you are going to compile this for a host
-// machine (ie Athena/Linux) for debug purposes and set HOST_DEBUG
-// to 0 if you are compiling with the smips-gcc toolchain.
-
-#ifndef HOST_DEBUG
-#define HOST_DEBUG 0
-#endif
-
-// Set PREALLOCATE to 1 if you want to preallocate the benchmark
-// function before starting stats. If you have instruction/data
-// caches and you don't want to count the overhead of misses, then
-// you will need to use preallocation.
-
-#ifndef PREALLOCATE
-#define PREALLOCATE 0
-#endif
-
-// Set SET_STATS to 1 if you want to carve out the piece that actually
-// does the computation.
-
-#ifndef SET_STATS
-#define SET_STATS 0
-#endif
+#include "median.h"
//--------------------------------------------------------------------------
// Input/Reference Data
@@ -46,38 +18,6 @@ int ncores = 1;
#include "dataset1.h"
//--------------------------------------------------------------------------
-// Helper functions
-
-int verify( int n, int test[], int correct[] )
-{
- int i;
- for ( i = 0; i < n; i++ ) {
- if ( test[i] != correct[i] ) {
- return 2;
- }
- }
- return 1;
-}
-
-#if HOST_DEBUG
-void printArray( char name[], int n, int arr[] )
-{
- int i;
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3d ", arr[i] );
- printf( "\n" );
-}
-#endif
-
-void setStats( int enable )
-{
-#if ( !HOST_DEBUG && SET_STATS )
- asm( "mtpcr %0, cr10" : : "r" (enable) );
-#endif
-}
-
-//--------------------------------------------------------------------------
// Main
int main( int argc, char* argv[] )
@@ -85,36 +25,22 @@ int main( int argc, char* argv[] )
int results_data[DATA_SIZE];
// Output the input array
-
-#if HOST_DEBUG
printArray( "input", DATA_SIZE, input_data );
printArray( "verify", DATA_SIZE, verify_data );
-#endif
+#if PREALLOCATE
// If needed we preallocate everything in the caches
-
-#if ( !HOST_DEBUG && PREALLOCATE )
median( DATA_SIZE, input_data, results_data );
#endif
// Do the filter
-
-#if HOST_DEBUG
- median( DATA_SIZE, input_data, results_data );
-#else
setStats(1);
median( DATA_SIZE, input_data, results_data );
setStats(0);
-#endif
// Print out the results
-
-#if HOST_DEBUG
printArray( "results", DATA_SIZE, results_data );
-#endif
// Check the results
-
- finishTest(verify( DATA_SIZE, results_data, verify_data ));
-
+ return verify( DATA_SIZE, results_data, verify_data );
}
diff --git a/benchmarks/mt-matmul/bmark.mk b/benchmarks/mt-matmul/bmark.mk
index 7749951..4b7fcb7 100644
--- a/benchmarks/mt-matmul/bmark.mk
+++ b/benchmarks/mt-matmul/bmark.mk
@@ -10,9 +10,10 @@
mt_matmul_c_src = \
mt-matmul.c \
+ syscalls.c \
mt_matmul_riscv_src = \
- crt-mt.S \
+ crt.S \
mt_matmul_c_objs = $(patsubst %.c, %.o, $(mt_matmul_c_src))
mt_matmul_riscv_objs = $(patsubst %.S, %.o, $(mt_matmul_riscv_src))
@@ -23,7 +24,7 @@ $(mt_matmul_host_bin) : $(mt_matmul_c_src)
mt_matmul_riscv_bin = mt-matmul.riscv
$(mt_matmul_riscv_bin) : $(mt_matmul_c_objs) $(mt_matmul_riscv_objs)
- $(RISCV_LINK_MT) $(mt_matmul_c_objs) $(mt_matmul_riscv_objs) $(RISCV_LINK_OPTS) -o $(mt_matmul_riscv_bin)
+ $(RISCV_LINK) $(mt_matmul_c_objs) $(mt_matmul_riscv_objs) $(RISCV_LINK_OPTS) -o $(mt_matmul_riscv_bin)
junk += $(mt_matmul_c_objs) $(mt_matmul_riscv_objs) \
$(mt_matmul_host_bin) $(mt_matmul_riscv_bin)
diff --git a/benchmarks/mt-matmul/mt-matmul.c b/benchmarks/mt-matmul/mt-matmul.c
index 93f8ea9..e795b50 100644
--- a/benchmarks/mt-matmul/mt-matmul.c
+++ b/benchmarks/mt-matmul/mt-matmul.c
@@ -25,7 +25,7 @@
//--------------------------------------------------------------------------
// Input/Reference Data
-typedef float data_t;
+typedef double data_t;
#include "dataset.h"
@@ -34,6 +34,7 @@ typedef float data_t;
__thread unsigned long coreid;
unsigned long ncores;
+#define ncores ncores
#include "util.h"
@@ -48,41 +49,6 @@ unsigned long ncores;
stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
} while(0)
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArray( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
//--------------------------------------------------------------------------
// matmul function
@@ -139,8 +105,11 @@ void thread_entry(int cid, int nc)
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
-
+ int res = verifyDouble(ARRAY_SIZE, results_data, verify_data);
+ if (res)
+ exit(res);
+
+#if 0
// clear results from the first trial
size_t i;
if (coreid == 0)
@@ -159,9 +128,11 @@ void thread_entry(int cid, int nc)
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ res = verify(ARRAY_SIZE, results_data, verify_data);
+ if (res)
+ exit(res);
barrier();
+#endif
exit(0);
}
-
diff --git a/benchmarks/mt-vvadd/bmark.mk b/benchmarks/mt-vvadd/bmark.mk
index 1f8b3ed..72b2d34 100644
--- a/benchmarks/mt-vvadd/bmark.mk
+++ b/benchmarks/mt-vvadd/bmark.mk
@@ -13,7 +13,7 @@ mt_vvadd_c_src = \
syscalls.c \
mt_vvadd_riscv_src = \
- crt-mt.S \
+ crt.S \
mt_vvadd_c_objs = $(patsubst %.c, %.o, $(mt_vvadd_c_src))
mt_vvadd_riscv_objs = $(patsubst %.S, %.o, $(mt_vvadd_riscv_src))
@@ -24,7 +24,7 @@ $(mt_vvadd_host_bin) : $(mt_vvadd_c_src)
mt_vvadd_riscv_bin = mt-vvadd.riscv
$(mt_vvadd_riscv_bin) : $(mt_vvadd_c_objs) $(mt_vvadd_riscv_objs)
- $(RISCV_LINK_MT) $(mt_vvadd_c_objs) $(mt_vvadd_riscv_objs) $(RISCV_LINK_OPTS) -o $(mt_vvadd_riscv_bin)
+ $(RISCV_LINK) $(mt_vvadd_c_objs) $(mt_vvadd_riscv_objs) $(RISCV_LINK_OPTS) -o $(mt_vvadd_riscv_bin)
junk += $(mt_vvadd_c_objs) $(mt_vvadd_riscv_objs) \
$(mt_vvadd_host_bin) $(mt_vvadd_riscv_bin)
diff --git a/benchmarks/mt-vvadd/mt-vvadd.c b/benchmarks/mt-vvadd/mt-vvadd.c
index 497b9bb..60aa2e7 100644
--- a/benchmarks/mt-vvadd/mt-vvadd.c
+++ b/benchmarks/mt-vvadd/mt-vvadd.c
@@ -24,7 +24,7 @@
//--------------------------------------------------------------------------
// Input/Reference Data
-typedef float data_t;
+typedef double data_t;
#include "dataset.h"
@@ -33,6 +33,7 @@ typedef float data_t;
__thread unsigned long coreid;
unsigned long ncores;
+#define ncores ncores
#include "util.h"
@@ -47,41 +48,6 @@ unsigned long ncores;
stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \
} while(0)
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArray( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %4ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n",
- i, (long) test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
//--------------------------------------------------------------------------
// vvadd function
@@ -136,8 +102,11 @@ void thread_entry(int cid, int nc)
// verify
- verify(DATA_SIZE, results_data, verify_data);
-
+ int res = verifyDouble(DATA_SIZE, results_data, verify_data);
+ if (res)
+ exit(res);
+
+#if 0
// reset results from the first trial
if (coreid == 0)
{
@@ -145,21 +114,22 @@ void thread_entry(int cid, int nc)
results_data[i] = input1_data[i];
}
barrier();
-
-
+
// Execute your faster vvadd
barrier();
stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printDoubleArray("results: ", DATA_SIZE, results_data);
+ printDoubleArray("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ res = verifyDouble(DATA_SIZE, results_data, verify_data);
+ if (res)
+ exit(res);
barrier();
+#endif
exit(0);
}
-
diff --git a/benchmarks/multiply/bmark.mk b/benchmarks/multiply/bmark.mk
index d6114a1..93ba67f 100644
--- a/benchmarks/multiply/bmark.mk
+++ b/benchmarks/multiply/bmark.mk
@@ -11,6 +11,7 @@
multiply_c_src = \
multiply_main.c \
multiply.c \
+ syscalls.c \
multiply_riscv_src = \
crt.S \
@@ -24,7 +25,7 @@ $(multiply_host_bin): $(multiply_c_src)
multiply_riscv_bin = multiply.riscv
$(multiply_riscv_bin): $(multiply_c_objs) $(multiply_riscv_objs)
- $(RISCV_LINK) $(multiply_c_objs) $(multiply_riscv_objs) -o $(multiply_riscv_bin)
+ $(RISCV_LINK) $(multiply_c_objs) $(multiply_riscv_objs) -o $(multiply_riscv_bin) $(RISCV_LINK_OPTS)
junk += $(multiply_c_objs) $(multiply_riscv_objs) \
$(multiply_host_bin) $(multiply_riscv_bin)
diff --git a/benchmarks/multiply/multiply_main.c b/benchmarks/multiply/multiply_main.c
index ca359eb..037de2f 100644
--- a/benchmarks/multiply/multiply_main.c
+++ b/benchmarks/multiply/multiply_main.c
@@ -8,46 +8,9 @@
// dataset1.h You should not change anything except the
// HOST_DEBUG and VERIFY macros for your timing run.
-#include "multiply.h"
-
-int ncores = 1;
#include "util.h"
-//--------------------------------------------------------------------------
-// Macros
-
-// Set HOST_DEBUG to 1 if you are going to compile this for a host
-// machine (ie Athena/Linux) for debug purposes and set HOST_DEBUG
-// to 0 if you are compiling with the smips-gcc toolchain.
-
-#ifndef HOST_DEBUG
-#define HOST_DEBUG 0
-#endif
-
-// Set PREALLOCATE to 1 if you want to preallocate the benchmark
-// function before starting stats. If you have instruction/data
-// caches and you don't want to count the overhead of misses, then
-// you will need to use preallocation.
-
-#ifndef PREALLOCATE
-#define PREALLOCATE 0
-#endif
-
-// Set VERIFY to 1 if you want the program to check that the sort
-// function returns the right answer. When you are doing your
-// benchmarking you should set this to 0 so that the verification
-// is not included in your timing.
-
-#ifndef VERIFY
-#define VERIFY 1
-#endif
-
-// Set SET_STATS to 1 if you want to carve out the piece that actually
-// does the computation.
-
-#ifndef SET_STATS
-#define SET_STATS 0
-#endif
+#include "multiply.h"
//--------------------------------------------------------------------------
// Input/Reference Data
@@ -55,38 +18,6 @@ int ncores = 1;
#include "dataset1.h"
//--------------------------------------------------------------------------
-// Helper functions
-
-int verify( int n, int test[], int correct[] )
-{
- int i;
- for ( i = 0; i < n; i++ ) {
- if ( test[i] != correct[i] ) {
- return 2;
- }
- }
- return 1;
-}
-
-#if HOST_DEBUG
-void printArray( char name[], int n, int arr[] )
-{
- int i;
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3d ", arr[i] );
- printf( "\n" );
-}
-#endif
-
-void setStats( int enable )
-{
-#if ( !HOST_DEBUG && SET_STATS )
- asm( "mtpcr %0, cr10" : : "r" (enable) );
-#endif
-}
-
-//--------------------------------------------------------------------------
// Main
int main( int argc, char* argv[] )
@@ -95,42 +26,27 @@ int main( int argc, char* argv[] )
int results_data[DATA_SIZE];
// Output the input arrays
-
-#if HOST_DEBUG
printArray( "input1", DATA_SIZE, input_data1 );
printArray( "input2", DATA_SIZE, input_data2 );
printArray( "verify", DATA_SIZE, verify_data );
-#endif
-#if ( !HOST_DEBUG && PREALLOCATE )
+#if PREALLOCATE
for (i = 0; i < DATA_SIZE; i++)
{
results_data[i] = multiply( input_data1[i], input_data2[i] );
}
#endif
-#if HOST_DEBUG
- for (i = 0; i < DATA_SIZE; i++)
- {
- results_data[i] = multiply( input_data1[i], input_data2[i] );
- }
-#else
setStats(1);
for (i = 0; i < DATA_SIZE; i++)
{
results_data[i] = multiply( input_data1[i], input_data2[i] );
}
setStats(0);
-#endif
// Print out the results
-
-#if HOST_DEBUG
printArray( "results", DATA_SIZE, results_data );
-#endif
// Check the results
-
- finishTest(verify( DATA_SIZE, results_data, verify_data ));
-
+ return verify( DATA_SIZE, results_data, verify_data );
}
diff --git a/benchmarks/qsort/bmark.mk b/benchmarks/qsort/bmark.mk
index cdc0545..4b39d96 100644
--- a/benchmarks/qsort/bmark.mk
+++ b/benchmarks/qsort/bmark.mk
@@ -10,6 +10,7 @@
qsort_c_src = \
qsort_main.c \
+ syscalls.c \
qsort_riscv_src = \
crt.S \
@@ -23,7 +24,7 @@ $(qsort_host_bin) : $(qsort_c_src)
qsort_riscv_bin = qsort.riscv
$(qsort_riscv_bin) : $(qsort_c_objs) $(qsort_riscv_objs)
- $(RISCV_LINK) $(qsort_c_objs) $(qsort_riscv_objs) -o $(qsort_riscv_bin)
+ $(RISCV_LINK) $(qsort_c_objs) $(qsort_riscv_objs) -o $(qsort_riscv_bin) $(RISCV_LINK_OPTS)
junk += $(qsort_c_objs) $(qsort_riscv_objs) \
$(qsort_host_bin) $(qsort_riscv_bin)
diff --git a/benchmarks/qsort/qsort_main.c b/benchmarks/qsort/qsort_main.c
index e61eef2..9633356 100644
--- a/benchmarks/qsort/qsort_main.c
+++ b/benchmarks/qsort/qsort_main.c
@@ -11,36 +11,8 @@
// processor simulator itself. You should not change anything except
// the HOST_DEBUG and PREALLOCATE macros for your timing run.
-int ncores = 1;
#include "util.h"
-//--------------------------------------------------------------------------
-// Macros
-
-// Set HOST_DEBUG to 1 if you are going to compile this for a host
-// machine (ie Athena/Linux) for debug purposes and set HOST_DEBUG
-// to 0 if you are compiling with the smips-gcc toolchain.
-
-#ifndef HOST_DEBUG
-#define HOST_DEBUG 0
-#endif
-
-// Set PREALLOCATE to 1 if you want to preallocate the benchmark
-// function before starting stats. If you have instruction/data
-// caches and you don't want to count the overhead of misses, then
-// you will need to use preallocation.
-
-#ifndef PREALLOCATE
-#define PREALLOCATE 0
-#endif
-
-// Set SET_STATS to 1 if you want to carve out the piece that actually
-// does the computation.
-
-#ifndef SET_STATS
-#define SET_STATS 0
-#endif
-
// The INSERTION_THRESHOLD is the size of the subarray when the
// algorithm switches to using an insertion sort instead of
// quick sort.
@@ -62,38 +34,6 @@ int ncores = 1;
#include "dataset1.h"
//--------------------------------------------------------------------------
-// Helper functions
-
-int verify( int n, int test[], int correct[] )
-{
- int i;
- for ( i = 0; i < n; i++ ) {
- if ( test[i] != correct[i] ) {
- return 2;
- }
- }
- return 1;
-}
-
-#if HOST_DEBUG
-void printArray( char name[], int n, int arr[] )
-{
- int i;
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3d ", arr[i] );
- printf( "\n" );
-}
-#endif
-
-void setStats( int enable )
-{
-#if ( !HOST_DEBUG && SET_STATS )
- asm( "mtpcr %0, cr10" : : "r" (enable) );
-#endif
-}
-
-//--------------------------------------------------------------------------
// Quicksort function
void sort( int n, int arr[] )
@@ -195,34 +135,23 @@ void sort( int n, int arr[] )
int main( int argc, char* argv[] )
{
-
// Output the input array
-
-#if HOST_DEBUG
printArray( "input", DATA_SIZE, input_data );
printArray( "verify", DATA_SIZE, verify_data );
-#endif
-
- // If needed we preallocate everything in the caches
#if PREALLOCATE
+ // If needed we preallocate everything in the caches
sort( DATA_SIZE, input_data );
#endif
// Do the sort
-
setStats(1);
sort( DATA_SIZE, input_data );
setStats(0);
// Print out the results
-
-#if HOST_DEBUG
printArray( "test", DATA_SIZE, input_data );
-#endif
// Check the results
-
- finishTest(verify( DATA_SIZE, input_data, verify_data ));
-
+ return verify( DATA_SIZE, input_data, verify_data );
}
diff --git a/benchmarks/spmv/bmark.mk b/benchmarks/spmv/bmark.mk
index 5ab9cc0..dcfdb19 100644
--- a/benchmarks/spmv/bmark.mk
+++ b/benchmarks/spmv/bmark.mk
@@ -10,6 +10,7 @@
spmv_c_src = \
spmv_main.c \
+ syscalls.c \
spmv_riscv_src = \
crt.S \
@@ -23,7 +24,7 @@ $(spmv_host_bin) : $(spmv_c_src)
spmv_riscv_bin = spmv.riscv
$(spmv_riscv_bin) : $(spmv_c_objs) $(spmv_riscv_objs)
- $(RISCV_LINK) $(spmv_c_objs) $(spmv_riscv_objs) -o $(spmv_riscv_bin)
+ $(RISCV_LINK) $(spmv_c_objs) $(spmv_riscv_objs) -o $(spmv_riscv_bin) $(RISCV_LINK_OPTS)
junk += $(spmv_c_objs) $(spmv_riscv_objs) \
$(spmv_host_bin) $(spmv_riscv_bin)
diff --git a/benchmarks/spmv/spmv_main.c b/benchmarks/spmv/spmv_main.c
index d765ca2..44cdc99 100644
--- a/benchmarks/spmv/spmv_main.c
+++ b/benchmarks/spmv/spmv_main.c
@@ -2,75 +2,13 @@
// Double-precision general matrix multiplication benchmark
//--------------------------------------------------------------------------
-int ncores = 1;
#include "util.h"
//--------------------------------------------------------------------------
-// Macros
-
-// Set HOST_DEBUG to 1 if you are going to compile this for a host
-// machine (ie Athena/Linux) for debug purposes and set HOST_DEBUG
-// to 0 if you are compiling with the smips-gcc toolchain.
-
-#ifndef HOST_DEBUG
-#define HOST_DEBUG 0
-#endif
-
-// Set PREALLOCATE to 1 if you want to preallocate the benchmark
-// function before starting stats. If you have instruction/data
-// caches and you don't want to count the overhead of misses, then
-// you will need to use preallocation.
-
-#ifndef PREALLOCATE
-#define PREALLOCATE 0
-#endif
-
-// Set SET_STATS to 1 if you want to carve out the piece that actually
-// does the computation.
-
-#ifndef SET_STATS
-#define SET_STATS 0
-#endif
-
-//--------------------------------------------------------------------------
// Input/Reference Data
#include "dataset1.h"
-//--------------------------------------------------------------------------
-// Helper functions
-
-int verify( long n, const double test[], const double correct[] )
-{
- int i;
- for ( i = 0; i < n; i++ ) {
- if ( test[i] != correct[i] ) {
- return 2;
- }
- }
- return 1;
-}
-
-#if HOST_DEBUG
-#include <stdio.h>
-#include <stdlib.h>
-void printArray( char name[], long n, const double arr[] )
-{
- int i;
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %8.1f ", arr[i] );
- printf( "\n" );
-}
-#endif
-
-void setStats( int enable )
-{
-#if ( !HOST_DEBUG && SET_STATS )
- asm( "mtpcr %0, cr10" : : "r" (enable) );
-#endif
-}
-
void spmv(int r, const double* val, const int* idx, const double* x,
const int* ptr, double* y)
{
@@ -108,5 +46,5 @@ int main( int argc, char* argv[] )
spmv(R, val, idx, x, ptr, y);
setStats(0);
- finishTest(verify(R, y, verify_data));
+ return verifyDouble(R, y, verify_data);
}
diff --git a/benchmarks/towers/bmark.mk b/benchmarks/towers/bmark.mk
index 0c16a81..b16bf04 100644
--- a/benchmarks/towers/bmark.mk
+++ b/benchmarks/towers/bmark.mk
@@ -10,6 +10,7 @@
towers_c_src = \
towers_main.c \
+ syscalls.c \
towers_riscv_src = \
crt.S \
@@ -23,7 +24,7 @@ $(towers_host_bin) : $(towers_c_src)
towers_riscv_bin = towers.riscv
$(towers_riscv_bin) : $(towers_c_objs) $(towers_riscv_objs)
- $(RISCV_LINK) $(towers_c_objs) $(towers_riscv_objs) -o $(towers_riscv_bin)
+ $(RISCV_LINK) $(towers_c_objs) $(towers_riscv_objs) -o $(towers_riscv_bin) $(RISCV_LINK_OPTS)
junk += $(towers_c_objs) $(towers_riscv_objs) \
$(towers_host_bin) $(towers_riscv_bin)
diff --git a/benchmarks/towers/towers_main.c b/benchmarks/towers/towers_main.c
index 724b73b..9f60017 100644
--- a/benchmarks/towers/towers_main.c
+++ b/benchmarks/towers/towers_main.c
@@ -16,51 +16,13 @@
// smips processor simulator itself. You should not change anything except
// the HOST_DEBUG and PREALLOCATE macros for your timing run.
-int ncores = 1;
#include "util.h"
-//--------------------------------------------------------------------------
-// Macros
-
-// Set HOST_DEBUG to 1 if you are going to compile this for a host
-// machine (ie Athena/Linux) for debug purposes and set HOST_DEBUG
-// to 0 if you are compiling with the smips-gcc toolchain.
-
-#ifndef HOST_DEBUG
-#define HOST_DEBUG 0
-#endif
-
-// Set PREALLOCATE to 1 if you want to preallocate the benchmark
-// function before starting stats. If you have instruction/data
-// caches and you don't want to count the overhead of misses, then
-// you will need to use preallocation.
-
-#ifndef PREALLOCATE
-#define PREALLOCATE 0
-#endif
-
-// Set SET_STATS to 1 if you want to carve out the piece that actually
-// does the computation.
-
-#ifndef SET_STATS
-#define SET_STATS 0
-#endif
-
// This is the number of discs in the puzzle.
#define NUM_DISCS 7
//--------------------------------------------------------------------------
-// Helper functions
-
-void setStats( int enable )
-{
-#if ( !HOST_DEBUG && SET_STATS )
- asm( "mtpcr %0, cr10" : : "r" (enable) );
-#endif
-}
-
-//--------------------------------------------------------------------------
// List data structure and functions
struct Node
@@ -278,7 +240,7 @@ int towers_verify( struct Towers* this )
return 6;
}
- return 1;
+ return 0;
}
//--------------------------------------------------------------------------
@@ -323,8 +285,6 @@ int main( int argc, char* argv[] )
#endif
// Check the results
-
- finishTest( towers_verify( &towers ) );
-
+ return towers_verify( &towers );
}
diff --git a/benchmarks/vec-cmplxmult/vec_cmplxmult_main.c b/benchmarks/vec-cmplxmult/vec_cmplxmult_main.c
index 3251ef9..6715c45 100644
--- a/benchmarks/vec-cmplxmult/vec_cmplxmult_main.c
+++ b/benchmarks/vec-cmplxmult/vec_cmplxmult_main.c
@@ -53,9 +53,10 @@
#include <stdlib.h>
#else
void printstr(const char*);
-void exit();
#endif
+#include "util.h"
+
//--------------------------------------------------------------------------
// Complex Value Structs
diff --git a/benchmarks/vec-matmul/vec_matmul_main.c b/benchmarks/vec-matmul/vec_matmul_main.c
index 5de377e..6613902 100644
--- a/benchmarks/vec-matmul/vec_matmul_main.c
+++ b/benchmarks/vec-matmul/vec_matmul_main.c
@@ -5,13 +5,9 @@
// This benchmark multiplies two 2-D arrays together and writes the results to
// a third vector. The input data (and reference data) should be generated
// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h. The riscv-gcc toolchain does not support system calls so printf's
-// can only be used on a host system, not on the riscv-v processor simulator
-// itself.
-//
-// HOWEVER: printstr() and printhex() are provided, for a primitive form of
-// printing strings and hexadecimal values to stdout.
+// dataset.h.
+#include "util.h"
// Choose which implementation you wish to test... but leave only one on!
// (only the first one will be executed).
@@ -20,45 +16,6 @@
#define VT_ASM
//--------------------------------------------------------------------------
-// Macros
-
-// Set HOST_DEBUG to 1 if you are going to compile this for a host
-// machine (ie Athena/Linux) for debug purposes and set HOST_DEBUG
-// to 0 if you are compiling with the smips-gcc toolchain.
-
-#ifndef HOST_DEBUG
-#define HOST_DEBUG 0
-#endif
-
-// Set PREALLOCATE to 1 if you want to preallocate the benchmark
-// function before starting stats. If you have instruction/data
-// caches and you don't want to count the overhead of misses, then
-// you will need to use preallocation.
-
-#ifndef PREALLOCATE
-#define PREALLOCATE 0
-#endif
-
-// Set SET_STATS to 1 if you want to carve out the piece that actually
-// does the computation.
-
-#ifndef SET_STATS
-#define SET_STATS 0
-#endif
-
-//--------------------------------------------------------------------------
-// Host Platform Includes
-
-#if HOST_DEBUG
- #include <stdio.h>
- #include <stdlib.h>
-#else
-void printstr(const char*);
-void exit();
-#endif
-
-
-//--------------------------------------------------------------------------
// Input/Reference Data
//#include "dataset_test.h"
@@ -86,18 +43,6 @@ int verify( int n, float test[], float correct[] )
return 1;
}
-#if HOST_DEBUG
-void printArray( char name[], int n, float arr[] )
-{
- int i;
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %03.2f ", arr[i] );
- printf( "\n" );
-}
-#endif
-
-
void finishTest( int correct, long long num_cycles, long long num_retired )
{
int toHostValue = correct;
diff --git a/benchmarks/vec-vvadd/vec_vvadd_main.c b/benchmarks/vec-vvadd/vec_vvadd_main.c
index c08e41a..850bec6 100644
--- a/benchmarks/vec-vvadd/vec_vvadd_main.c
+++ b/benchmarks/vec-vvadd/vec_vvadd_main.c
@@ -4,13 +4,7 @@
//
// This benchmark uses adds to vectors and writes the results to a third
// vector. The input data (and reference data) should be generated using the
-// vvadd_gendata.pl perl script and dumped to a file named dataset.h. The
-// riscv-gcc toolchain does not support system calls so printf's can only be
-// used on a host system, not on the riscv-v processor simulator itself.
-//
-// HOWEVER: printstr() and printhex() are provided, for a primitive form of
-// printing strings and hexadecimal values to stdout.
-
+// vvadd_gendata.pl perl script and dumped to a file named dataset.h.
// Choose which implementation you wish to test... but leave only one on!
// (only the first one will be executed).
@@ -19,45 +13,6 @@
#define VT_ASM
//--------------------------------------------------------------------------
-// Macros
-
-// Set HOST_DEBUG to 1 if you are going to compile this for a host
-// machine (ie Athena/Linux) for debug purposes and set HOST_DEBUG
-// to 0 if you are compiling with the smips-gcc toolchain.
-
-#ifndef HOST_DEBUG
-#define HOST_DEBUG 0
-#endif
-
-// Set PREALLOCATE to 1 if you want to preallocate the benchmark
-// function before starting stats. If you have instruction/data
-// caches and you don't want to count the overhead of misses, then
-// you will need to use preallocation.
-
-#ifndef PREALLOCATE
-#define PREALLOCATE 0
-#endif
-
-// Set SET_STATS to 1 if you want to carve out the piece that actually
-// does the computation.
-
-#ifndef SET_STATS
-#define SET_STATS 0
-#endif
-
-//--------------------------------------------------------------------------
-// Platform Specific Includes
-
-#if HOST_DEBUG
- #include <stdio.h>
- #include <stdlib.h>
-#else
-void printstr(const char*);
-void exit();
-#endif
-
-
-//--------------------------------------------------------------------------
// Input/Reference Data
//#include "dataset_test.h"
@@ -86,18 +41,6 @@ int verify( int n, float test[], float correct[] )
}
return 1;
}
-
-#if HOST_DEBUG
-void printArray( char name[], int n, float arr[] )
-{
- int i;
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %03.2f ", arr[i] );
- printf( "\n" );
-}
-#endif
-
void finishTest( int correct, long long num_cycles, long long num_retired )
{
diff --git a/benchmarks/vvadd/bmark.mk b/benchmarks/vvadd/bmark.mk
index d03cb96..5ab99de 100644
--- a/benchmarks/vvadd/bmark.mk
+++ b/benchmarks/vvadd/bmark.mk
@@ -10,6 +10,7 @@
vvadd_c_src = \
vvadd_main.c \
+ syscalls.c \
vvadd_riscv_src = \
crt.S \
@@ -23,7 +24,7 @@ $(vvadd_host_bin) : $(vvadd_c_src)
vvadd_riscv_bin = vvadd.riscv
$(vvadd_riscv_bin) : $(vvadd_c_objs) $(vvadd_riscv_objs)
- $(RISCV_LINK) $(vvadd_c_objs) $(vvadd_riscv_objs) -o $(vvadd_riscv_bin)
+ $(RISCV_LINK) $(vvadd_c_objs) $(vvadd_riscv_objs) -o $(vvadd_riscv_bin) $(RISCV_LINK_OPTS)
junk += $(vvadd_c_objs) $(vvadd_riscv_objs) \
$(vvadd_host_bin) $(vvadd_riscv_bin)
diff --git a/benchmarks/vvadd/vvadd_main.c b/benchmarks/vvadd/vvadd_main.c
index 0be3051..9c47617 100644
--- a/benchmarks/vvadd/vvadd_main.c
+++ b/benchmarks/vvadd/vvadd_main.c
@@ -11,35 +11,7 @@
// anything except the HOST_DEBUG and PREALLOCATE macros for your timing
// runs.
-int ncores = 1;
#include "util.h"
-
-//--------------------------------------------------------------------------
-// Macros
-
-// Set HOST_DEBUG to 1 if you are going to compile this for a host
-// machine (ie Athena/Linux) for debug purposes and set HOST_DEBUG
-// to 0 if you are compiling with the smips-gcc toolchain.
-
-#ifndef HOST_DEBUG
-#define HOST_DEBUG 0
-#endif
-
-// Set PREALLOCATE to 1 if you want to preallocate the benchmark
-// function before starting stats. If you have instruction/data
-// caches and you don't want to count the overhead of misses, then
-// you will need to use preallocation.
-
-#ifndef PREALLOCATE
-#define PREALLOCATE 0
-#endif
-
-// Set SET_STATS to 1 if you want to carve out the piece that actually
-// does the computation.
-
-#ifndef SET_STATS
-#define SET_STATS 0
-#endif
//--------------------------------------------------------------------------
// Input/Reference Data
@@ -47,52 +19,6 @@ int ncores = 1;
#include "dataset1.h"
//--------------------------------------------------------------------------
-// Helper functions
-
-int verify( int n, int test[], int correct[] )
-{
- int i;
- for ( i = 0; i < n; i++ ) {
- if ( test[i] != correct[i] ) {
- return 2;
- }
- }
- return 1;
-}
-
-#if HOST_DEBUG
-void printArray( char name[], int n, int arr[] )
-{
- int i;
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3d ", arr[i] );
- printf( "\n" );
-}
-#endif
-
-//void finishTest( int toHostValue )
-//{
-//#if HOST_DEBUG
-// if ( toHostValue == 1 )
-// printf( "*** PASSED ***\n" );
-// else
-// printf( "*** FAILED *** (tohost = %d)\n", toHostValue );
-// exit(0);
-//#else
-// asm( "mtpcr %0, tohost" : : "r" (toHostValue) );
-// while ( 1 ) { }
-//#endif
-//}
-
-void setStats( int enable )
-{
-#if ( !HOST_DEBUG && SET_STATS )
- asm( "mtpcr %0, cr10" : : "r" (enable) );
-#endif
-}
-
-//--------------------------------------------------------------------------
// vvadd function
void vvadd( int n, int a[], int b[], int c[] )
@@ -110,33 +36,23 @@ int main( int argc, char* argv[] )
int results_data[DATA_SIZE];
// Output the input array
-
-#if HOST_DEBUG
printArray( "input1", DATA_SIZE, input1_data );
printArray( "input2", DATA_SIZE, input2_data );
printArray( "verify", DATA_SIZE, verify_data );
-#endif
-
- // If needed we preallocate everything in the caches
#if PREALLOCATE
+ // If needed we preallocate everything in the caches
vvadd( DATA_SIZE, input1_data, input2_data, results_data );
#endif
// Do the vvadd
-
setStats(1);
vvadd( DATA_SIZE, input1_data, input2_data, results_data );
setStats(0);
// Print out the results
-
-#if HOST_DEBUG
printArray( "results", DATA_SIZE, results_data );
-#endif
// Check the results
-
- finishTest(verify( DATA_SIZE, results_data, verify_data ));
-
+ return verify( DATA_SIZE, results_data, verify_data );
}
diff --git a/env b/env
-Subproject d4b98bac3c304e3bed612c79680fc2226ddb7e9
+Subproject 35d0a989a24fc0b9913760dcbead775eb8c0f29