aboutsummaryrefslogtreecommitdiff
path: root/benchmarks
diff options
context:
space:
mode:
authorAndrew Waterman <waterman@cs.berkeley.edu>2013-08-23 20:02:02 -0700
committerAndrew Waterman <waterman@cs.berkeley.edu>2013-08-23 20:04:30 -0700
commit5fe2ce69dcd1d0ddb42c4edffac7ab11d939ca45 (patch)
tree1c76b45e4b7cf966f5d0d3b943d66b04c4f95c21 /benchmarks
parent5b13eb6cd5aa3e73fb477414f1866e7b9cbeaf3f (diff)
downloadriscv-tests-5fe2ce69dcd1d0ddb42c4edffac7ab11d939ca45.zip
riscv-tests-5fe2ce69dcd1d0ddb42c4edffac7ab11d939ca45.tar.gz
riscv-tests-5fe2ce69dcd1d0ddb42c4edffac7ab11d939ca45.tar.bz2
Reflect changes to ISA
Conflicts: isa/Makefile
Diffstat (limited to 'benchmarks')
-rw-r--r--benchmarks/Makefile2
-rw-r--r--benchmarks/common/crt-mt.S84
-rw-r--r--benchmarks/common/crt.S83
-rw-r--r--benchmarks/common/pcr.h86
-rw-r--r--benchmarks/common/syscalls.S678
-rw-r--r--benchmarks/common/syscalls.c12
-rw-r--r--benchmarks/dgemm/dgemm_main.c2
-rw-r--r--benchmarks/dhrystone/dhrystone_main.c2
-rw-r--r--benchmarks/median/median_main.c2
-rw-r--r--benchmarks/multiply/multiply_main.c2
-rw-r--r--benchmarks/qsort/qsort_main.c2
-rw-r--r--benchmarks/spmv/spmv_main.c2
-rw-r--r--benchmarks/towers/towers_main.c2
-rw-r--r--benchmarks/vec-cmplxmult/vec_cmplxmult_asm.S6
-rw-r--r--benchmarks/vec-matmul/vec_matmul_asm.S22
-rw-r--r--benchmarks/vvadd/vvadd_main.c2
16 files changed, 169 insertions, 820 deletions
diff --git a/benchmarks/Makefile b/benchmarks/Makefile
index 220a033..5d33f86 100644
--- a/benchmarks/Makefile
+++ b/benchmarks/Makefile
@@ -50,7 +50,7 @@ HOST_OPTS = -std=gnu99 -DPREALLOCATE=0 -DHOST_DEBUG=1
HOST_COMP = gcc $(HOST_OPTS)
RISCV_GCC = riscv-gcc
-RISCV_GCC_OPTS = -std=gnu99 -DSET_STATS -O2 -nostdlib -nostartfiles -ffast-math
+RISCV_GCC_OPTS = -Wa,-march=RVIMAFDXhwacha -std=gnu99 -O2 -nostdlib -nostartfiles -ffast-math
RISCV_LINK = riscv-gcc -T $(bmarkdir)/common/test.ld
RISCV_LINK_MT = riscv-gcc -T $(bmarkdir)/common/test-mt.ld
RISCV_LINK_OPTS = -lc
diff --git a/benchmarks/common/crt-mt.S b/benchmarks/common/crt-mt.S
index 283b3bf..90cd755 100644
--- a/benchmarks/common/crt-mt.S
+++ b/benchmarks/common/crt-mt.S
@@ -1,3 +1,5 @@
+#include "pcr.h"
+
.data
.globl _heapend
.globl environ
@@ -43,57 +45,53 @@ _start:
li x31,0
# enable fp
- mfpcr x1,cr0
- ori x1,x1,0x2
- mtpcr x1,cr0
+ setpcr status, SR_EF
# enable vec
- mfpcr x1,cr0
- ori x1,x1,0x4
- mtpcr x1,cr0
+ setpcr t0, status, SR_EV
## if that didn't stick, we don't have an FPU, so don't initialize it
- mfpcr x1,cr0
- andi x1,x1,0x2
- beqz x1,1f
+ and t0, t0, SR_EF
+ beqz t0, 1f
- mtfsr x0
- mxtf.s f0, x0
- mxtf.s f1, x0
- mxtf.s f2, x0
- mxtf.s f3, x0
- mxtf.s f4, x0
- mxtf.s f5, x0
- mxtf.s f6, x0
- mxtf.s f7, x0
- mxtf.s f8, x0
- mxtf.s f9, x0
- mxtf.s f10,x0
- mxtf.s f11,x0
- mxtf.s f12,x0
- mxtf.s f13,x0
- mxtf.s f14,x0
- mxtf.s f15,x0
- mxtf.s f16,x0
- mxtf.s f17,x0
- mxtf.s f18,x0
- mxtf.s f19,x0
- mxtf.s f20,x0
- mxtf.s f21,x0
- mxtf.s f22,x0
- mxtf.s f23,x0
- mxtf.s f24,x0
- mxtf.s f25,x0
- mxtf.s f26,x0
- mxtf.s f27,x0
- mxtf.s f28,x0
- mxtf.s f29,x0
- mxtf.s f30,x0
- mxtf.s f31,x0
+ fssr x0
+ fmv.s.x f0, x0
+ fmv.s.x f1, x0
+ fmv.s.x f2, x0
+ fmv.s.x f3, x0
+ fmv.s.x f4, x0
+ fmv.s.x f5, x0
+ fmv.s.x f6, x0
+ fmv.s.x f7, x0
+ fmv.s.x f8, x0
+ fmv.s.x f9, x0
+ fmv.s.x f10,x0
+ fmv.s.x f11,x0
+ fmv.s.x f12,x0
+ fmv.s.x f13,x0
+ fmv.s.x f14,x0
+ fmv.s.x f15,x0
+ fmv.s.x f16,x0
+ fmv.s.x f17,x0
+ fmv.s.x f18,x0
+ fmv.s.x f19,x0
+ fmv.s.x f20,x0
+ fmv.s.x f21,x0
+ fmv.s.x f22,x0
+ fmv.s.x f23,x0
+ fmv.s.x f24,x0
+ fmv.s.x f25,x0
+ fmv.s.x f26,x0
+ fmv.s.x f27,x0
+ fmv.s.x f28,x0
+ fmv.s.x f29,x0
+ fmv.s.x f30,x0
+ fmv.s.x f31,x0
1:
+
# get core id and number of cores
- mfpcr a0,cr10
+ mfpcr a0,hartid
lw a1, 4(zero)
slli a2, a0, 13
diff --git a/benchmarks/common/crt.S b/benchmarks/common/crt.S
index d153210..ac5d9ba 100644
--- a/benchmarks/common/crt.S
+++ b/benchmarks/common/crt.S
@@ -1,3 +1,5 @@
+#include "pcr.h"
+
.data
.globl _heapend
.globl environ
@@ -43,57 +45,52 @@ _start:
li x31,0
# enable fp
- mfpcr x1,cr0
- ori x1,x1,0x2
- mtpcr x1,cr0
+ setpcr status, SR_EF
# enable vec
- mfpcr x1,cr0
- ori x1,x1,0x4
- mtpcr x1,cr0
+ setpcr t0, status, SR_EV
## if that didn't stick, we don't have an FPU, so don't initialize it
- mfpcr x1,cr0
- andi x1,x1,0x2
- beqz x1,1f
+ and t0, t0, SR_EF
+ beqz t0, 1f
- mtfsr x0
- mxtf.s f0, x0
- mxtf.s f1, x0
- mxtf.s f2, x0
- mxtf.s f3, x0
- mxtf.s f4, x0
- mxtf.s f5, x0
- mxtf.s f6, x0
- mxtf.s f7, x0
- mxtf.s f8, x0
- mxtf.s f9, x0
- mxtf.s f10,x0
- mxtf.s f11,x0
- mxtf.s f12,x0
- mxtf.s f13,x0
- mxtf.s f14,x0
- mxtf.s f15,x0
- mxtf.s f16,x0
- mxtf.s f17,x0
- mxtf.s f18,x0
- mxtf.s f19,x0
- mxtf.s f20,x0
- mxtf.s f21,x0
- mxtf.s f22,x0
- mxtf.s f23,x0
- mxtf.s f24,x0
- mxtf.s f25,x0
- mxtf.s f26,x0
- mxtf.s f27,x0
- mxtf.s f28,x0
- mxtf.s f29,x0
- mxtf.s f30,x0
- mxtf.s f31,x0
+ fssr x0
+ fmv.s.x f0, x0
+ fmv.s.x f1, x0
+ fmv.s.x f2, x0
+ fmv.s.x f3, x0
+ fmv.s.x f4, x0
+ fmv.s.x f5, x0
+ fmv.s.x f6, x0
+ fmv.s.x f7, x0
+ fmv.s.x f8, x0
+ fmv.s.x f9, x0
+ fmv.s.x f10,x0
+ fmv.s.x f11,x0
+ fmv.s.x f12,x0
+ fmv.s.x f13,x0
+ fmv.s.x f14,x0
+ fmv.s.x f15,x0
+ fmv.s.x f16,x0
+ fmv.s.x f17,x0
+ fmv.s.x f18,x0
+ fmv.s.x f19,x0
+ fmv.s.x f20,x0
+ fmv.s.x f21,x0
+ fmv.s.x f22,x0
+ fmv.s.x f23,x0
+ fmv.s.x f24,x0
+ fmv.s.x f25,x0
+ fmv.s.x f26,x0
+ fmv.s.x f27,x0
+ fmv.s.x f28,x0
+ fmv.s.x f29,x0
+ fmv.s.x f30,x0
+ fmv.s.x f31,x0
1:
# only allow core 0 to proceed
-1:mfpcr a0, cr10
+1:mfpcr a0, hartid
bnez a0, 1b
la sp,stacktop
diff --git a/benchmarks/common/pcr.h b/benchmarks/common/pcr.h
index 7659a97..8780cdd 100644
--- a/benchmarks/common/pcr.h
+++ b/benchmarks/common/pcr.h
@@ -1,42 +1,53 @@
+// See LICENSE for license details.
+
#ifndef _RISCV_PCR_H
#define _RISCV_PCR_H
-#define SR_ET 0x00000001
-#define SR_EF 0x00000002
-#define SR_EV 0x00000004
-#define SR_EC 0x00000008
-#define SR_PS 0x00000010
-#define SR_S 0x00000020
-#define SR_U64 0x00000040
-#define SR_S64 0x00000080
-#define SR_VM 0x00000100
+#define SR_S 0x00000001
+#define SR_PS 0x00000002
+#define SR_EI 0x00000004
+#define SR_PEI 0x00000008
+#define SR_EF 0x00000010
+#define SR_U64 0x00000020
+#define SR_S64 0x00000040
+#define SR_VM 0x00000080
+#define SR_EV 0x00000100
#define SR_IM 0x00FF0000
-#define SR_ZERO ~(SR_ET|SR_EF|SR_EV|SR_EC|SR_PS|SR_S|SR_U64|SR_S64|SR_VM|SR_IM)
+#define SR_IP 0xFF000000
+#define SR_ZERO ~(SR_S|SR_PS|SR_EI|SR_PEI|SR_EF|SR_U64|SR_S64|SR_VM|SR_EV|SR_IM|SR_IP)
#define SR_IM_SHIFT 16
+#define SR_IP_SHIFT 24
-#define PCR_SR 0
-#define PCR_EPC 1
-#define PCR_BADVADDR 2
-#define PCR_EVEC 3
-#define PCR_COUNT 4
-#define PCR_COMPARE 5
-#define PCR_CAUSE 6
-#define PCR_PTBR 7
-#define PCR_SEND_IPI 8
-#define PCR_CLR_IPI 9
-#define PCR_COREID 10
-#define PCR_IMPL 11
-#define PCR_K0 12
-#define PCR_K1 13
+#define PCR_SUP0 0
+#define PCR_SUP1 1
+#define PCR_EPC 2
+#define PCR_BADVADDR 3
+#define PCR_PTBR 4
+#define PCR_ASID 5
+#define PCR_COUNT 6
+#define PCR_COMPARE 7
+#define PCR_EVEC 8
+#define PCR_CAUSE 9
+#define PCR_SR 10
+#define PCR_HARTID 11
+#define PCR_IMPL 12
+#define PCR_FATC 13
+#define PCR_SEND_IPI 14
+#define PCR_CLR_IPI 15
#define PCR_VECBANK 18
#define PCR_VECCFG 19
#define PCR_RESET 29
#define PCR_TOHOST 30
#define PCR_FROMHOST 31
+#define IRQ_COP 2
#define IRQ_IPI 5
+#define IRQ_HOST 6
#define IRQ_TIMER 7
+#define IMPL_SPIKE 1
+#define IMPL_ROCKET 2
+
#define CAUSE_MISALIGNED_FETCH 0
#define CAUSE_FAULT_FETCH 1
#define CAUSE_ILLEGAL_INSTRUCTION 2
@@ -60,10 +71,29 @@
#define CAUSE_VECTOR_FAULT_LOAD 30
#define CAUSE_VECTOR_FAULT_STORE 31
+// page table entry (PTE) fields
+#define PTE_V 0x001 // Entry is a page Table descriptor
+#define PTE_T 0x002 // Entry is a page Table, not a terminal node
+#define PTE_G 0x004 // Global
+#define PTE_UR 0x008 // User Write permission
+#define PTE_UW 0x010 // User Read permission
+#define PTE_UX 0x020 // User eXecute permission
+#define PTE_SR 0x040 // Supervisor Read permission
+#define PTE_SW 0x080 // Supervisor Write permission
+#define PTE_SX 0x100 // Supervisor eXecute permission
+#define PTE_PERM (PTE_SR | PTE_SW | PTE_SX | PTE_UR | PTE_UW | PTE_UX)
+
#ifdef __riscv
-#define ASM_CR(r) _ASM_CR(r)
-#define _ASM_CR(r) cr##r
+#ifdef __riscv64
+# define RISCV_PGLEVELS 3
+# define RISCV_PGSHIFT 13
+#else
+# define RISCV_PGLEVELS 2
+# define RISCV_PGSHIFT 12
+#endif
+#define RISCV_PGLEVEL_BITS 10
+#define RISCV_PGSIZE (1 << RISCV_PGSHIFT)
#ifndef __ASSEMBLER__
@@ -83,6 +113,10 @@
asm volatile ("clearpcr %0,cr%2,%1" : "=r"(__tmp) : "i"(val), "i"(reg)); \
__tmp; })
+#define rdcycle() ({ unsigned long __tmp; \
+ asm volatile ("rdcycle %0" : "=r"(__tmp)); \
+ __tmp; })
+
#endif
#endif
diff --git a/benchmarks/common/syscalls.S b/benchmarks/common/syscalls.S
deleted file mode 100644
index a0cdf6e..0000000
--- a/benchmarks/common/syscalls.S
+++ /dev/null
@@ -1,678 +0,0 @@
- .file 1 "syscalls.c"
- .section .mdebug.abi64
- .previous
- .section .rodata.str1.8,"aMS",@progbits,1
- .align 3
-$LC0:
- .ascii "0123456789abcdef\000"
- .text
- .align 2
- .ent printnum
- .type printnum, @function
-printnum:
- .frame x30,64,x1 # vars= 0, regs= 7/0, args= 0
- .mask 0x03f00002,-8
- .fmask 0x00000000,0
- add x30,x30,-64
- sd x22,32(x30)
- sll x22,x7,32
- srl x22,x22,32
- sd x24,48(x30)
- sd x23,40(x30)
- sd x21,24(x30)
- sd x20,16(x30)
- sd x25,56(x30)
- sd x1,8(x30)
- move x23,x6
- move x20,x4
- move x21,x5
- move x24,x9
- bleu x22,x6,$L2
- addw x8,x8,-1
- move x25,x8
- ble x8,x0,$L4
-$L6:
- addw x25,x25,-1
- move x4,x24
- move x5,x21
- jalr x20
- bne x25,x0,$L6
-$L4:
- lui x2,%hi($LC0)
- add x2,x2,%lo($LC0)
- remu x22,x23,x22
- add x22,x22,x2
- lb x4,0(x22)
- move x5,x21
- move x19,x20
- ld x25,56(x30)
- ld x24,48(x30)
- ld x23,40(x30)
- ld x22,32(x30)
- ld x21,24(x30)
- ld x20,16(x30)
- ld x1,8(x30)
- add x30,x30,64
- jr x19
-$L2:
- addw x8,x8,-1
- divu x6,x6,x22
- jal printnum
- j $L4
- .end printnum
- .size printnum, .-printnum
- .align 2
- .ent getuint
- .type getuint, @function
-getuint:
- .frame x30,0,x1 # vars= 0, regs= 0/0, args= 0
- .mask 0x00000000,0
- .fmask 0x00000000,0
- slt x2,x5,2
- bne x2,x0,$L10
-$L13:
- ld x2,0(x4)
- add x3,x2,8
- sd x3,0(x4)
- ld x2,0(x2)
- ret
-$L10:
- bne x5,x0,$L13
- ld x3,0(x4)
- lwu x2,0(x3)
- add x3,x3,8
- sd x3,0(x4)
- ret
- .end getuint
- .size getuint, .-getuint
- .align 2
- .globl putchar
- .ent putchar
- .type putchar, @function
-putchar:
- .frame x30,64,x1 # vars= 64, regs= 0/0, args= 0
- .mask 0x00000000,0
- .fmask 0x00000000,0
- li x2,-1 # 0xffffffffffffffff
- add x30,x30,-64
- lui x3,%hi(buflen.1596)
- beq x4,x2,$L21
- lw x5,%lo(buflen.1596)(x3)
- lui x2,%hi(buf.1595)
- add x2,x2,%lo(buf.1595)
- add x6,x2,x5
- sb x4,0(x6)
- addw x5,x5,1
- li x4,64 # 0x40
- sw x5,%lo(buflen.1596)(x3)
- beq x5,x4,$L15
- move x2,x0
- add x30,x30,64
- j x1
-$L21:
- lui x2,%hi(buf.1595)
- add x2,x2,%lo(buf.1595)
-$L15:
- lw x4,%lo(buflen.1596)(x3)
- li x5,4 # 0x4
- sd x0,0(x30)
- sd x0,8(x30)
- sd x0,16(x30)
- sd x0,24(x30)
- sd x0,32(x30)
- sd x0,40(x30)
- sd x0,48(x30)
- sd x0,56(x30)
- sd x5,0(x30)
- li x5,1 # 0x1
- sd x5,8(x30)
- sd x2,16(x30)
- sd x4,24(x30)
- fence
- #APP
- # 45 "syscalls.c" 1
- mtpcr x2,x30,cr30
- # 0 "" 2
- #NO_APP
-$L17:
- #APP
- # 46 "syscalls.c" 1
- mfpcr x2,cr31
- # 0 "" 2
- #NO_APP
- beq x2,x0,$L17
- move x2,x0
- sw x0,%lo(buflen.1596)(x3)
- add x30,x30,64
- j x1
- .end putchar
- .size putchar, .-putchar
- .align 2
- .globl exit
- .ent exit
- .type exit, @function
-exit:
- .frame x30,64,x1 # vars= 64, regs= 0/0, args= 0
- .mask 0x00000000,0
- .fmask 0x00000000,0
- add x30,x30,-64
- li x2,1 # 0x1
- sd x0,0(x30)
- sd x0,8(x30)
- sd x0,16(x30)
- sd x0,24(x30)
- sd x0,32(x30)
- sd x0,40(x30)
- sd x0,48(x30)
- sd x0,56(x30)
- sd x2,0(x30)
- sd x4,8(x30)
- fence
- #APP
- # 12 "syscalls.c" 1
- mtpcr x2,x30,cr30
- # 0 "" 2
- #NO_APP
-$L23:
- j $L23
- .end exit
- .size exit, .-exit
- .align 2
- .globl printstr
- .ent printstr
- .type printstr, @function
-printstr:
- .frame x30,80,x1 # vars= 64, regs= 1/0, args= 0
- .mask 0x00000002,-8
- .fmask 0x00000000,0
- add x30,x30,-80
- li x3,4 # 0x4
- sd x0,0(x30)
- sd x0,8(x30)
- sd x0,16(x30)
- sd x0,24(x30)
- sd x0,32(x30)
- sd x0,40(x30)
- sd x0,48(x30)
- sd x0,56(x30)
- sd x3,0(x30)
- li x3,1 # 0x1
- sd x3,8(x30)
- sd x1,72(x30)
- sd x4,16(x30)
- jal strlen
- sd x2,24(x30)
- fence
- #APP
- # 24 "syscalls.c" 1
- mtpcr x2,x30,cr30
- # 0 "" 2
- #NO_APP
-$L25:
- #APP
- # 25 "syscalls.c" 1
- mfpcr x2,cr31
- # 0 "" 2
- #NO_APP
- beq x2,x0,$L25
- ld x1,72(x30)
- add x30,x30,80
- j x1
- .end printstr
- .size printstr, .-printstr
- .align 2
- .globl printhex
- .ent printhex
- .type printhex, @function
-printhex:
- .frame x30,48,x1 # vars= 32, regs= 1/0, args= 0
- .mask 0x00000002,-8
- .fmask 0x00000000,0
- add x30,x30,-48
- sd x1,40(x30)
- add x2,x30,15
- add x7,x30,-1
-$L29:
- and x3,x4,15
- sltu x6,x3,10
- li x5,87 # 0x57
- beq x6,x0,$L28
- li x5,48 # 0x30
-$L28:
- add x3,x5,x3
- sb x3,0(x2)
- add x2,x2,-1
- srl x4,x4,4
- bne x2,x7,$L29
- move x4,x30
- sb x0,16(x30)
- jal printstr
- ld x1,40(x30)
- add x30,x30,48
- j x1
- .end printhex
- .size printhex, .-printhex
- .section .rodata.str1.8
- .align 3
-$LC1:
- .ascii "(null)\000"
- .text
- .align 2
- .globl vprintfmt
- .ent vprintfmt
- .type vprintfmt, @function
-vprintfmt:
- .frame x30,112,x1 # vars= 32, regs= 10/0, args= 0
- .mask 0x2ff00002,-8
- .fmask 0x00000000,0
- add x30,x30,-112
- sd x25,80(x30)
- lui x25,%hi($L53)
- sd x26,88(x30)
- sd x24,72(x30)
- sd x23,64(x30)
- sd x22,56(x30)
- sd x21,48(x30)
- sd x20,40(x30)
- sd x29,104(x30)
- sd x27,96(x30)
- sd x1,32(x30)
- move x21,x4
- move x20,x5
- move x23,x6
- sd x7,0(x30)
- li x22,37 # 0x25
- add x25,x25,%lo($L53)
- li x24,-1 # 0xffffffffffffffff
- lui x26,%hi($LC1)
- j $L84
-$L35:
- beq x2,x0,$L32
- move x5,x20
- add x23,x23,1
- jalr x21
-$L84:
- lbu x4,0(x23)
- move x2,x4
- bne x4,x22,$L35
- ld x2,0(x30)
- add x6,x23,1
- move x7,x6
- li x27,32 # 0x20
- sd x0,8(x30)
- li x3,-1 # 0xffffffffffffffff
- li x29,-1 # 0xffffffffffffffff
- move x5,x0
-$L85:
- lbu x4,0(x7)
- add x23,x7,1
- addw x8,x4,-35
- and x9,x8,0xff
- sltu x9,x9,86
- bne x9,x0,$L90
-$L38:
- li x4,37 # 0x25
- move x5,x20
- sd x2,0(x30)
- move x23,x6
- jalr x21
- j $L84
-$L32:
- ld x1,32(x30)
- ld x29,104(x30)
- ld x27,96(x30)
- ld x26,88(x30)
- ld x25,80(x30)
- ld x24,72(x30)
- ld x23,64(x30)
- ld x22,56(x30)
- ld x21,48(x30)
- ld x20,40(x30)
- add x30,x30,112
- j x1
-$L90:
- and x8,x8,0xff
- sll x8,x8,3
- add x8,x25,x8
- ld x8,0(x8)
- j x8
- .section .rodata
- .align 3
- .align 2
-$L53:
- .dword $L39
- .dword $L38
- .dword $L40
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L41
- .dword $L38
- .dword $L38
- .dword $L42
- .dword $L43
- .dword $L38
- .dword $L74
- .dword $L44
- .dword $L44
- .dword $L44
- .dword $L44
- .dword $L44
- .dword $L44
- .dword $L44
- .dword $L44
- .dword $L44
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L45
- .dword $L46
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L38
- .dword $L47
- .dword $L38
- .dword $L38
- .dword $L48
- .dword $L49
- .dword $L38
- .dword $L38
- .dword $L50
- .dword $L38
- .dword $L51
- .dword $L38
- .dword $L38
- .dword $L52
- .text
-$L52:
- move x4,x30
- sd x2,0(x30)
- jal getuint
- move x6,x2
- li x7,16 # 0x10
-$L73:
- move x4,x21
- move x5,x20
- move x8,x29
- move x9,x27
- jal printnum
- j $L84
-$L39:
- li x4,1 # 0x1
- move x7,x23
- sd x4,8(x30)
- j $L85
-$L40:
- move x5,x20
- sd x2,0(x30)
- jalr x21
- j $L84
-$L41:
- lw x3,0(x2)
- move x7,x23
- add x2,x2,8
-$L54:
- bge x29,x0,$L85
- move x29,x3
- li x3,-1 # 0xffffffffffffffff
- j $L85
-$L42:
- move x7,x23
- li x27,45 # 0x2d
- j $L85
-$L43:
- blt x29,x0,$L91
- move x7,x23
- j $L85
-$L74:
- move x7,x23
- li x27,48 # 0x30
- j $L85
-$L44:
- lb x8,1(x7)
- addw x3,x4,-48
- move x7,x23
- addw x4,x8,-48
- sltu x4,x4,10
- beq x4,x0,$L54
-$L55:
- add x7,x7,1
- lb x4,0(x7)
- sllw x9,x3,1
- sllw x3,x3,3
- addw x3,x9,x3
- addw x9,x4,-48
- addw x3,x3,x8
- sltu x9,x9,10
- addw x3,x3,-48
- move x8,x4
- bne x9,x0,$L55
- j $L54
-$L45:
- lw x4,0(x2)
- add x2,x2,8
- move x5,x20
- sd x2,0(x30)
- jalr x21
- j $L84
-$L46:
- slt x3,x5,2
- bne x3,x0,$L69
-$L88:
- add x3,x2,8
- sd x3,0(x30)
- ld x6,0(x2)
- blt x6,x0,$L72
-$L89:
- li x7,10 # 0xa
- j $L73
-$L47:
- addw x5,x5,1
- move x7,x23
- j $L85
-$L48:
- move x4,x30
- sd x2,0(x30)
- jal getuint
- move x6,x2
- li x7,8 # 0x8
- j $L73
-$L49:
- sd x2,0(x30)
- li x4,48 # 0x30
- move x5,x20
- jalr x21
- li x4,120 # 0x78
- move x5,x20
- jalr x21
- ld x2,0(x30)
- li x7,16 # 0x10
- add x3,x2,8
- sd x3,0(x30)
- ld x6,0(x2)
- j $L73
-$L50:
- add x4,x2,8
- sd x4,0(x30)
- ld x2,0(x2)
- sd x2,16(x30)
- beq x2,x0,$L92
-$L58:
- ble x29,x0,$L59
- li x2,45 # 0x2d
- beq x27,x2,$L59
- ld x4,16(x30)
- move x5,x3
- sd x3,24(x30)
- jal strnlen
- sllw x2,x2,0
- subw x2,x29,x2
- move x29,x2
- ld x3,24(x30)
- ble x2,x0,$L59
- move x29,x2
-$L60:
- sd x3,24(x30)
- move x4,x27
- move x5,x20
- addw x29,x29,-1
- jalr x21
- ld x3,24(x30)
- bne x29,x0,$L60
-$L59:
- ld x2,16(x30)
- lb x4,0(x2)
- add x27,x2,1
- beq x4,x0,$L62
-$L80:
- blt x3,x0,$L67
- addw x2,x3,-1
- move x3,x2
- beq x2,x24,$L62
-$L67:
- ld x2,8(x30)
- beq x2,x0,$L63
- addw x2,x4,-32
- sltu x2,x2,95
- beq x2,x0,$L93
-$L63:
- sd x3,24(x30)
- move x5,x20
- jalr x21
- ld x3,24(x30)
-$L64:
- lb x4,0(x27)
- addw x29,x29,-1
- add x27,x27,1
- bne x4,x0,$L80
-$L62:
- ble x29,x0,$L84
-$L79:
- addw x29,x29,-1
- li x4,32 # 0x20
- move x5,x20
- jalr x21
- bne x29,x0,$L79
- j $L84
-$L51:
- move x4,x30
- sd x2,0(x30)
- jal getuint
- move x6,x2
- li x7,10 # 0xa
- j $L73
-$L93:
- sd x3,24(x30)
- li x4,63 # 0x3f
- move x5,x20
- jalr x21
- ld x3,24(x30)
- j $L64
-$L91:
- move x7,x23
- move x29,x0
- j $L85
-$L92:
- add x2,x26,%lo($LC1)
- sd x2,16(x30)
- j $L58
-$L72:
- sd x6,24(x30)
- li x4,45 # 0x2d
- move x5,x20
- jalr x21
- ld x6,24(x30)
- li x7,10 # 0xa
- sub x6,zero,x6
- j $L73
-$L69:
- bne x5,x0,$L88
- lw x6,0(x2)
- add x2,x2,8
- sd x2,0(x30)
- bge x6,x0,$L89
- j $L72
- .end vprintfmt
- .size vprintfmt, .-vprintfmt
- .align 2
- .globl printf
- .ent printf
- .type printf, @function
-printf:
- .frame x30,96,x1 # vars= 16, regs= 1/0, args= 0
- .mask 0x00000002,-72
- .fmask 0x00000000,0
- add x30,x30,-96
- add x2,x30,40
- move x3,x4
- lui x4,%hi(putchar)
- sd x5,40(x30)
- sd x6,48(x30)
- sd x7,56(x30)
- add x4,x4,%lo(putchar)
- move x5,x0
- move x6,x3
- move x7,x2
- sd x1,24(x30)
- sd x8,64(x30)
- sd x9,72(x30)
- sd x10,80(x30)
- sd x11,88(x30)
- sd x2,0(x30)
- jal vprintfmt
- li x4,-1 # 0xffffffffffffffff
- jal putchar
- ld x1,24(x30)
- move x2,x0
- add x30,x30,96
- j x1
- .end printf
- .size printf, .-printf
- .local buflen.1596
- .comm buflen.1596,4,4
- .local buf.1595
- .comm buf.1595,64,8
- .ident "GCC: (GNU) 4.6.1"
diff --git a/benchmarks/common/syscalls.c b/benchmarks/common/syscalls.c
index f95dde4..0c1bc7f 100644
--- a/benchmarks/common/syscalls.c
+++ b/benchmarks/common/syscalls.c
@@ -1,12 +1,13 @@
#include <stdint.h>
#include <string.h>
#include <stdarg.h>
+#include <machine/syscall.h>
#include "pcr.h"
void exit(int code)
{
volatile uint64_t magic_mem[8] = {0};
- magic_mem[0] = 1;
+ magic_mem[0] = SYS_exit;
magic_mem[1] = code;
__sync_synchronize();
mtpcr(PCR_TOHOST, (long)magic_mem);
@@ -16,7 +17,7 @@ void exit(int code)
void printstr(const char* s)
{
volatile uint64_t magic_mem[8] = {0};
- magic_mem[0] = 4;
+ magic_mem[0] = SYS_write;
magic_mem[1] = 1;
magic_mem[2] = (unsigned long)s;
magic_mem[3] = strlen(s);
@@ -27,17 +28,16 @@ void printstr(const char* s)
int putchar(int ch)
{
- #define buffered_putch_bufsize 64
- static char buf[buffered_putch_bufsize];
+ static char buf[64];
static int buflen = 0;
if(ch != -1)
buf[buflen++] = ch;
- if(ch == -1 || buflen == buffered_putch_bufsize)
+ if(ch == -1 || buflen == sizeof(buf))
{
volatile uint64_t magic_mem[8] = {0};
- magic_mem[0] = 4;
+ magic_mem[0] = SYS_write;
magic_mem[1] = 1;
magic_mem[2] = (long)buf;
magic_mem[3] = buflen;
diff --git a/benchmarks/dgemm/dgemm_main.c b/benchmarks/dgemm/dgemm_main.c
index 7c8ce7c..72c90ef 100644
--- a/benchmarks/dgemm/dgemm_main.c
+++ b/benchmarks/dgemm/dgemm_main.c
@@ -70,7 +70,7 @@ void finishTest( int toHostValue )
printf( "*** FAILED *** (tohost = %d)\n", toHostValue );
exit(0);
#else
- asm( "mtpcr %0, cr30" : : "r" (toHostValue) );
+ asm( "mtpcr %0, tohost" : : "r" (toHostValue) );
while ( 1 ) { }
#endif
}
diff --git a/benchmarks/dhrystone/dhrystone_main.c b/benchmarks/dhrystone/dhrystone_main.c
index 7e8c6af..c2a8038 100644
--- a/benchmarks/dhrystone/dhrystone_main.c
+++ b/benchmarks/dhrystone/dhrystone_main.c
@@ -53,7 +53,7 @@ void finishTest( int toHostValue )
printf( "*** FAILED *** (tohost = %d)\n", toHostValue );
exit(0);
#else
- asm( "mtpcr %0, cr30" : : "r" (toHostValue) );
+ asm( "mtpcr %0, tohost" : : "r" (toHostValue) );
while ( 1 ) { }
#endif
}
diff --git a/benchmarks/median/median_main.c b/benchmarks/median/median_main.c
index 6decd3d..5c435dd 100644
--- a/benchmarks/median/median_main.c
+++ b/benchmarks/median/median_main.c
@@ -76,7 +76,7 @@ void finishTest( int toHostValue )
printf( "*** FAILED *** (tohost = %d)\n", toHostValue );
exit(0);
#else
- asm( "mtpcr %0, cr30" : : "r" (toHostValue) );
+ asm( "mtpcr %0, tohost" : : "r" (toHostValue) );
while ( 1 ) { }
#endif
}
diff --git a/benchmarks/multiply/multiply_main.c b/benchmarks/multiply/multiply_main.c
index fcb6b27..f2ce12b 100644
--- a/benchmarks/multiply/multiply_main.c
+++ b/benchmarks/multiply/multiply_main.c
@@ -85,7 +85,7 @@ void finishTest( int toHostValue )
printf( "*** FAILED *** (tohost = %d)\n", toHostValue );
exit(0);
#else
- asm( "mtpcr %0, cr30" : : "r" (toHostValue) );
+ asm( "mtpcr %0, tohost" : : "r" (toHostValue) );
while ( 1 ) { }
#endif
}
diff --git a/benchmarks/qsort/qsort_main.c b/benchmarks/qsort/qsort_main.c
index 6de1613..486b8fc 100644
--- a/benchmarks/qsort/qsort_main.c
+++ b/benchmarks/qsort/qsort_main.c
@@ -92,7 +92,7 @@ void finishTest( int toHostValue )
printf( "*** FAILED *** (tohost = %d)\n", toHostValue );
exit(0);
#else
- asm( "mtpcr %0, cr30" : : "r" (toHostValue) );
+ asm( "mtpcr %0, tohost" : : "r" (toHostValue) );
while ( 1 ) { }
#endif
}
diff --git a/benchmarks/spmv/spmv_main.c b/benchmarks/spmv/spmv_main.c
index 11d9540..afb1cf1 100644
--- a/benchmarks/spmv/spmv_main.c
+++ b/benchmarks/spmv/spmv_main.c
@@ -70,7 +70,7 @@ void finishTest( int toHostValue )
printf( "*** FAILED *** (tohost = %d)\n", toHostValue );
exit(0);
#else
- asm( "mtpcr %0, cr30" : : "r" (toHostValue) );
+ asm( "mtpcr %0, tohost" : : "r" (toHostValue) );
while ( 1 ) { }
#endif
}
diff --git a/benchmarks/towers/towers_main.c b/benchmarks/towers/towers_main.c
index 36526a2..aa61665 100644
--- a/benchmarks/towers/towers_main.c
+++ b/benchmarks/towers/towers_main.c
@@ -59,7 +59,7 @@ void finishTest( int toHostValue )
printf( "*** FAILED *** (tohost = %d)\n", toHostValue );
exit(0);
#else
- asm( "mtpcr %0, cr30" : : "r" (toHostValue) );
+ asm( "mtpcr %0, tohost" : : "r" (toHostValue) );
while ( 1 ) { }
#endif
}
diff --git a/benchmarks/vec-cmplxmult/vec_cmplxmult_asm.S b/benchmarks/vec-cmplxmult/vec_cmplxmult_asm.S
index 0771965..9ccd6c2 100644
--- a/benchmarks/vec-cmplxmult/vec_cmplxmult_asm.S
+++ b/benchmarks/vec-cmplxmult/vec_cmplxmult_asm.S
@@ -21,9 +21,9 @@
#define rVlen a6
#define rStride a7
-#define rAI a8
-#define rBI a9
-#define rCI a10
+#define rAI t0
+#define rBI t1
+#define rCI t2
# WARNING: do not write to the s0,...,s9 registers without first saving them to
# the stack.
diff --git a/benchmarks/vec-matmul/vec_matmul_asm.S b/benchmarks/vec-matmul/vec_matmul_asm.S
index f14d186..5135772 100644
--- a/benchmarks/vec-matmul/vec_matmul_asm.S
+++ b/benchmarks/vec-matmul/vec_matmul_asm.S
@@ -7,6 +7,8 @@
# Headers and Defines
#--------------------------------------------------------------------------
+#include "pcr.h"
+
# Here are some defines that make writing assembly code easier.
# I'm using the knowledge that rLda will be placed in register a0, rA will be
@@ -26,15 +28,15 @@
# address of VT function
#define rVTAddr v1
-#define rTemp0 a8
+#define rTemp0 t0
# desired app vector length (number of elements to vectorize)
-#define rNum a9
+#define rNum t1
-#define rATemp a10
-#define rBTemp a11
-#define rCTemp a12
-#define rI a13
+#define rATemp t2
+#define rBTemp t3
+#define rCTemp t4
+#define rI t5
#define rJ s1
#define rK s2
#define rLda4 a4
@@ -118,9 +120,7 @@ vt_matmul_asm:
# turn on vector unit
- mfpcr a13,cr0
- ori a13,a13,4
- mtpcr x0,a13,cr0
+ setpcr status, SR_EV
blez rLda, cpdone # exit early if lda < 0
@@ -289,9 +289,7 @@ vtcode:
transpose:
# turn on vector unit
- mfpcr a13,cr0
- ori a13,a13,4
- mtpcr x0,a13,cr0
+ setpcr status, SR_EV
blez rLda, cpdone # exit early if lda < 0
diff --git a/benchmarks/vvadd/vvadd_main.c b/benchmarks/vvadd/vvadd_main.c
index 9738118..70b3f75 100644
--- a/benchmarks/vvadd/vvadd_main.c
+++ b/benchmarks/vvadd/vvadd_main.c
@@ -77,7 +77,7 @@ void finishTest( int toHostValue )
printf( "*** FAILED *** (tohost = %d)\n", toHostValue );
exit(0);
#else
- asm( "mtpcr %0, cr30" : : "r" (toHostValue) );
+ asm( "mtpcr %0, tohost" : : "r" (toHostValue) );
while ( 1 ) { }
#endif
}