From 5fe2ce69dcd1d0ddb42c4edffac7ab11d939ca45 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 23 Aug 2013 20:02:02 -0700 Subject: Reflect changes to ISA Conflicts: isa/Makefile --- benchmarks/Makefile | 2 +- benchmarks/common/crt-mt.S | 84 ++-- benchmarks/common/crt.S | 83 ++-- benchmarks/common/pcr.h | 86 +++- benchmarks/common/syscalls.S | 678 --------------------------- benchmarks/common/syscalls.c | 12 +- benchmarks/dgemm/dgemm_main.c | 2 +- benchmarks/dhrystone/dhrystone_main.c | 2 +- benchmarks/median/median_main.c | 2 +- benchmarks/multiply/multiply_main.c | 2 +- benchmarks/qsort/qsort_main.c | 2 +- benchmarks/spmv/spmv_main.c | 2 +- benchmarks/towers/towers_main.c | 2 +- benchmarks/vec-cmplxmult/vec_cmplxmult_asm.S | 6 +- benchmarks/vec-matmul/vec_matmul_asm.S | 22 +- benchmarks/vvadd/vvadd_main.c | 2 +- 16 files changed, 169 insertions(+), 820 deletions(-) delete mode 100644 benchmarks/common/syscalls.S (limited to 'benchmarks') diff --git a/benchmarks/Makefile b/benchmarks/Makefile index 220a033..5d33f86 100644 --- a/benchmarks/Makefile +++ b/benchmarks/Makefile @@ -50,7 +50,7 @@ HOST_OPTS = -std=gnu99 -DPREALLOCATE=0 -DHOST_DEBUG=1 HOST_COMP = gcc $(HOST_OPTS) RISCV_GCC = riscv-gcc -RISCV_GCC_OPTS = -std=gnu99 -DSET_STATS -O2 -nostdlib -nostartfiles -ffast-math +RISCV_GCC_OPTS = -Wa,-march=RVIMAFDXhwacha -std=gnu99 -O2 -nostdlib -nostartfiles -ffast-math RISCV_LINK = riscv-gcc -T $(bmarkdir)/common/test.ld RISCV_LINK_MT = riscv-gcc -T $(bmarkdir)/common/test-mt.ld RISCV_LINK_OPTS = -lc diff --git a/benchmarks/common/crt-mt.S b/benchmarks/common/crt-mt.S index 283b3bf..90cd755 100644 --- a/benchmarks/common/crt-mt.S +++ b/benchmarks/common/crt-mt.S @@ -1,3 +1,5 @@ +#include "pcr.h" + .data .globl _heapend .globl environ @@ -43,57 +45,53 @@ _start: li x31,0 # enable fp - mfpcr x1,cr0 - ori x1,x1,0x2 - mtpcr x1,cr0 + setpcr status, SR_EF # enable vec - mfpcr x1,cr0 - ori x1,x1,0x4 - mtpcr x1,cr0 + setpcr t0, status, SR_EV ## if that didn't stick, we don't have an FPU, so don't initialize it - mfpcr x1,cr0 - andi x1,x1,0x2 - beqz x1,1f + and t0, t0, SR_EF + beqz t0, 1f - mtfsr x0 - mxtf.s f0, x0 - mxtf.s f1, x0 - mxtf.s f2, x0 - mxtf.s f3, x0 - mxtf.s f4, x0 - mxtf.s f5, x0 - mxtf.s f6, x0 - mxtf.s f7, x0 - mxtf.s f8, x0 - mxtf.s f9, x0 - mxtf.s f10,x0 - mxtf.s f11,x0 - mxtf.s f12,x0 - mxtf.s f13,x0 - mxtf.s f14,x0 - mxtf.s f15,x0 - mxtf.s f16,x0 - mxtf.s f17,x0 - mxtf.s f18,x0 - mxtf.s f19,x0 - mxtf.s f20,x0 - mxtf.s f21,x0 - mxtf.s f22,x0 - mxtf.s f23,x0 - mxtf.s f24,x0 - mxtf.s f25,x0 - mxtf.s f26,x0 - mxtf.s f27,x0 - mxtf.s f28,x0 - mxtf.s f29,x0 - mxtf.s f30,x0 - mxtf.s f31,x0 + fssr x0 + fmv.s.x f0, x0 + fmv.s.x f1, x0 + fmv.s.x f2, x0 + fmv.s.x f3, x0 + fmv.s.x f4, x0 + fmv.s.x f5, x0 + fmv.s.x f6, x0 + fmv.s.x f7, x0 + fmv.s.x f8, x0 + fmv.s.x f9, x0 + fmv.s.x f10,x0 + fmv.s.x f11,x0 + fmv.s.x f12,x0 + fmv.s.x f13,x0 + fmv.s.x f14,x0 + fmv.s.x f15,x0 + fmv.s.x f16,x0 + fmv.s.x f17,x0 + fmv.s.x f18,x0 + fmv.s.x f19,x0 + fmv.s.x f20,x0 + fmv.s.x f21,x0 + fmv.s.x f22,x0 + fmv.s.x f23,x0 + fmv.s.x f24,x0 + fmv.s.x f25,x0 + fmv.s.x f26,x0 + fmv.s.x f27,x0 + fmv.s.x f28,x0 + fmv.s.x f29,x0 + fmv.s.x f30,x0 + fmv.s.x f31,x0 1: + # get core id and number of cores - mfpcr a0,cr10 + mfpcr a0,hartid lw a1, 4(zero) slli a2, a0, 13 diff --git a/benchmarks/common/crt.S b/benchmarks/common/crt.S index d153210..ac5d9ba 100644 --- a/benchmarks/common/crt.S +++ b/benchmarks/common/crt.S @@ -1,3 +1,5 @@ +#include "pcr.h" + .data .globl _heapend .globl environ @@ -43,57 +45,52 @@ _start: li x31,0 # enable fp - mfpcr x1,cr0 - ori x1,x1,0x2 - mtpcr x1,cr0 + setpcr status, SR_EF # enable vec - mfpcr x1,cr0 - ori x1,x1,0x4 - mtpcr x1,cr0 + setpcr t0, status, SR_EV ## if that didn't stick, we don't have an FPU, so don't initialize it - mfpcr x1,cr0 - andi x1,x1,0x2 - beqz x1,1f + and t0, t0, SR_EF + beqz t0, 1f - mtfsr x0 - mxtf.s f0, x0 - mxtf.s f1, x0 - mxtf.s f2, x0 - mxtf.s f3, x0 - mxtf.s f4, x0 - mxtf.s f5, x0 - mxtf.s f6, x0 - mxtf.s f7, x0 - mxtf.s f8, x0 - mxtf.s f9, x0 - mxtf.s f10,x0 - mxtf.s f11,x0 - mxtf.s f12,x0 - mxtf.s f13,x0 - mxtf.s f14,x0 - mxtf.s f15,x0 - mxtf.s f16,x0 - mxtf.s f17,x0 - mxtf.s f18,x0 - mxtf.s f19,x0 - mxtf.s f20,x0 - mxtf.s f21,x0 - mxtf.s f22,x0 - mxtf.s f23,x0 - mxtf.s f24,x0 - mxtf.s f25,x0 - mxtf.s f26,x0 - mxtf.s f27,x0 - mxtf.s f28,x0 - mxtf.s f29,x0 - mxtf.s f30,x0 - mxtf.s f31,x0 + fssr x0 + fmv.s.x f0, x0 + fmv.s.x f1, x0 + fmv.s.x f2, x0 + fmv.s.x f3, x0 + fmv.s.x f4, x0 + fmv.s.x f5, x0 + fmv.s.x f6, x0 + fmv.s.x f7, x0 + fmv.s.x f8, x0 + fmv.s.x f9, x0 + fmv.s.x f10,x0 + fmv.s.x f11,x0 + fmv.s.x f12,x0 + fmv.s.x f13,x0 + fmv.s.x f14,x0 + fmv.s.x f15,x0 + fmv.s.x f16,x0 + fmv.s.x f17,x0 + fmv.s.x f18,x0 + fmv.s.x f19,x0 + fmv.s.x f20,x0 + fmv.s.x f21,x0 + fmv.s.x f22,x0 + fmv.s.x f23,x0 + fmv.s.x f24,x0 + fmv.s.x f25,x0 + fmv.s.x f26,x0 + fmv.s.x f27,x0 + fmv.s.x f28,x0 + fmv.s.x f29,x0 + fmv.s.x f30,x0 + fmv.s.x f31,x0 1: # only allow core 0 to proceed -1:mfpcr a0, cr10 +1:mfpcr a0, hartid bnez a0, 1b la sp,stacktop diff --git a/benchmarks/common/pcr.h b/benchmarks/common/pcr.h index 7659a97..8780cdd 100644 --- a/benchmarks/common/pcr.h +++ b/benchmarks/common/pcr.h @@ -1,42 +1,53 @@ +// See LICENSE for license details. + #ifndef _RISCV_PCR_H #define _RISCV_PCR_H -#define SR_ET 0x00000001 -#define SR_EF 0x00000002 -#define SR_EV 0x00000004 -#define SR_EC 0x00000008 -#define SR_PS 0x00000010 -#define SR_S 0x00000020 -#define SR_U64 0x00000040 -#define SR_S64 0x00000080 -#define SR_VM 0x00000100 +#define SR_S 0x00000001 +#define SR_PS 0x00000002 +#define SR_EI 0x00000004 +#define SR_PEI 0x00000008 +#define SR_EF 0x00000010 +#define SR_U64 0x00000020 +#define SR_S64 0x00000040 +#define SR_VM 0x00000080 +#define SR_EV 0x00000100 #define SR_IM 0x00FF0000 -#define SR_ZERO ~(SR_ET|SR_EF|SR_EV|SR_EC|SR_PS|SR_S|SR_U64|SR_S64|SR_VM|SR_IM) +#define SR_IP 0xFF000000 +#define SR_ZERO ~(SR_S|SR_PS|SR_EI|SR_PEI|SR_EF|SR_U64|SR_S64|SR_VM|SR_EV|SR_IM|SR_IP) #define SR_IM_SHIFT 16 +#define SR_IP_SHIFT 24 -#define PCR_SR 0 -#define PCR_EPC 1 -#define PCR_BADVADDR 2 -#define PCR_EVEC 3 -#define PCR_COUNT 4 -#define PCR_COMPARE 5 -#define PCR_CAUSE 6 -#define PCR_PTBR 7 -#define PCR_SEND_IPI 8 -#define PCR_CLR_IPI 9 -#define PCR_COREID 10 -#define PCR_IMPL 11 -#define PCR_K0 12 -#define PCR_K1 13 +#define PCR_SUP0 0 +#define PCR_SUP1 1 +#define PCR_EPC 2 +#define PCR_BADVADDR 3 +#define PCR_PTBR 4 +#define PCR_ASID 5 +#define PCR_COUNT 6 +#define PCR_COMPARE 7 +#define PCR_EVEC 8 +#define PCR_CAUSE 9 +#define PCR_SR 10 +#define PCR_HARTID 11 +#define PCR_IMPL 12 +#define PCR_FATC 13 +#define PCR_SEND_IPI 14 +#define PCR_CLR_IPI 15 #define PCR_VECBANK 18 #define PCR_VECCFG 19 #define PCR_RESET 29 #define PCR_TOHOST 30 #define PCR_FROMHOST 31 +#define IRQ_COP 2 #define IRQ_IPI 5 +#define IRQ_HOST 6 #define IRQ_TIMER 7 +#define IMPL_SPIKE 1 +#define IMPL_ROCKET 2 + #define CAUSE_MISALIGNED_FETCH 0 #define CAUSE_FAULT_FETCH 1 #define CAUSE_ILLEGAL_INSTRUCTION 2 @@ -60,10 +71,29 @@ #define CAUSE_VECTOR_FAULT_LOAD 30 #define CAUSE_VECTOR_FAULT_STORE 31 +// page table entry (PTE) fields +#define PTE_V 0x001 // Entry is a page Table descriptor +#define PTE_T 0x002 // Entry is a page Table, not a terminal node +#define PTE_G 0x004 // Global +#define PTE_UR 0x008 // User Write permission +#define PTE_UW 0x010 // User Read permission +#define PTE_UX 0x020 // User eXecute permission +#define PTE_SR 0x040 // Supervisor Read permission +#define PTE_SW 0x080 // Supervisor Write permission +#define PTE_SX 0x100 // Supervisor eXecute permission +#define PTE_PERM (PTE_SR | PTE_SW | PTE_SX | PTE_UR | PTE_UW | PTE_UX) + #ifdef __riscv -#define ASM_CR(r) _ASM_CR(r) -#define _ASM_CR(r) cr##r +#ifdef __riscv64 +# define RISCV_PGLEVELS 3 +# define RISCV_PGSHIFT 13 +#else +# define RISCV_PGLEVELS 2 +# define RISCV_PGSHIFT 12 +#endif +#define RISCV_PGLEVEL_BITS 10 +#define RISCV_PGSIZE (1 << RISCV_PGSHIFT) #ifndef __ASSEMBLER__ @@ -83,6 +113,10 @@ asm volatile ("clearpcr %0,cr%2,%1" : "=r"(__tmp) : "i"(val), "i"(reg)); \ __tmp; }) +#define rdcycle() ({ unsigned long __tmp; \ + asm volatile ("rdcycle %0" : "=r"(__tmp)); \ + __tmp; }) + #endif #endif diff --git a/benchmarks/common/syscalls.S b/benchmarks/common/syscalls.S deleted file mode 100644 index a0cdf6e..0000000 --- a/benchmarks/common/syscalls.S +++ /dev/null @@ -1,678 +0,0 @@ - .file 1 "syscalls.c" - .section .mdebug.abi64 - .previous - .section .rodata.str1.8,"aMS",@progbits,1 - .align 3 -$LC0: - .ascii "0123456789abcdef\000" - .text - .align 2 - .ent printnum - .type printnum, @function -printnum: - .frame x30,64,x1 # vars= 0, regs= 7/0, args= 0 - .mask 0x03f00002,-8 - .fmask 0x00000000,0 - add x30,x30,-64 - sd x22,32(x30) - sll x22,x7,32 - srl x22,x22,32 - sd x24,48(x30) - sd x23,40(x30) - sd x21,24(x30) - sd x20,16(x30) - sd x25,56(x30) - sd x1,8(x30) - move x23,x6 - move x20,x4 - move x21,x5 - move x24,x9 - bleu x22,x6,$L2 - addw x8,x8,-1 - move x25,x8 - ble x8,x0,$L4 -$L6: - addw x25,x25,-1 - move x4,x24 - move x5,x21 - jalr x20 - bne x25,x0,$L6 -$L4: - lui x2,%hi($LC0) - add x2,x2,%lo($LC0) - remu x22,x23,x22 - add x22,x22,x2 - lb x4,0(x22) - move x5,x21 - move x19,x20 - ld x25,56(x30) - ld x24,48(x30) - ld x23,40(x30) - ld x22,32(x30) - ld x21,24(x30) - ld x20,16(x30) - ld x1,8(x30) - add x30,x30,64 - jr x19 -$L2: - addw x8,x8,-1 - divu x6,x6,x22 - jal printnum - j $L4 - .end printnum - .size printnum, .-printnum - .align 2 - .ent getuint - .type getuint, @function -getuint: - .frame x30,0,x1 # vars= 0, regs= 0/0, args= 0 - .mask 0x00000000,0 - .fmask 0x00000000,0 - slt x2,x5,2 - bne x2,x0,$L10 -$L13: - ld x2,0(x4) - add x3,x2,8 - sd x3,0(x4) - ld x2,0(x2) - ret -$L10: - bne x5,x0,$L13 - ld x3,0(x4) - lwu x2,0(x3) - add x3,x3,8 - sd x3,0(x4) - ret - .end getuint - .size getuint, .-getuint - .align 2 - .globl putchar - .ent putchar - .type putchar, @function -putchar: - .frame x30,64,x1 # vars= 64, regs= 0/0, args= 0 - .mask 0x00000000,0 - .fmask 0x00000000,0 - li x2,-1 # 0xffffffffffffffff - add x30,x30,-64 - lui x3,%hi(buflen.1596) - beq x4,x2,$L21 - lw x5,%lo(buflen.1596)(x3) - lui x2,%hi(buf.1595) - add x2,x2,%lo(buf.1595) - add x6,x2,x5 - sb x4,0(x6) - addw x5,x5,1 - li x4,64 # 0x40 - sw x5,%lo(buflen.1596)(x3) - beq x5,x4,$L15 - move x2,x0 - add x30,x30,64 - j x1 -$L21: - lui x2,%hi(buf.1595) - add x2,x2,%lo(buf.1595) -$L15: - lw x4,%lo(buflen.1596)(x3) - li x5,4 # 0x4 - sd x0,0(x30) - sd x0,8(x30) - sd x0,16(x30) - sd x0,24(x30) - sd x0,32(x30) - sd x0,40(x30) - sd x0,48(x30) - sd x0,56(x30) - sd x5,0(x30) - li x5,1 # 0x1 - sd x5,8(x30) - sd x2,16(x30) - sd x4,24(x30) - fence - #APP - # 45 "syscalls.c" 1 - mtpcr x2,x30,cr30 - # 0 "" 2 - #NO_APP -$L17: - #APP - # 46 "syscalls.c" 1 - mfpcr x2,cr31 - # 0 "" 2 - #NO_APP - beq x2,x0,$L17 - move x2,x0 - sw x0,%lo(buflen.1596)(x3) - add x30,x30,64 - j x1 - .end putchar - .size putchar, .-putchar - .align 2 - .globl exit - .ent exit - .type exit, @function -exit: - .frame x30,64,x1 # vars= 64, regs= 0/0, args= 0 - .mask 0x00000000,0 - .fmask 0x00000000,0 - add x30,x30,-64 - li x2,1 # 0x1 - sd x0,0(x30) - sd x0,8(x30) - sd x0,16(x30) - sd x0,24(x30) - sd x0,32(x30) - sd x0,40(x30) - sd x0,48(x30) - sd x0,56(x30) - sd x2,0(x30) - sd x4,8(x30) - fence - #APP - # 12 "syscalls.c" 1 - mtpcr x2,x30,cr30 - # 0 "" 2 - #NO_APP -$L23: - j $L23 - .end exit - .size exit, .-exit - .align 2 - .globl printstr - .ent printstr - .type printstr, @function -printstr: - .frame x30,80,x1 # vars= 64, regs= 1/0, args= 0 - .mask 0x00000002,-8 - .fmask 0x00000000,0 - add x30,x30,-80 - li x3,4 # 0x4 - sd x0,0(x30) - sd x0,8(x30) - sd x0,16(x30) - sd x0,24(x30) - sd x0,32(x30) - sd x0,40(x30) - sd x0,48(x30) - sd x0,56(x30) - sd x3,0(x30) - li x3,1 # 0x1 - sd x3,8(x30) - sd x1,72(x30) - sd x4,16(x30) - jal strlen - sd x2,24(x30) - fence - #APP - # 24 "syscalls.c" 1 - mtpcr x2,x30,cr30 - # 0 "" 2 - #NO_APP -$L25: - #APP - # 25 "syscalls.c" 1 - mfpcr x2,cr31 - # 0 "" 2 - #NO_APP - beq x2,x0,$L25 - ld x1,72(x30) - add x30,x30,80 - j x1 - .end printstr - .size printstr, .-printstr - .align 2 - .globl printhex - .ent printhex - .type printhex, @function -printhex: - .frame x30,48,x1 # vars= 32, regs= 1/0, args= 0 - .mask 0x00000002,-8 - .fmask 0x00000000,0 - add x30,x30,-48 - sd x1,40(x30) - add x2,x30,15 - add x7,x30,-1 -$L29: - and x3,x4,15 - sltu x6,x3,10 - li x5,87 # 0x57 - beq x6,x0,$L28 - li x5,48 # 0x30 -$L28: - add x3,x5,x3 - sb x3,0(x2) - add x2,x2,-1 - srl x4,x4,4 - bne x2,x7,$L29 - move x4,x30 - sb x0,16(x30) - jal printstr - ld x1,40(x30) - add x30,x30,48 - j x1 - .end printhex - .size printhex, .-printhex - .section .rodata.str1.8 - .align 3 -$LC1: - .ascii "(null)\000" - .text - .align 2 - .globl vprintfmt - .ent vprintfmt - .type vprintfmt, @function -vprintfmt: - .frame x30,112,x1 # vars= 32, regs= 10/0, args= 0 - .mask 0x2ff00002,-8 - .fmask 0x00000000,0 - add x30,x30,-112 - sd x25,80(x30) - lui x25,%hi($L53) - sd x26,88(x30) - sd x24,72(x30) - sd x23,64(x30) - sd x22,56(x30) - sd x21,48(x30) - sd x20,40(x30) - sd x29,104(x30) - sd x27,96(x30) - sd x1,32(x30) - move x21,x4 - move x20,x5 - move x23,x6 - sd x7,0(x30) - li x22,37 # 0x25 - add x25,x25,%lo($L53) - li x24,-1 # 0xffffffffffffffff - lui x26,%hi($LC1) - j $L84 -$L35: - beq x2,x0,$L32 - move x5,x20 - add x23,x23,1 - jalr x21 -$L84: - lbu x4,0(x23) - move x2,x4 - bne x4,x22,$L35 - ld x2,0(x30) - add x6,x23,1 - move x7,x6 - li x27,32 # 0x20 - sd x0,8(x30) - li x3,-1 # 0xffffffffffffffff - li x29,-1 # 0xffffffffffffffff - move x5,x0 -$L85: - lbu x4,0(x7) - add x23,x7,1 - addw x8,x4,-35 - and x9,x8,0xff - sltu x9,x9,86 - bne x9,x0,$L90 -$L38: - li x4,37 # 0x25 - move x5,x20 - sd x2,0(x30) - move x23,x6 - jalr x21 - j $L84 -$L32: - ld x1,32(x30) - ld x29,104(x30) - ld x27,96(x30) - ld x26,88(x30) - ld x25,80(x30) - ld x24,72(x30) - ld x23,64(x30) - ld x22,56(x30) - ld x21,48(x30) - ld x20,40(x30) - add x30,x30,112 - j x1 -$L90: - and x8,x8,0xff - sll x8,x8,3 - add x8,x25,x8 - ld x8,0(x8) - j x8 - .section .rodata - .align 3 - .align 2 -$L53: - .dword $L39 - .dword $L38 - .dword $L40 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L41 - .dword $L38 - .dword $L38 - .dword $L42 - .dword $L43 - .dword $L38 - .dword $L74 - .dword $L44 - .dword $L44 - .dword $L44 - .dword $L44 - .dword $L44 - .dword $L44 - .dword $L44 - .dword $L44 - .dword $L44 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L45 - .dword $L46 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L38 - .dword $L47 - .dword $L38 - .dword $L38 - .dword $L48 - .dword $L49 - .dword $L38 - .dword $L38 - .dword $L50 - .dword $L38 - .dword $L51 - .dword $L38 - .dword $L38 - .dword $L52 - .text -$L52: - move x4,x30 - sd x2,0(x30) - jal getuint - move x6,x2 - li x7,16 # 0x10 -$L73: - move x4,x21 - move x5,x20 - move x8,x29 - move x9,x27 - jal printnum - j $L84 -$L39: - li x4,1 # 0x1 - move x7,x23 - sd x4,8(x30) - j $L85 -$L40: - move x5,x20 - sd x2,0(x30) - jalr x21 - j $L84 -$L41: - lw x3,0(x2) - move x7,x23 - add x2,x2,8 -$L54: - bge x29,x0,$L85 - move x29,x3 - li x3,-1 # 0xffffffffffffffff - j $L85 -$L42: - move x7,x23 - li x27,45 # 0x2d - j $L85 -$L43: - blt x29,x0,$L91 - move x7,x23 - j $L85 -$L74: - move x7,x23 - li x27,48 # 0x30 - j $L85 -$L44: - lb x8,1(x7) - addw x3,x4,-48 - move x7,x23 - addw x4,x8,-48 - sltu x4,x4,10 - beq x4,x0,$L54 -$L55: - add x7,x7,1 - lb x4,0(x7) - sllw x9,x3,1 - sllw x3,x3,3 - addw x3,x9,x3 - addw x9,x4,-48 - addw x3,x3,x8 - sltu x9,x9,10 - addw x3,x3,-48 - move x8,x4 - bne x9,x0,$L55 - j $L54 -$L45: - lw x4,0(x2) - add x2,x2,8 - move x5,x20 - sd x2,0(x30) - jalr x21 - j $L84 -$L46: - slt x3,x5,2 - bne x3,x0,$L69 -$L88: - add x3,x2,8 - sd x3,0(x30) - ld x6,0(x2) - blt x6,x0,$L72 -$L89: - li x7,10 # 0xa - j $L73 -$L47: - addw x5,x5,1 - move x7,x23 - j $L85 -$L48: - move x4,x30 - sd x2,0(x30) - jal getuint - move x6,x2 - li x7,8 # 0x8 - j $L73 -$L49: - sd x2,0(x30) - li x4,48 # 0x30 - move x5,x20 - jalr x21 - li x4,120 # 0x78 - move x5,x20 - jalr x21 - ld x2,0(x30) - li x7,16 # 0x10 - add x3,x2,8 - sd x3,0(x30) - ld x6,0(x2) - j $L73 -$L50: - add x4,x2,8 - sd x4,0(x30) - ld x2,0(x2) - sd x2,16(x30) - beq x2,x0,$L92 -$L58: - ble x29,x0,$L59 - li x2,45 # 0x2d - beq x27,x2,$L59 - ld x4,16(x30) - move x5,x3 - sd x3,24(x30) - jal strnlen - sllw x2,x2,0 - subw x2,x29,x2 - move x29,x2 - ld x3,24(x30) - ble x2,x0,$L59 - move x29,x2 -$L60: - sd x3,24(x30) - move x4,x27 - move x5,x20 - addw x29,x29,-1 - jalr x21 - ld x3,24(x30) - bne x29,x0,$L60 -$L59: - ld x2,16(x30) - lb x4,0(x2) - add x27,x2,1 - beq x4,x0,$L62 -$L80: - blt x3,x0,$L67 - addw x2,x3,-1 - move x3,x2 - beq x2,x24,$L62 -$L67: - ld x2,8(x30) - beq x2,x0,$L63 - addw x2,x4,-32 - sltu x2,x2,95 - beq x2,x0,$L93 -$L63: - sd x3,24(x30) - move x5,x20 - jalr x21 - ld x3,24(x30) -$L64: - lb x4,0(x27) - addw x29,x29,-1 - add x27,x27,1 - bne x4,x0,$L80 -$L62: - ble x29,x0,$L84 -$L79: - addw x29,x29,-1 - li x4,32 # 0x20 - move x5,x20 - jalr x21 - bne x29,x0,$L79 - j $L84 -$L51: - move x4,x30 - sd x2,0(x30) - jal getuint - move x6,x2 - li x7,10 # 0xa - j $L73 -$L93: - sd x3,24(x30) - li x4,63 # 0x3f - move x5,x20 - jalr x21 - ld x3,24(x30) - j $L64 -$L91: - move x7,x23 - move x29,x0 - j $L85 -$L92: - add x2,x26,%lo($LC1) - sd x2,16(x30) - j $L58 -$L72: - sd x6,24(x30) - li x4,45 # 0x2d - move x5,x20 - jalr x21 - ld x6,24(x30) - li x7,10 # 0xa - sub x6,zero,x6 - j $L73 -$L69: - bne x5,x0,$L88 - lw x6,0(x2) - add x2,x2,8 - sd x2,0(x30) - bge x6,x0,$L89 - j $L72 - .end vprintfmt - .size vprintfmt, .-vprintfmt - .align 2 - .globl printf - .ent printf - .type printf, @function -printf: - .frame x30,96,x1 # vars= 16, regs= 1/0, args= 0 - .mask 0x00000002,-72 - .fmask 0x00000000,0 - add x30,x30,-96 - add x2,x30,40 - move x3,x4 - lui x4,%hi(putchar) - sd x5,40(x30) - sd x6,48(x30) - sd x7,56(x30) - add x4,x4,%lo(putchar) - move x5,x0 - move x6,x3 - move x7,x2 - sd x1,24(x30) - sd x8,64(x30) - sd x9,72(x30) - sd x10,80(x30) - sd x11,88(x30) - sd x2,0(x30) - jal vprintfmt - li x4,-1 # 0xffffffffffffffff - jal putchar - ld x1,24(x30) - move x2,x0 - add x30,x30,96 - j x1 - .end printf - .size printf, .-printf - .local buflen.1596 - .comm buflen.1596,4,4 - .local buf.1595 - .comm buf.1595,64,8 - .ident "GCC: (GNU) 4.6.1" diff --git a/benchmarks/common/syscalls.c b/benchmarks/common/syscalls.c index f95dde4..0c1bc7f 100644 --- a/benchmarks/common/syscalls.c +++ b/benchmarks/common/syscalls.c @@ -1,12 +1,13 @@ #include #include #include +#include #include "pcr.h" void exit(int code) { volatile uint64_t magic_mem[8] = {0}; - magic_mem[0] = 1; + magic_mem[0] = SYS_exit; magic_mem[1] = code; __sync_synchronize(); mtpcr(PCR_TOHOST, (long)magic_mem); @@ -16,7 +17,7 @@ void exit(int code) void printstr(const char* s) { volatile uint64_t magic_mem[8] = {0}; - magic_mem[0] = 4; + magic_mem[0] = SYS_write; magic_mem[1] = 1; magic_mem[2] = (unsigned long)s; magic_mem[3] = strlen(s); @@ -27,17 +28,16 @@ void printstr(const char* s) int putchar(int ch) { - #define buffered_putch_bufsize 64 - static char buf[buffered_putch_bufsize]; + static char buf[64]; static int buflen = 0; if(ch != -1) buf[buflen++] = ch; - if(ch == -1 || buflen == buffered_putch_bufsize) + if(ch == -1 || buflen == sizeof(buf)) { volatile uint64_t magic_mem[8] = {0}; - magic_mem[0] = 4; + magic_mem[0] = SYS_write; magic_mem[1] = 1; magic_mem[2] = (long)buf; magic_mem[3] = buflen; diff --git a/benchmarks/dgemm/dgemm_main.c b/benchmarks/dgemm/dgemm_main.c index 7c8ce7c..72c90ef 100644 --- a/benchmarks/dgemm/dgemm_main.c +++ b/benchmarks/dgemm/dgemm_main.c @@ -70,7 +70,7 @@ void finishTest( int toHostValue ) printf( "*** FAILED *** (tohost = %d)\n", toHostValue ); exit(0); #else - asm( "mtpcr %0, cr30" : : "r" (toHostValue) ); + asm( "mtpcr %0, tohost" : : "r" (toHostValue) ); while ( 1 ) { } #endif } diff --git a/benchmarks/dhrystone/dhrystone_main.c b/benchmarks/dhrystone/dhrystone_main.c index 7e8c6af..c2a8038 100644 --- a/benchmarks/dhrystone/dhrystone_main.c +++ b/benchmarks/dhrystone/dhrystone_main.c @@ -53,7 +53,7 @@ void finishTest( int toHostValue ) printf( "*** FAILED *** (tohost = %d)\n", toHostValue ); exit(0); #else - asm( "mtpcr %0, cr30" : : "r" (toHostValue) ); + asm( "mtpcr %0, tohost" : : "r" (toHostValue) ); while ( 1 ) { } #endif } diff --git a/benchmarks/median/median_main.c b/benchmarks/median/median_main.c index 6decd3d..5c435dd 100644 --- a/benchmarks/median/median_main.c +++ b/benchmarks/median/median_main.c @@ -76,7 +76,7 @@ void finishTest( int toHostValue ) printf( "*** FAILED *** (tohost = %d)\n", toHostValue ); exit(0); #else - asm( "mtpcr %0, cr30" : : "r" (toHostValue) ); + asm( "mtpcr %0, tohost" : : "r" (toHostValue) ); while ( 1 ) { } #endif } diff --git a/benchmarks/multiply/multiply_main.c b/benchmarks/multiply/multiply_main.c index fcb6b27..f2ce12b 100644 --- a/benchmarks/multiply/multiply_main.c +++ b/benchmarks/multiply/multiply_main.c @@ -85,7 +85,7 @@ void finishTest( int toHostValue ) printf( "*** FAILED *** (tohost = %d)\n", toHostValue ); exit(0); #else - asm( "mtpcr %0, cr30" : : "r" (toHostValue) ); + asm( "mtpcr %0, tohost" : : "r" (toHostValue) ); while ( 1 ) { } #endif } diff --git a/benchmarks/qsort/qsort_main.c b/benchmarks/qsort/qsort_main.c index 6de1613..486b8fc 100644 --- a/benchmarks/qsort/qsort_main.c +++ b/benchmarks/qsort/qsort_main.c @@ -92,7 +92,7 @@ void finishTest( int toHostValue ) printf( "*** FAILED *** (tohost = %d)\n", toHostValue ); exit(0); #else - asm( "mtpcr %0, cr30" : : "r" (toHostValue) ); + asm( "mtpcr %0, tohost" : : "r" (toHostValue) ); while ( 1 ) { } #endif } diff --git a/benchmarks/spmv/spmv_main.c b/benchmarks/spmv/spmv_main.c index 11d9540..afb1cf1 100644 --- a/benchmarks/spmv/spmv_main.c +++ b/benchmarks/spmv/spmv_main.c @@ -70,7 +70,7 @@ void finishTest( int toHostValue ) printf( "*** FAILED *** (tohost = %d)\n", toHostValue ); exit(0); #else - asm( "mtpcr %0, cr30" : : "r" (toHostValue) ); + asm( "mtpcr %0, tohost" : : "r" (toHostValue) ); while ( 1 ) { } #endif } diff --git a/benchmarks/towers/towers_main.c b/benchmarks/towers/towers_main.c index 36526a2..aa61665 100644 --- a/benchmarks/towers/towers_main.c +++ b/benchmarks/towers/towers_main.c @@ -59,7 +59,7 @@ void finishTest( int toHostValue ) printf( "*** FAILED *** (tohost = %d)\n", toHostValue ); exit(0); #else - asm( "mtpcr %0, cr30" : : "r" (toHostValue) ); + asm( "mtpcr %0, tohost" : : "r" (toHostValue) ); while ( 1 ) { } #endif } diff --git a/benchmarks/vec-cmplxmult/vec_cmplxmult_asm.S b/benchmarks/vec-cmplxmult/vec_cmplxmult_asm.S index 0771965..9ccd6c2 100644 --- a/benchmarks/vec-cmplxmult/vec_cmplxmult_asm.S +++ b/benchmarks/vec-cmplxmult/vec_cmplxmult_asm.S @@ -21,9 +21,9 @@ #define rVlen a6 #define rStride a7 -#define rAI a8 -#define rBI a9 -#define rCI a10 +#define rAI t0 +#define rBI t1 +#define rCI t2 # WARNING: do not write to the s0,...,s9 registers without first saving them to # the stack. diff --git a/benchmarks/vec-matmul/vec_matmul_asm.S b/benchmarks/vec-matmul/vec_matmul_asm.S index f14d186..5135772 100644 --- a/benchmarks/vec-matmul/vec_matmul_asm.S +++ b/benchmarks/vec-matmul/vec_matmul_asm.S @@ -7,6 +7,8 @@ # Headers and Defines #-------------------------------------------------------------------------- +#include "pcr.h" + # Here are some defines that make writing assembly code easier. # I'm using the knowledge that rLda will be placed in register a0, rA will be @@ -26,15 +28,15 @@ # address of VT function #define rVTAddr v1 -#define rTemp0 a8 +#define rTemp0 t0 # desired app vector length (number of elements to vectorize) -#define rNum a9 +#define rNum t1 -#define rATemp a10 -#define rBTemp a11 -#define rCTemp a12 -#define rI a13 +#define rATemp t2 +#define rBTemp t3 +#define rCTemp t4 +#define rI t5 #define rJ s1 #define rK s2 #define rLda4 a4 @@ -118,9 +120,7 @@ vt_matmul_asm: # turn on vector unit - mfpcr a13,cr0 - ori a13,a13,4 - mtpcr x0,a13,cr0 + setpcr status, SR_EV blez rLda, cpdone # exit early if lda < 0 @@ -289,9 +289,7 @@ vtcode: transpose: # turn on vector unit - mfpcr a13,cr0 - ori a13,a13,4 - mtpcr x0,a13,cr0 + setpcr status, SR_EV blez rLda, cpdone # exit early if lda < 0 diff --git a/benchmarks/vvadd/vvadd_main.c b/benchmarks/vvadd/vvadd_main.c index 9738118..70b3f75 100644 --- a/benchmarks/vvadd/vvadd_main.c +++ b/benchmarks/vvadd/vvadd_main.c @@ -77,7 +77,7 @@ void finishTest( int toHostValue ) printf( "*** FAILED *** (tohost = %d)\n", toHostValue ); exit(0); #else - asm( "mtpcr %0, cr30" : : "r" (toHostValue) ); + asm( "mtpcr %0, tohost" : : "r" (toHostValue) ); while ( 1 ) { } #endif } -- cgit v1.1