diff options
author | Howard Mao <zhehao.mao@gmail.com> | 2016-09-09 13:53:07 -0700 |
---|---|---|
committer | Howard Mao <zhehao.mao@gmail.com> | 2016-11-10 11:39:02 -0800 |
commit | c20f5fb67ffe6d8011ce7d7a41dd8d8c571ca178 (patch) | |
tree | 587fd83cb51f18e52dab0e5b19e06e764d415048 | |
parent | f204b5c2b6b8914060614cd77ea10509968c3fa4 (diff) | |
download | riscv-tests-c20f5fb67ffe6d8011ce7d7a41dd8d8c571ca178.zip riscv-tests-c20f5fb67ffe6d8011ce7d7a41dd8d8c571ca178.tar.gz riscv-tests-c20f5fb67ffe6d8011ce7d7a41dd8d8c571ca178.tar.bz2 |
add a software memcpy benchmark
-rw-r--r-- | benchmarks/Makefile | 1 | ||||
-rw-r--r-- | benchmarks/memcpy/bmark.mk | 31 | ||||
-rw-r--r-- | benchmarks/memcpy/memcpy.c | 40 | ||||
-rw-r--r-- | benchmarks/memcpy/memcpy.h | 9 | ||||
-rw-r--r-- | benchmarks/memcpy/memcpy_main.c | 28 |
5 files changed, 109 insertions, 0 deletions
diff --git a/benchmarks/Makefile b/benchmarks/Makefile index ce9d4f9..d246ef7 100644 --- a/benchmarks/Makefile +++ b/benchmarks/Makefile @@ -29,6 +29,7 @@ bmarks = \ spmv \ mt-vvadd \ mt-matmul \ + memcpy \ bmarks_host = \ median \ diff --git a/benchmarks/memcpy/bmark.mk b/benchmarks/memcpy/bmark.mk new file mode 100644 index 0000000..2808b73 --- /dev/null +++ b/benchmarks/memcpy/bmark.mk @@ -0,0 +1,31 @@ +#======================================================================= +# UCB CS250 Makefile fragment for benchmarks +#----------------------------------------------------------------------- +# +# Each benchmark directory should have its own fragment which +# essentially lists what the source files are and how to link them +# into an riscv and/or host executable. All variables should include +# the benchmark name as a prefix so that they are unique. +# + +memcpy_c_src = \ + memcpy_main.c \ + memcpy.c \ + syscalls.c \ + +memcpy_riscv_src = \ + crt.S \ + +memcpy_c_objs = $(patsubst %.c, %.o, $(memcpy_c_src)) +memcpy_riscv_objs = $(patsubst %.S, %.o, $(memcpy_riscv_src)) + +memcpy_host_bin = memcpy.host +$(memcpy_host_bin) : $(memcpy_c_src) + $(HOST_COMP) $^ -o $(memcpy_host_bin) + +memcpy_riscv_bin = memcpy.riscv +$(memcpy_riscv_bin) : $(memcpy_c_objs) $(memcpy_riscv_objs) + $(RISCV_LINK) $(memcpy_c_objs) $(memcpy_riscv_objs) -o $(memcpy_riscv_bin) $(RISCV_LINK_OPTS) + +junk += $(memcpy_c_objs) $(memcpy_riscv_objs) \ + $(memcpy_host_bin) $(memcpy_riscv_bin) diff --git a/benchmarks/memcpy/memcpy.c b/benchmarks/memcpy/memcpy.c new file mode 100644 index 0000000..b2511d5 --- /dev/null +++ b/benchmarks/memcpy/memcpy.c @@ -0,0 +1,40 @@ +#include "memcpy.h" +#include <stdlib.h> +#include <stdint.h> + +void fill(int *arr, size_t n) +{ + for (int i = 0; i < n; i++) { + arr[i] = i << 4; + } +} + +#pragma GCC optimize ("unroll-loops") + +#define UNROLL 4 + +void fast_memcpy(void *dst, void *src, size_t len) +{ + uintptr_t ptr_mask = (uintptr_t) dst | (uintptr_t) src | len; + + if ((ptr_mask & (sizeof(uintptr_t) - 1)) == 0) { + uintptr_t *d = dst, *s = src; + size_t n = len / sizeof(uintptr_t); + size_t lenu = ((n / UNROLL) * UNROLL) * sizeof(uintptr_t); + + while (d < (uintptr_t *)(dst + lenu)) { + for (int i = 0; i < UNROLL; i++) + d[i] = s[i]; + d += UNROLL; + s += UNROLL; + } + + while (d < (uintptr_t *)(dst + len)) + *(d++) = *(s++); + } else { + char *d = dst, *s = src; + + while (d < (char *)(dst + len)) + *(d++) = *(s++); + } +} diff --git a/benchmarks/memcpy/memcpy.h b/benchmarks/memcpy/memcpy.h new file mode 100644 index 0000000..ca65756 --- /dev/null +++ b/benchmarks/memcpy/memcpy.h @@ -0,0 +1,9 @@ +#ifndef __MEMCPY_H__ +#define __MEMCPY_H__ + +#include <stdlib.h> + +void fill(int *arr, size_t n); +void fast_memcpy(void *dst, void *src, size_t len); + +#endif diff --git a/benchmarks/memcpy/memcpy_main.c b/benchmarks/memcpy/memcpy_main.c new file mode 100644 index 0000000..0436c73 --- /dev/null +++ b/benchmarks/memcpy/memcpy_main.c @@ -0,0 +1,28 @@ +#include <stdio.h> +#include "util.h" +#include "memcpy.h" + +#define MAX_SIZE 4 * 4096 +#define START_SIZE 256 + +int a[MAX_SIZE / sizeof(int)]; +int b[MAX_SIZE / sizeof(int)]; + +void thread_entry(int cid, int nc) +{ + + int copy_size = START_SIZE; + + fill(a, MAX_SIZE / sizeof(uint64_t)); + asm volatile ("fence"); + + while (copy_size <= MAX_SIZE) { + memcpy(b, a, MAX_SIZE); + printf("%d: ", copy_size); + stats(fast_memcpy(b, a, copy_size), 3); + verify(copy_size / sizeof(int), b, a); + copy_size *= 2; + } + + exit(0); +} |