aboutsummaryrefslogtreecommitdiff
path: root/benchmarks/mm/mm_main.c
blob: 133b5a2d73a17434e8aefef7d8e4ebf4ba8d402e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
// See LICENSE for license details.

#include "common.h"
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include "util.h"

#pragma GCC optimize ("unroll-loops")

void thread_entry(int cid, int nc)
{
  const int R = 8;
  int m, n, p;
  uint64_t s = 0xdeadbeefU;
  
  m = CBM;
  n = CBN;
  p = CBK;

  t a[m*p];
  t b[p*n];
  t c[m*n];

  for (size_t i = 0; i < m; i++)
    for (size_t j = 0; j < p; j++)
      a[i*p+j] = (t)(s = lfsr(s));
  for (size_t i = 0; i < p; i++)
    for (size_t j = 0; j < n; j++)
      b[i*n+j] = (t)(s = lfsr(s));
  memset(c, 0, m*n*sizeof(c[0]));

  size_t instret, cycles;
  for (int i = 0; i < R; i++)
  {
    instret = -read_csr(minstret);
    cycles = -read_csr(mcycle);
    mm(m, n, p, a, p, b, n, c, n);
    instret += read_csr(minstret);
    cycles += read_csr(mcycle);
  }

  asm volatile("fence");

  printf("C%d: reg block %dx%dx%d, cache block %dx%dx%d\n",
         cid, RBM, RBN, RBK, CBM, CBN, CBK);
  printf("C%d: %d instructions\n", cid, (int)(instret));
  printf("C%d: %d cycles\n", cid, (int)(cycles));
  printf("C%d: %d flops\n", cid, 2*m*n*p);
  printf("C%d: %d Mflops @ 1 GHz\n", cid, 2000*m*n*p/(cycles));

#if 1
  for (size_t i = 0; i < m; i++)
  {
    for (size_t j = 0; j < n; j++)
    {
      t s = 0;
      for (size_t k = 0; k < p; k++)
        s += a[i*p+k] * b[k*n+j];
      s *= R;
      if (fabs(c[i*n+j]-s) > fabs(1e-6*s))
      {
        printf("C%d: c[%lu][%lu] %f != %f\n", cid, i, j, c[i*n+j], s);
        exit(1);
      }
    }
  }
#endif

  barrier(nc);
  exit(0);
}