aboutsummaryrefslogtreecommitdiff
path: root/benchmarks/sort/sort_main.c
blob: 29b46c53930c3d12eeaff1cd6498799851bb8856 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
// See LICENSE for license details.

// ****************************************************************************
// sort benchmark from DARPA PERFECT TAV suite
// ---------------------------------------------------------------------------- 
#include "sort.h"
#include "util.h"
#include "dataset.h"


// Need 7 times the input size for: input data, indices, 
// four copies, and buckets.
FAKE_MALLOC_INIT( (8 * DATA_SIZE_SORT * TRIALS_SORT), radix )


#if defined(USE_N_SQUARED_SORT)
const char* algo = "N_SQUARED";
#elif defined(USE_RADIX_SORT)
const char* algo = "RADIX";
#elif defined(USE_INSERTION_SORT)
const char* algo = "INSERTION";
#else
const char* algo = "QUICKSORT";
#endif



int main( int argc, char* argv[] )
{
  int err;

  int* index = fake_malloc_radix (sizeof(int) * DATA_SIZE_SORT * TRIALS_SORT);
  for(int trial = 0; trial < TRIALS_SORT; trial++)
    for ( int i = 0; i < DATA_SIZE_SORT; i++ )
      index[i + (DATA_SIZE_SORT * trial)] = i;

#ifdef PREALLOCATE
  // Access every element of input_data_sort to make sure it's in cache
  // (or at least that as much as possible of its beginning is).
  float sum = 0;
  for(int i = (DATA_SIZE_SORT * TRIALS_SORT)-1; i >= 0; i--) {
    sum += input_data_sort[i];
  }
  if(sum < 0.1)
    return 1;

  const bool prealloc = true;
#else
  const bool prealloc = false;
#endif
  
  setStats(1);

#define read_csr_safe(reg) ({ long __tmp = 0; \
  asm volatile ("csrr %0, " #reg : "+r"(__tmp)); \
  __tmp; })


  long cycles_total = 0;
  long instret_total = 0;

  for(int i = 0; i < TRIALS_SORT; i++) {
    long cycles = read_csr_safe(cycle);
    long instret = read_csr_safe(instret);

    float* input_data_trial = &input_data_sort[DATA_SIZE_SORT * i];
    int* index_trial = &index[DATA_SIZE_SORT * i];

#if defined(USE_N_SQUARED_SORT)
    err = n_squared_sort ( input_data_trial, index_trial, DATA_SIZE_SORT );
#elif defined(USE_RADIX_SORT)
    err = radix_sort_tuples ( (int *) input_data_trial, index_trial, DATA_SIZE_SORT, RADIX_BITS );
#elif defined(USE_INSERTION_SORT)
    err = insertion_sort ( input_data_trial, index_trial, DATA_SIZE_SORT );
#else
    err = quicksort ( input_data_trial, index_trial, DATA_SIZE_SORT );
#endif

    cycles_total  += read_csr_safe(cycle) - cycles;
    instret_total += read_csr_safe(instret) - instret;
  }

  setStats(0);

  printf("DONE SORTING.\n", 0);

  // Validate results
  err = 0;
  for(int trial = 0; trial < TRIALS_SORT; trial++)
  {
    float* input_data_trial = &input_data_sort[DATA_SIZE_SORT * trial];
    int* index_trial = &index[DATA_SIZE_SORT * trial];

    for(int i = 0; i < DATA_SIZE_SORT-1; i++)
    {
      if((unsigned int) input_data_trial[i] > (unsigned int) input_data_trial[i+1])
      {
        err = i;
        for(int j = 0; j < DATA_SIZE_SORT; j++)
          printf("TRIAL %d, element %d:\t%d\n", trial, j, input_data_trial[j]);
        break;
      }
    }
  }

  printf("sort_cycles  = %ld\n", cycles_total/TRIALS_SORT);
  printf("sort_instret = %d\n", instret_total/TRIALS_SORT);
  printf("sort_size    = %d\n", DATA_SIZE_SORT);
  printf("sort_trials  = %d\n", TRIALS_SORT);
  printf("sort_algo    = %s\n", algo);
  printf("sort_radix_bits = %d\n", RADIX_BITS);
  printf("sort_prealloc = %s\n", prealloc ? "true" : "false");
  printf("sort_err = %d\n", err);

  return err;
}