aboutsummaryrefslogtreecommitdiff
path: root/libc/src/__support/GPU/fixedstack.h
blob: 6ceaa2fc7335545ce58e2bea7b0ae6fdab0589b8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
//===-- A lock-free data structure for a fixed capacity stack ---*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC___SUPPORT_GPU_FIXEDSTACK_H
#define LLVM_LIBC_SRC___SUPPORT_GPU_FIXEDSTACK_H

#include "src/__support/CPP/atomic.h"
#include "src/__support/threads/sleep.h"

#include <stdint.h>

namespace LIBC_NAMESPACE_DECL {

// A lock-free fixed size stack backed by an underlying array of data. It
// supports push and pop operations in a completely lock-free manner.
template <typename T, uint32_t CAPACITY> struct alignas(16) FixedStack {
  // The index is stored as a 20-bit value and cannot index into any more.
  static_assert(CAPACITY < 1024 * 1024, "Invalid buffer size");

  // The head of the free and used stacks. Represents as a 20-bit index combined
  // with a 44-bit ABA tag that is updated in a single atomic operation.
  uint64_t free;
  uint64_t used;

  // The stack is a linked list of indices into the underlying data
  uint32_t next[CAPACITY];
  T data[CAPACITY];

  // Get the 20-bit index into the underlying array from the head.
  LIBC_INLINE static constexpr uint32_t get_node(uint64_t head) {
    return static_cast<uint32_t>(head & 0xfffff);
  }

  // Increment the old ABA tag and merge it into the new index.
  LIBC_INLINE static constexpr uint64_t make_head(uint64_t orig,
                                                  uint32_t node) {
    return static_cast<uint64_t>(node) | (((orig >> 20ul) + 1ul) << 20ul);
  }

  // Attempts to pop data from the given stack by making it point to the next
  // node. We repeatedly attempt to write to the head using compare-and-swap,
  // expecting that it has not been changed by any other thread.
  LIBC_INLINE uint32_t pop_impl(cpp::AtomicRef<uint64_t> head) {
    uint64_t orig = head.load(cpp::MemoryOrder::RELAXED);

    for (;;) {
      if (get_node(orig) == CAPACITY)
        return CAPACITY;

      uint32_t node =
          cpp::AtomicRef(next[get_node(orig)]).load(cpp::MemoryOrder::RELAXED);
      if (head.compare_exchange_strong(orig, make_head(orig, node),
                                       cpp::MemoryOrder::ACQUIRE,
                                       cpp::MemoryOrder::RELAXED))
        break;
    }
    return get_node(orig);
  }

  // Attempts to push data to the given stack by making it point to the new
  // node. We repeatedly attempt to write to the head using compare-and-swap,
  // expecting that it has not been changed by any other thread.
  LIBC_INLINE uint32_t push_impl(cpp::AtomicRef<uint64_t> head, uint32_t node) {
    uint64_t orig = head.load(cpp::MemoryOrder::RELAXED);
    for (;;) {
      next[node] = get_node(orig);
      if (head.compare_exchange_strong(orig, make_head(orig, node),
                                       cpp::MemoryOrder::RELEASE,
                                       cpp::MemoryOrder::RELAXED))
        break;
    }
    return get_node(head.load(cpp::MemoryOrder::RELAXED));
  }

public:
  // Initialize the free stack to be full and the used stack to be empty. We use
  // the capacity of the stack as a sentinel value.
  LIBC_INLINE constexpr FixedStack() : free(0), used(CAPACITY), data{} {
    for (uint32_t i = 0; i < CAPACITY; ++i)
      next[i] = i + 1;
  }

  LIBC_INLINE bool push(const T &val) {
    uint32_t node = pop_impl(cpp::AtomicRef(free));
    if (node == CAPACITY)
      return false;

    data[node] = val;
    push_impl(cpp::AtomicRef(used), node);
    return true;
  }

  LIBC_INLINE bool pop(T &val) {
    uint32_t node = pop_impl(cpp::AtomicRef(used));
    if (node == CAPACITY)
      return false;

    val = data[node];
    push_impl(cpp::AtomicRef(free), node);
    return true;
  }
};

} // namespace LIBC_NAMESPACE_DECL

#endif // LLVM_LIBC_SRC___SUPPORT_GPU_FIXEDSTACK_H