aboutsummaryrefslogtreecommitdiff
path: root/libitm/barrier.tpl
blob: dcf101356e673b208f302e03b87416e94483be00 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
/* -*- c++ -*- */
/* Copyright (C) 2008, 2009, 2011 Free Software Foundation, Inc.
   Contributed by Richard Henderson <rth@redhat.com>.

   This file is part of the GNU Transactional Memory Library (libitm).

   Libitm is free software; you can redistribute it and/or modify it
   under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3 of the License, or
   (at your option) any later version.

   Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   more details.

   Under Section 7 of GPL version 3, you are granted additional
   permissions described in the GCC Runtime Library Exception, version
   3.1, as published by the Free Software Foundation.

   You should have received a copy of the GNU General Public License and
   a copy of the GCC Runtime Library Exception along with this program;
   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
   <http://www.gnu.org/licenses/>.  */

#include "unaligned.h"

namespace {

using namespace GTM;

template<typename T>
T do_read (const T *ptr, abi_dispatch::lock_type lock)
{
  //
  // Find the cacheline that holds the current value of *PTR.
  //
  abi_dispatch *disp = abi_disp();
  uintptr_t iptr = reinterpret_cast<uintptr_t>(ptr);
  // Normalize PTR by chopping off the bottom bits so we can search
  // for PTR in the cacheline hash.
  uintptr_t iline = iptr & -CACHELINE_SIZE;
  // The position in the resulting cacheline where *PTR is actually stored.
  uintptr_t iofs = iptr & (CACHELINE_SIZE - 1);
  const gtm_cacheline *pline = reinterpret_cast<const gtm_cacheline *>(iline);
  // Search for the actual cacheline that holds the current value of *PTR.
  const gtm_cacheline *line = disp->read_lock(pline, lock);

  // Point to the position in the cacheline where *PTR is stored.
  ptr = reinterpret_cast<const T *>(&line->b[iofs]);

  // Straight up loads, because we're either aligned, or we don't care
  // about alignment.
  //
  // If we require alignment on type T, do a straight load if we're
  // aligned.  Otherwise do a straight load IFF the load fits entirely
  // in this cacheline.  That is, it won't span multiple cachelines.
  if (__builtin_expect (strict_alignment<T>::value
			? (iofs & (sizeof (T) - 1)) == 0
			: iofs + sizeof(T) <= CACHELINE_SIZE, 1))
    {
    do_normal_load:
      return *ptr;
    }
  // If alignment on T is necessary, but we're unaligned, yet we fit
  // entirely in this cacheline... do the unaligned load dance.
  else if (__builtin_expect (strict_alignment<T>::value
			     && iofs + sizeof(T) <= CACHELINE_SIZE, 1))
    {
    do_unaligned_load:
      return unaligned_load<T>(ptr);
    }
  // Otherwise, this load will span multiple cachelines.
  else
    {
      // Get the following cacheline for the rest of the data.
      const gtm_cacheline *line2 = disp->read_lock(pline + 1, lock);

      // If the two cachelines are adjacent, just load it all in one
      // swoop.
      if (line2 == line + 1)
	{
	  if (!strict_alignment<T>::value)
	    goto do_normal_load;
	  else
	    goto do_unaligned_load;
	}
      else
	{
	  // Otherwise, ask the backend to load from two different
	  // cachelines.
	  return unaligned_load2<T>(line, line2, iofs);
	}
    }
}

template<typename T>
void do_write (T *ptr, T val, abi_dispatch::lock_type lock)
{
  // Note: See comments for do_read() above for hints on this
  // function.  Ideally we should abstract out a lot out of these two
  // functions, and avoid all this duplication.

  abi_dispatch *disp = abi_disp();
  uintptr_t iptr = reinterpret_cast<uintptr_t>(ptr);
  uintptr_t iline = iptr & -CACHELINE_SIZE;
  uintptr_t iofs = iptr & (CACHELINE_SIZE - 1);
  gtm_cacheline *pline = reinterpret_cast<gtm_cacheline *>(iline);
  gtm_cacheline_mask m = ((gtm_cacheline_mask)2 << (sizeof(T) - 1)) - 1;
  abi_dispatch::mask_pair pair = disp->write_lock(pline, lock);

  ptr = reinterpret_cast<T *>(&pair.line->b[iofs]);

  if (__builtin_expect (strict_alignment<T>::value
			? (iofs & (sizeof (val) - 1)) == 0
			: iofs + sizeof(val) <= CACHELINE_SIZE, 1))
    {
      *pair.mask |= m << iofs;
    do_normal_store:
      *ptr = val;
    }
  else if (__builtin_expect (strict_alignment<T>::value
			     && iofs + sizeof(val) <= CACHELINE_SIZE, 1))
    {
      *pair.mask |= m << iofs;
    do_unaligned_store:
      unaligned_store<T>(ptr, val);
    }
  else
    {
      *pair.mask |= m << iofs;
      abi_dispatch::mask_pair pair2 = disp->write_lock(pline + 1, lock);

      uintptr_t ileft = CACHELINE_SIZE - iofs;
      *pair2.mask |= m >> ileft;

      if (pair2.line == pair.line + 1)
	{
	  if (!strict_alignment<T>::value)
	    goto do_normal_store;
	  else
	    goto do_unaligned_store;
	}
      else
	unaligned_store2<T>(pair.line, pair2.line, iofs, val);
    }
}

} /* anonymous namespace */

#define ITM_READ(T, LOCK)						\
  _ITM_TYPE_##T ITM_REGPARM _ITM_##LOCK##T (const _ITM_TYPE_##T *ptr)	\
  {									\
    return do_read (ptr, abi_dispatch::LOCK);				\
  }

#define ITM_WRITE(T, LOCK)						\
  void ITM_REGPARM _ITM_##LOCK##T (_ITM_TYPE_##T *ptr, _ITM_TYPE_##T val) \
  {									\
    do_write (ptr, val, abi_dispatch::LOCK);				\
  }

#define ITM_BARRIERS(T)		\
  ITM_READ(T, R)		\
  ITM_READ(T, RaR)		\
  ITM_READ(T, RaW)		\
  ITM_READ(T, RfW)		\
  ITM_WRITE(T, W)		\
  ITM_WRITE(T, WaR)		\
  ITM_WRITE(T, WaW)