diff options
author | Aldy Hernandez <aldyh@gcc.gnu.org> | 2011-11-08 11:13:41 +0000 |
---|---|---|
committer | Aldy Hernandez <aldyh@gcc.gnu.org> | 2011-11-08 11:13:41 +0000 |
commit | 0a35513e4e73ec9c6f24e791d344308ad3ed030d (patch) | |
tree | e07de8d0b6265f8d72388d335bd471022e753d57 /libitm/config | |
parent | 287188ea072dd887a17dd56360531c3a22307e7c (diff) | |
download | gcc-0a35513e4e73ec9c6f24e791d344308ad3ed030d.zip gcc-0a35513e4e73ec9c6f24e791d344308ad3ed030d.tar.gz gcc-0a35513e4e73ec9c6f24e791d344308ad3ed030d.tar.bz2 |
Merge from transactional-memory branch.
From-SVN: r181154
Diffstat (limited to 'libitm/config')
27 files changed, 3199 insertions, 0 deletions
diff --git a/libitm/config/alpha/cacheline.h b/libitm/config/alpha/cacheline.h new file mode 100644 index 0000000..5e38486 --- /dev/null +++ b/libitm/config/alpha/cacheline.h @@ -0,0 +1,122 @@ +/* Copyright (C) 2009, 2011 Free Software Foundation, Inc. + Contributed by Richard Henderson <rth@redhat.com>. + + This file is part of the GNU Transactional Memory Library (libitm). + + Libitm is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef LIBITM_ALPHA_CACHELINE_H +#define LIBITM_ALPHA_CACHELINE_H 1 + +// A cacheline is the smallest unit with which locks are associated. +// The current implementation of the _ITM_[RW] barriers assumes that +// all data types can fit (aligned) within a cachline, which means +// in practice sizeof(complex long double) is the smallest cacheline size. +// It ought to be small enough for efficient manipulation of the +// modification mask, below. +#define CACHELINE_SIZE 64 + +#ifdef __alpha_bwx__ +# include "config/generic/cacheline.h" +#else +// If we don't have byte-word stores, then we'll never be able to +// adjust *all* of the byte loads/stores to be truely atomic. So +// only guarantee 4-byte aligned values atomicly stored, exactly +// like the native system. Use byte zap instructions to accelerate +// sub-word masked stores. + +namespace GTM HIDDEN { + +// A gtm_cacheline_mask stores a modified bit for every modified byte +// in the cacheline with which it is associated. +typedef sized_integral<CACHELINE_SIZE / 8>::type gtm_cacheline_mask; + +union gtm_cacheline +{ + // Byte access to the cacheline. + unsigned char b[CACHELINE_SIZE] __attribute__((aligned(CACHELINE_SIZE))); + + // Larger sized access to the cacheline. + uint16_t u16[CACHELINE_SIZE / sizeof(uint16_t)]; + uint32_t u32[CACHELINE_SIZE / sizeof(uint32_t)]; + uint64_t u64[CACHELINE_SIZE / sizeof(uint64_t)]; + gtm_word w[CACHELINE_SIZE / sizeof(gtm_word)]; + + // Store S into D, but only the bytes specified by M. + static void store_mask(uint32_t *d, uint32_t s, uint8_t m); + static void store_mask(uint64_t *d, uint64_t s, uint8_t m); + + // Copy S to D, but only the bytes specified by M. + static void copy_mask (gtm_cacheline * __restrict d, + const gtm_cacheline * __restrict s, + gtm_cacheline_mask m); + + // A write barrier to emit after (a series of) copy_mask. + static void copy_mask_wb () { atomic_write_barrier(); } +}; + +inline void ALWAYS_INLINE +gtm_cacheline::store_mask (uint32_t *d, uint32_t s, uint8_t m) +{ + const uint8_t tm = (1 << sizeof(uint32_t)) - 1; + + m &= tm; + if (__builtin_expect (m, tm)) + { + if (__builtin_expect (m == tm, 1)) + *d = s; + else + *d = __builtin_alpha_zap (*d, m) | __builtin_alpha_zapnot (s, m); + } +} + +inline void ALWAYS_INLINE +gtm_cacheline::store_mask (uint64_t *d, uint64_t s, uint8_t m) +{ + if (__builtin_expect (m, 0xff)) + { + if (__builtin_expect (m == 0xff, 1)) + *d = s; + else + { + typedef uint32_t *p32 __attribute__((may_alias)); + p32 d32 = reinterpret_cast<p32>(d); + + if ((m & 0x0f) == 0x0f) + { + d32[0] = s; + m &= 0xf0; + } + else if ((m & 0xf0) == 0xf0) + { + d32[1] = s >> 32; + m &= 0x0f; + } + + if (m) + *d = __builtin_alpha_zap (*d, m) | __builtin_alpha_zapnot (s, m); + } + } +} + +} // namespace GTM + +#endif // __alpha_bwx__ +#endif // LIBITM_ALPHA_CACHELINE_H diff --git a/libitm/config/alpha/sjlj.S b/libitm/config/alpha/sjlj.S new file mode 100644 index 0000000..d60a82d --- /dev/null +++ b/libitm/config/alpha/sjlj.S @@ -0,0 +1,108 @@ +/* Copyright (C) 2009, 2011 Free Software Foundation, Inc. + Contributed by Richard Henderson <rth@redhat.com>. + + This file is part of the GNU Transactional Memory Library (libitm). + + Libitm is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + + .text + .align 4 + .globl _ITM_beginTransaction + .ent _ITM_beginTransaction + +#define FRAME 144 + +_ITM_beginTransaction: + ldgp $29, 0($27) + subq $30, FRAME, $30 + .frame $30, FRAME, $26, 0 + .mask 0x04000000, 0 + stq $26, 0($30) + .prologue 1 + + stq $9, 8($30) + stq $10, 16($30) + addq $30, FRAME, $0 + stq $11, 24($30) + + stq $12, 32($30) + stq $13, 40($30) + stq $14, 48($30) + stq $15, 56($30) + + stq $0, 64($30) + stt $f2, 72($30) + stt $f3, 80($30) + stt $f4, 88($30) + + stt $f5, 96($30) + stt $f6, 104($30) + stt $f7, 112($30) + stt $f8, 120($30) + + stt $f9, 128($30) + mov $30, $17 +#ifdef __PIC__ + unop + bsr $26, GTM_begin_transaction !samegp +#else + jsr $26, GTM_begin_transaction + ldgp $29, 0($26) +#endif + + ldq $26, 0($30) + addq $30, FRAME, $30 + ret +.end _ITM_beginTransaction + + .align 4 + .globl GTM_longjmp + .hidden GTM_longjmp + .ent GTM_longjmp + +GTM_longjmp: + .prologue 0 + ldq $26, 0($16) + ldq $9, 8($16) + ldq $10, 16($16) + ldq $11, 24($16) + + ldq $12, 32($16) + ldq $13, 40($16) + ldq $14, 48($16) + ldq $15, 56($16) + + ldq $1, 64($16) + ldt $f2, 72($16) + ldt $f3, 80($16) + ldt $f4, 88($16) + + ldt $f5, 96($16) + ldt $f6, 104($16) + ldt $f7, 112($16) + ldt $f8, 120($16) + + ldt $f9, 128($16) + mov $17, $0 + mov $1, $30 + ret +.end GTM_longjmp + +.section .note.GNU-stack, "", @progbits diff --git a/libitm/config/alpha/target.h b/libitm/config/alpha/target.h new file mode 100644 index 0000000..121546f --- /dev/null +++ b/libitm/config/alpha/target.h @@ -0,0 +1,60 @@ +/* Copyright (C) 2009, 2011 Free Software Foundation, Inc. + Contributed by Richard Henderson <rth@redhat.com>. + + This file is part of the GNU Transactional Memory Library (libitm). + + Libitm is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +namespace GTM HIDDEN { + +typedef struct gtm_jmpbuf +{ + unsigned long pc; + unsigned long s[7]; + void *cfa; + unsigned long f[8]; +} gtm_jmpbuf; + +/* Alpha generally uses a fixed page size of 8K. */ +#define PAGE_SIZE 8192 +#define FIXED_PAGE_SIZE 1 + +/* The size of one line in hardware caches (in bytes). */ +#define HW_CACHELINE_SIZE 64 + +static inline void +cpu_relax (void) +{ + __asm volatile ("" : : : "memory"); +} + +static inline void +atomic_read_barrier (void) +{ + __sync_synchronize (); +} + +static inline void +atomic_write_barrier (void) +{ + __asm volatile ("wmb" : : : "memory"); +} + +} // namespace GTM diff --git a/libitm/config/alpha/unaligned.h b/libitm/config/alpha/unaligned.h new file mode 100644 index 0000000..3d091ae --- /dev/null +++ b/libitm/config/alpha/unaligned.h @@ -0,0 +1,118 @@ +/* Copyright (C) 2009, 2011 Free Software Foundation, Inc. + Contributed by Richard Henderson <rth@redhat.com>. + + This file is part of the GNU Transactional Memory Library (libitm). + + Libitm is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef LIBITM_ALPHA_UNALIGNED_H +#define LIBITM_ALPHA_UNALIGNED_H 1 + +#define HAVE_ARCH_UNALIGNED_LOAD2_U2 1 +#define HAVE_ARCH_UNALIGNED_LOAD2_U4 1 +#define HAVE_ARCH_UNALIGNED_LOAD2_U8 1 + +#ifndef __alpha_bwx__ +#define HAVE_ARCH_UNALIGNED_STORE2_U2 1 +#endif +#define HAVE_ARCH_UNALIGNED_STORE2_U4 1 +#define HAVE_ARCH_UNALIGNED_STORE2_U8 1 + +#include "config/generic/unaligned.h" + +namespace GTM HIDDEN { + +template<> +inline uint16_t ALWAYS_INLINE +unaligned_load2<uint16_t>(const gtm_cacheline *c1, + const gtm_cacheline *c2, size_t ofs) +{ + uint64_t v1 = c1->u64[CACHELINE_SIZE / sizeof(uint64_t) - 1]; + uint64_t v2 = c2->u64[0]; + + return __builtin_alpha_extwl (v1, ofs) | __builtin_alpha_extwh (v2, ofs); +} + +template<> +inline uint32_t ALWAYS_INLINE +unaligned_load2<uint32_t>(const gtm_cacheline *c1, + const gtm_cacheline *c2, size_t ofs) +{ + uint64_t v1 = c1->u64[CACHELINE_SIZE / sizeof(uint64_t) - 1]; + uint64_t v2 = c2->u64[0]; + + return __builtin_alpha_extll (v1, ofs) + __builtin_alpha_extlh (v2, ofs); +} + +template<> +inline uint64_t ALWAYS_INLINE +unaligned_load2<uint64_t>(const gtm_cacheline *c1, + const gtm_cacheline *c2, size_t ofs) +{ + uint64_t v1 = c1->u64[CACHELINE_SIZE / sizeof(uint64_t) - 1]; + uint64_t v2 = c2->u64[0]; + + return __builtin_alpha_extql (v1, ofs) | __builtin_alpha_extqh (v2, ofs); +} + +#ifndef __alpha_bwx__ +template<> +inline void +unaligned_store2<uint16_t>(gtm_cacheline *c1, gtm_cacheline *c2, + size_t ofs, uint16_t val) +{ + uint32_t vl = (uint32_t)val << 24, vh = val >> 8; + + gtm_cacheline::store_mask (&c1->u32[CACHELINE_SIZE / 4 - 1], vl, 4); + gtm_cacheline::store_mask (&c2->u32[0], vh, 1); +} +#endif + +template<> +inline void +unaligned_store2<uint32_t>(gtm_cacheline *c1, gtm_cacheline *c2, + size_t ofs, uint32_t val) +{ + uint64_t vl = __builtin_alpha_insll (val, ofs); + uint64_t ml = __builtin_alpha_insll (~0u, ofs); + uint64_t vh = __builtin_alpha_inslh (val, ofs); + uint64_t mh = __builtin_alpha_inslh (~0u, ofs); + + gtm_cacheline::store_mask (&c1->u64[CACHELINE_SIZE / 8 - 1], vl, ml); + gtm_cacheline::store_mask (&c2->u64[0], vh, mh); +} + +template<> +inline void +unaligned_store2<uint64_t>(gtm_cacheline *c1, gtm_cacheline *c2, + size_t ofs, uint64_t val) +{ + uint64_t vl = __builtin_alpha_insql (val, ofs); + uint64_t ml = __builtin_alpha_insql (~0u, ofs); + uint64_t vh = __builtin_alpha_insqh (val, ofs); + uint64_t mh = __builtin_alpha_insqh (~0u, ofs); + + gtm_cacheline::store_mask (&c1->u64[CACHELINE_SIZE / 8 - 1], vl, ml); + gtm_cacheline::store_mask (&c2->u64[0], vh, mh); +} + +} // namespace GTM + +#endif // LIBITM_ALPHA_UNALIGNED_H diff --git a/libitm/config/generic/cacheline.cc b/libitm/config/generic/cacheline.cc new file mode 100644 index 0000000..108ffba --- /dev/null +++ b/libitm/config/generic/cacheline.cc @@ -0,0 +1,49 @@ +/* Copyright (C) 2009, 2011 Free Software Foundation, Inc. + Contributed by Richard Henderson <rth@redhat.com>. + + This file is part of the GNU Transactional Memory Library (libitm). + + Libitm is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "libitm_i.h" + + +namespace GTM HIDDEN { + +void +gtm_cacheline::copy_mask (gtm_cacheline * __restrict d, + const gtm_cacheline * __restrict s, + gtm_cacheline_mask m) +{ + const size_t n = sizeof (gtm_word); + + if (m == (gtm_cacheline_mask) -1) + { + *d = *s; + return; + } + if (__builtin_expect (m == 0, 0)) + return; + + for (size_t i = 0; i < CACHELINE_SIZE / n; ++i, m >>= n) + store_mask (&d->w[i], s->w[i], m); +} + +} // namespace GTM diff --git a/libitm/config/generic/cacheline.h b/libitm/config/generic/cacheline.h new file mode 100644 index 0000000..0a5af761 --- /dev/null +++ b/libitm/config/generic/cacheline.h @@ -0,0 +1,107 @@ +/* Copyright (C) 2009, 2011 Free Software Foundation, Inc. + Contributed by Richard Henderson <rth@redhat.com>. + + This file is part of the GNU Transactional Memory Library (libitm). + + Libitm is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef LIBITM_CACHELINE_H +#define LIBITM_CACHELINE_H 1 + +namespace GTM HIDDEN { + +// A cacheline is the smallest unit with which locks are associated. +// The current implementation of the _ITM_[RW] barriers assumes that +// all data types can fit (aligned) within a cachline, which means +// in practice sizeof(complex long double) is the smallest cacheline size. +// It ought to be small enough for efficient manipulation of the +// modification mask, below. +#ifndef CACHELINE_SIZE +# define CACHELINE_SIZE 32 +#endif + +// A gtm_cacheline_mask stores a modified bit for every modified byte +// in the cacheline with which it is associated. +typedef sized_integral<CACHELINE_SIZE / 8>::type gtm_cacheline_mask; + +union gtm_cacheline +{ + // Byte access to the cacheline. + unsigned char b[CACHELINE_SIZE] __attribute__((aligned(CACHELINE_SIZE))); + + // Larger sized access to the cacheline. + uint16_t u16[CACHELINE_SIZE / sizeof(uint16_t)]; + uint32_t u32[CACHELINE_SIZE / sizeof(uint32_t)]; + uint64_t u64[CACHELINE_SIZE / sizeof(uint64_t)]; + gtm_word w[CACHELINE_SIZE / sizeof(gtm_word)]; + + // Store S into D, but only the bytes specified by M. + template<typename T> static void store_mask (T *d, T s, uint8_t m); + + // Copy S to D, but only the bytes specified by M. + static void copy_mask (gtm_cacheline * __restrict d, + const gtm_cacheline * __restrict s, + gtm_cacheline_mask m); + + // A write barrier to emit after (a series of) copy_mask. + // When we're emitting non-temporal stores, the normal strong + // ordering of the machine doesn't apply. + static void copy_mask_wb () { atomic_write_barrier(); } +}; + +template<typename T> +inline void +gtm_cacheline::store_mask (T *d, T s, uint8_t m) +{ + const uint8_t tm = (1 << sizeof(T)) - 1; + + if (__builtin_expect (m & tm, tm)) + { + if (__builtin_expect ((m & tm) == tm, 1)) + *d = s; + else + { + const int half = sizeof(T) / 2; + typedef typename sized_integral<half>::type half_t; + half_t *dhalf = reinterpret_cast<half_t *>(d); + half_t s1, s2; + + if (WORDS_BIGENDIAN) + s1 = s >> half*8, s2 = s; + else + s1 = s, s2 = s >> half*8; + + store_mask (dhalf, s1, m); + store_mask (dhalf + 1, s2, m >> half); + } + } +} + +template<> +inline void ALWAYS_INLINE +gtm_cacheline::store_mask<uint8_t> (uint8_t *d, uint8_t s, uint8_t m) +{ + if (m & 1) + *d = s; +} + +} // namespace GTM + +#endif // LIBITM_CACHELINE_H diff --git a/libitm/config/generic/cachepage.h b/libitm/config/generic/cachepage.h new file mode 100644 index 0000000..a5472f3 --- /dev/null +++ b/libitm/config/generic/cachepage.h @@ -0,0 +1,77 @@ +/* Copyright (C) 2009, 2011 Free Software Foundation, Inc. + Contributed by Richard Henderson <rth@redhat.com>. + + This file is part of the GNU Transactional Memory Library (libitm). + + Libitm is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef LIBITM_CACHEPAGE_H +#define LIBITM_CACHEPAGE_H 1 + +namespace GTM HIDDEN { + +// A "page" worth of saved cachelines plus modification masks. This +// arrangement is intended to minimize the overhead of alignment. The +// PAGE_SIZE defined by the target must be a constant for this to work, +// which means that this definition may not be the same as the real +// system page size. An additional define of FIXED_PAGE_SIZE by the +// target indicates that PAGE_SIZE exactly matches the system page size. + +#ifndef PAGE_SIZE +#define PAGE_SIZE 4096 +#endif + +struct gtm_cacheline_page +{ + static const size_t LINES + = ((PAGE_SIZE - sizeof(gtm_cacheline_page *)) + / (CACHELINE_SIZE + sizeof(gtm_cacheline_mask))); + + gtm_cacheline lines[LINES] __attribute__((aligned(PAGE_SIZE))); + gtm_cacheline_mask masks[LINES]; + gtm_cacheline_page *prev; + + static gtm_cacheline_page * + page_for_line (gtm_cacheline *c) + { + return (gtm_cacheline_page *)((uintptr_t)c & -PAGE_SIZE); + } + + gtm_cacheline_mask * + mask_for_line (gtm_cacheline *c) + { + size_t index = c - &this->lines[0]; + return &this->masks[index]; + } + + static gtm_cacheline_mask * + mask_for_page_line (gtm_cacheline *c) + { + gtm_cacheline_page *p = page_for_line (c); + return p->mask_for_line (c); + } + + static void *operator new (size_t); + static void operator delete (void *); +}; + +} // namespace GTM + +#endif // LIBITM_CACHEPAGE_H diff --git a/libitm/config/generic/tls.cc b/libitm/config/generic/tls.cc new file mode 100644 index 0000000..3e82cff --- /dev/null +++ b/libitm/config/generic/tls.cc @@ -0,0 +1,76 @@ +/* Copyright (C) 2010, 2011 Free Software Foundation, Inc. + Contributed by Richard Henderson <rth@redhat.com>. + + This file is part of the GNU Transactional Memory Library (libitm). + + Libitm is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "libitm_i.h" + +namespace GTM HIDDEN { + +// Filter out any updates that overlap the libitm stack, as defined by +// TOP (entry point to library) and BOT (below current function). This +// definition should be fine for all stack-grows-down architectures. + +gtm_cacheline_mask __attribute__((noinline)) +gtm_mask_stack(gtm_cacheline *line, gtm_cacheline_mask mask) +{ + void *top = gtm_thr()->jb.cfa; + void *bot = __builtin_dwarf_cfa(); + + // We must have come through an entry point that set TOP. + assert (top != NULL); + + if (line + 1 < bot) + { + // Since we don't have the REAL stack boundaries for this thread, + // we cannot know if this is a dead write to a stack address below + // the current function or if it is write to another VMA. In either + // case allowing the write should not affect correctness. + } + else if (line >= top) + { + // A valid write to an address in an outer stack frame, or a write + // to another VMA. + } + else + { + uintptr_t diff = (uintptr_t)top - (uintptr_t)line; + if (diff >= CACHELINE_SIZE) + { + // The write is either fully within the proscribed area, or the tail + // of the cacheline overlaps the proscribed area. Assume that all + // stacks are at least cacheline aligned and declare the head of the + // cacheline dead. + mask = 0; + } + else + { + // The head of the cacheline is within the proscribed area, but the + // tail of the cacheline is live. Eliminate the dead writes. + mask &= (gtm_cacheline_mask)-1 << diff; + } + } + + return mask; +} + +} // namespace GTM diff --git a/libitm/config/generic/tls.h b/libitm/config/generic/tls.h new file mode 100644 index 0000000..e282e54 --- /dev/null +++ b/libitm/config/generic/tls.h @@ -0,0 +1,65 @@ +/* Copyright (C) 2008, 2009, 2011 Free Software Foundation, Inc. + Contributed by Richard Henderson <rth@redhat.com>. + + This file is part of the GNU Transactional Memory Library (libitm). + + Libitm is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef LIBITM_TLS_H +#define LIBITM_TLS_H 1 + +namespace GTM HIDDEN { + +#if !defined(HAVE_ARCH_GTM_THREAD) || !defined(HAVE_ARCH_GTM_THREAD_DISP) +// Provides a single place to store all this libraries thread-local data. +struct gtm_thread_tls +{ +#ifndef HAVE_ARCH_GTM_THREAD + // The currently active transaction. Elided if the target provides + // some efficient mechanism for storing this. + gtm_thread *thr; +#endif +#ifndef HAVE_ARCH_GTM_THREAD_DISP + // The dispatch table for the STM implementation currently in use. Elided + // if the target provides some efficient mechanism for storing this. + abi_dispatch *disp; +#endif +}; + +extern __thread gtm_thread_tls _gtm_thr_tls; +#endif + +#ifndef HAVE_ARCH_GTM_THREAD +// If the target does not provide optimized access to the thread-local +// data, simply access the TLS variable defined above. +static inline gtm_thread *gtm_thr() { return &_gtm_thr_tls.thr; } +static inline void set_gtm_thr(gtm_thread *x) { _gtm_thr_tls.thr = x; } +#endif + +#ifndef HAVE_ARCH_GTM_THREAD_DISP +// If the target does not provide optimized access to the currently +// active dispatch table, simply access via GTM_THR. +static inline abi_dispatch * abi_disp() { return _gtm_thr_tls.disp; } +static inline void set_abi_disp(abi_dispatch *x) { _gtm_thr_tls.disp = x; } +#endif + +} // namespace GTM + +#endif // LIBITM_TLS_H diff --git a/libitm/config/generic/unaligned.h b/libitm/config/generic/unaligned.h new file mode 100644 index 0000000..50cb13b --- /dev/null +++ b/libitm/config/generic/unaligned.h @@ -0,0 +1,228 @@ +/* Copyright (C) 2009, 2011 Free Software Foundation, Inc. + Contributed by Richard Henderson <rth@redhat.com>. + + This file is part of the GNU Transactional Memory Library (libitm). + + Libitm is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef LIBITM_UNALIGNED_H +#define LIBITM_UNALIGNED_H 1 + +namespace GTM HIDDEN { + +#ifndef STRICT_ALIGNMENT +#define STRICT_ALIGNMENT 1 +#endif + +// A type trait for whether type T requires strict alignment. +// The generic types are assumed to all be the same; specializations +// for target-specific types should be done in config/cpu/unaligned.h. +template<typename T> + struct strict_alignment + : public std::integral_constant<bool, STRICT_ALIGNMENT> + { }; + +// A helper template for accessing an integral type the same size as T +template<typename T> + struct make_integral + : public sized_integral<sizeof(T)> + { }; + +// A helper class for accessing T as an unaligned value. +template<typename T> +struct __attribute__((packed)) unaligned_helper + { T x; }; + +// A helper class for view-converting T as an integer. +template<typename T> +union view_convert_helper +{ + typedef T type; + typedef make_integral<T> itype; + + type t; + itype i; +}; + +// Generate an unaligned load sequence. +// The compiler knows how to do this for any specific type. +template<typename T> +inline T ALWAYS_INLINE +unaligned_load(const void *t) +{ + typedef unaligned_helper<T> UT; + const UT *ut = reinterpret_cast<const UT *>(t); + return ut->x; +} + +// Generate an unaligned store sequence. +template<typename T> +inline void ALWAYS_INLINE +unaligned_store(void *t, T val) +{ + typedef unaligned_helper<T> UT; + UT *ut = reinterpret_cast<UT *>(t); + ut->x = val; +} + +// Generate an unaligned load from two different cachelines. +// It is known that OFS + SIZEOF(T) > CACHELINE_SIZE. +template<typename T> +inline T ALWAYS_INLINE +unaligned_load2(const gtm_cacheline *c1, const gtm_cacheline *c2, size_t ofs) +{ + size_t left = CACHELINE_SIZE - ofs; + T ret; + + memcpy (&ret, &c1->b[ofs], left); + memcpy ((char *)&ret + ofs, c2, sizeof(T) - left); + + return ret; +} + +// Generate an unaligned store into two different cachelines. +// It is known that OFS + SIZEOF(T) > CACHELINE_SIZE. +template<typename T> +inline void ALWAYS_INLINE +unaligned_store2(gtm_cacheline *c1, gtm_cacheline *c2, size_t ofs, T val) +{ + size_t left = CACHELINE_SIZE - ofs; + memcpy (&c1->b[ofs], &val, left); + memcpy (c2, (char *)&val + left, sizeof(T) - left); +} + +#ifndef HAVE_ARCH_UNALIGNED_LOAD2_U2 +template<> +inline uint16_t ALWAYS_INLINE +unaligned_load2<uint16_t>(const gtm_cacheline *c1, + const gtm_cacheline *c2, size_t ofs) +{ + uint16_t v1 = c1->b[CACHELINE_SIZE - 1]; + uint16_t v2 = c2->b[0]; + + if (WORDS_BIGENDIAN) + return v1 << 8 | v2; + else + return v2 << 8 | v1; +} +#endif + +#ifndef HAVE_ARCH_UNALIGNED_LOAD2_U4 +template<> +inline uint32_t ALWAYS_INLINE +unaligned_load2<uint32_t>(const gtm_cacheline *c1, + const gtm_cacheline *c2, size_t ofs) +{ + uint32_t v1 = c1->u32[CACHELINE_SIZE / sizeof(uint32_t) - 1]; + uint32_t v2 = c2->u32[0]; + int s2 = (ofs & (sizeof(uint32_t) - 1)) * 8; + int s1 = sizeof(uint32_t) * 8 - s2; + + if (WORDS_BIGENDIAN) + return v1 << s2 | v2 >> s1; + else + return v2 << s2 | v1 >> s1; +} +#endif + +#ifndef HAVE_ARCH_UNALIGNED_LOAD2_U8 +template<> +inline uint64_t ALWAYS_INLINE +unaligned_load2<uint64_t>(const gtm_cacheline *c1, + const gtm_cacheline *c2, size_t ofs) +{ + uint64_t v1 = c1->u64[CACHELINE_SIZE / sizeof(uint64_t) - 1]; + uint64_t v2 = c2->u64[0]; + int s2 = (ofs & (sizeof(uint64_t) - 1)) * 8; + int s1 = sizeof(uint64_t) * 8 - s2; + + if (WORDS_BIGENDIAN) + return v1 << s2 | v2 >> s1; + else + return v2 << s2 | v1 >> s1; +} +#endif + +template<> +inline float ALWAYS_INLINE +unaligned_load2<float>(const gtm_cacheline *c1, + const gtm_cacheline *c2, size_t ofs) +{ + typedef view_convert_helper<float> VC; VC vc; + vc.i = unaligned_load2<VC::itype>(c1, c2, ofs); + return vc.t; +} + +template<> +inline double ALWAYS_INLINE +unaligned_load2<double>(const gtm_cacheline *c1, + const gtm_cacheline *c2, size_t ofs) +{ + typedef view_convert_helper<double> VC; VC vc; + vc.i = unaligned_load2<VC::itype>(c1, c2, ofs); + return vc.t; +} + +#ifndef HAVE_ARCH_UNALIGNED_STORE2_U2 +template<> +inline void ALWAYS_INLINE +unaligned_store2<uint16_t>(gtm_cacheline *c1, gtm_cacheline *c2, + size_t ofs, uint16_t val) +{ + uint8_t vl = val, vh = val >> 8; + + if (WORDS_BIGENDIAN) + { + c1->b[CACHELINE_SIZE - 1] = vh; + c2->b[0] = vl; + } + else + { + c1->b[CACHELINE_SIZE - 1] = vl; + c2->b[0] = vh; + } +} +#endif + +#if 0 +#ifndef HAVE_ARCH_UNALIGNED_STORE2_U4 +template<> +inline void ALWAYS_INLINE +unaligned_store2<uint32_t>(gtm_cacheline *c1, gtm_cacheline *c2, + size_t ofs, uint32_t val) +{ + // ??? We could reuse the store_mask stuff here. +} +#endif + +template<> +inline void ALWAYS_INLINE +unaligned_store2<float>(gtm_cacheline *c1, gtm_cacheline *c2, + size_t ofs, float val) +{ + typedef view_convert_helper<float> VC; VC vc; + vc.t = val; + unaligned_store2(c1, c2, ofs, vc.i); +} +#endif + +} // namespace GTM + +#endif // LIBITM_UNALIGNED_H diff --git a/libitm/config/linux/alpha/futex_bits.h b/libitm/config/linux/alpha/futex_bits.h new file mode 100644 index 0000000..997bf0b --- /dev/null +++ b/libitm/config/linux/alpha/futex_bits.h @@ -0,0 +1,56 @@ +/* Copyright (C) 2008, 2009, 2011 Free Software Foundation, Inc. + Contributed by Richard Henderson <rth@redhat.com>. + + This file is part of the GNU Transactional Memory Library (libitm). + + Libitm is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Provide target-specific access to the futex system call. */ + +#ifndef SYS_futex +#define SYS_futex 394 +#endif + +static inline long +sys_futex0 (int *addr, long op, long val) +{ + register long sc_0 __asm__("$0"); + register long sc_16 __asm__("$16"); + register long sc_17 __asm__("$17"); + register long sc_18 __asm__("$18"); + register long sc_19 __asm__("$19"); + long res; + + sc_0 = SYS_futex; + sc_16 = (long) addr; + sc_17 = op; + sc_18 = val; + sc_19 = 0; + __asm volatile ("callsys" + : "=r" (sc_0), "=r"(sc_19) + : "0"(sc_0), "r" (sc_16), "r"(sc_17), "r"(sc_18), "1"(sc_19) + : "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", + "$22", "$23", "$24", "$25", "$27", "$28", "memory"); + + res = sc_0; + if (__builtin_expect (sc_19, 0)) + res = -res; + return res; +} diff --git a/libitm/config/linux/futex.cc b/libitm/config/linux/futex.cc new file mode 100644 index 0000000..45c9db6 --- /dev/null +++ b/libitm/config/linux/futex.cc @@ -0,0 +1,82 @@ +/* Copyright (C) 2008, 2009, 2011 Free Software Foundation, Inc. + Contributed by Richard Henderson <rth@redhat.com>. + + This file is part of the GNU Transactional Memory Library (libitm). + + Libitm is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Provide access to the futex system call. */ + +#include "libitm_i.h" +#include "futex.h" +#include <errno.h> + +namespace GTM HIDDEN { + +#define FUTEX_WAIT 0 +#define FUTEX_WAKE 1 +#define FUTEX_PRIVATE_FLAG 128L + + +static long int gtm_futex_wait = FUTEX_WAIT | FUTEX_PRIVATE_FLAG; +static long int gtm_futex_wake = FUTEX_WAKE | FUTEX_PRIVATE_FLAG; + + +void +futex_wait (int *addr, int val) +{ + long res; + + res = sys_futex0 (addr, gtm_futex_wait, val); + if (__builtin_expect (res == -ENOSYS, 0)) + { + gtm_futex_wait = FUTEX_WAIT; + gtm_futex_wake = FUTEX_WAKE; + res = sys_futex0 (addr, FUTEX_WAIT, val); + } + if (__builtin_expect (res < 0, 0)) + { + if (res == -EWOULDBLOCK || res == -ETIMEDOUT) + ; + else if (res == -EFAULT) + GTM_fatal ("futex failed (EFAULT %p)", addr); + else + GTM_fatal ("futex failed (%s)", strerror(-res)); + } +} + + +long +futex_wake (int *addr, int count) +{ + long res = sys_futex0 (addr, gtm_futex_wake, count); + if (__builtin_expect (res == -ENOSYS, 0)) + { + gtm_futex_wait = FUTEX_WAIT; + gtm_futex_wake = FUTEX_WAKE; + res = sys_futex0 (addr, FUTEX_WAKE, count); + } + if (__builtin_expect (res < 0, 0)) + GTM_fatal ("futex failed (%s)", strerror(-res)); + else + return res; +} + +} // namespace GTM diff --git a/libitm/config/linux/futex.h b/libitm/config/linux/futex.h new file mode 100644 index 0000000..326c0f5 --- /dev/null +++ b/libitm/config/linux/futex.h @@ -0,0 +1,39 @@ +/* Copyright (C) 2008, 2009, 2011 Free Software Foundation, Inc. + Contributed by Richard Henderson <rth@redhat.com>. + + This file is part of the GNU Transactional Memory Library (libitm). + + Libitm is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Provide access to the futex system call. */ + +#ifndef GTM_FUTEX_H +#define GTM_FUTEX_H 1 + +namespace GTM HIDDEN { + +#include "futex_bits.h" + +extern void futex_wait (int *addr, int val); +extern long futex_wake (int *addr, int count); + +} + +#endif /* GTM_FUTEX_H */ diff --git a/libitm/config/linux/rwlock.cc b/libitm/config/linux/rwlock.cc new file mode 100644 index 0000000..c1e935e --- /dev/null +++ b/libitm/config/linux/rwlock.cc @@ -0,0 +1,235 @@ +/* Copyright (C) 2011 Free Software Foundation, Inc. + Contributed by Torvald Riegel <triegel@redhat.com>. + + This file is part of the GNU Transactional Memory Library (libitm). + + Libitm is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "libitm_i.h" +#include "futex.h" +#include <limits.h> + +namespace GTM HIDDEN { + +// Acquire a RW lock for reading. + +void +gtm_rwlock::read_lock (gtm_thread *tx) +{ + for (;;) + { + // Fast path: first announce our intent to read, then check for + // conflicting intents to write. The barrier makes sure that this + // happens in exactly this order. + tx->shared_state = 0; + __sync_synchronize(); + if (likely(writers == 0)) + return; + + // There seems to be an active, waiting, or confirmed writer, so enter + // the futex-based slow path. + + // Before waiting, we clear our read intent check whether there are any + // writers that might potentially wait for readers. If so, wake them. + // We need the barrier here for the same reason that we need it in + // read_unlock(). + // TODO Potentially too many wake-ups. See comments in read_unlock(). + tx->shared_state = ~(typeof tx->shared_state)0; + __sync_synchronize(); + if (writer_readers > 0) + { + writer_readers = 0; + futex_wake(&writer_readers, 1); + } + + // Signal that there are waiting readers and wait until there is no + // writer anymore. + // TODO Spin here on writers for a while. Consider whether we woke + // any writers before? + while (writers) + { + // An active writer. Wait until it has finished. To avoid lost + // wake-ups, we need to use Dekker-like synchronization. + // Note that we cannot reset readers to zero when we see that there + // are no writers anymore after the barrier because this pending + // store could then lead to lost wake-ups at other readers. + readers = 1; + __sync_synchronize(); + if (writers) + futex_wait(&readers, 1); + } + + // And we try again to acquire a read lock. + } +} + + +// Acquire a RW lock for writing. Generic version that also works for +// upgrades. +// Note that an upgrade might fail (and thus waste previous work done during +// this transaction) if there is another thread that tried to go into serial +// mode earlier (i.e., upgrades do not have higher priority than pure writers). +// However, this seems rare enough to not consider it further as we need both +// a non-upgrade writer and a writer to happen to switch to serial mode +// concurrently. If we'd want to handle this, a writer waiting for readers +// would have to coordinate with later arriving upgrades and hand over the +// lock to them, including the the reader-waiting state. We can try to support +// this if this will actually happen often enough in real workloads. + +bool +gtm_rwlock::write_lock_generic (gtm_thread *tx) +{ + // Try to acquire the write lock. + unsigned int w; + if (unlikely((w = __sync_val_compare_and_swap(&writers, 0, 1)) != 0)) + { + // If this is an upgrade, we must not wait for other writers or + // upgrades. + if (tx != 0) + return false; + + // There is already a writer. If there are no other waiting writers, + // switch to contended mode. + // Note that this is actually an atomic exchange, not a TAS. Also, + // it's only guaranteed to have acquire semantics, whereas we need a + // full barrier to make the Dekker-style synchronization work. However, + // we rely on the xchg being a full barrier on the architectures that we + // consider here. + // ??? Use C++0x atomics as soon as they are available. + if (w != 2) + w = __sync_lock_test_and_set(&writers, 2); + while (w != 0) + { + futex_wait(&writers, 2); + w = __sync_lock_test_and_set(&writers, 2); + } + } + + // We have acquired the writer side of the R/W lock. Now wait for any + // readers that might still be active. + // We don't need an extra barrier here because the CAS and the xchg + // operations have full barrier semantics already. + + // If this is an upgrade, we are not a reader anymore. This is only safe to + // do after we have acquired the writer lock. + // TODO In the worst case, this requires one wait/wake pair for each + // active reader. Reduce this! + if (tx != 0) + tx->shared_state = ~(typeof tx->shared_state)0; + + for (gtm_thread *it = gtm_thread::list_of_threads; it != 0; + it = it->next_thread) + { + // Use a loop here to check reader flags again after waiting. + while (it->shared_state != ~(typeof it->shared_state)0) + { + // An active reader. Wait until it has finished. To avoid lost + // wake-ups, we need to use Dekker-like synchronization. + // Note that we can reset writer_readers to zero when we see after + // the barrier that the reader has finished in the meantime; + // however, this is only possible because we are the only writer. + // TODO Spin for a while on this reader flag. + writer_readers = 1; + __sync_synchronize(); + if (it->shared_state != ~(typeof it->shared_state)0) + futex_wait(&writer_readers, 1); + else + writer_readers = 0; + } + } + + return true; +} + +// Acquire a RW lock for writing. + +void +gtm_rwlock::write_lock () +{ + write_lock_generic (0); +} + + +// Upgrade a RW lock that has been locked for reading to a writing lock. +// Do this without possibility of another writer incoming. Return false +// if this attempt fails (i.e. another thread also upgraded). + +bool +gtm_rwlock::write_upgrade (gtm_thread *tx) +{ + return write_lock_generic (tx); +} + + +// Release a RW lock from reading. + +void +gtm_rwlock::read_unlock (gtm_thread *tx) +{ + tx->shared_state = ~(typeof tx->shared_state)0; + + // If there is a writer waiting for readers, wake it up. We need the barrier + // to avoid lost wake-ups. + // ??? We might not be the last active reader, so the wake-up might happen + // too early. How do we avoid this without slowing down readers too much? + // Each reader could scan the list of txns for other active readers but + // this can result in many cache misses. Use combining instead? + // TODO Sends out one wake-up for each reader in the worst case. + __sync_synchronize(); + if (unlikely(writer_readers > 0)) + { + writer_readers = 0; + futex_wake(&writer_readers, 1); + } +} + + +// Release a RW lock from writing. + +void +gtm_rwlock::write_unlock () +{ + // This is supposed to be a full barrier. + if (__sync_fetch_and_sub(&writers, 1) == 2) + { + // There might be waiting writers, so wake them. + writers = 0; + if (futex_wake(&writers, 1) == 0) + { + // If we did not wake any waiting writers, we might indeed be the + // last writer (this can happen because write_lock_generic() + // exchanges 0 or 1 to 2 and thus might go to contended mode even if + // no other thread holds the write lock currently). Therefore, we + // have to wake up readers here as well. + futex_wake(&readers, INT_MAX); + } + return; + } + // No waiting writers, so wake up all waiting readers. + // Because the fetch_and_sub is a full barrier already, we don't need + // another barrier here (as in read_unlock()). + if (readers > 0) + { + readers = 0; + futex_wake(&readers, INT_MAX); + } +} + +} // namespace GTM diff --git a/libitm/config/linux/rwlock.h b/libitm/config/linux/rwlock.h new file mode 100644 index 0000000..7e6229b --- /dev/null +++ b/libitm/config/linux/rwlock.h @@ -0,0 +1,66 @@ +/* Copyright (C) 2011 Free Software Foundation, Inc. + Contributed by Torvald Riegel <triegel@redhat.com>. + + This file is part of the GNU Transactional Memory Library (libitm). + + Libitm is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef GTM_RWLOCK_H +#define GTM_RWLOCK_H + +#include "common.h" + +namespace GTM HIDDEN { + +struct gtm_thread; + +// This datastructure is the blocking, futex-based version of the Dekker-style +// reader-writer lock used to provide mutual exclusion between active and +// serial transactions. +// See libitm's documentation for further details. +// +// In this implementation, writers are given highest priority access but +// read-to-write upgrades do not have a higher priority than writers. + +class gtm_rwlock +{ + // TODO Put futexes on different cachelines? + int writers; // Writers' futex. + int writer_readers; // A confirmed writer waits here for readers. + int readers; // Readers wait here for writers (iff true). + + public: + gtm_rwlock() : writers(0), writer_readers(0), readers(0) {}; + + void read_lock (gtm_thread *tx); + void read_unlock (gtm_thread *tx); + + void write_lock (); + void write_unlock (); + + bool write_upgrade (gtm_thread *tx); + + protected: + bool write_lock_generic (gtm_thread *tx); +}; + +} // namespace GTM + +#endif // GTM_RWLOCK_H diff --git a/libitm/config/linux/x86/futex_bits.h b/libitm/config/linux/x86/futex_bits.h new file mode 100644 index 0000000..9a6b102 --- /dev/null +++ b/libitm/config/linux/x86/futex_bits.h @@ -0,0 +1,82 @@ +/* Copyright (C) 2008, 2009, 2011 Free Software Foundation, Inc. + Contributed by Richard Henderson <rth@redhat.com>. + + This file is part of the GNU Transactional Memory Library (libitm). + + Libitm is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifdef __x86_64__ +# ifndef SYS_futex +# define SYS_futex 202 +# endif + +static inline long +sys_futex0 (int *addr, long op, long val) +{ + register long r10 __asm__("%r10") = 0; + long res; + + __asm volatile ("syscall" + : "=a" (res) + : "0" (SYS_futex), "D" (addr), "S" (op), "d" (val), "r" (r10) + : "r11", "rcx", "memory"); + + return res; +} + +#else +# ifndef SYS_futex +# define SYS_futex 240 +# endif + +# ifdef __PIC__ + +static inline long +sys_futex0 (int *addr, int op, int val) +{ + long res; + + __asm volatile ("xchgl\t%%ebx, %2\n\t" + "int\t$0x80\n\t" + "xchgl\t%%ebx, %2" + : "=a" (res) + : "0"(SYS_futex), "r" (addr), "c"(op), + "d"(val), "S"(0) + : "memory"); + return res; +} + +# else + +static inline long +sys_futex0 (int *addr, int op, int val) +{ + long res; + + __asm volatile ("int $0x80" + : "=a" (res) + : "0"(SYS_futex), "b" (addr), "c"(op), + "d"(val), "S"(0) + : "memory"); + return res; +} + +# endif /* __PIC__ */ +#endif /* __x86_64__ */ diff --git a/libitm/config/linux/x86/tls.h b/libitm/config/linux/x86/tls.h new file mode 100644 index 0000000..01f7c27 --- /dev/null +++ b/libitm/config/linux/x86/tls.h @@ -0,0 +1,105 @@ +/* Copyright (C) 2008, 2009, 2011 Free Software Foundation, Inc. + Contributed by Richard Henderson <rth@redhat.com>. + + This file is part of the GNU Transactional Memory Library (libitm). + + Libitm is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef LIBITM_X86_TLS_H +#define LIBITM_X86_TLS_H 1 + +#if defined(__GLIBC_PREREQ) && __GLIBC_PREREQ(2, 10) +/* Use slots in the TCB head rather than __thread lookups. + GLIBC has reserved words 10 through 13 for TM. */ +#define HAVE_ARCH_GTM_THREAD 1 +#define HAVE_ARCH_GTM_THREAD_DISP 1 +#endif + +#include "config/generic/tls.h" + +#if defined(__GLIBC_PREREQ) && __GLIBC_PREREQ(2, 10) +namespace GTM HIDDEN { + +#ifdef __x86_64__ +#ifdef __LP64__ +# define SEG_READ(OFS) "movq\t%%fs:(" #OFS "*8),%0" +# define SEG_WRITE(OFS) "movq\t%0,%%fs:(" #OFS "*8)" +# define SEG_DECODE_READ(OFS) SEG_READ(OFS) "\n\t" \ + "rorq\t$17,%0\n\t" \ + "xorq\t%%fs:48,%0" +# define SEG_ENCODE_WRITE(OFS) "xorq\t%%fs:48,%0\n\t" \ + "rolq\t$17,%0\n\t" \ + SEG_WRITE(OFS) +#else +// For X32. +# define SEG_READ(OFS) "movl\t%%fs:(" #OFS "*4),%0" +# define SEG_WRITE(OFS) "movl\t%0,%%fs:(" #OFS "*4)" +# define SEG_DECODE_READ(OFS) SEG_READ(OFS) "\n\t" \ + "rorl\t$9,%0\n\t" \ + "xorl\t%%fs:24,%0" +# define SEG_ENCODE_WRITE(OFS) "xorl\t%%fs:24,%0\n\t" \ + "roll\t$9,%0\n\t" \ + SEG_WRITE(OFS) +#endif +#else +# define SEG_READ(OFS) "movl\t%%gs:(" #OFS "*4),%0" +# define SEG_WRITE(OFS) "movl\t%0,%%gs:(" #OFS "*4)" +# define SEG_DECODE_READ(OFS) SEG_READ(OFS) "\n\t" \ + "rorl\t$9,%0\n\t" \ + "xorl\t%%gs:24,%0" +# define SEG_ENCODE_WRITE(OFS) "xorl\t%%gs:24,%0\n\t" \ + "roll\t$9,%0\n\t" \ + SEG_WRITE(OFS) +#endif + +static inline struct gtm_thread *gtm_thr(void) +{ + struct gtm_thread *r; + asm volatile (SEG_READ(10) : "=r"(r)); + return r; +} + +static inline void set_gtm_thr(struct gtm_thread *x) +{ + asm volatile (SEG_WRITE(10) : : "r"(x)); +} + +static inline struct abi_dispatch *abi_disp(void) +{ + struct abi_dispatch *r; + asm volatile (SEG_DECODE_READ(11) : "=r"(r)); + return r; +} + +static inline void set_abi_disp(struct abi_dispatch *x) +{ + void *scratch; + asm volatile (SEG_ENCODE_WRITE(11) : "=r"(scratch) : "0"(x)); +} + +#undef SEG_READ +#undef SEG_WRITE +#undef SEG_DECODE_READ +#undef SEG_ENCODE_WRITE + +} // namespace GTM +#endif /* >= GLIBC 2.10 */ + +#endif // LIBITM_X86_TLS_H diff --git a/libitm/config/posix/cachepage.cc b/libitm/config/posix/cachepage.cc new file mode 100644 index 0000000..128cd54 --- /dev/null +++ b/libitm/config/posix/cachepage.cc @@ -0,0 +1,183 @@ +/* Copyright (C) 2009, 2011 Free Software Foundation, Inc. + Contributed by Richard Henderson <rth@redhat.com>. + + This file is part of the GNU Transactional Memory Library (libitm). + + Libitm is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "libitm_i.h" +#include <pthread.h> + +// +// We have three possibilities for alloction: mmap, memalign, posix_memalign +// + +#if defined(HAVE_MMAP_ANON) || defined(HAVE_MMAP_DEV_ZERO) +#include <sys/mman.h> +#include <fcntl.h> +#endif +#ifdef HAVE_MALLOC_H +#include <malloc.h> +#endif + +namespace GTM HIDDEN { + +#if defined(HAVE_MMAP_ANON) +# if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) +# define MAP_ANONYMOUS MAP_ANON +# endif +# define dev_zero -1 +#elif defined(HAVE_MMAP_DEV_ZERO) +# ifndef MAP_ANONYMOUS +# define MAP_ANONYMOUS 0 +# endif +static int dev_zero = -1; +#endif + +#if defined(HAVE_MMAP_ANON) || defined(HAVE_MMAP_DEV_ZERO) +/* If we get here, we've already opened /dev/zero and verified that + PAGE_SIZE is valid for the system. */ +static gtm_cacheline_page * alloc_mmap (void) UNUSED; +static gtm_cacheline_page * +alloc_mmap (void) +{ + gtm_cacheline_page *r; + r = (gtm_cacheline_page *) mmap (NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, dev_zero, 0); + if (r == (gtm_cacheline_page *) MAP_FAILED) + abort (); + return r; +} +#endif /* MMAP_ANON | MMAP_DEV_ZERO */ + +#ifdef HAVE_MEMALIGN +static gtm_cacheline_page * alloc_memalign (void) UNUSED; +static gtm_cacheline_page * +alloc_memalign (void) +{ + gtm_cacheline_page *r; + r = (gtm_cacheline_page *) memalign (PAGE_SIZE, PAGE_SIZE); + if (r == NULL) + abort (); + return r; +} +#endif /* MEMALIGN */ + +#ifdef HAVE_POSIX_MEMALIGN +static gtm_cacheline_page *alloc_posix_memalign (void) UNUSED; +static gtm_cacheline_page * +alloc_posix_memalign (void) +{ + void *r; + if (posix_memalign (&r, PAGE_SIZE, PAGE_SIZE)) + abort (); + return (gtm_cacheline_page *) r; +} +#endif /* POSIX_MEMALIGN */ + +#if defined(HAVE_MMAP_ANON) && defined(FIXED_PAGE_SIZE) +# define alloc_page alloc_mmap +#elif defined(HAVE_MMAP_DEV_ZERO) && defined(FIXED_PAGE_SIZE) +static gtm_cacheline_page * +alloc_page (void) +{ + if (dev_zero < 0) + { + dev_zero = open ("/dev/zero", O_RDWR); + assert (dev_zero >= 0); + } + return alloc_mmap (); +} +#elif defined(HAVE_MMAP_ANON) || defined(HAVE_MMAP_DEV_ZERO) +static gtm_cacheline_page * (*alloc_page) (void); +static void __attribute__((constructor)) +init_alloc_page (void) +{ + size_t page_size = getpagesize (); + if (page_size <= PAGE_SIZE && PAGE_SIZE % page_size == 0) + { +# ifndef HAVE_MMAP_ANON + dev_zero = open ("/dev/zero", O_RDWR); + assert (dev_zero >= 0); +# endif + alloc_page = alloc_mmap; + return; + } +# ifdef HAVE_MEMALIGN + alloc_page = alloc_memalign; +# elif defined(HAVE_POSIX_MEMALIGN) + alloc_page = alloc_posix_memalign; +# else +# error "No fallback aligned memory allocation method" +# endif +} +#elif defined(HAVE_MEMALIGN) +# define alloc_page alloc_memalign +#elif defined(HAVE_POSIX_MEMALIGN) +# define alloc_page alloc_posix_memalign +#else +# error "No aligned memory allocation method" +#endif + +static gtm_cacheline_page *free_pages; +static pthread_mutex_t free_page_lock = PTHREAD_MUTEX_INITIALIZER; + +void * +gtm_cacheline_page::operator new (size_t size) +{ + assert (size == sizeof (gtm_cacheline_page)); + assert (size <= PAGE_SIZE); + + pthread_mutex_lock(&free_page_lock); + + gtm_cacheline_page *r = free_pages; + free_pages = r ? r->prev : NULL; + + pthread_mutex_unlock(&free_page_lock); + + if (r == NULL) + r = alloc_page (); + + return r; +} + +void +gtm_cacheline_page::operator delete (void *xhead) +{ + gtm_cacheline_page *head = static_cast<gtm_cacheline_page *>(xhead); + gtm_cacheline_page *tail; + + if (head == 0) + return; + + /* ??? We should eventually really free some of these. */ + + for (tail = head; tail->prev != 0; tail = tail->prev) + continue; + + pthread_mutex_lock(&free_page_lock); + + tail->prev = free_pages; + free_pages = head; + + pthread_mutex_unlock(&free_page_lock); +} + +} // namespace GTM diff --git a/libitm/config/posix/rwlock.cc b/libitm/config/posix/rwlock.cc new file mode 100644 index 0000000..f379383 --- /dev/null +++ b/libitm/config/posix/rwlock.cc @@ -0,0 +1,288 @@ +/* Copyright (C) 2008, 2009, 2011 Free Software Foundation, Inc. + Contributed by Richard Henderson <rth@redhat.com>. + + This file is part of the GNU Transactional Memory Library (libitm). + + Libitm is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "libitm_i.h" + +namespace GTM HIDDEN { + +// Initialize a new RW lock. +// ??? Move this back to the header file when constexpr is implemented. + +gtm_rwlock::gtm_rwlock() + : mutex (PTHREAD_MUTEX_INITIALIZER), + c_readers (PTHREAD_COND_INITIALIZER), + c_writers (PTHREAD_COND_INITIALIZER), + c_confirmed_writers (PTHREAD_COND_INITIALIZER), + summary (0), + a_readers (0), + w_readers (0), + w_writers (0) +{ } + +gtm_rwlock::~gtm_rwlock() +{ + pthread_mutex_destroy (&this->mutex); + pthread_cond_destroy (&this->c_readers); + pthread_cond_destroy (&this->c_writers); +} + +// Acquire a RW lock for reading. + +void +gtm_rwlock::read_lock (gtm_thread *tx) +{ + // Fast path: first announce our intent to read, then check for conflicting + // intents to write. The barrier makes sure that this happens in exactly + // this order. + tx->shared_state = 0; + __sync_synchronize(); + unsigned int sum = this->summary; + if (likely(!(sum & (a_writer | w_writer)))) + return; + + // There seems to be an active, waiting, or confirmed writer, so enter the + // mutex-based slow path. To try to keep the number of readers small that + // the writer will see, we clear our read flag right away before entering + // the critical section. Otherwise, the writer would have to wait for us to + // get into the critical section. (Note that for correctness, this only has + // to happen before we leave the slow path and before we wait for any + // writer). + // ??? Add a barrier to enforce early visibility of this? + tx->shared_state = ~(typeof tx->shared_state)0; + + pthread_mutex_lock (&this->mutex); + + // Read summary again after acquiring the mutex because it might have + // changed during waiting for the mutex to become free. + sum = this->summary; + + // If there is a writer waiting for readers, wake it up. Only do that if we + // might be the last reader that could do the wake-up, otherwise skip the + // wake-up but decrease a_readers to show that we have entered the slow path. + // This has to happen before we wait for any writers or upgraders. + // See write_lock_generic() for further explanations. + if (this->a_readers > 0) + { + this->a_readers--; + if (this->a_readers == 0) + pthread_cond_signal(&this->c_confirmed_writers); + } + + // If there is an active or waiting writer, we must wait. + while (sum & (a_writer | w_writer)) + { + this->summary = sum | w_reader; + this->w_readers++; + pthread_cond_wait (&this->c_readers, &this->mutex); + sum = this->summary; + if (--this->w_readers == 0) + sum &= ~w_reader; + } + + // Otherwise we can acquire the lock for read. + tx->shared_state = 0; + + pthread_mutex_unlock(&this->mutex); +} + + +// Acquire a RW lock for writing. Generic version that also works for +// upgrades. +// Note that an upgrade might fail (and thus waste previous work done during +// this transaction) if there is another thread that tried to go into serial +// mode earlier (i.e., upgrades do not have higher priority than pure writers). +// However, this seems rare enough to not consider it further as we need both +// a non-upgrade writer and a writer to happen to switch to serial mode +// concurrently. If we'd want to handle this, a writer waiting for readers +// would have to coordinate with later arriving upgrades and hand over the +// lock to them, including the the reader-waiting state. We can try to support +// this if this will actually happen often enough in real workloads. + +bool +gtm_rwlock::write_lock_generic (gtm_thread *tx) +{ + pthread_mutex_lock (&this->mutex); + + unsigned int sum = this->summary; + + // If there is an active writer, wait. + while (sum & a_writer) + { + if (tx != 0) + { + // If this is an upgrade, we must not wait for other writers or + // upgrades that already have gone in + pthread_mutex_unlock (&this->mutex); + return false; + } + + this->summary = sum | w_writer; + this->w_writers++; + pthread_cond_wait (&this->c_writers, &this->mutex); + sum = this->summary; + if (--this->w_writers == 0) + sum &= ~w_writer; + } + + // Otherwise we can acquire the lock for write. As a writer, we have + // priority, so we don't need to take this back. + this->summary = sum | a_writer; + + // We still need to wait for active readers to finish. The barrier makes + // sure that we first set our write intent and check for active readers + // after that, in strictly this order (similar to the barrier in the fast + // path of read_lock()). + __sync_synchronize(); + + // If this is an upgrade, we are not a reader anymore. + if (tx != 0) + tx->shared_state = ~(typeof tx->shared_state)0; + + // Count the number of active readers to be able to decrease the number of + // wake-ups and wait calls that are necessary. + // + // This number is an upper bound of the number of readers that actually + // are still active and which we need to wait for: + // - We set our write flag before checking the reader flags, and readers + // check our write flag after clearing their read flags in read_unlock(). + // Therefore, they will enter the slow path whenever we have seen them. + // - Readers will have cleared their read flags before leaving the slow + // path in read_lock() (prevents lost wake-ups), and before waiting for + // any writer (prevents deadlocks). + // + // However, this number is also just a lower bound of the number of readers + // that will actually enter the slow path in read_unlock() or read_lock(): + // - Because the read flag is cleared outside of a critical section, writers + // can see it as cleared while the reader still goes into the slow path. + // + // Therefore, readers can skip (lower bound - 1) wake-ups, but we do need + // the following loop to check that the readers that we wanted to wait for + // are actually those that entered the slow path so far (and either skipped + // or sent a wake-up). + // + // ??? Do we need to optimize further? (The writer could publish a list of + // readers that it suspects to be active. Readers could check this list and + // only decrement a_readers if they are in this list.) + for (;;) + { + // ??? Keep a list of active readers that we saw and update it on the + // next retry instead? This might reduce the number of cache misses that + // we get when checking reader flags. + int readers = 0; + for (gtm_thread *it = gtm_thread::list_of_threads; it != 0; + it = it->next_thread) + { + // Don't count ourself if this is an upgrade. + if (it->shared_state != ~(typeof it->shared_state)0) + readers++; + } + + // If we have not seen any readers, we will not wait. + if (readers == 0) + break; + + // We've seen a number of readers, so we publish this number and wait. + this->a_readers = readers; + pthread_cond_wait (&this->c_confirmed_writers, &this->mutex); + } + + pthread_mutex_unlock (&this->mutex); + return true; +} + +// Acquire a RW lock for writing. + +void +gtm_rwlock::write_lock () +{ + write_lock_generic (0); +} + + +// Upgrade a RW lock that has been locked for reading to a writing lock. +// Do this without possibility of another writer incoming. Return false +// if this attempt fails (i.e. another thread also upgraded). + +bool +gtm_rwlock::write_upgrade (gtm_thread *tx) +{ + return write_lock_generic (tx); +} + + +// Release a RW lock from reading. + +void +gtm_rwlock::read_unlock (gtm_thread *tx) +{ + tx->shared_state = ~(typeof tx->shared_state)0; + __sync_synchronize(); + unsigned int sum = this->summary; + if (likely(!(sum & (a_writer | w_writer)))) + return; + + // There is a writer, either active or waiting for other readers or writers. + // Thus, enter the mutex-based slow path. + pthread_mutex_lock (&this->mutex); + + // If there is a writer waiting for readers, wake it up. Only do that if we + // might be the last reader that could do the wake-up, otherwise skip the + // wake-up and decrease a_readers to publish that we have entered the slow + // path but skipped the wake-up. + if (this->a_readers > 0) + { + this->a_readers--; + if (this->a_readers == 0) + pthread_cond_signal(&this->c_confirmed_writers); + } + + // We don't need to wake up any writers waiting for other writers. Active + // writers will take care of that. + + pthread_mutex_unlock (&this->mutex); +} + + +// Release a RW lock from writing. + +void +gtm_rwlock::write_unlock () +{ + pthread_mutex_lock (&this->mutex); + + unsigned int sum = this->summary; + this->summary = sum & ~a_writer; + + // If there is a waiting writer, wake it. + if (unlikely (sum & w_writer)) + pthread_cond_signal (&this->c_writers); + + // If there are waiting readers, wake them. + else if (unlikely (sum & w_reader)) + pthread_cond_broadcast (&this->c_readers); + + pthread_mutex_unlock (&this->mutex); +} + +} // namespace GTM diff --git a/libitm/config/posix/rwlock.h b/libitm/config/posix/rwlock.h new file mode 100644 index 0000000..f538bd0 --- /dev/null +++ b/libitm/config/posix/rwlock.h @@ -0,0 +1,81 @@ +/* Copyright (C) 2009, 2011 Free Software Foundation, Inc. + Contributed by Richard Henderson <rth@redhat.com>. + + This file is part of the GNU Transactional Memory Library (libitm). + + Libitm is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef GTM_RWLOCK_H +#define GTM_RWLOCK_H + +#include <pthread.h> + +namespace GTM HIDDEN { + +struct gtm_thread; + +// This datastructure is the blocking, mutex-based side of the Dekker-style +// reader-writer lock used to provide mutual exclusion between active and +// serial transactions. It has similarities to POSIX pthread_rwlock_t except +// that we also provide for upgrading a reader->writer lock, with a +// positive indication of failure (another writer acquired the lock +// before we were able to acquire). While the writer flag (a_writer below) is +// global and protected by the mutex, there are per-transaction reader flags, +// which are stored in a transaction's shared state. +// See libitm's documentation for further details. +// +// In this implementation, writers are given highest priority access but +// read-to-write upgrades do not have a higher priority than writers. + +class gtm_rwlock +{ + pthread_mutex_t mutex; // Held if manipulating any field. + pthread_cond_t c_readers; // Readers wait here + pthread_cond_t c_writers; // Writers wait here for writers + pthread_cond_t c_confirmed_writers; // Writers wait here for readers + + static const unsigned a_writer = 1; // An active writer. + static const unsigned w_writer = 2; // The w_writers field != 0 + static const unsigned w_reader = 4; // The w_readers field != 0 + + unsigned int summary; // Bitmask of the above. + unsigned int a_readers; // Nr active readers as observed by a writer + unsigned int w_readers; // Nr waiting readers + unsigned int w_writers; // Nr waiting writers + + public: + gtm_rwlock(); + ~gtm_rwlock(); + + void read_lock (gtm_thread *tx); + void read_unlock (gtm_thread *tx); + + void write_lock (); + void write_unlock (); + + bool write_upgrade (gtm_thread *tx); + + protected: + bool write_lock_generic (gtm_thread *tx); +}; + +} // namespace GTM + +#endif // GTM_RWLOCK_H diff --git a/libitm/config/x86/cacheline.cc b/libitm/config/x86/cacheline.cc new file mode 100644 index 0000000..2e49a35 --- /dev/null +++ b/libitm/config/x86/cacheline.cc @@ -0,0 +1,73 @@ +/* Copyright (C) 2009, 2011 Free Software Foundation, Inc. + Contributed by Richard Henderson <rth@redhat.com>. + + This file is part of the GNU Transactional Memory Library (libitm). + + Libitm is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "libitm_i.h" + +namespace GTM HIDDEN { + +uint32_t const gtm_bit_to_byte_mask[16] = +{ + 0x00000000, + 0x000000ff, + 0x0000ff00, + 0x0000ffff, + 0x00ff0000, + 0x00ff00ff, + 0x00ffff00, + 0x00ffffff, + 0xff000000, + 0xff0000ff, + 0xff00ff00, + 0xff00ffff, + 0xffff0000, + 0xffff00ff, + 0xffffff00, + 0xffffffff +}; + +#ifdef __SSE2__ +# define MEMBER m128i +#else +# define MEMBER w +#endif + +void +gtm_cacheline::copy_mask (gtm_cacheline * __restrict d, + const gtm_cacheline * __restrict s, + gtm_cacheline_mask m) +{ + if (m == (gtm_cacheline_mask)-1) + { + *d = *s; + return; + } + if (__builtin_expect (m == 0, 0)) + return; + + size_t n = sizeof(d->MEMBER[0]); + for (size_t i = 0; i < CACHELINE_SIZE / n; ++i, m >>= n) + store_mask (&d->MEMBER[i], s->MEMBER[i], m); +} + +} // namespace GTM diff --git a/libitm/config/x86/cacheline.h b/libitm/config/x86/cacheline.h new file mode 100644 index 0000000..15a95b0 --- /dev/null +++ b/libitm/config/x86/cacheline.h @@ -0,0 +1,242 @@ +/* Copyright (C) 2009, 2011 Free Software Foundation, Inc. + Contributed by Richard Henderson <rth@redhat.com>. + + This file is part of the GNU Transactional Memory Library (libitm). + + Libitm is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef LIBITM_CACHELINE_H +#define LIBITM_CACHELINE_H 1 + +// Minimum cacheline size is 32, due to both complex long double and __m256. +// There's no requirement that 64-bit use a 64-byte cacheline size, but do +// so for now to make sure everything is parameterized properly. +#ifdef __x86_64__ +# define CACHELINE_SIZE 64 +#else +# define CACHELINE_SIZE 32 +#endif + +namespace GTM HIDDEN { + +// A gtm_cacheline_mask stores a modified bit for every modified byte +// in the cacheline with which it is associated. +typedef sized_integral<CACHELINE_SIZE / 8>::type gtm_cacheline_mask; + +extern uint32_t const gtm_bit_to_byte_mask[16]; + +union gtm_cacheline +{ + // Byte access to the cacheline. + unsigned char b[CACHELINE_SIZE] __attribute__((aligned(CACHELINE_SIZE))); + + // Larger sized access to the cacheline. + uint16_t u16[CACHELINE_SIZE / sizeof(uint16_t)]; + uint32_t u32[CACHELINE_SIZE / sizeof(uint32_t)]; + uint64_t u64[CACHELINE_SIZE / sizeof(uint64_t)]; + gtm_word w[CACHELINE_SIZE / sizeof(gtm_word)]; + +#ifdef __MMX__ + __m64 m64[CACHELINE_SIZE / sizeof(__m64)]; +#endif +#ifdef __SSE__ + __m128 m128[CACHELINE_SIZE / sizeof(__m128)]; +#endif +#ifdef __SSE2__ + __m128i m128i[CACHELINE_SIZE / sizeof(__m128i)]; +#endif +#ifdef __AVX__ + __m256 m256[CACHELINE_SIZE / sizeof(__m256)]; + __m256i m256i[CACHELINE_SIZE / sizeof(__m256i)]; +#endif + + // Store S into D, but only the bytes specified by M. + static void store_mask (uint32_t *d, uint32_t s, uint8_t m); + static void store_mask (uint64_t *d, uint64_t s, uint8_t m); +#ifdef __SSE2__ + static void store_mask (__m128i *d, __m128i s, uint16_t m); +#endif + + // Copy S to D, but only the bytes specified by M. + static void copy_mask (gtm_cacheline * __restrict d, + const gtm_cacheline * __restrict s, + gtm_cacheline_mask m); + + // A write barrier to emit after (a series of) copy_mask. + // When we're emitting non-temporal stores, the normal strong + // ordering of the machine doesn't apply. + static void copy_mask_wb (); + +#if defined(__SSE__) || defined(__AVX__) + // Copy S to D; only bother defining if we can do this more efficiently + // than the compiler-generated default implementation. + gtm_cacheline& operator= (const gtm_cacheline &s); +#endif // SSE, AVX +}; + +inline void +gtm_cacheline::copy_mask_wb () +{ +#ifdef __SSE2__ + _mm_sfence (); +#endif +} + +#if defined(__SSE__) || defined(__AVX__) +inline gtm_cacheline& ALWAYS_INLINE +gtm_cacheline::operator= (const gtm_cacheline & __restrict s) +{ +#ifdef __AVX__ +# define CP m256 +# define TYPE __m256 +#else +# define CP m128 +# define TYPE __m128 +#endif + + TYPE w, x, y, z; + + // ??? Wouldn't it be nice to have a pragma to tell the compiler + // to completely unroll a given loop? + switch (CACHELINE_SIZE / sizeof(TYPE)) + { + case 1: + this->CP[0] = s.CP[0]; + break; + case 2: + x = s.CP[0]; + y = s.CP[1]; + this->CP[0] = x; + this->CP[1] = y; + break; + case 4: + w = s.CP[0]; + x = s.CP[1]; + y = s.CP[2]; + z = s.CP[3]; + this->CP[0] = w; + this->CP[1] = x; + this->CP[2] = y; + this->CP[3] = z; + break; + default: + __builtin_trap (); + } + + return *this; +} +#endif + +// ??? Support masked integer stores more efficiently with an unlocked cmpxchg +// insn. My reasoning is that while we write to locations that we do not wish +// to modify, we do it in an uninterruptable insn, and so we either truely +// write back the original data or the insn fails -- unlike with a +// load/and/or/write sequence which can be interrupted either by a kernel +// task switch or an unlucky cacheline steal by another processor. Avoiding +// the LOCK prefix improves performance by a factor of 10, and we don't need +// the memory barrier semantics implied by that prefix. + +inline void ALWAYS_INLINE +gtm_cacheline::store_mask (uint32_t *d, uint32_t s, uint8_t m) +{ + gtm_cacheline_mask tm = (1 << sizeof (s)) - 1; + if (__builtin_expect (m & tm, tm)) + { + if (__builtin_expect ((m & tm) == tm, 1)) + *d = s; + else + { + gtm_cacheline_mask bm = gtm_bit_to_byte_mask[m & 15]; + gtm_word n, o = *d; + + __asm("\n0:\t" + "mov %[o], %[n]\n\t" + "and %[m], %[n]\n\t" + "or %[s], %[n]\n\t" + "cmpxchg %[n], %[d]\n\t" + "jnz,pn 0b" + : [d] "+m"(*d), [n] "=&r" (n), [o] "+a"(o) + : [s] "r" (s & bm), [m] "r" (~bm)); + } + } +} + +inline void ALWAYS_INLINE +gtm_cacheline::store_mask (uint64_t *d, uint64_t s, uint8_t m) +{ + gtm_cacheline_mask tm = (1 << sizeof (s)) - 1; + if (__builtin_expect (m & tm, tm)) + { + if (__builtin_expect ((m & tm) == tm, 1)) + *d = s; + else + { +#ifdef __x86_64__ + uint32_t bl = gtm_bit_to_byte_mask[m & 15]; + uint32_t bh = gtm_bit_to_byte_mask[(m >> 4) & 15]; + gtm_cacheline_mask bm = bl | ((gtm_cacheline_mask)bh << 31 << 1); + uint64_t n, o = *d; + __asm("\n0:\t" + "mov %[o], %[n]\n\t" + "and %[m], %[n]\n\t" + "or %[s], %[n]\n\t" + "cmpxchg %[n], %[d]\n\t" + "jnz,pn 0b" + : [d] "+m"(*d), [n] "=&r" (n), [o] "+a"(o) + : [s] "r" (s & bm), [m] "r" (~bm)); +#else + /* ??? While it's possible to perform this operation with + cmpxchg8b, the sequence requires all 7 general registers + and thus cannot be performed with -fPIC. Don't even try. */ + uint32_t *d32 = reinterpret_cast<uint32_t *>(d); + store_mask (d32, s, m); + store_mask (d32 + 1, s >> 32, m >> 4); +#endif + } + } +} + +#ifdef __SSE2__ +inline void ALWAYS_INLINE +gtm_cacheline::store_mask (__m128i *d, __m128i s, uint16_t m) +{ + if (__builtin_expect (m == 0, 0)) + return; + if (__builtin_expect (m == 0xffff, 1)) + *d = s; + else + { + __m128i bm0, bm1, bm2, bm3; + bm0 = _mm_set_epi32 (0, 0, 0, gtm_bit_to_byte_mask[m & 15]); m >>= 4; + bm1 = _mm_set_epi32 (0, 0, 0, gtm_bit_to_byte_mask[m & 15]); m >>= 4; + bm2 = _mm_set_epi32 (0, 0, 0, gtm_bit_to_byte_mask[m & 15]); m >>= 4; + bm3 = _mm_set_epi32 (0, 0, 0, gtm_bit_to_byte_mask[m & 15]); m >>= 4; + bm0 = _mm_unpacklo_epi32 (bm0, bm1); + bm2 = _mm_unpacklo_epi32 (bm2, bm3); + bm0 = _mm_unpacklo_epi64 (bm0, bm2); + + _mm_maskmoveu_si128 (s, bm0, (char *)d); + } +} +#endif // SSE2 + +} // namespace GTM + +#endif // LIBITM_CACHELINE_H diff --git a/libitm/config/x86/sjlj.S b/libitm/config/x86/sjlj.S new file mode 100644 index 0000000..725ffec --- /dev/null +++ b/libitm/config/x86/sjlj.S @@ -0,0 +1,105 @@ +/* Copyright (C) 2008, 2009, 2011 Free Software Foundation, Inc. + Contributed by Richard Henderson <rth@redhat.com>. + + This file is part of the GNU Transactional Memory Library (libitm). + + Libitm is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + + .text + .p2align 4 + .globl _ITM_beginTransaction + .type _ITM_beginTransaction, @function + +_ITM_beginTransaction: + .cfi_startproc +#ifdef __x86_64__ + leaq 8(%rsp), %rax + movq (%rsp), %r8 + subq $72, %rsp + .cfi_def_cfa_offset 80 + movq %rax, (%rsp) + movq %r8, 8(%rsp) + movq %rbx, 16(%rsp) + movq %rbp, 24(%rsp) + movq %r12, 32(%rsp) + movq %r13, 40(%rsp) + movq %r14, 48(%rsp) + movq %r15, 56(%rsp) + movq %rsp, %rsi + call GTM_begin_transaction + addq $72, %rsp + .cfi_def_cfa_offset 8 + ret +#else + leal 4(%esp), %ecx + subl $28, %esp + .cfi_def_cfa_offset 32 + movl %ecx, 8(%esp) + movl %ebx, 12(%esp) + movl %esi, 16(%esp) + movl %edi, 20(%esp) + movl %ebp, 24(%esp) + leal 8(%esp), %edx + call GTM_begin_transaction + addl $28, %esp + .cfi_def_cfa_offset 4 + ret +#endif + .cfi_endproc + .size _ITM_beginTransaction, .-_ITM_beginTransaction + + .p2align 4 + .globl GTM_longjmp + .type GTM_longjmp, @function + .hidden GTM_longjmp + +GTM_longjmp: + .cfi_startproc +#ifdef __x86_64__ + movq (%rdi), %rcx + movq 8(%rdi), %rdx + movq 16(%rdi), %rbx + movq 24(%rdi), %rbp + movq 32(%rdi), %r12 + movq 40(%rdi), %r13 + movq 48(%rdi), %r14 + movq 56(%rdi), %r15 + movl %esi, %eax + .cfi_def_cfa %rcx, 0 + .cfi_register %rip, %rdx + movq %rcx, %rsp + jmp *%rdx +#else + xchgl %eax, %edx + movl (%edx), %ecx + movl 4(%edx), %ebx + movl 8(%edx), %esi + movl 12(%edx), %edi + movl 16(%edx), %ebp + movl 20(%edx), %edx + .cfi_def_cfa %ecx, 0 + .cfi_register %eip, %edx + movl %ecx, %esp + jmp *%edx +#endif + .cfi_endproc + .size GTM_longjmp, .-GTM_longjmp + +.section .note.GNU-stack, "", @progbits diff --git a/libitm/config/x86/target.h b/libitm/config/x86/target.h new file mode 100644 index 0000000..197faeb --- /dev/null +++ b/libitm/config/x86/target.h @@ -0,0 +1,98 @@ +/* Copyright (C) 2008, 2009, 2011 Free Software Foundation, Inc. + Contributed by Richard Henderson <rth@redhat.com>. + + This file is part of the GNU Transactional Memory Library (libitm). + + Libitm is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +namespace GTM HIDDEN { + +#ifdef __x86_64__ +/* ??? This doesn't work for Win64. */ +typedef struct gtm_jmpbuf +{ + void *cfa; + unsigned long rip; + unsigned long rbx; + unsigned long rbp; + unsigned long r12; + unsigned long r13; + unsigned long r14; + unsigned long r15; +} gtm_jmpbuf; +#else +typedef struct gtm_jmpbuf +{ + void *cfa; + unsigned long ebx; + unsigned long esi; + unsigned long edi; + unsigned long ebp; + unsigned long eip; +} gtm_jmpbuf; +#endif + +/* x86 doesn't require strict alignment for the basic types. */ +#define STRICT_ALIGNMENT 0 + +/* x86 uses a fixed page size of 4K. */ +#define PAGE_SIZE 4096 +#define FIXED_PAGE_SIZE 1 + +/* The size of one line in hardware caches (in bytes). */ +#define HW_CACHELINE_SIZE 64 + + +static inline void +cpu_relax (void) +{ + __asm volatile ("rep; nop" : : : "memory"); +} + +static inline void +atomic_read_barrier (void) +{ + /* x86 is a strong memory ordering machine. */ + __asm volatile ("" : : : "memory"); +} + +static inline void +atomic_write_barrier (void) +{ + /* x86 is a strong memory ordering machine. */ + __asm volatile ("" : : : "memory"); +} + +} // namespace GTM + +// We'll be using some of the cpu builtins, and their associated types. +#ifndef __cplusplus +/* ??? It's broken for C++. */ +#include <x86intrin.h> +#else +# ifdef __SSE2__ +# include <emmintrin.h> +# elif defined(__SSE__) +# include <xmmintrin.h> +# endif +# ifdef __AVX__ +# include <immintrin.h> +# endif +#endif diff --git a/libitm/config/x86/unaligned.h b/libitm/config/x86/unaligned.h new file mode 100644 index 0000000..01abc47 --- /dev/null +++ b/libitm/config/x86/unaligned.h @@ -0,0 +1,237 @@ +/* Copyright (C) 2009, 2011 Free Software Foundation, Inc. + Contributed by Richard Henderson <rth@redhat.com>. + + This file is part of the GNU Transactional Memory Library (libitm). + + Libitm is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef LIBITM_X86_UNALIGNED_H +#define LIBITM_X86_UNALIGNED_H 1 + +#define HAVE_ARCH_UNALIGNED_LOAD2_U4 1 +#define HAVE_ARCH_UNALIGNED_LOAD2_U8 1 + +#include "config/generic/unaligned.h" + +namespace GTM HIDDEN { + +template<> +inline uint32_t +unaligned_load2<uint32_t>(const gtm_cacheline *c1, + const gtm_cacheline *c2, size_t ofs) +{ + uint32_t r, lo, hi; + lo = c1->u32[CACHELINE_SIZE / sizeof(uint32_t) - 1]; + hi = c2->u32[0]; + asm("shrd %b2, %1, %0" : "=r"(r) : "r"(hi), "c"((ofs & 3) * 8), "0"(lo)); + return r; +} + +template<> +inline uint64_t +unaligned_load2<uint64_t>(const gtm_cacheline *c1, + const gtm_cacheline *c2, size_t ofs) +{ +#ifdef __x86_64__ + uint64_t r, lo, hi; + lo = c1->u64[CACHELINE_SIZE / sizeof(uint64_t) - 1]; + hi = c2->u64[0]; + asm("shrd %b2, %1, %0" : "=r"(r) : "r"(hi), "c"((ofs & 3) * 8), "0"(lo)); + return r; +#else + uint32_t v0, v1, v2; + uint64_t r; + + if (ofs < CACHELINE_SIZE - 4) + { + v0 = c1->u32[CACHELINE_SIZE / sizeof(uint32_t) - 2]; + v1 = c1->u32[CACHELINE_SIZE / sizeof(uint32_t) - 1]; + v2 = c2->u32[0]; + } + else + { + v0 = c1->u32[CACHELINE_SIZE / sizeof(uint32_t) - 1]; + v1 = c2->u32[0]; + v2 = c2->u32[1]; + } + ofs = (ofs & 3) * 8; + asm("shrd %%cl, %[v1], %[v0]; shrd %%cl, %[v2], %[v1]" + : "=A"(r) : "c"(ofs), [v0] "a"(v0), [v1] "d"(v1), [v2] "r"(v2)); + + return r; +#endif +} + +#if defined(__SSE2__) || defined(__MMX__) +template<> +inline _ITM_TYPE_M64 +unaligned_load2<_ITM_TYPE_M64>(const gtm_cacheline *c1, + const gtm_cacheline *c2, size_t ofs) +{ +# ifdef __x86_64__ + __m128i lo = _mm_movpi64_epi64 (c1->m64[CACHELINE_SIZE / 8 - 1]); + __m128i hi = _mm_movpi64_epi64 (c2->m64[0]); + + ofs = (ofs & 7) * 8; + lo = _mm_srli_epi64 (lo, ofs); + hi = _mm_slli_epi64 (hi, 64 - ofs); + lo = lo | hi; + return _mm_movepi64_pi64 (lo); +# else + // On 32-bit we're about to return the result in an MMX register, so go + // ahead and do the computation in that unit, even if SSE2 is available. + __m64 lo = c1->m64[CACHELINE_SIZE / 8 - 1]; + __m64 hi = c2->m64[0]; + + ofs = (ofs & 7) * 8; + lo = _mm_srli_si64 (lo, ofs); + hi = _mm_slli_si64 (hi, 64 - ofs); + return lo | hi; +# endif +} +#endif // SSE2 or MMX + +// The SSE types are strictly aligned. +#ifdef __SSE__ +template<> + struct strict_alignment<_ITM_TYPE_M128> + : public std::true_type + { }; + +// Expand the unaligned SSE move instructions. +template<> +inline _ITM_TYPE_M128 +unaligned_load<_ITM_TYPE_M128>(const void *t) +{ + return _mm_loadu_ps (static_cast<const float *>(t)); +} + +template<> +inline void +unaligned_store<_ITM_TYPE_M128>(void *t, _ITM_TYPE_M128 val) +{ + _mm_storeu_ps (static_cast<float *>(t), val); +} +#endif // SSE + +#ifdef __AVX__ +// The AVX types are strictly aligned when it comes to vmovaps vs vmovups. +template<> + struct strict_alignment<_ITM_TYPE_M256> + : public std::true_type + { }; + +template<> +inline _ITM_TYPE_M256 +unaligned_load<_ITM_TYPE_M256>(const void *t) +{ + return _mm256_loadu_ps (static_cast<const float *>(t)); +} + +template<> +inline void +unaligned_store<_ITM_TYPE_M256>(void *t, _ITM_TYPE_M256 val) +{ + _mm256_storeu_ps (static_cast<float *>(t), val); +} +#endif // AVX + +#ifdef __XOP__ +# define HAVE_ARCH_REALIGN_M128I 1 +extern const __v16qi GTM_vpperm_shift[16]; +inline __m128i +realign_m128i (__m128i lo, __m128i hi, unsigned byte_count) +{ + return _mm_perm_epi8 (lo, hi, GTM_vpperm_shift[byte_count]); +} +#elif defined(__AVX__) +# define HAVE_ARCH_REALIGN_M128I 1 +extern "C" const uint64_t GTM_vpalignr_table[16]; +inline __m128i +realign_m128i (__m128i lo, __m128i hi, unsigned byte_count) +{ + register __m128i xmm0 __asm__("xmm0") = hi; + register __m128i xmm1 __asm__("xmm1") = lo; + __asm("call *%2" : "+x"(xmm0) : "x"(xmm1), + "r"(>M_vpalignr_table[byte_count])); + return xmm0; +} +#elif defined(__SSSE3__) +# define HAVE_ARCH_REALIGN_M128I 1 +extern "C" const uint64_t GTM_palignr_table[16]; +inline __m128i +realign_m128i (__m128i lo, __m128i hi, unsigned byte_count) +{ + register __m128i xmm0 __asm__("xmm0") = hi; + register __m128i xmm1 __asm__("xmm1") = lo; + __asm("call *%2" : "+x"(xmm0) : "x"(xmm1), + "r"(>M_palignr_table[byte_count])); + return xmm0; +} +#elif defined(__SSE2__) +# define HAVE_ARCH_REALIGN_M128I 1 +extern "C" const char GTM_pshift_table[16 * 16]; +inline __m128i +realign_m128i (__m128i lo, __m128i hi, unsigned byte_count) +{ + register __m128i xmm0 __asm__("xmm0") = lo; + register __m128i xmm1 __asm__("xmm1") = hi; + __asm("call *%2" : "+x"(xmm0), "+x"(xmm1) + : "r"(GTM_pshift_table + byte_count*16)); + return xmm0; +} +#endif // XOP, AVX, SSSE3, SSE2 + +#ifdef HAVE_ARCH_REALIGN_M128I +template<> +inline _ITM_TYPE_M128 +unaligned_load2<_ITM_TYPE_M128>(const gtm_cacheline *c1, + const gtm_cacheline *c2, size_t ofs) +{ + return (_ITM_TYPE_M128) + realign_m128i (c1->m128i[CACHELINE_SIZE / 16 - 1], + c2->m128i[0], ofs & 15); +} +#endif // HAVE_ARCH_REALIGN_M128I + +#ifdef __AVX__ +template<> +inline _ITM_TYPE_M256 +unaligned_load2<_ITM_TYPE_M256>(const gtm_cacheline *c1, + const gtm_cacheline *c2, size_t ofs) +{ + __m128i v0, v1; + __m256i r; + + v0 = (__m128i) unaligned_load2<_ITM_TYPE_M128>(c1, c2, ofs); + if (ofs < CACHELINE_SIZE - 16) + v1 = v0, v0 = _mm_loadu_si128 ((const __m128i *) &c1->b[ofs]); + else + v1 = _mm_loadu_si128((const __m128i *)&c2->b[ofs + 16 - CACHELINE_SIZE]); + + r = _mm256_castsi128_si256 ((__m128i)v0); + r = _mm256_insertf128_si256 (r, (__m128i)v1, 1); + return (_ITM_TYPE_M256) r; +} +#endif // AVX + +} // namespace GTM + +#endif // LIBITM_X86_UNALIGNED_H diff --git a/libitm/config/x86/x86_avx.cc b/libitm/config/x86/x86_avx.cc new file mode 100644 index 0000000..9d1ddfb --- /dev/null +++ b/libitm/config/x86/x86_avx.cc @@ -0,0 +1,95 @@ +/* Copyright (C) 2009, 2011 Free Software Foundation, Inc. + Contributed by Richard Henderson <rth@redhat.com>. + + This file is part of the GNU Transactional Memory Library (libitm). + + Libitm is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "libitm_i.h" +#include "dispatch.h" + +// ??? Use memcpy for now, until we have figured out how to best instantiate +// these loads/stores. +CREATE_DISPATCH_FUNCTIONS_T_MEMCPY(M256, GTM::abi_disp()->, ) + +void ITM_REGPARM +_ITM_LM256 (const _ITM_TYPE_M256 *ptr) +{ + GTM::GTM_LB (ptr, sizeof (*ptr)); +} + +// Helpers for re-aligning two 128-bit values. +#ifdef __XOP__ +const __v16qi GTM::GTM_vpperm_shift[16] = +{ + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }, + { 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 }, + { 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18 }, + { 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 }, + { 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20 }, + { 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 }, + { 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22 }, + { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 }, + { 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24 }, + { 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 }, + { 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26 }, + { 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27 }, + { 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28 }, + { 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29 }, + { 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30 }, +}; +#else +# define INSN0 "movdqa %xmm1, %xmm0" +# define INSN(N) "vpalignr $" #N ", %xmm0, %xmm1, %xmm0" +# define TABLE_ENT_0 INSN0 "\n\tret\n\t" +# define TABLE_ENT(N) ".balign 8\n\t" INSN(N) "\n\tret\n\t" + +asm(".pushsection .text\n\ + .balign 16\n\ + .globl GTM_vpalignr_table\n\ + .hidden GTM_vpalignr_table\n\ + .type GTM_vpalignr_table, @function\n\ +GTM_vpalignr_table:\n\t" + TABLE_ENT_0 + TABLE_ENT(1) + TABLE_ENT(2) + TABLE_ENT(3) + TABLE_ENT(4) + TABLE_ENT(5) + TABLE_ENT(6) + TABLE_ENT(7) + TABLE_ENT(8) + TABLE_ENT(9) + TABLE_ENT(10) + TABLE_ENT(11) + TABLE_ENT(12) + TABLE_ENT(13) + TABLE_ENT(14) + TABLE_ENT(15) + ".balign 8\n\ + .size GTM_vpalignr_table, .-GTM_vpalignr_table\n\ + .popsection"); + +# undef INSN0 +# undef INSN +# undef TABLE_ENT_0 +# undef TABLE_ENT +#endif diff --git a/libitm/config/x86/x86_sse.cc b/libitm/config/x86/x86_sse.cc new file mode 100644 index 0000000..7440c94 --- /dev/null +++ b/libitm/config/x86/x86_sse.cc @@ -0,0 +1,122 @@ +/* Copyright (C) 2009, 2011 Free Software Foundation, Inc. + Contributed by Richard Henderson <rth@redhat.com>. + + This file is part of the GNU Transactional Memory Library (libitm). + + Libitm is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "libitm_i.h" +#include "dispatch.h" + +// ??? Use memcpy for now, until we have figured out how to best instantiate +// these loads/stores. +CREATE_DISPATCH_FUNCTIONS_T_MEMCPY(M64, GTM::abi_disp()->, ) +CREATE_DISPATCH_FUNCTIONS_T_MEMCPY(M128, GTM::abi_disp()->, ) + +void ITM_REGPARM +_ITM_LM64 (const _ITM_TYPE_M64 *ptr) +{ + GTM::GTM_LB (ptr, sizeof (*ptr)); +} + +void ITM_REGPARM +_ITM_LM128 (const _ITM_TYPE_M128 *ptr) +{ + GTM::GTM_LB (ptr, sizeof (*ptr)); +} + +// Helpers for re-aligning two 128-bit values. +#ifdef __SSSE3__ +# define INSN0 "movdqa %xmm1, %xmm0" +# define INSN(N) "palignr $" #N ", %xmm1, %xmm0" +# define TABLE_ENT_0 INSN0 "\n\tret\n\t" +# define TABLE_ENT(N) ".balign 8\n\t" INSN(N) "\n\tret\n\t" + +asm(".pushsection .text\n\ + .balign 16\n\ + .globl GTM_palignr_table\n\ + .hidden GTM_palignr_table\n\ + .type GTM_palignr_table, @function\n\ +GTM_palignr_table:\n\t" + TABLE_ENT_0 + TABLE_ENT(1) + TABLE_ENT(2) + TABLE_ENT(3) + TABLE_ENT(4) + TABLE_ENT(5) + TABLE_ENT(6) + TABLE_ENT(7) + TABLE_ENT(8) + TABLE_ENT(9) + TABLE_ENT(10) + TABLE_ENT(11) + TABLE_ENT(12) + TABLE_ENT(13) + TABLE_ENT(14) + TABLE_ENT(15) + ".balign 8\n\ + .size GTM_palignr_table, .-GTM_palignr_table\n\ + .popsection"); + +# undef INSN0 +# undef INSN +# undef TABLE_ENT_0 +# undef TABLE_ENT +#elif defined(__SSE2__) +# define INSNS_8 "punpcklqdq %xmm1, %xmm0" +# define INSNS(N) "psrldq $"#N", %xmm0\n\t" \ + "pslldq $(16-"#N"), %xmm1\n\t" \ + "por %xmm1, %xmm0" +# define TABLE_ENT_0 "ret\n\t" +# define TABLE_ENT_8 ".balign 16\n\t" INSNS_8 "\n\tret\n\t" +# define TABLE_ENT(N) ".balign 16\n\t" INSNS(N) "\n\tret\n\t" + +asm(".pushsection .text\n\ + .balign 16\n\ + .globl GTM_pshift_table\n\ + .hidden GTM_pshift_table\n\ + .type GTM_pshift_table, @function\n\ +GTM_pshift_table:\n\t" + TABLE_ENT_0 + TABLE_ENT(1) + TABLE_ENT(2) + TABLE_ENT(3) + TABLE_ENT(4) + TABLE_ENT(5) + TABLE_ENT(6) + TABLE_ENT(7) + TABLE_ENT_8 + TABLE_ENT(9) + TABLE_ENT(10) + TABLE_ENT(11) + TABLE_ENT(12) + TABLE_ENT(13) + TABLE_ENT(14) + TABLE_ENT(15) + ".balign 8\n\ + .size GTM_pshift_table, .-GTM_pshift_table\n\ + .popsection"); + +# undef INSNS_8 +# undef INSNS +# undef TABLE_ENT_0 +# undef TABLE_ENT_8 +# undef TABLE_ENT +#endif |