From 79b1edb6b5142f414368add71ec8ba37d0cd028f Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 9 Nov 2011 14:54:55 -0800 Subject: libitm: Remove unused code. In particular, unused code that's presenting portability problems. From-SVN: r181241 --- libitm/ChangeLog | 23 ++ libitm/Makefile.am | 2 +- libitm/Makefile.in | 29 +- libitm/barrier.tpl | 170 ---------- libitm/config/alpha/cacheline.h | 86 +---- libitm/config/alpha/unaligned.h | 118 ------- libitm/config/generic/cacheline.cc | 49 --- libitm/config/generic/cacheline.h | 49 --- libitm/config/generic/cachepage.h | 77 ----- libitm/config/generic/tls.cc | 47 --- libitm/config/generic/unaligned.h | 228 -------------- libitm/config/posix/cachepage.cc | 183 ----------- libitm/config/x86/cacheline.cc | 73 ----- libitm/config/x86/cacheline.h | 123 +------- libitm/config/x86/unaligned.h | 237 -------------- libitm/config/x86/x86_avx.cc | 59 ---- libitm/config/x86/x86_sse.cc | 79 ----- libitm/libitm_i.h | 1 - libitm/memcpy.cc | 365 --------------------- libitm/memset.cc | 78 ----- libitm/method-wbetl.cc | 628 ------------------------------------- libitm/testsuite/Makefile.in | 4 +- 22 files changed, 40 insertions(+), 2668 deletions(-) delete mode 100644 libitm/barrier.tpl delete mode 100644 libitm/config/alpha/unaligned.h delete mode 100644 libitm/config/generic/cacheline.cc delete mode 100644 libitm/config/generic/cachepage.h delete mode 100644 libitm/config/generic/unaligned.h delete mode 100644 libitm/config/posix/cachepage.cc delete mode 100644 libitm/config/x86/cacheline.cc delete mode 100644 libitm/config/x86/unaligned.h delete mode 100644 libitm/memcpy.cc delete mode 100644 libitm/memset.cc delete mode 100644 libitm/method-wbetl.cc (limited to 'libitm') diff --git a/libitm/ChangeLog b/libitm/ChangeLog index 0501d16..b1629b1 100644 --- a/libitm/ChangeLog +++ b/libitm/ChangeLog @@ -1,5 +1,28 @@ 2011-11-09 Richard Henderson + * barrier.tpl, memcpy.cc, memset.cc, method-wbetl.cc: Remove file. + * config/alpha/unaligned.h: Remove file. + * config/generic/unaligned.h: Remove file. + * config/x86/unaligned.h: Remove file. + * config/generic/cachepage.h: Remove file. + * config/posix/cachepage.cc: Remove file. + * config/generic/cacheline.cc: Remove file. + * config/x86/cacheline.cc: Remove file. + * config/generic/cacheline.h (gtm_cacheline): Remove the + store_mask, copy_mask, copy_mask_wb methods. + * config/x86/cacheline.h: Likewise. + * config/alpha/cacheline.h: Fall back to generic after setting size. + * config/generic/tls.cc (gtm_mask_stack): Remove. + * config/x86/x86_avx.cc (GTM_vpperm_shift): Remove. + (GTM_vpalignr_table): Remove. + * config/x86/x86_sse.cc (GTM_palignr_table): Remove. + (GTM_pshift_table): Remove. + * config/libitm_i.h: Don't include cachepage.h. + * Makefile.am (libitm_la_SOURCES): Remove cacheline.cc, cachepage.cc + * Makefile.in, testsuite/Makefile.in: Rebuild. + +2011-11-09 Richard Henderson + * config/x86/cacheline.h (gtm_cacheline::store_mask): Use .byte to emit branch prediction hint. diff --git a/libitm/Makefile.am b/libitm/Makefile.am index 6923409..4578986 100644 --- a/libitm/Makefile.am +++ b/libitm/Makefile.am @@ -41,7 +41,7 @@ libitm_la_LDFLAGS = $(libitm_version_info) $(libitm_version_script) \ libitm_la_SOURCES = \ aatree.cc alloc.cc alloc_c.cc alloc_cpp.cc barrier.cc beginend.cc \ - clone.cc cacheline.cc cachepage.cc eh_cpp.cc local.cc \ + clone.cc eh_cpp.cc local.cc \ query.cc retry.cc rwlock.cc useraction.cc util.cc \ sjlj.S tls.cc method-serial.cc method-gl.cc diff --git a/libitm/Makefile.in b/libitm/Makefile.in index 7dc864b..8816580 100644 --- a/libitm/Makefile.in +++ b/libitm/Makefile.in @@ -48,6 +48,7 @@ DIST_COMMON = $(am__configure_deps) $(srcdir)/../config.guess \ $(top_srcdir)/configure ChangeLog ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \ + $(top_srcdir)/../config/asmcfi.m4 \ $(top_srcdir)/../config/depstand.m4 \ $(top_srcdir)/../config/enable.m4 \ $(top_srcdir)/../config/futex.m4 \ @@ -94,17 +95,17 @@ am__installdirs = "$(DESTDIR)$(toolexeclibdir)" "$(DESTDIR)$(infodir)" \ LTLIBRARIES = $(toolexeclib_LTLIBRARIES) libitm_la_LIBADD = am__libitm_la_SOURCES_DIST = aatree.cc alloc.cc alloc_c.cc \ - alloc_cpp.cc barrier.cc beginend.cc clone.cc cacheline.cc \ - cachepage.cc eh_cpp.cc local.cc query.cc retry.cc rwlock.cc \ - useraction.cc util.cc sjlj.S tls.cc method-serial.cc \ - method-gl.cc x86_sse.cc x86_avx.cc futex.cc + alloc_cpp.cc barrier.cc beginend.cc clone.cc eh_cpp.cc \ + local.cc query.cc retry.cc rwlock.cc useraction.cc util.cc \ + sjlj.S tls.cc method-serial.cc method-gl.cc x86_sse.cc \ + x86_avx.cc futex.cc @ARCH_X86_TRUE@am__objects_1 = x86_sse.lo x86_avx.lo @ARCH_FUTEX_TRUE@am__objects_2 = futex.lo am_libitm_la_OBJECTS = aatree.lo alloc.lo alloc_c.lo alloc_cpp.lo \ - barrier.lo beginend.lo clone.lo cacheline.lo cachepage.lo \ - eh_cpp.lo local.lo query.lo retry.lo rwlock.lo useraction.lo \ - util.lo sjlj.lo tls.lo method-serial.lo method-gl.lo \ - $(am__objects_1) $(am__objects_2) + barrier.lo beginend.lo clone.lo eh_cpp.lo local.lo query.lo \ + retry.lo rwlock.lo useraction.lo util.lo sjlj.lo tls.lo \ + method-serial.lo method-gl.lo $(am__objects_1) \ + $(am__objects_2) libitm_la_OBJECTS = $(am_libitm_la_OBJECTS) DEFAULT_INCLUDES = -I.@am__isrc@ depcomp = $(SHELL) $(top_srcdir)/../depcomp @@ -234,8 +235,6 @@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ -FC = @FC@ -FCFLAGS = @FCFLAGS@ FGREP = @FGREP@ GREP = @GREP@ INSTALL = @INSTALL@ @@ -286,7 +285,6 @@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ -ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ @@ -371,10 +369,9 @@ libitm_la_LDFLAGS = $(libitm_version_info) $(libitm_version_script) \ -no-undefined libitm_la_SOURCES = aatree.cc alloc.cc alloc_c.cc alloc_cpp.cc \ - barrier.cc beginend.cc clone.cc cacheline.cc cachepage.cc \ - eh_cpp.cc local.cc query.cc retry.cc rwlock.cc useraction.cc \ - util.cc sjlj.S tls.cc method-serial.cc method-gl.cc \ - $(am__append_1) $(am__append_2) + barrier.cc beginend.cc clone.cc eh_cpp.cc local.cc query.cc \ + retry.cc rwlock.cc useraction.cc util.cc sjlj.S tls.cc \ + method-serial.cc method-gl.cc $(am__append_1) $(am__append_2) # Automake Documentation: # If your package has Texinfo files in many directories, you can use the @@ -500,8 +497,6 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/alloc_cpp.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/barrier.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/beginend.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cacheline.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cachepage.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/clone.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/eh_cpp.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/futex.Plo@am__quote@ diff --git a/libitm/barrier.tpl b/libitm/barrier.tpl deleted file mode 100644 index dcf1013..0000000 --- a/libitm/barrier.tpl +++ /dev/null @@ -1,170 +0,0 @@ -/* -*- c++ -*- */ -/* Copyright (C) 2008, 2009, 2011 Free Software Foundation, Inc. - Contributed by Richard Henderson . - - This file is part of the GNU Transactional Memory Library (libitm). - - Libitm is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - Libitm is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - . */ - -#include "unaligned.h" - -namespace { - -using namespace GTM; - -template -T do_read (const T *ptr, abi_dispatch::lock_type lock) -{ - // - // Find the cacheline that holds the current value of *PTR. - // - abi_dispatch *disp = abi_disp(); - uintptr_t iptr = reinterpret_cast(ptr); - // Normalize PTR by chopping off the bottom bits so we can search - // for PTR in the cacheline hash. - uintptr_t iline = iptr & -CACHELINE_SIZE; - // The position in the resulting cacheline where *PTR is actually stored. - uintptr_t iofs = iptr & (CACHELINE_SIZE - 1); - const gtm_cacheline *pline = reinterpret_cast(iline); - // Search for the actual cacheline that holds the current value of *PTR. - const gtm_cacheline *line = disp->read_lock(pline, lock); - - // Point to the position in the cacheline where *PTR is stored. - ptr = reinterpret_cast(&line->b[iofs]); - - // Straight up loads, because we're either aligned, or we don't care - // about alignment. - // - // If we require alignment on type T, do a straight load if we're - // aligned. Otherwise do a straight load IFF the load fits entirely - // in this cacheline. That is, it won't span multiple cachelines. - if (__builtin_expect (strict_alignment::value - ? (iofs & (sizeof (T) - 1)) == 0 - : iofs + sizeof(T) <= CACHELINE_SIZE, 1)) - { - do_normal_load: - return *ptr; - } - // If alignment on T is necessary, but we're unaligned, yet we fit - // entirely in this cacheline... do the unaligned load dance. - else if (__builtin_expect (strict_alignment::value - && iofs + sizeof(T) <= CACHELINE_SIZE, 1)) - { - do_unaligned_load: - return unaligned_load(ptr); - } - // Otherwise, this load will span multiple cachelines. - else - { - // Get the following cacheline for the rest of the data. - const gtm_cacheline *line2 = disp->read_lock(pline + 1, lock); - - // If the two cachelines are adjacent, just load it all in one - // swoop. - if (line2 == line + 1) - { - if (!strict_alignment::value) - goto do_normal_load; - else - goto do_unaligned_load; - } - else - { - // Otherwise, ask the backend to load from two different - // cachelines. - return unaligned_load2(line, line2, iofs); - } - } -} - -template -void do_write (T *ptr, T val, abi_dispatch::lock_type lock) -{ - // Note: See comments for do_read() above for hints on this - // function. Ideally we should abstract out a lot out of these two - // functions, and avoid all this duplication. - - abi_dispatch *disp = abi_disp(); - uintptr_t iptr = reinterpret_cast(ptr); - uintptr_t iline = iptr & -CACHELINE_SIZE; - uintptr_t iofs = iptr & (CACHELINE_SIZE - 1); - gtm_cacheline *pline = reinterpret_cast(iline); - gtm_cacheline_mask m = ((gtm_cacheline_mask)2 << (sizeof(T) - 1)) - 1; - abi_dispatch::mask_pair pair = disp->write_lock(pline, lock); - - ptr = reinterpret_cast(&pair.line->b[iofs]); - - if (__builtin_expect (strict_alignment::value - ? (iofs & (sizeof (val) - 1)) == 0 - : iofs + sizeof(val) <= CACHELINE_SIZE, 1)) - { - *pair.mask |= m << iofs; - do_normal_store: - *ptr = val; - } - else if (__builtin_expect (strict_alignment::value - && iofs + sizeof(val) <= CACHELINE_SIZE, 1)) - { - *pair.mask |= m << iofs; - do_unaligned_store: - unaligned_store(ptr, val); - } - else - { - *pair.mask |= m << iofs; - abi_dispatch::mask_pair pair2 = disp->write_lock(pline + 1, lock); - - uintptr_t ileft = CACHELINE_SIZE - iofs; - *pair2.mask |= m >> ileft; - - if (pair2.line == pair.line + 1) - { - if (!strict_alignment::value) - goto do_normal_store; - else - goto do_unaligned_store; - } - else - unaligned_store2(pair.line, pair2.line, iofs, val); - } -} - -} /* anonymous namespace */ - -#define ITM_READ(T, LOCK) \ - _ITM_TYPE_##T ITM_REGPARM _ITM_##LOCK##T (const _ITM_TYPE_##T *ptr) \ - { \ - return do_read (ptr, abi_dispatch::LOCK); \ - } - -#define ITM_WRITE(T, LOCK) \ - void ITM_REGPARM _ITM_##LOCK##T (_ITM_TYPE_##T *ptr, _ITM_TYPE_##T val) \ - { \ - do_write (ptr, val, abi_dispatch::LOCK); \ - } - -#define ITM_BARRIERS(T) \ - ITM_READ(T, R) \ - ITM_READ(T, RaR) \ - ITM_READ(T, RaW) \ - ITM_READ(T, RfW) \ - ITM_WRITE(T, W) \ - ITM_WRITE(T, WaR) \ - ITM_WRITE(T, WaW) diff --git a/libitm/config/alpha/cacheline.h b/libitm/config/alpha/cacheline.h index 5e38486..611a1c9 100644 --- a/libitm/config/alpha/cacheline.h +++ b/libitm/config/alpha/cacheline.h @@ -33,90 +33,6 @@ // modification mask, below. #define CACHELINE_SIZE 64 -#ifdef __alpha_bwx__ -# include "config/generic/cacheline.h" -#else -// If we don't have byte-word stores, then we'll never be able to -// adjust *all* of the byte loads/stores to be truely atomic. So -// only guarantee 4-byte aligned values atomicly stored, exactly -// like the native system. Use byte zap instructions to accelerate -// sub-word masked stores. +#include "config/generic/cacheline.h" -namespace GTM HIDDEN { - -// A gtm_cacheline_mask stores a modified bit for every modified byte -// in the cacheline with which it is associated. -typedef sized_integral::type gtm_cacheline_mask; - -union gtm_cacheline -{ - // Byte access to the cacheline. - unsigned char b[CACHELINE_SIZE] __attribute__((aligned(CACHELINE_SIZE))); - - // Larger sized access to the cacheline. - uint16_t u16[CACHELINE_SIZE / sizeof(uint16_t)]; - uint32_t u32[CACHELINE_SIZE / sizeof(uint32_t)]; - uint64_t u64[CACHELINE_SIZE / sizeof(uint64_t)]; - gtm_word w[CACHELINE_SIZE / sizeof(gtm_word)]; - - // Store S into D, but only the bytes specified by M. - static void store_mask(uint32_t *d, uint32_t s, uint8_t m); - static void store_mask(uint64_t *d, uint64_t s, uint8_t m); - - // Copy S to D, but only the bytes specified by M. - static void copy_mask (gtm_cacheline * __restrict d, - const gtm_cacheline * __restrict s, - gtm_cacheline_mask m); - - // A write barrier to emit after (a series of) copy_mask. - static void copy_mask_wb () { atomic_write_barrier(); } -}; - -inline void ALWAYS_INLINE -gtm_cacheline::store_mask (uint32_t *d, uint32_t s, uint8_t m) -{ - const uint8_t tm = (1 << sizeof(uint32_t)) - 1; - - m &= tm; - if (__builtin_expect (m, tm)) - { - if (__builtin_expect (m == tm, 1)) - *d = s; - else - *d = __builtin_alpha_zap (*d, m) | __builtin_alpha_zapnot (s, m); - } -} - -inline void ALWAYS_INLINE -gtm_cacheline::store_mask (uint64_t *d, uint64_t s, uint8_t m) -{ - if (__builtin_expect (m, 0xff)) - { - if (__builtin_expect (m == 0xff, 1)) - *d = s; - else - { - typedef uint32_t *p32 __attribute__((may_alias)); - p32 d32 = reinterpret_cast(d); - - if ((m & 0x0f) == 0x0f) - { - d32[0] = s; - m &= 0xf0; - } - else if ((m & 0xf0) == 0xf0) - { - d32[1] = s >> 32; - m &= 0x0f; - } - - if (m) - *d = __builtin_alpha_zap (*d, m) | __builtin_alpha_zapnot (s, m); - } - } -} - -} // namespace GTM - -#endif // __alpha_bwx__ #endif // LIBITM_ALPHA_CACHELINE_H diff --git a/libitm/config/alpha/unaligned.h b/libitm/config/alpha/unaligned.h deleted file mode 100644 index 3d091ae..0000000 --- a/libitm/config/alpha/unaligned.h +++ /dev/null @@ -1,118 +0,0 @@ -/* Copyright (C) 2009, 2011 Free Software Foundation, Inc. - Contributed by Richard Henderson . - - This file is part of the GNU Transactional Memory Library (libitm). - - Libitm is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - Libitm is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - . */ - -#ifndef LIBITM_ALPHA_UNALIGNED_H -#define LIBITM_ALPHA_UNALIGNED_H 1 - -#define HAVE_ARCH_UNALIGNED_LOAD2_U2 1 -#define HAVE_ARCH_UNALIGNED_LOAD2_U4 1 -#define HAVE_ARCH_UNALIGNED_LOAD2_U8 1 - -#ifndef __alpha_bwx__ -#define HAVE_ARCH_UNALIGNED_STORE2_U2 1 -#endif -#define HAVE_ARCH_UNALIGNED_STORE2_U4 1 -#define HAVE_ARCH_UNALIGNED_STORE2_U8 1 - -#include "config/generic/unaligned.h" - -namespace GTM HIDDEN { - -template<> -inline uint16_t ALWAYS_INLINE -unaligned_load2(const gtm_cacheline *c1, - const gtm_cacheline *c2, size_t ofs) -{ - uint64_t v1 = c1->u64[CACHELINE_SIZE / sizeof(uint64_t) - 1]; - uint64_t v2 = c2->u64[0]; - - return __builtin_alpha_extwl (v1, ofs) | __builtin_alpha_extwh (v2, ofs); -} - -template<> -inline uint32_t ALWAYS_INLINE -unaligned_load2(const gtm_cacheline *c1, - const gtm_cacheline *c2, size_t ofs) -{ - uint64_t v1 = c1->u64[CACHELINE_SIZE / sizeof(uint64_t) - 1]; - uint64_t v2 = c2->u64[0]; - - return __builtin_alpha_extll (v1, ofs) + __builtin_alpha_extlh (v2, ofs); -} - -template<> -inline uint64_t ALWAYS_INLINE -unaligned_load2(const gtm_cacheline *c1, - const gtm_cacheline *c2, size_t ofs) -{ - uint64_t v1 = c1->u64[CACHELINE_SIZE / sizeof(uint64_t) - 1]; - uint64_t v2 = c2->u64[0]; - - return __builtin_alpha_extql (v1, ofs) | __builtin_alpha_extqh (v2, ofs); -} - -#ifndef __alpha_bwx__ -template<> -inline void -unaligned_store2(gtm_cacheline *c1, gtm_cacheline *c2, - size_t ofs, uint16_t val) -{ - uint32_t vl = (uint32_t)val << 24, vh = val >> 8; - - gtm_cacheline::store_mask (&c1->u32[CACHELINE_SIZE / 4 - 1], vl, 4); - gtm_cacheline::store_mask (&c2->u32[0], vh, 1); -} -#endif - -template<> -inline void -unaligned_store2(gtm_cacheline *c1, gtm_cacheline *c2, - size_t ofs, uint32_t val) -{ - uint64_t vl = __builtin_alpha_insll (val, ofs); - uint64_t ml = __builtin_alpha_insll (~0u, ofs); - uint64_t vh = __builtin_alpha_inslh (val, ofs); - uint64_t mh = __builtin_alpha_inslh (~0u, ofs); - - gtm_cacheline::store_mask (&c1->u64[CACHELINE_SIZE / 8 - 1], vl, ml); - gtm_cacheline::store_mask (&c2->u64[0], vh, mh); -} - -template<> -inline void -unaligned_store2(gtm_cacheline *c1, gtm_cacheline *c2, - size_t ofs, uint64_t val) -{ - uint64_t vl = __builtin_alpha_insql (val, ofs); - uint64_t ml = __builtin_alpha_insql (~0u, ofs); - uint64_t vh = __builtin_alpha_insqh (val, ofs); - uint64_t mh = __builtin_alpha_insqh (~0u, ofs); - - gtm_cacheline::store_mask (&c1->u64[CACHELINE_SIZE / 8 - 1], vl, ml); - gtm_cacheline::store_mask (&c2->u64[0], vh, mh); -} - -} // namespace GTM - -#endif // LIBITM_ALPHA_UNALIGNED_H diff --git a/libitm/config/generic/cacheline.cc b/libitm/config/generic/cacheline.cc deleted file mode 100644 index 108ffba..0000000 --- a/libitm/config/generic/cacheline.cc +++ /dev/null @@ -1,49 +0,0 @@ -/* Copyright (C) 2009, 2011 Free Software Foundation, Inc. - Contributed by Richard Henderson . - - This file is part of the GNU Transactional Memory Library (libitm). - - Libitm is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - Libitm is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - . */ - -#include "libitm_i.h" - - -namespace GTM HIDDEN { - -void -gtm_cacheline::copy_mask (gtm_cacheline * __restrict d, - const gtm_cacheline * __restrict s, - gtm_cacheline_mask m) -{ - const size_t n = sizeof (gtm_word); - - if (m == (gtm_cacheline_mask) -1) - { - *d = *s; - return; - } - if (__builtin_expect (m == 0, 0)) - return; - - for (size_t i = 0; i < CACHELINE_SIZE / n; ++i, m >>= n) - store_mask (&d->w[i], s->w[i], m); -} - -} // namespace GTM diff --git a/libitm/config/generic/cacheline.h b/libitm/config/generic/cacheline.h index 0a5af761..dd7d877 100644 --- a/libitm/config/generic/cacheline.h +++ b/libitm/config/generic/cacheline.h @@ -51,57 +51,8 @@ union gtm_cacheline uint32_t u32[CACHELINE_SIZE / sizeof(uint32_t)]; uint64_t u64[CACHELINE_SIZE / sizeof(uint64_t)]; gtm_word w[CACHELINE_SIZE / sizeof(gtm_word)]; - - // Store S into D, but only the bytes specified by M. - template static void store_mask (T *d, T s, uint8_t m); - - // Copy S to D, but only the bytes specified by M. - static void copy_mask (gtm_cacheline * __restrict d, - const gtm_cacheline * __restrict s, - gtm_cacheline_mask m); - - // A write barrier to emit after (a series of) copy_mask. - // When we're emitting non-temporal stores, the normal strong - // ordering of the machine doesn't apply. - static void copy_mask_wb () { atomic_write_barrier(); } }; -template -inline void -gtm_cacheline::store_mask (T *d, T s, uint8_t m) -{ - const uint8_t tm = (1 << sizeof(T)) - 1; - - if (__builtin_expect (m & tm, tm)) - { - if (__builtin_expect ((m & tm) == tm, 1)) - *d = s; - else - { - const int half = sizeof(T) / 2; - typedef typename sized_integral::type half_t; - half_t *dhalf = reinterpret_cast(d); - half_t s1, s2; - - if (WORDS_BIGENDIAN) - s1 = s >> half*8, s2 = s; - else - s1 = s, s2 = s >> half*8; - - store_mask (dhalf, s1, m); - store_mask (dhalf + 1, s2, m >> half); - } - } -} - -template<> -inline void ALWAYS_INLINE -gtm_cacheline::store_mask (uint8_t *d, uint8_t s, uint8_t m) -{ - if (m & 1) - *d = s; -} - } // namespace GTM #endif // LIBITM_CACHELINE_H diff --git a/libitm/config/generic/cachepage.h b/libitm/config/generic/cachepage.h deleted file mode 100644 index a5472f3..0000000 --- a/libitm/config/generic/cachepage.h +++ /dev/null @@ -1,77 +0,0 @@ -/* Copyright (C) 2009, 2011 Free Software Foundation, Inc. - Contributed by Richard Henderson . - - This file is part of the GNU Transactional Memory Library (libitm). - - Libitm is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - Libitm is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - . */ - -#ifndef LIBITM_CACHEPAGE_H -#define LIBITM_CACHEPAGE_H 1 - -namespace GTM HIDDEN { - -// A "page" worth of saved cachelines plus modification masks. This -// arrangement is intended to minimize the overhead of alignment. The -// PAGE_SIZE defined by the target must be a constant for this to work, -// which means that this definition may not be the same as the real -// system page size. An additional define of FIXED_PAGE_SIZE by the -// target indicates that PAGE_SIZE exactly matches the system page size. - -#ifndef PAGE_SIZE -#define PAGE_SIZE 4096 -#endif - -struct gtm_cacheline_page -{ - static const size_t LINES - = ((PAGE_SIZE - sizeof(gtm_cacheline_page *)) - / (CACHELINE_SIZE + sizeof(gtm_cacheline_mask))); - - gtm_cacheline lines[LINES] __attribute__((aligned(PAGE_SIZE))); - gtm_cacheline_mask masks[LINES]; - gtm_cacheline_page *prev; - - static gtm_cacheline_page * - page_for_line (gtm_cacheline *c) - { - return (gtm_cacheline_page *)((uintptr_t)c & -PAGE_SIZE); - } - - gtm_cacheline_mask * - mask_for_line (gtm_cacheline *c) - { - size_t index = c - &this->lines[0]; - return &this->masks[index]; - } - - static gtm_cacheline_mask * - mask_for_page_line (gtm_cacheline *c) - { - gtm_cacheline_page *p = page_for_line (c); - return p->mask_for_line (c); - } - - static void *operator new (size_t); - static void operator delete (void *); -}; - -} // namespace GTM - -#endif // LIBITM_CACHEPAGE_H diff --git a/libitm/config/generic/tls.cc b/libitm/config/generic/tls.cc index c642111..e502e50 100644 --- a/libitm/config/generic/tls.cc +++ b/libitm/config/generic/tls.cc @@ -30,51 +30,4 @@ namespace GTM HIDDEN { __thread gtm_thread_tls _gtm_thr_tls; #endif -// Filter out any updates that overlap the libitm stack, as defined by -// TOP (entry point to library) and BOT (below current function). This -// definition should be fine for all stack-grows-down architectures. - -gtm_cacheline_mask __attribute__((noinline)) -gtm_mask_stack(gtm_cacheline *line, gtm_cacheline_mask mask) -{ - void *top = gtm_thr()->jb.cfa; - void *bot = __builtin_dwarf_cfa(); - - // We must have come through an entry point that set TOP. - assert (top != NULL); - - if (line + 1 < bot) - { - // Since we don't have the REAL stack boundaries for this thread, - // we cannot know if this is a dead write to a stack address below - // the current function or if it is write to another VMA. In either - // case allowing the write should not affect correctness. - } - else if (line >= top) - { - // A valid write to an address in an outer stack frame, or a write - // to another VMA. - } - else - { - uintptr_t diff = (uintptr_t)top - (uintptr_t)line; - if (diff >= CACHELINE_SIZE) - { - // The write is either fully within the proscribed area, or the tail - // of the cacheline overlaps the proscribed area. Assume that all - // stacks are at least cacheline aligned and declare the head of the - // cacheline dead. - mask = 0; - } - else - { - // The head of the cacheline is within the proscribed area, but the - // tail of the cacheline is live. Eliminate the dead writes. - mask &= (gtm_cacheline_mask)-1 << diff; - } - } - - return mask; -} - } // namespace GTM diff --git a/libitm/config/generic/unaligned.h b/libitm/config/generic/unaligned.h deleted file mode 100644 index 50cb13b..0000000 --- a/libitm/config/generic/unaligned.h +++ /dev/null @@ -1,228 +0,0 @@ -/* Copyright (C) 2009, 2011 Free Software Foundation, Inc. - Contributed by Richard Henderson . - - This file is part of the GNU Transactional Memory Library (libitm). - - Libitm is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - Libitm is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - . */ - -#ifndef LIBITM_UNALIGNED_H -#define LIBITM_UNALIGNED_H 1 - -namespace GTM HIDDEN { - -#ifndef STRICT_ALIGNMENT -#define STRICT_ALIGNMENT 1 -#endif - -// A type trait for whether type T requires strict alignment. -// The generic types are assumed to all be the same; specializations -// for target-specific types should be done in config/cpu/unaligned.h. -template - struct strict_alignment - : public std::integral_constant - { }; - -// A helper template for accessing an integral type the same size as T -template - struct make_integral - : public sized_integral - { }; - -// A helper class for accessing T as an unaligned value. -template -struct __attribute__((packed)) unaligned_helper - { T x; }; - -// A helper class for view-converting T as an integer. -template -union view_convert_helper -{ - typedef T type; - typedef make_integral itype; - - type t; - itype i; -}; - -// Generate an unaligned load sequence. -// The compiler knows how to do this for any specific type. -template -inline T ALWAYS_INLINE -unaligned_load(const void *t) -{ - typedef unaligned_helper UT; - const UT *ut = reinterpret_cast(t); - return ut->x; -} - -// Generate an unaligned store sequence. -template -inline void ALWAYS_INLINE -unaligned_store(void *t, T val) -{ - typedef unaligned_helper UT; - UT *ut = reinterpret_cast(t); - ut->x = val; -} - -// Generate an unaligned load from two different cachelines. -// It is known that OFS + SIZEOF(T) > CACHELINE_SIZE. -template -inline T ALWAYS_INLINE -unaligned_load2(const gtm_cacheline *c1, const gtm_cacheline *c2, size_t ofs) -{ - size_t left = CACHELINE_SIZE - ofs; - T ret; - - memcpy (&ret, &c1->b[ofs], left); - memcpy ((char *)&ret + ofs, c2, sizeof(T) - left); - - return ret; -} - -// Generate an unaligned store into two different cachelines. -// It is known that OFS + SIZEOF(T) > CACHELINE_SIZE. -template -inline void ALWAYS_INLINE -unaligned_store2(gtm_cacheline *c1, gtm_cacheline *c2, size_t ofs, T val) -{ - size_t left = CACHELINE_SIZE - ofs; - memcpy (&c1->b[ofs], &val, left); - memcpy (c2, (char *)&val + left, sizeof(T) - left); -} - -#ifndef HAVE_ARCH_UNALIGNED_LOAD2_U2 -template<> -inline uint16_t ALWAYS_INLINE -unaligned_load2(const gtm_cacheline *c1, - const gtm_cacheline *c2, size_t ofs) -{ - uint16_t v1 = c1->b[CACHELINE_SIZE - 1]; - uint16_t v2 = c2->b[0]; - - if (WORDS_BIGENDIAN) - return v1 << 8 | v2; - else - return v2 << 8 | v1; -} -#endif - -#ifndef HAVE_ARCH_UNALIGNED_LOAD2_U4 -template<> -inline uint32_t ALWAYS_INLINE -unaligned_load2(const gtm_cacheline *c1, - const gtm_cacheline *c2, size_t ofs) -{ - uint32_t v1 = c1->u32[CACHELINE_SIZE / sizeof(uint32_t) - 1]; - uint32_t v2 = c2->u32[0]; - int s2 = (ofs & (sizeof(uint32_t) - 1)) * 8; - int s1 = sizeof(uint32_t) * 8 - s2; - - if (WORDS_BIGENDIAN) - return v1 << s2 | v2 >> s1; - else - return v2 << s2 | v1 >> s1; -} -#endif - -#ifndef HAVE_ARCH_UNALIGNED_LOAD2_U8 -template<> -inline uint64_t ALWAYS_INLINE -unaligned_load2(const gtm_cacheline *c1, - const gtm_cacheline *c2, size_t ofs) -{ - uint64_t v1 = c1->u64[CACHELINE_SIZE / sizeof(uint64_t) - 1]; - uint64_t v2 = c2->u64[0]; - int s2 = (ofs & (sizeof(uint64_t) - 1)) * 8; - int s1 = sizeof(uint64_t) * 8 - s2; - - if (WORDS_BIGENDIAN) - return v1 << s2 | v2 >> s1; - else - return v2 << s2 | v1 >> s1; -} -#endif - -template<> -inline float ALWAYS_INLINE -unaligned_load2(const gtm_cacheline *c1, - const gtm_cacheline *c2, size_t ofs) -{ - typedef view_convert_helper VC; VC vc; - vc.i = unaligned_load2(c1, c2, ofs); - return vc.t; -} - -template<> -inline double ALWAYS_INLINE -unaligned_load2(const gtm_cacheline *c1, - const gtm_cacheline *c2, size_t ofs) -{ - typedef view_convert_helper VC; VC vc; - vc.i = unaligned_load2(c1, c2, ofs); - return vc.t; -} - -#ifndef HAVE_ARCH_UNALIGNED_STORE2_U2 -template<> -inline void ALWAYS_INLINE -unaligned_store2(gtm_cacheline *c1, gtm_cacheline *c2, - size_t ofs, uint16_t val) -{ - uint8_t vl = val, vh = val >> 8; - - if (WORDS_BIGENDIAN) - { - c1->b[CACHELINE_SIZE - 1] = vh; - c2->b[0] = vl; - } - else - { - c1->b[CACHELINE_SIZE - 1] = vl; - c2->b[0] = vh; - } -} -#endif - -#if 0 -#ifndef HAVE_ARCH_UNALIGNED_STORE2_U4 -template<> -inline void ALWAYS_INLINE -unaligned_store2(gtm_cacheline *c1, gtm_cacheline *c2, - size_t ofs, uint32_t val) -{ - // ??? We could reuse the store_mask stuff here. -} -#endif - -template<> -inline void ALWAYS_INLINE -unaligned_store2(gtm_cacheline *c1, gtm_cacheline *c2, - size_t ofs, float val) -{ - typedef view_convert_helper VC; VC vc; - vc.t = val; - unaligned_store2(c1, c2, ofs, vc.i); -} -#endif - -} // namespace GTM - -#endif // LIBITM_UNALIGNED_H diff --git a/libitm/config/posix/cachepage.cc b/libitm/config/posix/cachepage.cc deleted file mode 100644 index 128cd54..0000000 --- a/libitm/config/posix/cachepage.cc +++ /dev/null @@ -1,183 +0,0 @@ -/* Copyright (C) 2009, 2011 Free Software Foundation, Inc. - Contributed by Richard Henderson . - - This file is part of the GNU Transactional Memory Library (libitm). - - Libitm is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - Libitm is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - . */ - -#include "libitm_i.h" -#include - -// -// We have three possibilities for alloction: mmap, memalign, posix_memalign -// - -#if defined(HAVE_MMAP_ANON) || defined(HAVE_MMAP_DEV_ZERO) -#include -#include -#endif -#ifdef HAVE_MALLOC_H -#include -#endif - -namespace GTM HIDDEN { - -#if defined(HAVE_MMAP_ANON) -# if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) -# define MAP_ANONYMOUS MAP_ANON -# endif -# define dev_zero -1 -#elif defined(HAVE_MMAP_DEV_ZERO) -# ifndef MAP_ANONYMOUS -# define MAP_ANONYMOUS 0 -# endif -static int dev_zero = -1; -#endif - -#if defined(HAVE_MMAP_ANON) || defined(HAVE_MMAP_DEV_ZERO) -/* If we get here, we've already opened /dev/zero and verified that - PAGE_SIZE is valid for the system. */ -static gtm_cacheline_page * alloc_mmap (void) UNUSED; -static gtm_cacheline_page * -alloc_mmap (void) -{ - gtm_cacheline_page *r; - r = (gtm_cacheline_page *) mmap (NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, dev_zero, 0); - if (r == (gtm_cacheline_page *) MAP_FAILED) - abort (); - return r; -} -#endif /* MMAP_ANON | MMAP_DEV_ZERO */ - -#ifdef HAVE_MEMALIGN -static gtm_cacheline_page * alloc_memalign (void) UNUSED; -static gtm_cacheline_page * -alloc_memalign (void) -{ - gtm_cacheline_page *r; - r = (gtm_cacheline_page *) memalign (PAGE_SIZE, PAGE_SIZE); - if (r == NULL) - abort (); - return r; -} -#endif /* MEMALIGN */ - -#ifdef HAVE_POSIX_MEMALIGN -static gtm_cacheline_page *alloc_posix_memalign (void) UNUSED; -static gtm_cacheline_page * -alloc_posix_memalign (void) -{ - void *r; - if (posix_memalign (&r, PAGE_SIZE, PAGE_SIZE)) - abort (); - return (gtm_cacheline_page *) r; -} -#endif /* POSIX_MEMALIGN */ - -#if defined(HAVE_MMAP_ANON) && defined(FIXED_PAGE_SIZE) -# define alloc_page alloc_mmap -#elif defined(HAVE_MMAP_DEV_ZERO) && defined(FIXED_PAGE_SIZE) -static gtm_cacheline_page * -alloc_page (void) -{ - if (dev_zero < 0) - { - dev_zero = open ("/dev/zero", O_RDWR); - assert (dev_zero >= 0); - } - return alloc_mmap (); -} -#elif defined(HAVE_MMAP_ANON) || defined(HAVE_MMAP_DEV_ZERO) -static gtm_cacheline_page * (*alloc_page) (void); -static void __attribute__((constructor)) -init_alloc_page (void) -{ - size_t page_size = getpagesize (); - if (page_size <= PAGE_SIZE && PAGE_SIZE % page_size == 0) - { -# ifndef HAVE_MMAP_ANON - dev_zero = open ("/dev/zero", O_RDWR); - assert (dev_zero >= 0); -# endif - alloc_page = alloc_mmap; - return; - } -# ifdef HAVE_MEMALIGN - alloc_page = alloc_memalign; -# elif defined(HAVE_POSIX_MEMALIGN) - alloc_page = alloc_posix_memalign; -# else -# error "No fallback aligned memory allocation method" -# endif -} -#elif defined(HAVE_MEMALIGN) -# define alloc_page alloc_memalign -#elif defined(HAVE_POSIX_MEMALIGN) -# define alloc_page alloc_posix_memalign -#else -# error "No aligned memory allocation method" -#endif - -static gtm_cacheline_page *free_pages; -static pthread_mutex_t free_page_lock = PTHREAD_MUTEX_INITIALIZER; - -void * -gtm_cacheline_page::operator new (size_t size) -{ - assert (size == sizeof (gtm_cacheline_page)); - assert (size <= PAGE_SIZE); - - pthread_mutex_lock(&free_page_lock); - - gtm_cacheline_page *r = free_pages; - free_pages = r ? r->prev : NULL; - - pthread_mutex_unlock(&free_page_lock); - - if (r == NULL) - r = alloc_page (); - - return r; -} - -void -gtm_cacheline_page::operator delete (void *xhead) -{ - gtm_cacheline_page *head = static_cast(xhead); - gtm_cacheline_page *tail; - - if (head == 0) - return; - - /* ??? We should eventually really free some of these. */ - - for (tail = head; tail->prev != 0; tail = tail->prev) - continue; - - pthread_mutex_lock(&free_page_lock); - - tail->prev = free_pages; - free_pages = head; - - pthread_mutex_unlock(&free_page_lock); -} - -} // namespace GTM diff --git a/libitm/config/x86/cacheline.cc b/libitm/config/x86/cacheline.cc deleted file mode 100644 index 2e49a35..0000000 --- a/libitm/config/x86/cacheline.cc +++ /dev/null @@ -1,73 +0,0 @@ -/* Copyright (C) 2009, 2011 Free Software Foundation, Inc. - Contributed by Richard Henderson . - - This file is part of the GNU Transactional Memory Library (libitm). - - Libitm is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - Libitm is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - . */ - -#include "libitm_i.h" - -namespace GTM HIDDEN { - -uint32_t const gtm_bit_to_byte_mask[16] = -{ - 0x00000000, - 0x000000ff, - 0x0000ff00, - 0x0000ffff, - 0x00ff0000, - 0x00ff00ff, - 0x00ffff00, - 0x00ffffff, - 0xff000000, - 0xff0000ff, - 0xff00ff00, - 0xff00ffff, - 0xffff0000, - 0xffff00ff, - 0xffffff00, - 0xffffffff -}; - -#ifdef __SSE2__ -# define MEMBER m128i -#else -# define MEMBER w -#endif - -void -gtm_cacheline::copy_mask (gtm_cacheline * __restrict d, - const gtm_cacheline * __restrict s, - gtm_cacheline_mask m) -{ - if (m == (gtm_cacheline_mask)-1) - { - *d = *s; - return; - } - if (__builtin_expect (m == 0, 0)) - return; - - size_t n = sizeof(d->MEMBER[0]); - for (size_t i = 0; i < CACHELINE_SIZE / n; ++i, m >>= n) - store_mask (&d->MEMBER[i], s->MEMBER[i], m); -} - -} // namespace GTM diff --git a/libitm/config/x86/cacheline.h b/libitm/config/x86/cacheline.h index f91d7cc..337c999 100644 --- a/libitm/config/x86/cacheline.h +++ b/libitm/config/x86/cacheline.h @@ -40,8 +40,6 @@ namespace GTM HIDDEN { // in the cacheline with which it is associated. typedef sized_integral::type gtm_cacheline_mask; -extern uint32_t const gtm_bit_to_byte_mask[16]; - union gtm_cacheline { // Byte access to the cacheline. @@ -67,23 +65,6 @@ union gtm_cacheline __m256i m256i[CACHELINE_SIZE / sizeof(__m256i)]; #endif - // Store S into D, but only the bytes specified by M. - static void store_mask (uint32_t *d, uint32_t s, uint8_t m); - static void store_mask (uint64_t *d, uint64_t s, uint8_t m); -#ifdef __SSE2__ - static void store_mask (__m128i *d, __m128i s, uint16_t m); -#endif - - // Copy S to D, but only the bytes specified by M. - static void copy_mask (gtm_cacheline * __restrict d, - const gtm_cacheline * __restrict s, - gtm_cacheline_mask m); - - // A write barrier to emit after (a series of) copy_mask. - // When we're emitting non-temporal stores, the normal strong - // ordering of the machine doesn't apply. - static void copy_mask_wb (); - #if defined(__SSE__) || defined(__AVX__) // Copy S to D; only bother defining if we can do this more efficiently // than the compiler-generated default implementation. @@ -91,14 +72,6 @@ union gtm_cacheline #endif // SSE, AVX }; -inline void -gtm_cacheline::copy_mask_wb () -{ -#ifdef __SSE2__ - _mm_sfence (); -#endif -} - #if defined(__SSE__) || defined(__AVX__) inline gtm_cacheline& ALWAYS_INLINE gtm_cacheline::operator= (const gtm_cacheline & __restrict s) @@ -141,103 +114,11 @@ gtm_cacheline::operator= (const gtm_cacheline & __restrict s) } return *this; -} -#endif -// Support masked integer stores more efficiently with an unlocked cmpxchg -// insn. My reasoning is that while we write to locations that we do not wish -// to modify, we do it in an uninterruptable insn, and so we either truely -// write back the original data or the insn fails -- unlike with a -// load/and/or/write sequence which can be interrupted either by a kernel -// task switch or an unlucky cacheline steal by another processor. Avoiding -// the LOCK prefix improves performance by a factor of 10, and we don't need -// the memory barrier semantics implied by that prefix. - -inline void ALWAYS_INLINE -gtm_cacheline::store_mask (uint32_t *d, uint32_t s, uint8_t m) -{ - gtm_cacheline_mask tm = (1 << sizeof (s)) - 1; - if (__builtin_expect (m & tm, tm)) - { - if (__builtin_expect ((m & tm) == tm, 1)) - *d = s; - else - { - gtm_cacheline_mask bm = gtm_bit_to_byte_mask[m & 15]; - gtm_word n, o = *d; - - __asm("\n0:\t" - "mov %[o], %[n]\n\t" - "and %[m], %[n]\n\t" - "or %[s], %[n]\n\t" - "cmpxchg %[n], %[d]\n\t" - ".byte 0x2e\n\t" // predict not-taken, aka jnz,pn - "jnz 0b" - : [d] "+m"(*d), [n] "=&r" (n), [o] "+a"(o) - : [s] "r" (s & bm), [m] "r" (~bm)); - } - } +#undef CP +#undef TYPE } - -inline void ALWAYS_INLINE -gtm_cacheline::store_mask (uint64_t *d, uint64_t s, uint8_t m) -{ - gtm_cacheline_mask tm = (1 << sizeof (s)) - 1; - if (__builtin_expect (m & tm, tm)) - { - if (__builtin_expect ((m & tm) == tm, 1)) - *d = s; - else - { -#ifdef __x86_64__ - uint32_t bl = gtm_bit_to_byte_mask[m & 15]; - uint32_t bh = gtm_bit_to_byte_mask[(m >> 4) & 15]; - gtm_cacheline_mask bm = bl | ((gtm_cacheline_mask)bh << 31 << 1); - uint64_t n, o = *d; - __asm("\n0:\t" - "mov %[o], %[n]\n\t" - "and %[m], %[n]\n\t" - "or %[s], %[n]\n\t" - "cmpxchg %[n], %[d]\n\t" - ".byte 0x2e\n\t" // predict not-taken, aka jnz,pn - "jnz 0b" - : [d] "+m"(*d), [n] "=&r" (n), [o] "+a"(o) - : [s] "r" (s & bm), [m] "r" (~bm)); -#else - /* ??? While it's possible to perform this operation with - cmpxchg8b, the sequence requires all 7 general registers - and thus cannot be performed with -fPIC. Don't even try. */ - uint32_t *d32 = reinterpret_cast(d); - store_mask (d32, s, m); - store_mask (d32 + 1, s >> 32, m >> 4); #endif - } - } -} - -#ifdef __SSE2__ -inline void ALWAYS_INLINE -gtm_cacheline::store_mask (__m128i *d, __m128i s, uint16_t m) -{ - if (__builtin_expect (m == 0, 0)) - return; - if (__builtin_expect (m == 0xffff, 1)) - *d = s; - else - { - __m128i bm0, bm1, bm2, bm3; - bm0 = _mm_set_epi32 (0, 0, 0, gtm_bit_to_byte_mask[m & 15]); m >>= 4; - bm1 = _mm_set_epi32 (0, 0, 0, gtm_bit_to_byte_mask[m & 15]); m >>= 4; - bm2 = _mm_set_epi32 (0, 0, 0, gtm_bit_to_byte_mask[m & 15]); m >>= 4; - bm3 = _mm_set_epi32 (0, 0, 0, gtm_bit_to_byte_mask[m & 15]); m >>= 4; - bm0 = _mm_unpacklo_epi32 (bm0, bm1); - bm2 = _mm_unpacklo_epi32 (bm2, bm3); - bm0 = _mm_unpacklo_epi64 (bm0, bm2); - - _mm_maskmoveu_si128 (s, bm0, (char *)d); - } -} -#endif // SSE2 } // namespace GTM diff --git a/libitm/config/x86/unaligned.h b/libitm/config/x86/unaligned.h deleted file mode 100644 index 01abc47..0000000 --- a/libitm/config/x86/unaligned.h +++ /dev/null @@ -1,237 +0,0 @@ -/* Copyright (C) 2009, 2011 Free Software Foundation, Inc. - Contributed by Richard Henderson . - - This file is part of the GNU Transactional Memory Library (libitm). - - Libitm is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - Libitm is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - . */ - -#ifndef LIBITM_X86_UNALIGNED_H -#define LIBITM_X86_UNALIGNED_H 1 - -#define HAVE_ARCH_UNALIGNED_LOAD2_U4 1 -#define HAVE_ARCH_UNALIGNED_LOAD2_U8 1 - -#include "config/generic/unaligned.h" - -namespace GTM HIDDEN { - -template<> -inline uint32_t -unaligned_load2(const gtm_cacheline *c1, - const gtm_cacheline *c2, size_t ofs) -{ - uint32_t r, lo, hi; - lo = c1->u32[CACHELINE_SIZE / sizeof(uint32_t) - 1]; - hi = c2->u32[0]; - asm("shrd %b2, %1, %0" : "=r"(r) : "r"(hi), "c"((ofs & 3) * 8), "0"(lo)); - return r; -} - -template<> -inline uint64_t -unaligned_load2(const gtm_cacheline *c1, - const gtm_cacheline *c2, size_t ofs) -{ -#ifdef __x86_64__ - uint64_t r, lo, hi; - lo = c1->u64[CACHELINE_SIZE / sizeof(uint64_t) - 1]; - hi = c2->u64[0]; - asm("shrd %b2, %1, %0" : "=r"(r) : "r"(hi), "c"((ofs & 3) * 8), "0"(lo)); - return r; -#else - uint32_t v0, v1, v2; - uint64_t r; - - if (ofs < CACHELINE_SIZE - 4) - { - v0 = c1->u32[CACHELINE_SIZE / sizeof(uint32_t) - 2]; - v1 = c1->u32[CACHELINE_SIZE / sizeof(uint32_t) - 1]; - v2 = c2->u32[0]; - } - else - { - v0 = c1->u32[CACHELINE_SIZE / sizeof(uint32_t) - 1]; - v1 = c2->u32[0]; - v2 = c2->u32[1]; - } - ofs = (ofs & 3) * 8; - asm("shrd %%cl, %[v1], %[v0]; shrd %%cl, %[v2], %[v1]" - : "=A"(r) : "c"(ofs), [v0] "a"(v0), [v1] "d"(v1), [v2] "r"(v2)); - - return r; -#endif -} - -#if defined(__SSE2__) || defined(__MMX__) -template<> -inline _ITM_TYPE_M64 -unaligned_load2<_ITM_TYPE_M64>(const gtm_cacheline *c1, - const gtm_cacheline *c2, size_t ofs) -{ -# ifdef __x86_64__ - __m128i lo = _mm_movpi64_epi64 (c1->m64[CACHELINE_SIZE / 8 - 1]); - __m128i hi = _mm_movpi64_epi64 (c2->m64[0]); - - ofs = (ofs & 7) * 8; - lo = _mm_srli_epi64 (lo, ofs); - hi = _mm_slli_epi64 (hi, 64 - ofs); - lo = lo | hi; - return _mm_movepi64_pi64 (lo); -# else - // On 32-bit we're about to return the result in an MMX register, so go - // ahead and do the computation in that unit, even if SSE2 is available. - __m64 lo = c1->m64[CACHELINE_SIZE / 8 - 1]; - __m64 hi = c2->m64[0]; - - ofs = (ofs & 7) * 8; - lo = _mm_srli_si64 (lo, ofs); - hi = _mm_slli_si64 (hi, 64 - ofs); - return lo | hi; -# endif -} -#endif // SSE2 or MMX - -// The SSE types are strictly aligned. -#ifdef __SSE__ -template<> - struct strict_alignment<_ITM_TYPE_M128> - : public std::true_type - { }; - -// Expand the unaligned SSE move instructions. -template<> -inline _ITM_TYPE_M128 -unaligned_load<_ITM_TYPE_M128>(const void *t) -{ - return _mm_loadu_ps (static_cast(t)); -} - -template<> -inline void -unaligned_store<_ITM_TYPE_M128>(void *t, _ITM_TYPE_M128 val) -{ - _mm_storeu_ps (static_cast(t), val); -} -#endif // SSE - -#ifdef __AVX__ -// The AVX types are strictly aligned when it comes to vmovaps vs vmovups. -template<> - struct strict_alignment<_ITM_TYPE_M256> - : public std::true_type - { }; - -template<> -inline _ITM_TYPE_M256 -unaligned_load<_ITM_TYPE_M256>(const void *t) -{ - return _mm256_loadu_ps (static_cast(t)); -} - -template<> -inline void -unaligned_store<_ITM_TYPE_M256>(void *t, _ITM_TYPE_M256 val) -{ - _mm256_storeu_ps (static_cast(t), val); -} -#endif // AVX - -#ifdef __XOP__ -# define HAVE_ARCH_REALIGN_M128I 1 -extern const __v16qi GTM_vpperm_shift[16]; -inline __m128i -realign_m128i (__m128i lo, __m128i hi, unsigned byte_count) -{ - return _mm_perm_epi8 (lo, hi, GTM_vpperm_shift[byte_count]); -} -#elif defined(__AVX__) -# define HAVE_ARCH_REALIGN_M128I 1 -extern "C" const uint64_t GTM_vpalignr_table[16]; -inline __m128i -realign_m128i (__m128i lo, __m128i hi, unsigned byte_count) -{ - register __m128i xmm0 __asm__("xmm0") = hi; - register __m128i xmm1 __asm__("xmm1") = lo; - __asm("call *%2" : "+x"(xmm0) : "x"(xmm1), - "r"(>M_vpalignr_table[byte_count])); - return xmm0; -} -#elif defined(__SSSE3__) -# define HAVE_ARCH_REALIGN_M128I 1 -extern "C" const uint64_t GTM_palignr_table[16]; -inline __m128i -realign_m128i (__m128i lo, __m128i hi, unsigned byte_count) -{ - register __m128i xmm0 __asm__("xmm0") = hi; - register __m128i xmm1 __asm__("xmm1") = lo; - __asm("call *%2" : "+x"(xmm0) : "x"(xmm1), - "r"(>M_palignr_table[byte_count])); - return xmm0; -} -#elif defined(__SSE2__) -# define HAVE_ARCH_REALIGN_M128I 1 -extern "C" const char GTM_pshift_table[16 * 16]; -inline __m128i -realign_m128i (__m128i lo, __m128i hi, unsigned byte_count) -{ - register __m128i xmm0 __asm__("xmm0") = lo; - register __m128i xmm1 __asm__("xmm1") = hi; - __asm("call *%2" : "+x"(xmm0), "+x"(xmm1) - : "r"(GTM_pshift_table + byte_count*16)); - return xmm0; -} -#endif // XOP, AVX, SSSE3, SSE2 - -#ifdef HAVE_ARCH_REALIGN_M128I -template<> -inline _ITM_TYPE_M128 -unaligned_load2<_ITM_TYPE_M128>(const gtm_cacheline *c1, - const gtm_cacheline *c2, size_t ofs) -{ - return (_ITM_TYPE_M128) - realign_m128i (c1->m128i[CACHELINE_SIZE / 16 - 1], - c2->m128i[0], ofs & 15); -} -#endif // HAVE_ARCH_REALIGN_M128I - -#ifdef __AVX__ -template<> -inline _ITM_TYPE_M256 -unaligned_load2<_ITM_TYPE_M256>(const gtm_cacheline *c1, - const gtm_cacheline *c2, size_t ofs) -{ - __m128i v0, v1; - __m256i r; - - v0 = (__m128i) unaligned_load2<_ITM_TYPE_M128>(c1, c2, ofs); - if (ofs < CACHELINE_SIZE - 16) - v1 = v0, v0 = _mm_loadu_si128 ((const __m128i *) &c1->b[ofs]); - else - v1 = _mm_loadu_si128((const __m128i *)&c2->b[ofs + 16 - CACHELINE_SIZE]); - - r = _mm256_castsi128_si256 ((__m128i)v0); - r = _mm256_insertf128_si256 (r, (__m128i)v1, 1); - return (_ITM_TYPE_M256) r; -} -#endif // AVX - -} // namespace GTM - -#endif // LIBITM_X86_UNALIGNED_H diff --git a/libitm/config/x86/x86_avx.cc b/libitm/config/x86/x86_avx.cc index 9d1ddfb..30420aa 100644 --- a/libitm/config/x86/x86_avx.cc +++ b/libitm/config/x86/x86_avx.cc @@ -34,62 +34,3 @@ _ITM_LM256 (const _ITM_TYPE_M256 *ptr) { GTM::GTM_LB (ptr, sizeof (*ptr)); } - -// Helpers for re-aligning two 128-bit values. -#ifdef __XOP__ -const __v16qi GTM::GTM_vpperm_shift[16] = -{ - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, - { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }, - { 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 }, - { 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18 }, - { 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 }, - { 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20 }, - { 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 }, - { 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22 }, - { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 }, - { 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24 }, - { 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 }, - { 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26 }, - { 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27 }, - { 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28 }, - { 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29 }, - { 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30 }, -}; -#else -# define INSN0 "movdqa %xmm1, %xmm0" -# define INSN(N) "vpalignr $" #N ", %xmm0, %xmm1, %xmm0" -# define TABLE_ENT_0 INSN0 "\n\tret\n\t" -# define TABLE_ENT(N) ".balign 8\n\t" INSN(N) "\n\tret\n\t" - -asm(".pushsection .text\n\ - .balign 16\n\ - .globl GTM_vpalignr_table\n\ - .hidden GTM_vpalignr_table\n\ - .type GTM_vpalignr_table, @function\n\ -GTM_vpalignr_table:\n\t" - TABLE_ENT_0 - TABLE_ENT(1) - TABLE_ENT(2) - TABLE_ENT(3) - TABLE_ENT(4) - TABLE_ENT(5) - TABLE_ENT(6) - TABLE_ENT(7) - TABLE_ENT(8) - TABLE_ENT(9) - TABLE_ENT(10) - TABLE_ENT(11) - TABLE_ENT(12) - TABLE_ENT(13) - TABLE_ENT(14) - TABLE_ENT(15) - ".balign 8\n\ - .size GTM_vpalignr_table, .-GTM_vpalignr_table\n\ - .popsection"); - -# undef INSN0 -# undef INSN -# undef TABLE_ENT_0 -# undef TABLE_ENT -#endif diff --git a/libitm/config/x86/x86_sse.cc b/libitm/config/x86/x86_sse.cc index 7440c94..5a1c67a 100644 --- a/libitm/config/x86/x86_sse.cc +++ b/libitm/config/x86/x86_sse.cc @@ -41,82 +41,3 @@ _ITM_LM128 (const _ITM_TYPE_M128 *ptr) { GTM::GTM_LB (ptr, sizeof (*ptr)); } - -// Helpers for re-aligning two 128-bit values. -#ifdef __SSSE3__ -# define INSN0 "movdqa %xmm1, %xmm0" -# define INSN(N) "palignr $" #N ", %xmm1, %xmm0" -# define TABLE_ENT_0 INSN0 "\n\tret\n\t" -# define TABLE_ENT(N) ".balign 8\n\t" INSN(N) "\n\tret\n\t" - -asm(".pushsection .text\n\ - .balign 16\n\ - .globl GTM_palignr_table\n\ - .hidden GTM_palignr_table\n\ - .type GTM_palignr_table, @function\n\ -GTM_palignr_table:\n\t" - TABLE_ENT_0 - TABLE_ENT(1) - TABLE_ENT(2) - TABLE_ENT(3) - TABLE_ENT(4) - TABLE_ENT(5) - TABLE_ENT(6) - TABLE_ENT(7) - TABLE_ENT(8) - TABLE_ENT(9) - TABLE_ENT(10) - TABLE_ENT(11) - TABLE_ENT(12) - TABLE_ENT(13) - TABLE_ENT(14) - TABLE_ENT(15) - ".balign 8\n\ - .size GTM_palignr_table, .-GTM_palignr_table\n\ - .popsection"); - -# undef INSN0 -# undef INSN -# undef TABLE_ENT_0 -# undef TABLE_ENT -#elif defined(__SSE2__) -# define INSNS_8 "punpcklqdq %xmm1, %xmm0" -# define INSNS(N) "psrldq $"#N", %xmm0\n\t" \ - "pslldq $(16-"#N"), %xmm1\n\t" \ - "por %xmm1, %xmm0" -# define TABLE_ENT_0 "ret\n\t" -# define TABLE_ENT_8 ".balign 16\n\t" INSNS_8 "\n\tret\n\t" -# define TABLE_ENT(N) ".balign 16\n\t" INSNS(N) "\n\tret\n\t" - -asm(".pushsection .text\n\ - .balign 16\n\ - .globl GTM_pshift_table\n\ - .hidden GTM_pshift_table\n\ - .type GTM_pshift_table, @function\n\ -GTM_pshift_table:\n\t" - TABLE_ENT_0 - TABLE_ENT(1) - TABLE_ENT(2) - TABLE_ENT(3) - TABLE_ENT(4) - TABLE_ENT(5) - TABLE_ENT(6) - TABLE_ENT(7) - TABLE_ENT_8 - TABLE_ENT(9) - TABLE_ENT(10) - TABLE_ENT(11) - TABLE_ENT(12) - TABLE_ENT(13) - TABLE_ENT(14) - TABLE_ENT(15) - ".balign 8\n\ - .size GTM_pshift_table, .-GTM_pshift_table\n\ - .popsection"); - -# undef INSNS_8 -# undef INSNS -# undef TABLE_ENT_0 -# undef TABLE_ENT_8 -# undef TABLE_ENT -#endif diff --git a/libitm/libitm_i.h b/libitm/libitm_i.h index c188fa5..58e43b0 100644 --- a/libitm/libitm_i.h +++ b/libitm/libitm_i.h @@ -78,7 +78,6 @@ enum gtm_restart_reason #include "rwlock.h" #include "aatree.h" #include "cacheline.h" -#include "cachepage.h" #include "stmlock.h" #include "dispatch.h" #include "containers.h" diff --git a/libitm/memcpy.cc b/libitm/memcpy.cc deleted file mode 100644 index 9919e6a..0000000 --- a/libitm/memcpy.cc +++ /dev/null @@ -1,365 +0,0 @@ -/* Copyright (C) 2008, 2009, 2011 Free Software Foundation, Inc. - Contributed by Richard Henderson . - - This file is part of the GNU Transactional Memory Library (libitm). - - Libitm is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - Libitm is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - . */ - -#include "libitm_i.h" - -using namespace GTM; - -static void -do_memcpy (uintptr_t idst, uintptr_t isrc, size_t size, - abi_dispatch::lock_type W, abi_dispatch::lock_type R) -{ - abi_dispatch *disp = abi_disp(); - // The position in the destination cacheline where *IDST starts. - uintptr_t dofs = idst & (CACHELINE_SIZE - 1); - // The position in the source cacheline where *ISRC starts. - uintptr_t sofs = isrc & (CACHELINE_SIZE - 1); - const gtm_cacheline *src - = reinterpret_cast(isrc & -CACHELINE_SIZE); - gtm_cacheline *dst - = reinterpret_cast(idst & -CACHELINE_SIZE); - const gtm_cacheline *sline; - abi_dispatch::mask_pair dpair; - - if (size == 0) - return; - - // If both SRC and DST data start at the same position in the cachelines, - // we can easily copy the data in tandem, cacheline by cacheline... - if (dofs == sofs) - { - // We copy the data in three stages: - - // (a) Copy stray bytes at the beginning that are smaller than a - // cacheline. - if (sofs != 0) - { - size_t sleft = CACHELINE_SIZE - sofs; - size_t min = (size <= sleft ? size : sleft); - - dpair = disp->write_lock(dst, W); - sline = disp->read_lock(src, R); - *dpair.mask |= (((gtm_cacheline_mask)1 << min) - 1) << sofs; - memcpy (&dpair.line->b[sofs], &sline->b[sofs], min); - dst++; - src++; - size -= min; - } - - // (b) Copy subsequent cacheline sized chunks. - while (size >= CACHELINE_SIZE) - { - dpair = disp->write_lock(dst, W); - sline = disp->read_lock(src, R); - *dpair.mask = -1; - *dpair.line = *sline; - dst++; - src++; - size -= CACHELINE_SIZE; - } - - // (c) Copy anything left over. - if (size != 0) - { - dpair = disp->write_lock(dst, W); - sline = disp->read_lock(src, R); - *dpair.mask |= ((gtm_cacheline_mask)1 << size) - 1; - memcpy (dpair.line, sline, size); - } - } - // ... otherwise, we must copy the data in disparate hunks using - // temporary storage. - else - { - gtm_cacheline c; - size_t sleft = CACHELINE_SIZE - sofs; - - sline = disp->read_lock(src, R); - - // As above, we copy the data in three stages: - - // (a) Copy stray bytes at the beginning that are smaller than a - // cacheline. - if (dofs != 0) - { - size_t dleft = CACHELINE_SIZE - dofs; - size_t min = (size <= dleft ? size : dleft); - - dpair = disp->write_lock(dst, W); - *dpair.mask |= (((gtm_cacheline_mask)1 << min) - 1) << dofs; - - // If what's left in the source cacheline will fit in the - // rest of the destination cacheline, straight up copy it. - if (min <= sleft) - { - memcpy (&dpair.line->b[dofs], &sline->b[sofs], min); - sofs += min; - } - // Otherwise, we need more bits from the source cacheline - // that are available. Piece together what we need from - // contiguous (source) cachelines, into temp space, and copy - // it over. - else - { - memcpy (&c, &sline->b[sofs], sleft); - sline = disp->read_lock(++src, R); - sofs = min - sleft; - memcpy (&c.b[sleft], sline, sofs); - memcpy (&dpair.line->b[dofs], &c, min); - } - sleft = CACHELINE_SIZE - sofs; - - dst++; - size -= min; - } - - // (b) Copy subsequent cacheline sized chunks. - while (size >= CACHELINE_SIZE) - { - // We have a full (destination) cacheline where to put the - // data, but to get to the corresponding cacheline sized - // chunk in the source, we have to piece together two - // contiguous source cachelines. - - memcpy (&c, &sline->b[sofs], sleft); - sline = disp->read_lock(++src, R); - memcpy (&c.b[sleft], sline, sofs); - - dpair = disp->write_lock(dst, W); - *dpair.mask = -1; - *dpair.line = c; - - dst++; - size -= CACHELINE_SIZE; - } - - // (c) Copy anything left over. - if (size != 0) - { - dpair = disp->write_lock(dst, W); - *dpair.mask |= ((gtm_cacheline_mask)1 << size) - 1; - // If what's left to copy is entirely in the remaining - // source cacheline, do it. - if (size <= sleft) - memcpy (dpair.line, &sline->b[sofs], size); - // Otherwise, piece together the remaining bits, and copy. - else - { - memcpy (&c, &sline->b[sofs], sleft); - sline = disp->read_lock(++src, R); - memcpy (&c.b[sleft], sline, size - sleft); - memcpy (dpair.line, &c, size); - } - } - } -} - -static void -do_memmove (uintptr_t idst, uintptr_t isrc, size_t size, - abi_dispatch::lock_type W, abi_dispatch::lock_type R) -{ - abi_dispatch *disp = abi_disp(); - uintptr_t dleft, sleft, sofs, dofs; - const gtm_cacheline *sline; - abi_dispatch::mask_pair dpair; - - if (size == 0) - return; - - /* The co-aligned memmove below doesn't work for DST == SRC, so filter - that out. It's tempting to just return here, as this is a no-op move. - However, our caller has the right to expect the locks to be acquired - as advertized. */ - if (__builtin_expect (idst == isrc, 0)) - { - /* If the write lock is already acquired, nothing to do. */ - if (W == abi_dispatch::WaW) - return; - /* If the destination is protected, acquire a write lock. */ - if (W != abi_dispatch::NOLOCK) - R = abi_dispatch::RfW; - /* Notice serial mode, where we don't acquire locks at all. */ - if (R == abi_dispatch::NOLOCK) - return; - - idst = isrc + size; - for (isrc &= -CACHELINE_SIZE; isrc < idst; isrc += CACHELINE_SIZE) - disp->read_lock(reinterpret_cast(isrc), R); - return; - } - - /* Fall back to memcpy if the implementation above can handle it. */ - if (idst < isrc || isrc + size <= idst) - { - do_memcpy (idst, isrc, size, W, R); - return; - } - - /* What remains requires a backward copy from the end of the blocks. */ - idst += size; - isrc += size; - dofs = idst & (CACHELINE_SIZE - 1); - sofs = isrc & (CACHELINE_SIZE - 1); - dleft = CACHELINE_SIZE - dofs; - sleft = CACHELINE_SIZE - sofs; - - gtm_cacheline *dst - = reinterpret_cast(idst & -CACHELINE_SIZE); - const gtm_cacheline *src - = reinterpret_cast(isrc & -CACHELINE_SIZE); - if (dofs == 0) - dst--; - if (sofs == 0) - src--; - - if (dofs == sofs) - { - /* Since DST and SRC are co-aligned, and we didn't use the memcpy - optimization above, that implies that SIZE > CACHELINE_SIZE. */ - if (sofs != 0) - { - dpair = disp->write_lock(dst, W); - sline = disp->read_lock(src, R); - *dpair.mask |= ((gtm_cacheline_mask)1 << sleft) - 1; - memcpy (dpair.line, sline, sleft); - dst--; - src--; - size -= sleft; - } - - while (size >= CACHELINE_SIZE) - { - dpair = disp->write_lock(dst, W); - sline = disp->read_lock(src, R); - *dpair.mask = -1; - *dpair.line = *sline; - dst--; - src--; - size -= CACHELINE_SIZE; - } - - if (size != 0) - { - size_t ofs = CACHELINE_SIZE - size; - dpair = disp->write_lock(dst, W); - sline = disp->read_lock(src, R); - *dpair.mask |= (((gtm_cacheline_mask)1 << size) - 1) << ofs; - memcpy (&dpair.line->b[ofs], &sline->b[ofs], size); - } - } - else - { - gtm_cacheline c; - - sline = disp->read_lock(src, R); - if (dofs != 0) - { - size_t min = (size <= dofs ? size : dofs); - - if (min <= sofs) - { - sofs -= min; - memcpy (&c, &sline->b[sofs], min); - } - else - { - size_t min_ofs = min - sofs; - memcpy (&c.b[min_ofs], sline, sofs); - sline = disp->read_lock(--src, R); - sofs = CACHELINE_SIZE - min_ofs; - memcpy (&c, &sline->b[sofs], min_ofs); - } - - dofs = dleft - min; - dpair = disp->write_lock(dst, W); - *dpair.mask |= (((gtm_cacheline_mask)1 << min) - 1) << dofs; - memcpy (&dpair.line->b[dofs], &c, min); - - sleft = CACHELINE_SIZE - sofs; - dst--; - size -= min; - } - - while (size >= CACHELINE_SIZE) - { - memcpy (&c.b[sleft], sline, sofs); - sline = disp->read_lock(--src, R); - memcpy (&c, &sline->b[sofs], sleft); - - dpair = disp->write_lock(dst, W); - *dpair.mask = -1; - *dpair.line = c; - - dst--; - size -= CACHELINE_SIZE; - } - - if (size != 0) - { - dofs = CACHELINE_SIZE - size; - - memcpy (&c.b[sleft], sline, sofs); - if (sleft > dofs) - { - sline = disp->read_lock(--src, R); - memcpy (&c, &sline->b[sofs], sleft); - } - - dpair = disp->write_lock(dst, W); - *dpair.mask |= (gtm_cacheline_mask)-1 << dofs; - memcpy (&dpair.line->b[dofs], &c.b[dofs], size); - } - } -} - -#define ITM_MEM_DEF(NAME, READ, WRITE) \ -void ITM_REGPARM _ITM_memcpy##NAME(void *dst, const void *src, size_t size) \ -{ \ - do_memcpy ((uintptr_t)dst, (uintptr_t)src, size, \ - abi_dispatch::WRITE, abi_dispatch::READ); \ -} \ -void ITM_REGPARM _ITM_memmove##NAME(void *dst, const void *src, size_t size) \ -{ \ - do_memmove ((uintptr_t)dst, (uintptr_t)src, size, \ - abi_dispatch::WRITE, abi_dispatch::READ); \ -} - -ITM_MEM_DEF(RnWt, NOLOCK, W) -ITM_MEM_DEF(RnWtaR, NOLOCK, WaR) -ITM_MEM_DEF(RnWtaW, NOLOCK, WaW) - -ITM_MEM_DEF(RtWn, R, NOLOCK) -ITM_MEM_DEF(RtWt, R, W) -ITM_MEM_DEF(RtWtaR, R, WaR) -ITM_MEM_DEF(RtWtaW, R, WaW) - -ITM_MEM_DEF(RtaRWn, RaR, NOLOCK) -ITM_MEM_DEF(RtaRWt, RaR, W) -ITM_MEM_DEF(RtaRWtaR, RaR, WaR) -ITM_MEM_DEF(RtaRWtaW, RaR, WaW) - -ITM_MEM_DEF(RtaWWn, RaW, NOLOCK) -ITM_MEM_DEF(RtaWWt, RaW, W) -ITM_MEM_DEF(RtaWWtaR, RaW, WaR) -ITM_MEM_DEF(RtaWWtaW, RaW, WaW) diff --git a/libitm/memset.cc b/libitm/memset.cc deleted file mode 100644 index 3a627dd..0000000 --- a/libitm/memset.cc +++ /dev/null @@ -1,78 +0,0 @@ -/* Copyright (C) 2008, 2009, 2011 Free Software Foundation, Inc. - Contributed by Richard Henderson . - - This file is part of the GNU Transactional Memory Library (libitm). - - Libitm is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - Libitm is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - . */ - -#include "libitm_i.h" - -using namespace GTM; - -static void -do_memset(uintptr_t idst, int c, size_t size, abi_dispatch::lock_type W) -{ - abi_dispatch *disp = abi_disp(); - uintptr_t dofs = idst & (CACHELINE_SIZE - 1); - abi_dispatch::mask_pair dpair; - gtm_cacheline *dst - = reinterpret_cast(idst & -CACHELINE_SIZE); - - if (size == 0) - return; - - if (dofs != 0) - { - size_t dleft = CACHELINE_SIZE - dofs; - size_t min = (size <= dleft ? size : dleft); - - dpair = disp->write_lock(dst, W); - *dpair.mask |= (((gtm_cacheline_mask)1 << min) - 1) << dofs; - memset (&dpair.line->b[dofs], c, min); - dst++; - size -= min; - } - - while (size >= CACHELINE_SIZE) - { - dpair = disp->write_lock(dst, W); - *dpair.mask = -1; - memset (dpair.line, c, CACHELINE_SIZE); - dst++; - size -= CACHELINE_SIZE; - } - - if (size != 0) - { - dpair = disp->write_lock(dst, W); - *dpair.mask |= ((gtm_cacheline_mask)1 << size) - 1; - memset (dpair.line, c, size); - } -} - -#define ITM_MEM_DEF(WRITE) \ -void ITM_REGPARM _ITM_memset##WRITE(void *dst, int c, size_t size) \ -{ \ - do_memset ((uintptr_t)dst, c, size, abi_dispatch::WRITE); \ -} - -ITM_MEM_DEF(W) -ITM_MEM_DEF(WaR) -ITM_MEM_DEF(WaW) diff --git a/libitm/method-wbetl.cc b/libitm/method-wbetl.cc deleted file mode 100644 index 093d1c7..0000000 --- a/libitm/method-wbetl.cc +++ /dev/null @@ -1,628 +0,0 @@ -/* Copyright (C) 2009, 2011 Free Software Foundation, Inc. - Contributed by Richard Henderson . - - This file is part of the GNU Transactional Memory Library (libitm). - - Libitm is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - Libitm is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - . */ - -#include "libitm_i.h" - -namespace { - -using namespace GTM; - -class wbetl_dispatch : public abi_dispatch -{ - private: - static const size_t RW_SET_SIZE = 4096; - - struct r_entry - { - gtm_version version; - gtm_stmlock *lock; - }; - - r_entry *m_rset_entries; - size_t m_rset_nb_entries; - size_t m_rset_size; - - struct w_entry - { - /* There's a hashtable where the locks are held, so multiple - cachelines can hash to a given bucket. This link points to the - possible next cacheline that also hashes to this bucket. */ - struct w_entry *next; - - /* Every entry in this bucket (accessed by NEXT) has the same LOCK - address below. */ - gtm_stmlock *lock; - - gtm_cacheline *addr; - gtm_cacheline *value; - gtm_version version; - }; - - w_entry *m_wset_entries; - size_t m_wset_nb_entries; - size_t m_wset_size; - bool m_wset_reallocate; - - gtm_version m_start; - gtm_version m_end; - - gtm_cacheline_page *m_cache_page; - unsigned m_n_cache_page; - - private: - bool local_w_entry_p (w_entry *w); - bool has_read (gtm_stmlock *lock); - bool validate(); - bool extend(); - - gtm_cacheline *do_write_lock(gtm_cacheline *); - gtm_cacheline *do_after_write_lock(gtm_cacheline *); - const gtm_cacheline *do_read_lock(const gtm_cacheline *, bool); - - public: - wbetl_dispatch(); - - virtual const gtm_cacheline *read_lock(const gtm_cacheline *, ls_modifier); - virtual mask_pair write_lock(gtm_cacheline *, ls_modifier); - - virtual bool trycommit(); - virtual void rollback(); - virtual void reinit(); - virtual void fini(); - virtual bool trydropreference (void *, size_t); -}; - -/* Check if W is one of our write locks. */ - -inline bool -wbetl_dispatch::local_w_entry_p (w_entry *w) -{ - return (m_wset_entries <= w && w < m_wset_entries + m_wset_nb_entries); -} - -/* Check if stripe has been read previously. */ - -inline bool -wbetl_dispatch::has_read (gtm_stmlock *lock) -{ - // ??? Consider using an AA tree to lookup the r_set entries. - size_t n = m_rset_nb_entries; - for (size_t i = 0; i < n; ++i) - if (m_rset_entries[i].lock == lock) - return true; - - return false; -} - -/* Validate read set, i.e. check if all read addresses are still valid now. */ - -bool -wbetl_dispatch::validate () -{ - __sync_synchronize (); - - size_t n = m_rset_nb_entries; - for (size_t i = 0; i < n; ++i) - { - r_entry *r = &m_rset_entries[i]; - gtm_stmlock l = *r->lock; - - if (gtm_stmlock_owned_p (l)) - { - w_entry *w = (w_entry *) gtm_stmlock_get_addr (l); - - // If someone has locked us, it better be by someone in the - // current thread. - if (!local_w_entry_p (w)) - return false; - } - else if (gtm_stmlock_get_version (l) != r->version) - return false; - } - - return true; -} - -/* Extend the snapshot range. */ - -bool -wbetl_dispatch::extend () -{ - gtm_version now = gtm_get_clock (); - - if (validate ()) - { - m_end = now; - return true; - } - return false; -} - -/* Acquire a write lock on ADDR. */ - -gtm_cacheline * -wbetl_dispatch::do_write_lock(gtm_cacheline *addr) -{ - gtm_stmlock *lock; - gtm_stmlock l, l2; - gtm_version version; - w_entry *w, *prev = NULL; - - lock = gtm_get_stmlock (addr); - l = *lock; - - restart_no_load: - if (gtm_stmlock_owned_p (l)) - { - w = (w_entry *) gtm_stmlock_get_addr (l); - - /* Did we previously write the same address? */ - if (local_w_entry_p (w)) - { - prev = w; - while (1) - { - if (addr == prev->addr) - return prev->value; - if (prev->next == NULL) - break; - prev = prev->next; - } - - /* Get version from previous entry write set. */ - version = prev->version; - - /* If there's not enough entries, we must reallocate the array, - which invalidates all pointers to write set entries, which - means we have to restart the transaction. */ - if (m_wset_nb_entries == m_wset_size) - { - m_wset_size *= 2; - m_wset_reallocate = true; - gtm_tx()->restart (RESTART_REALLOCATE); - } - - w = &m_wset_entries[m_wset_nb_entries]; - goto do_write; - } - - gtm_tx()->restart (RESTART_LOCKED_WRITE); - } - else - { - version = gtm_stmlock_get_version (l); - - /* We might have read an older version previously. */ - if (version > m_end) - { - if (has_read (lock)) - gtm_tx()->restart (RESTART_VALIDATE_WRITE); - } - - /* Extend write set, aborting to reallocate write set entries. */ - if (m_wset_nb_entries == m_wset_size) - { - m_wset_size *= 2; - m_wset_reallocate = true; - gtm_tx()->restart (RESTART_REALLOCATE); - } - - /* Acquire the lock. */ - w = &m_wset_entries[m_wset_nb_entries]; - l2 = gtm_stmlock_set_owned (w); - l = __sync_val_compare_and_swap (lock, l, l2); - if (l != l2) - goto restart_no_load; - } - - do_write: - m_wset_nb_entries++; - if (prev != NULL) - prev->next = w; - w->next = 0; - w->lock = lock; - w->addr = addr; - w->version = version; - - gtm_cacheline_page *page = m_cache_page; - unsigned index = m_n_cache_page; - - if (page == NULL || index == gtm_cacheline_page::LINES) - { - gtm_cacheline_page *npage = new gtm_cacheline_page; - npage->prev = page; - m_cache_page = page = npage; - m_n_cache_page = 1; - index = 0; - } - else - m_n_cache_page = index + 1; - - gtm_cacheline *line = &page->lines[index]; - w->value = line; - page->masks[index] = 0; - *line = *addr; - - return line; -} - -gtm_cacheline * -wbetl_dispatch::do_after_write_lock (gtm_cacheline *addr) -{ - gtm_stmlock *lock; - gtm_stmlock l; - w_entry *w; - - lock = gtm_get_stmlock (addr); - l = *lock; - assert (gtm_stmlock_owned_p (l)); - - w = (w_entry *) gtm_stmlock_get_addr (l); - assert (local_w_entry_p (w)); - - while (1) - { - if (addr == w->addr) - return w->value; - w = w->next; - } -} - -/* Acquire a read lock on ADDR. */ - -const gtm_cacheline * -wbetl_dispatch::do_read_lock (const gtm_cacheline *addr, bool after_read) -{ - gtm_stmlock *lock; - gtm_stmlock l, l2; - gtm_version version; - w_entry *w; - - lock = gtm_get_stmlock (addr); - l = *lock; - - restart_no_load: - if (gtm_stmlock_owned_p (l)) - { - w = (w_entry *) gtm_stmlock_get_addr (l); - - /* Did we previously write the same address? */ - if (local_w_entry_p (w)) - { - while (1) - { - if (addr == w->addr) - return w->value; - if (w->next == NULL) - return addr; - w = w->next; - } - } - - gtm_tx()->restart (RESTART_LOCKED_READ); - } - - version = gtm_stmlock_get_version (l); - - /* If version is no longer valid, re-validate the read set. */ - if (version > m_end) - { - if (!extend ()) - gtm_tx()->restart (RESTART_VALIDATE_READ); - - if (!after_read) - { - // Verify that the version has not yet been overwritten. The read - // value has not yet been added to read set and may not have been - // checked during the extend. - // - // ??? This only makes sense if we're actually reading the value - // and returning it now -- which I believe the original TinySTM - // did. This doesn't make a whole lot of sense when we're - // manipulating cachelines as we are now. Do we need some other - // form of lock verification here, or is the validate call in - // trycommit sufficient? - - __sync_synchronize (); - l2 = *lock; - if (l != l2) - { - l = l2; - goto restart_no_load; - } - } - } - - if (!after_read) - { - r_entry *r; - - /* Add the address and version to the read set. */ - if (m_rset_nb_entries == m_rset_size) - { - m_rset_size *= 2; - - m_rset_entries = (r_entry *) - xrealloc (m_rset_entries, m_rset_size * sizeof(r_entry)); - } - r = &m_rset_entries[m_rset_nb_entries++]; - r->version = version; - r->lock = lock; - } - - return addr; -} - -const gtm_cacheline * -wbetl_dispatch::read_lock (const gtm_cacheline *addr, ls_modifier ltype) -{ - switch (ltype) - { - case NONTXNAL: - return addr; - case R: - return do_read_lock (addr, false); - case RaR: - return do_read_lock (addr, true); - case RaW: - return do_after_write_lock (const_cast(addr)); - case RfW: - return do_write_lock (const_cast(addr)); - default: - abort (); - } -} - -abi_dispatch::mask_pair -wbetl_dispatch::write_lock (gtm_cacheline *addr, ls_modifier ltype) -{ - gtm_cacheline *line; - - switch (ltype) - { - case NONTXNAL: - return mask_pair (addr, &mask_sink); - case W: - case WaR: - line = do_write_lock (addr); - break; - case WaW: - line = do_after_write_lock (addr); - break; - default: - abort (); - } - - return mask_pair (line, gtm_cacheline_page::mask_for_page_line (line)); -} - -/* Commit the transaction. */ - -bool -wbetl_dispatch::trycommit () -{ - const size_t n = m_wset_nb_entries; - if (n != 0) - { - /* Get commit timestamp. */ - gtm_version t = gtm_inc_clock (); - - /* Validate only if a concurrent transaction has started since. */ - if (m_start != t - 1 && !validate ()) - return false; - - /* Install new versions. */ - for (size_t i = 0; i < n; ++i) - { - w_entry *w = &m_wset_entries[i]; - gtm_cacheline_mask mask - = *gtm_cacheline_page::mask_for_page_line (w->value); - - /* Filter out any updates that overlap the libitm stack. */ - mask = gtm_mask_stack (w->addr, mask); - - gtm_cacheline::copy_mask (w->addr, w->value, mask); - } - - /* Only emit barrier after all cachelines are copied. */ - gtm_cacheline::copy_mask_wb (); - - /* Drop locks. */ - for (size_t i = 0; i < n; ++i) - { - w_entry *w = &m_wset_entries[i]; - - /* Every link along the chain has the same lock, but only - bother dropping the lock once per bucket (at the end). */ - if (w->next == NULL) - *w->lock = gtm_stmlock_set_version (t); - } - } - - __sync_synchronize (); - return true; -} - -void -wbetl_dispatch::rollback () -{ - /* Drop locks. */ - const size_t n = m_wset_nb_entries; - for (size_t i = 0; i < n; ++i) - { - w_entry *w = &m_wset_entries[i]; - - /* Every link along the chain has the same lock, but only - bother dropping the lock once per bucket (at the end). */ - if (w->next == NULL) - *w->lock = gtm_stmlock_set_version (w->version); - } - - __sync_synchronize (); -} - -void -wbetl_dispatch::reinit () -{ - gtm_cacheline_page *page; - - m_rset_nb_entries = 0; - m_wset_nb_entries = 0; - - if (m_wset_reallocate) - { - m_wset_reallocate = 0; - m_wset_entries = (w_entry *) - xrealloc (m_wset_entries, m_wset_size * sizeof(w_entry)); - } - - page = m_cache_page; - if (page) - { - /* Release all but one of the pages of cachelines. */ - gtm_cacheline_page *prev = page->prev; - if (prev) - { - page->prev = 0; - delete prev; - } - - /* Start the next cacheline allocation from the beginning. */ - m_n_cache_page = 0; - } - - m_start = m_end = gtm_get_clock (); -} - -void -wbetl_dispatch::fini () -{ - delete m_cache_page; - free (m_rset_entries); - free (m_wset_entries); - delete this; -} - -/* Attempt to drop any internal references to PTR. Return TRUE if successful. - - This is an adaptation of the transactional memcpy function. - - What we do here is flush out the current transactional content of - PTR to real memory, and remove the write mask bits associated with - it so future commits will ignore this piece of memory. */ - -bool -wbetl_dispatch::trydropreference (void *ptr, size_t size) -{ - if (size == 0) - return true; - - if (!validate ()) - return false; - - uintptr_t isrc = (uintptr_t)ptr; - // The position in the source cacheline where *PTR starts. - uintptr_t sofs = isrc & (CACHELINE_SIZE - 1); - gtm_cacheline *src - = reinterpret_cast(isrc & -CACHELINE_SIZE); - unsigned char *dst = (unsigned char *)ptr; - abi_dispatch::mask_pair pair; - - // If we're trying to drop a reference, we should already have a - // write lock on it. If we don't have one, there's no work to do. - if (!gtm_stmlock_owned_p (*gtm_get_stmlock (src))) - return true; - - // We copy the data in three stages: - - // (a) Copy stray bytes at the beginning that are smaller than a - // cacheline. - if (sofs != 0) - { - size_t sleft = CACHELINE_SIZE - sofs; - size_t min = (size <= sleft ? size : sleft); - - // WaW will give us the current locked entry. - pair = this->write_lock (src, WaW); - - // *jedi mind wave*...these aren't the droids you're looking for. - *pair.mask &= ~((((gtm_cacheline_mask)1 << min) - 1) << sofs); - - memcpy (dst, &pair.line->b[sofs], min); - dst += min; - src++; - size -= min; - } - - // (b) Copy subsequent cacheline sized chunks. - while (size >= CACHELINE_SIZE) - { - pair = this->write_lock(src, WaW); - *pair.mask = 0; - memcpy (dst, pair.line, CACHELINE_SIZE); - dst += CACHELINE_SIZE; - src++; - size -= CACHELINE_SIZE; - } - - // (c) Copy anything left over. - if (size != 0) - { - pair = this->write_lock(src, WaW); - *pair.mask &= ~(((gtm_cacheline_mask)1 << size) - 1); - memcpy (dst, pair.line, size); - } - - // No need to drop locks, since we're going to abort the transaction - // anyhow. - - return true; -} - - -wbetl_dispatch::wbetl_dispatch () - : abi_dispatch (false, false) -{ - m_rset_entries = (r_entry *) xmalloc (RW_SET_SIZE * sizeof(r_entry)); - m_rset_nb_entries = 0; - m_rset_size = RW_SET_SIZE; - - m_wset_entries = (w_entry *) xmalloc (RW_SET_SIZE * sizeof(w_entry)); - m_wset_nb_entries = 0; - m_wset_size = RW_SET_SIZE; - m_wset_reallocate = false; - - m_start = m_end = gtm_get_clock (); - - m_cache_page = 0; - m_n_cache_page = 0; -} - -} // anon namespace - -abi_dispatch * -GTM::dispatch_wbetl () -{ - return new wbetl_dispatch (); -} diff --git a/libitm/testsuite/Makefile.in b/libitm/testsuite/Makefile.in index ed1f314..6990cfe 100644 --- a/libitm/testsuite/Makefile.in +++ b/libitm/testsuite/Makefile.in @@ -38,6 +38,7 @@ subdir = testsuite DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \ + $(top_srcdir)/../config/asmcfi.m4 \ $(top_srcdir)/../config/depstand.m4 \ $(top_srcdir)/../config/enable.m4 \ $(top_srcdir)/../config/futex.m4 \ @@ -90,8 +91,6 @@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ -FC = @FC@ -FCFLAGS = @FCFLAGS@ FGREP = @FGREP@ GREP = @GREP@ INSTALL = @INSTALL@ @@ -142,7 +141,6 @@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ -ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ -- cgit v1.1