/* Copyright (C) 2008, 2009, 2011 Free Software Foundation, Inc. Contributed by Richard Henderson . This file is part of the GNU Transactional Memory Library (libitm). Libitm is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. Libitm is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see . */ #include "libitm_i.h" using namespace GTM; static void do_memcpy (uintptr_t idst, uintptr_t isrc, size_t size, abi_dispatch::lock_type W, abi_dispatch::lock_type R) { abi_dispatch *disp = abi_disp(); // The position in the destination cacheline where *IDST starts. uintptr_t dofs = idst & (CACHELINE_SIZE - 1); // The position in the source cacheline where *ISRC starts. uintptr_t sofs = isrc & (CACHELINE_SIZE - 1); const gtm_cacheline *src = reinterpret_cast(isrc & -CACHELINE_SIZE); gtm_cacheline *dst = reinterpret_cast(idst & -CACHELINE_SIZE); const gtm_cacheline *sline; abi_dispatch::mask_pair dpair; if (size == 0) return; // If both SRC and DST data start at the same position in the cachelines, // we can easily copy the data in tandem, cacheline by cacheline... if (dofs == sofs) { // We copy the data in three stages: // (a) Copy stray bytes at the beginning that are smaller than a // cacheline. if (sofs != 0) { size_t sleft = CACHELINE_SIZE - sofs; size_t min = (size <= sleft ? size : sleft); dpair = disp->write_lock(dst, W); sline = disp->read_lock(src, R); *dpair.mask |= (((gtm_cacheline_mask)1 << min) - 1) << sofs; memcpy (&dpair.line->b[sofs], &sline->b[sofs], min); dst++; src++; size -= min; } // (b) Copy subsequent cacheline sized chunks. while (size >= CACHELINE_SIZE) { dpair = disp->write_lock(dst, W); sline = disp->read_lock(src, R); *dpair.mask = -1; *dpair.line = *sline; dst++; src++; size -= CACHELINE_SIZE; } // (c) Copy anything left over. if (size != 0) { dpair = disp->write_lock(dst, W); sline = disp->read_lock(src, R); *dpair.mask |= ((gtm_cacheline_mask)1 << size) - 1; memcpy (dpair.line, sline, size); } } // ... otherwise, we must copy the data in disparate hunks using // temporary storage. else { gtm_cacheline c; size_t sleft = CACHELINE_SIZE - sofs; sline = disp->read_lock(src, R); // As above, we copy the data in three stages: // (a) Copy stray bytes at the beginning that are smaller than a // cacheline. if (dofs != 0) { size_t dleft = CACHELINE_SIZE - dofs; size_t min = (size <= dleft ? size : dleft); dpair = disp->write_lock(dst, W); *dpair.mask |= (((gtm_cacheline_mask)1 << min) - 1) << dofs; // If what's left in the source cacheline will fit in the // rest of the destination cacheline, straight up copy it. if (min <= sleft) { memcpy (&dpair.line->b[dofs], &sline->b[sofs], min); sofs += min; } // Otherwise, we need more bits from the source cacheline // that are available. Piece together what we need from // contiguous (source) cachelines, into temp space, and copy // it over. else { memcpy (&c, &sline->b[sofs], sleft); sline = disp->read_lock(++src, R); sofs = min - sleft; memcpy (&c.b[sleft], sline, sofs); memcpy (&dpair.line->b[dofs], &c, min); } sleft = CACHELINE_SIZE - sofs; dst++; size -= min; } // (b) Copy subsequent cacheline sized chunks. while (size >= CACHELINE_SIZE) { // We have a full (destination) cacheline where to put the // data, but to get to the corresponding cacheline sized // chunk in the source, we have to piece together two // contiguous source cachelines. memcpy (&c, &sline->b[sofs], sleft); sline = disp->read_lock(++src, R); memcpy (&c.b[sleft], sline, sofs); dpair = disp->write_lock(dst, W); *dpair.mask = -1; *dpair.line = c; dst++; size -= CACHELINE_SIZE; } // (c) Copy anything left over. if (size != 0) { dpair = disp->write_lock(dst, W); *dpair.mask |= ((gtm_cacheline_mask)1 << size) - 1; // If what's left to copy is entirely in the remaining // source cacheline, do it. if (size <= sleft) memcpy (dpair.line, &sline->b[sofs], size); // Otherwise, piece together the remaining bits, and copy. else { memcpy (&c, &sline->b[sofs], sleft); sline = disp->read_lock(++src, R); memcpy (&c.b[sleft], sline, size - sleft); memcpy (dpair.line, &c, size); } } } } static void do_memmove (uintptr_t idst, uintptr_t isrc, size_t size, abi_dispatch::lock_type W, abi_dispatch::lock_type R) { abi_dispatch *disp = abi_disp(); uintptr_t dleft, sleft, sofs, dofs; const gtm_cacheline *sline; abi_dispatch::mask_pair dpair; if (size == 0) return; /* The co-aligned memmove below doesn't work for DST == SRC, so filter that out. It's tempting to just return here, as this is a no-op move. However, our caller has the right to expect the locks to be acquired as advertized. */ if (__builtin_expect (idst == isrc, 0)) { /* If the write lock is already acquired, nothing to do. */ if (W == abi_dispatch::WaW) return; /* If the destination is protected, acquire a write lock. */ if (W != abi_dispatch::NOLOCK) R = abi_dispatch::RfW; /* Notice serial mode, where we don't acquire locks at all. */ if (R == abi_dispatch::NOLOCK) return; idst = isrc + size; for (isrc &= -CACHELINE_SIZE; isrc < idst; isrc += CACHELINE_SIZE) disp->read_lock(reinterpret_cast(isrc), R); return; } /* Fall back to memcpy if the implementation above can handle it. */ if (idst < isrc || isrc + size <= idst) { do_memcpy (idst, isrc, size, W, R); return; } /* What remains requires a backward copy from the end of the blocks. */ idst += size; isrc += size; dofs = idst & (CACHELINE_SIZE - 1); sofs = isrc & (CACHELINE_SIZE - 1); dleft = CACHELINE_SIZE - dofs; sleft = CACHELINE_SIZE - sofs; gtm_cacheline *dst = reinterpret_cast(idst & -CACHELINE_SIZE); const gtm_cacheline *src = reinterpret_cast(isrc & -CACHELINE_SIZE); if (dofs == 0) dst--; if (sofs == 0) src--; if (dofs == sofs) { /* Since DST and SRC are co-aligned, and we didn't use the memcpy optimization above, that implies that SIZE > CACHELINE_SIZE. */ if (sofs != 0) { dpair = disp->write_lock(dst, W); sline = disp->read_lock(src, R); *dpair.mask |= ((gtm_cacheline_mask)1 << sleft) - 1; memcpy (dpair.line, sline, sleft); dst--; src--; size -= sleft; } while (size >= CACHELINE_SIZE) { dpair = disp->write_lock(dst, W); sline = disp->read_lock(src, R); *dpair.mask = -1; *dpair.line = *sline; dst--; src--; size -= CACHELINE_SIZE; } if (size != 0) { size_t ofs = CACHELINE_SIZE - size; dpair = disp->write_lock(dst, W); sline = disp->read_lock(src, R); *dpair.mask |= (((gtm_cacheline_mask)1 << size) - 1) << ofs; memcpy (&dpair.line->b[ofs], &sline->b[ofs], size); } } else { gtm_cacheline c; sline = disp->read_lock(src, R); if (dofs != 0) { size_t min = (size <= dofs ? size : dofs); if (min <= sofs) { sofs -= min; memcpy (&c, &sline->b[sofs], min); } else { size_t min_ofs = min - sofs; memcpy (&c.b[min_ofs], sline, sofs); sline = disp->read_lock(--src, R); sofs = CACHELINE_SIZE - min_ofs; memcpy (&c, &sline->b[sofs], min_ofs); } dofs = dleft - min; dpair = disp->write_lock(dst, W); *dpair.mask |= (((gtm_cacheline_mask)1 << min) - 1) << dofs; memcpy (&dpair.line->b[dofs], &c, min); sleft = CACHELINE_SIZE - sofs; dst--; size -= min; } while (size >= CACHELINE_SIZE) { memcpy (&c.b[sleft], sline, sofs); sline = disp->read_lock(--src, R); memcpy (&c, &sline->b[sofs], sleft); dpair = disp->write_lock(dst, W); *dpair.mask = -1; *dpair.line = c; dst--; size -= CACHELINE_SIZE; } if (size != 0) { dofs = CACHELINE_SIZE - size; memcpy (&c.b[sleft], sline, sofs); if (sleft > dofs) { sline = disp->read_lock(--src, R); memcpy (&c, &sline->b[sofs], sleft); } dpair = disp->write_lock(dst, W); *dpair.mask |= (gtm_cacheline_mask)-1 << dofs; memcpy (&dpair.line->b[dofs], &c.b[dofs], size); } } } #define ITM_MEM_DEF(NAME, READ, WRITE) \ void ITM_REGPARM _ITM_memcpy##NAME(void *dst, const void *src, size_t size) \ { \ do_memcpy ((uintptr_t)dst, (uintptr_t)src, size, \ abi_dispatch::WRITE, abi_dispatch::READ); \ } \ void ITM_REGPARM _ITM_memmove##NAME(void *dst, const void *src, size_t size) \ { \ do_memmove ((uintptr_t)dst, (uintptr_t)src, size, \ abi_dispatch::WRITE, abi_dispatch::READ); \ } ITM_MEM_DEF(RnWt, NOLOCK, W) ITM_MEM_DEF(RnWtaR, NOLOCK, WaR) ITM_MEM_DEF(RnWtaW, NOLOCK, WaW) ITM_MEM_DEF(RtWn, R, NOLOCK) ITM_MEM_DEF(RtWt, R, W) ITM_MEM_DEF(RtWtaR, R, WaR) ITM_MEM_DEF(RtWtaW, R, WaW) ITM_MEM_DEF(RtaRWn, RaR, NOLOCK) ITM_MEM_DEF(RtaRWt, RaR, W) ITM_MEM_DEF(RtaRWtaR, RaR, WaR) ITM_MEM_DEF(RtaRWtaW, RaR, WaW) ITM_MEM_DEF(RtaWWn, RaW, NOLOCK) ITM_MEM_DEF(RtaWWt, RaW, W) ITM_MEM_DEF(RtaWWtaR, RaW, WaR) ITM_MEM_DEF(RtaWWtaW, RaW, WaW)