#![feature(intrinsics)] #[lang = "sized"] pub trait Sized {} mod intrinsics { extern "rust-intrinsic" { pub fn offset(ptr: *const T, count: isize) -> *const T; pub fn copy_nonoverlapping(src: *const T, dst: *mut T, count: usize); pub fn move_val_init(dst: *mut T, src: T); pub fn uninit() -> T; } } mod ptr { #[lang = "const_ptr"] impl *const T { pub unsafe fn offset(self, count: isize) -> *const T { intrinsics::offset(self, count) } } #[lang = "mut_ptr"] impl *mut T { pub unsafe fn offset(self, count: isize) -> *mut T { intrinsics::offset(self, count) as *mut T } } pub unsafe fn swap_nonoverlapping(x: *mut T, y: *mut T, count: usize) { let x = x as *mut u8; let y = y as *mut u8; let len = mem::size_of::() * count; swap_nonoverlapping_bytes(x, y, len) } pub unsafe fn swap_nonoverlapping_one(x: *mut T, y: *mut T) { // For types smaller than the block optimization below, // just swap directly to avoid pessimizing codegen. if mem::size_of::() < 32 { let z = read(x); intrinsics::copy_nonoverlapping(y, x, 1); write(y, z); } else { swap_nonoverlapping(x, y, 1); } } pub unsafe fn write(dst: *mut T, src: T) { intrinsics::move_val_init(&mut *dst, src) } pub unsafe fn read(src: *const T) -> T { let mut tmp: T = mem::uninitialized(); intrinsics::copy_nonoverlapping(src, &mut tmp, 1); tmp } unsafe fn swap_nonoverlapping_bytes(x: *mut u8, y: *mut u8, len: usize) { struct Block(u64, u64, u64, u64); struct UnalignedBlock(u64, u64, u64, u64); let block_size = mem::size_of::(); // Loop through x & y, copying them `Block` at a time // The optimizer should unroll the loop fully for most types // N.B. We can't use a for loop as the `range` impl calls `mem::swap` recursively let mut i = 0; while i + block_size <= len { // Create some uninitialized memory as scratch space // Declaring `t` here avoids aligning the stack when this loop is unused let mut t: Block = mem::uninitialized(); let t = &mut t as *mut _ as *mut u8; let x = x.offset(i as isize); let y = y.offset(i as isize); // Swap a block of bytes of x & y, using t as a temporary buffer // This should be optimized into efficient SIMD operations where available intrinsics::copy_nonoverlapping(x, t, block_size); intrinsics::copy_nonoverlapping(y, x, block_size); intrinsics::copy_nonoverlapping(t, y, block_size); i += block_size; } if i < len { // Swap any remaining bytes let mut t: UnalignedBlock = mem::uninitialized(); let rem = len - i; let t = &mut t as *mut _ as *mut u8; let x = x.offset(i as isize); let y = y.offset(i as isize); intrinsics::copy_nonoverlapping(x, t, rem); intrinsics::copy_nonoverlapping(y, x, rem); intrinsics::copy_nonoverlapping(t, y, rem); } } } mod mem { extern "rust-intrinsic" { pub fn transmute(_: T) -> U; pub fn size_of() -> usize; } pub fn swap(x: &mut T, y: &mut T) { unsafe { ptr::swap_nonoverlapping_one(x, y); } } pub fn replace(dest: &mut T, mut src: T) -> T { swap(dest, &mut src); src } pub unsafe fn uninitialized() -> T { intrinsics::uninit() } } trait Step { fn replace_zero(&mut self) -> Self; } impl Step for i32 { fn replace_zero(&mut self) -> Self { mem::replace(self, 0) } } fn main() -> i32 { let a = 123; a.replace_zero(); a }