From 8f98348f31afe8e005eed9cbf776e286869816fc Mon Sep 17 00:00:00 2001 From: Philip Herron Date: Thu, 16 Mar 2023 21:03:34 +0000 Subject: gccrs: Add move_val_init intrinsic This implements it as a builtin memcpy using the generic param T for the size hint. Fixes #1902 gcc/rust/ChangeLog: * backend/rust-compile-intrinsic.cc (move_val_init_handler): new intrinsice (uninit_handler): use a builtin memcpy gcc/testsuite/ChangeLog: * rust/compile/issue-1981.rs: New test. Signed-off-by: Philip Herron --- gcc/rust/backend/rust-compile-intrinsic.cc | 56 +++++++++++- gcc/testsuite/rust/compile/issue-1981.rs | 132 +++++++++++++++++++++++++++++ 2 files changed, 187 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/rust/compile/issue-1981.rs (limited to 'gcc') diff --git a/gcc/rust/backend/rust-compile-intrinsic.cc b/gcc/rust/backend/rust-compile-intrinsic.cc index 85f6e1c..b9951a0 100644 --- a/gcc/rust/backend/rust-compile-intrinsic.cc +++ b/gcc/rust/backend/rust-compile-intrinsic.cc @@ -83,6 +83,8 @@ static tree op_with_overflow_inner (Context *ctx, TyTy::FnType *fntype, tree_code op); static tree uninit_handler (Context *ctx, TyTy::FnType *fntype); +static tree +move_val_init_handler (Context *ctx, TyTy::FnType *fntype); enum class Prefetch { @@ -205,6 +207,7 @@ static const std::map(); + // get the template parameter type tree fn uninit(); rust_assert (fntype->get_num_substitutions () == 1); auto ¶m_mapping = fntype->get_substs ().at (0); const TyTy::ParamType *param_tyty = param_mapping.get_param_ty (); @@ -1042,5 +1045,56 @@ uninit_handler (Context *ctx, TyTy::FnType *fntype) return fndecl; } +static tree +move_val_init_handler (Context *ctx, TyTy::FnType *fntype) +{ + rust_assert (fntype->get_params ().size () == 2); + + tree lookup = NULL_TREE; + if (check_for_cached_intrinsic (ctx, fntype, &lookup)) + return lookup; + + auto fndecl = compile_intrinsic_function (ctx, fntype); + + // get the template parameter type tree fn size_of(); + rust_assert (fntype->get_num_substitutions () == 1); + auto ¶m_mapping = fntype->get_substs ().at (0); + const TyTy::ParamType *param_tyty = param_mapping.get_param_ty (); + TyTy::BaseType *resolved_tyty = param_tyty->resolve (); + tree template_parameter_type + = TyTyResolveCompile::compile (ctx, resolved_tyty); + + std::vector param_vars; + compile_fn_params (ctx, fntype, fndecl, ¶m_vars); + + if (!ctx->get_backend ()->function_set_parameters (fndecl, param_vars)) + return error_mark_node; + + enter_intrinsic_block (ctx, fndecl); + + // BUILTIN size_of FN BODY BEGIN + + tree dst = ctx->get_backend ()->var_expression (param_vars[0], Location ()); + tree src = ctx->get_backend ()->var_expression (param_vars[1], Location ()); + tree size = TYPE_SIZE_UNIT (template_parameter_type); + + tree memcpy_builtin = error_mark_node; + BuiltinsContext::get ().lookup_simple_builtin ("memcpy", &memcpy_builtin); + rust_assert (memcpy_builtin != error_mark_node); + + src = build_fold_addr_expr_loc (BUILTINS_LOCATION, src); + tree memset_call = build_call_expr_loc (BUILTINS_LOCATION, memcpy_builtin, 3, + dst, src, size); + TREE_READONLY (memset_call) = 0; + TREE_SIDE_EFFECTS (memset_call) = 1; + + ctx->add_statement (memset_call); + // BUILTIN size_of FN BODY END + + finalize_intrinsic_block (ctx, fndecl); + + return fndecl; +} + } // namespace Compile } // namespace Rust diff --git a/gcc/testsuite/rust/compile/issue-1981.rs b/gcc/testsuite/rust/compile/issue-1981.rs new file mode 100644 index 0000000..e3f1723 --- /dev/null +++ b/gcc/testsuite/rust/compile/issue-1981.rs @@ -0,0 +1,132 @@ +mod intrinsics { + extern "rust-intrinsic" { + pub fn offset(ptr: *const T, count: isize) -> *const T; + pub fn copy_nonoverlapping(src: *const T, dst: *mut T, count: usize); + pub fn move_val_init(dst: *mut T, src: T); + pub fn uninit() -> T; + } +} + +mod ptr { + #[lang = "const_ptr"] + impl *const T { + pub unsafe fn offset(self, count: isize) -> *const T { + intrinsics::offset(self, count) + } + } + + #[lang = "mut_ptr"] + impl *mut T { + pub unsafe fn offset(self, count: isize) -> *mut T { + intrinsics::offset(self, count) as *mut T + } + } + + pub unsafe fn swap_nonoverlapping(x: *mut T, y: *mut T, count: usize) { + let x = x as *mut T; + let y = y as *mut T; + let len = mem::size_of::() * count; + swap_nonoverlapping_bytes(x, y, len) + } + + pub unsafe fn swap_nonoverlapping_one(x: *mut T, y: *mut T) { + // For types smaller than the block optimization below, + // just swap directly to avoid pessimizing codegen. + if mem::size_of::() < 32 { + let z = read(x); + intrinsics::copy_nonoverlapping(y, x, 1); + write(y, z); + } else { + swap_nonoverlapping(x, y, 1); + } + } + + pub unsafe fn write(dst: *mut T, src: T) { + intrinsics::move_val_init(&mut *dst, src) + } + + pub unsafe fn read(src: *const T) -> T { + let mut tmp: T = mem::uninitialized(); + intrinsics::copy_nonoverlapping(src, &mut tmp, 1); + tmp + } + + unsafe fn swap_nonoverlapping_bytes(x: *mut u8, y: *mut u8, len: usize) { + struct Block(u64, u64, u64, u64); + struct UnalignedBlock(u64, u64, u64, u64); + + let block_size = mem::size_of::(); + + // Loop through x & y, copying them `Block` at a time + // The optimizer should unroll the loop fully for most types + // N.B. We can't use a for loop as the `range` impl calls `mem::swap` recursively + let mut i = 0; + while i + block_size <= len { + // Create some uninitialized memory as scratch space + // Declaring `t` here avoids aligning the stack when this loop is unused + let mut t: Block = mem::uninitialized(); + let t = &mut t as *mut _ as *mut u8; + let x = x.offset(i as isize); + let y = y.offset(i as isize); + + // Swap a block of bytes of x & y, using t as a temporary buffer + // This should be optimized into efficient SIMD operations where available + intrinsics::copy_nonoverlapping(x, t, block_size); + intrinsics::copy_nonoverlapping(y, x, block_size); + intrinsics::copy_nonoverlapping(t, y, block_size); + i += block_size; + } + + if i < len { + // Swap any remaining bytes + let mut t: UnalignedBlock = mem::uninitialized(); + let rem = len - i; + + let t = &mut t as *mut _ as *mut u8; + let x = x.offset(i as isize); + let y = y.offset(i as isize); + + intrinsics::copy_nonoverlapping(x, t, rem); + intrinsics::copy_nonoverlapping(y, x, rem); + intrinsics::copy_nonoverlapping(t, y, rem); + } + } +} + +mod mem { + extern "rust-intrinsic" { + pub fn transmute(_: T) -> U; + pub fn size_of() -> usize; + } + + pub fn swap(x: &mut T, y: &mut T) { + unsafe { + ptr::swap_nonoverlapping_one(x, y); + } + } + + pub fn replace(dest: &mut T, mut src: T) -> T { + swap(dest, &mut src); + src + } + + pub unsafe fn uninitialized() -> T { + intrinsics::uninit() + } +} + +trait Step { + fn replace_zero(&mut self) -> Self; +} + +impl Step for i32 { + fn replace_zero(&mut self) -> Self { + mem::replace(self, 0) + } +} + +fn main() -> i32 { + let a = 123; + a.replace_zero(); + a +} -- cgit v1.1