; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 ; RUN: opt < %s -passes=memcpyopt -S -verify-memoryssa | FileCheck %s %buf = type [9 x i8] ; We can forward `memcpy` because the copy location are the same, define void @forward_offset(ptr %src, ptr %dest) { ; CHECK-LABEL: define void @forward_offset( ; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DEST:%.*]]) { ; CHECK-NEXT: [[DEP_DEST:%.*]] = alloca [9 x i8], align 1 ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[SRC]], i64 7, i1 false) ; CHECK-NEXT: [[SRC_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1 ; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP1]], i64 6, i1 false) ; CHECK-NEXT: ret void ; %cpy_tmp = alloca %buf, align 1 call void @llvm.memcpy.p0.p0.i64(ptr align 1 %cpy_tmp, ptr align 1 %src, i64 7, i1 false) %cpy_tmp_offset = getelementptr inbounds i8, ptr %cpy_tmp, i64 1 call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dest, ptr align 1 %cpy_tmp_offset, i64 6, i1 false) ret void } ; We need to update the align value of the source of `memcpy` when forwarding. define void @forward_offset_align(ptr %src, ptr %dest) { ; CHECK-LABEL: define void @forward_offset_align( ; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DEST:%.*]]) { ; CHECK-NEXT: [[DEP_DEST:%.*]] = alloca [9 x i8], align 1 ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 4 [[SRC]], i64 9, i1 false) ; CHECK-NEXT: [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 3 ; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP1]], i64 5, i1 false) ; CHECK-NEXT: ret void ; %cpy_tmp = alloca %buf, align 1 call void @llvm.memcpy.p0.p0.i64(ptr align 1 %cpy_tmp, ptr align 4 %src, i64 9, i1 false) %cpy_tmp_offset = getelementptr inbounds i8, ptr %cpy_tmp, i64 3 call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dest, ptr align 1 %cpy_tmp_offset, i64 5, i1 false) ret void } ; We can change the align value to 2 when forwarding. define void @forward_offset_align_2(ptr %src, ptr %dest) { ; CHECK-LABEL: define void @forward_offset_align_2( ; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DEST:%.*]]) { ; CHECK-NEXT: [[DEP_DEST:%.*]] = alloca [9 x i8], align 1 ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 4 [[SRC]], i64 9, i1 false) ; CHECK-NEXT: [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 2 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 2 ; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DEST]], ptr align 2 [[TMP1]], i64 6, i1 false) ; CHECK-NEXT: ret void ; %cpy_tmp = alloca %buf, align 1 call void @llvm.memcpy.p0.p0.i64(ptr align 1 %cpy_tmp, ptr align 4 %src, i64 9, i1 false) %cpy_tmp_offset = getelementptr inbounds i8, ptr %cpy_tmp, i64 2 call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dest, ptr align 1 %cpy_tmp_offset, i64 6, i1 false) ret void } ; If the copy destination can be used as the copy source, we don't need to create a GEP instruction. define void @forward_offset_without_gep(ptr %src) { ; CHECK-LABEL: define void @forward_offset_without_gep( ; CHECK-SAME: ptr [[SRC:%.*]]) { ; CHECK-NEXT: [[TMP:%.*]] = alloca [9 x i8], align 1 ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[TMP]], ptr align 1 [[SRC]], i64 7, i1 false) ; CHECK-NEXT: [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[TMP]], i64 1 ; CHECK-NEXT: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1 ; CHECK-NEXT: ret void ; %cpy_tmp = alloca %buf, align 1 call void @llvm.memcpy.p0.p0.i64(ptr align 1 %cpy_tmp, ptr align 1 %src, i64 7, i1 false) %cpy_tmp_offset = getelementptr inbounds i8, ptr %cpy_tmp, i64 1 %dest = getelementptr inbounds i8, ptr %src, i64 1 call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dest, ptr align 1 %cpy_tmp_offset, i64 6, i1 false) ret void } ; We need to create a GEP instruction when forwarding. define void @forward_offset_with_gep(ptr %src) { ; CHECK-LABEL: define void @forward_offset_with_gep( ; CHECK-SAME: ptr [[SRC:%.*]]) { ; CHECK-NEXT: [[DEP_DEST:%.*]] = alloca [9 x i8], align 1 ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[SRC]], i64 7, i1 false) ; CHECK-NEXT: [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1 ; CHECK-NEXT: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 2 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1 ; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP1]], i64 6, i1 false) ; CHECK-NEXT: ret void ; %cpy_tmp = alloca %buf, align 1 call void @llvm.memcpy.p0.p0.i64(ptr align 1 %cpy_tmp, ptr align 1 %src, i64 7, i1 false) %cpy_tmp_offset = getelementptr inbounds i8, ptr %cpy_tmp, i64 1 %dest = getelementptr inbounds i8, ptr %src, i64 2 call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dest, ptr align 1 %cpy_tmp_offset, i64 6, i1 false) ret void } ; Make sure we pass the right parameters when calling `memcpy`. define void @forward_offset_memcpy(ptr %src, ptr %dest) { ; CHECK-LABEL: define void @forward_offset_memcpy( ; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DEST:%.*]]) { ; CHECK-NEXT: [[DEP_DEST:%.*]] = alloca [9 x i8], align 1 ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[SRC]], i64 7, i1 false) ; CHECK-NEXT: [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1 ; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP1]], i64 6, i1 false) ; CHECK-NEXT: call void @use(ptr [[DEST]]) ; CHECK-NEXT: ret void ; %cpy_tmp = alloca %buf, align 1 call void @llvm.memcpy.p0.p0.i64(ptr align 1 %cpy_tmp, ptr align 1 %src, i64 7, i1 false) %cpy_tmp_offset = getelementptr inbounds i8, ptr %cpy_tmp, i64 1 call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dest, ptr align 1 %cpy_tmp_offset, i64 6, i1 false) call void @use(ptr %dest) ret void } ; Make sure we pass the right parameters when calling `memcpy.inline`. define void @forward_offset_memcpy_inline(ptr %src, ptr %dest) { ; CHECK-LABEL: define void @forward_offset_memcpy_inline( ; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DEST:%.*]]) { ; CHECK-NEXT: [[DEP_DEST:%.*]] = alloca [9 x i8], align 1 ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[SRC]], i64 7, i1 false) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1 ; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP1]], i64 6, i1 false) ; CHECK-NEXT: call void @use(ptr [[DEST]]) ; CHECK-NEXT: ret void ; %cpy_tmp = alloca %buf, align 1 call void @llvm.memcpy.p0.p0.i64(ptr align 1 %cpy_tmp, ptr align 1 %src, i64 7, i1 false) %cpy_tmp_offset = getelementptr inbounds i8, ptr %cpy_tmp, i64 1 call void @llvm.memcpy.inline.p0.p0.i64(ptr align 1 %dest, ptr align 1 %cpy_tmp_offset, i64 6, i1 false) call void @use(ptr %dest) ret void } ; We can forward `memcpy` by shrinking it to the size of the `memcpy` it depends on. define void @forward_oversize_offset(ptr %src, ptr %dest) { ; CHECK-LABEL: define void @forward_oversize_offset( ; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DEST:%.*]]) { ; CHECK-NEXT: [[CPY_TMP:%.*]] = alloca [9 x i8], align 1 ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[CPY_TMP]], ptr align 1 [[SRC]], i64 6, i1 false) ; CHECK-NEXT: [[CPY_TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[CPY_TMP]], i64 1 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1 ; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP1]], i64 5, i1 false) ; CHECK-NEXT: ret void ; %cpy_tmp = alloca %buf, align 1 call void @llvm.memcpy.p0.p0.i64(ptr align 1 %cpy_tmp, ptr align 1 %src, i64 6, i1 false) %cpy_tmp_offset = getelementptr inbounds i8, ptr %cpy_tmp, i64 1 call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dest, ptr align 1 %cpy_tmp_offset, i64 6, i1 false) ret void } ; We can forward `memcpy` because the write operation does not corrupt the location to be copied. define void @forward_offset_and_store(ptr %src, ptr %dest) { ; CHECK-LABEL: define void @forward_offset_and_store( ; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DEST:%.*]]) { ; CHECK-NEXT: [[DEP_DEST:%.*]] = alloca [9 x i8], align 1 ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[SRC]], i64 7, i1 false) ; CHECK-NEXT: store i8 1, ptr [[SRC]], align 1 ; CHECK-NEXT: [[DEP_SRC_END:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 6 ; CHECK-NEXT: store i8 1, ptr [[DEP_SRC_END]], align 1 ; CHECK-NEXT: [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1 ; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP1]], i64 5, i1 false) ; CHECK-NEXT: ret void ; %cpy_tmp = alloca %buf, align 1 call void @llvm.memcpy.p0.p0.i64(ptr align 1 %cpy_tmp, ptr align 1 %src, i64 7, i1 false) store i8 1, ptr %src, align 1 %src_end = getelementptr inbounds i8, ptr %src, i64 6 store i8 1, ptr %src_end, align 1 %cpy_tmp_offset = getelementptr inbounds i8, ptr %cpy_tmp, i64 1 call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dest, ptr align 1 %cpy_tmp_offset, i64 5, i1 false) ret void } ; We cannot forward `memcpy` because the write operation alters the location to be copied. ; Also, make sure we have removed the GEP instruction that was created temporarily. define void @do_not_forward_offset_and_store(ptr %src, ptr %dest) { ; CHECK-LABEL: define void @do_not_forward_offset_and_store( ; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DEST:%.*]]) { ; CHECK-NEXT: [[DEP_DEST:%.*]] = alloca [9 x i8], align 1 ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[SRC]], i64 7, i1 false) ; CHECK-NEXT: [[DEP:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1 ; CHECK-NEXT: store i8 1, ptr [[DEP]], align 1 ; CHECK-NEXT: [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1 ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP_OFFSET]], i64 5, i1 false) ; CHECK-NEXT: ret void ; %cpy_tmp = alloca %buf, align 1 call void @llvm.memcpy.p0.p0.i64(ptr align 1 %cpy_tmp, ptr align 1 %src, i64 7, i1 false) %src_offset = getelementptr inbounds i8, ptr %src, i64 1 store i8 1, ptr %src_offset, align 1 %cpy_tmp_offset = getelementptr inbounds i8, ptr %cpy_tmp, i64 1 call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dest, ptr align 1 %cpy_tmp_offset, i64 5, i1 false) ret void } ; Make sure we don't crash when the copy source is a constant. @buf = external global [32 x i8] define void @pr98675(ptr noalias %p1, ptr noalias %p2) { ; CHECK-LABEL: define void @pr98675( ; CHECK-SAME: ptr noalias [[P1:%.*]], ptr noalias [[P2:%.*]]) { ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[P1]], ptr @buf, i64 26, i1 false) ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[P1]], i64 10 ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[P2]], ptr getelementptr inbounds (i8, ptr @buf, i64 10), i64 1, i1 false) ; CHECK-NEXT: ret void ; call void @llvm.memcpy.p0.p0.i64(ptr %p1, ptr @buf, i64 26, i1 false) %gep = getelementptr i8, ptr %p1, i64 10 call void @llvm.memmove.p0.p0.i64(ptr %p2, ptr %gep, i64 1, i1 false) ret void } define void @over_offset_cpy(ptr %src) { ; CHECK-LABEL: define void @over_offset_cpy( ; CHECK-SAME: ptr [[SRC:%.*]]) { ; CHECK-NEXT: [[TMP:%.*]] = alloca [2 x i8], align 1 ; CHECK-NEXT: [[DST:%.*]] = alloca i8, align 1 ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP]], ptr align 8 [[SRC]], i64 1, i1 false) ; CHECK-NEXT: [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[TMP]], i64 1 ; CHECK-NEXT: ret void ; %tmp = alloca [2 x i8] %dst = alloca i8 call void @llvm.memcpy.p0.p0.i64(ptr align 8 %tmp, ptr align 8 %src, i64 1, i1 false) %tmp_offset = getelementptr inbounds i8, ptr %tmp, i64 1 call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dst, ptr align 8 %tmp_offset, i64 1, i1 false) ret void } declare void @use(ptr) declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) declare void @llvm.memcpy.inline.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1)