aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/LoongArch/unaligned-memcpy-inline.ll
blob: 0d441e66a0c84b9ae18695649b0beca1c9e5acdf (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2

;; Test how memcpy is optimized when ual is turned off which is similar to AArch64/arm64-misaligned-memcpy-inline.ll.

; RUN: llc --mtriple=loongarch32 --mattr=-ual < %s | FileCheck %s --check-prefix=LA32
; RUN: llc --mtriple=loongarch64 --mattr=-ual < %s | FileCheck %s --check-prefix=LA64

;; Small (16 bytes here) unaligned memcpy() should be a function call if
;; ual is turned off.
define void @t0(ptr %out, ptr %in) {
; LA32-LABEL: t0:
; LA32:       # %bb.0: # %entry
; LA32-NEXT:    addi.w $sp, $sp, -16
; LA32-NEXT:    .cfi_def_cfa_offset 16
; LA32-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
; LA32-NEXT:    .cfi_offset 1, -4
; LA32-NEXT:    ori $a2, $zero, 16
; LA32-NEXT:    bl memcpy
; LA32-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
; LA32-NEXT:    addi.w $sp, $sp, 16
; LA32-NEXT:    ret
;
; LA64-LABEL: t0:
; LA64:       # %bb.0: # %entry
; LA64-NEXT:    addi.d $sp, $sp, -16
; LA64-NEXT:    .cfi_def_cfa_offset 16
; LA64-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
; LA64-NEXT:    .cfi_offset 1, -8
; LA64-NEXT:    ori $a2, $zero, 16
; LA64-NEXT:    pcaddu18i $ra, %call36(memcpy)
; LA64-NEXT:    jirl $ra, $ra, 0
; LA64-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
; LA64-NEXT:    addi.d $sp, $sp, 16
; LA64-NEXT:    ret
entry:
  call void @llvm.memcpy.p0.p0.i64(ptr %out, ptr %in, i64 16, i1 false)
  ret void
}

;; Small (16 bytes here) aligned memcpy() should be inlined even if
;; ual is turned off.
define void @t1(ptr align 8 %out, ptr align 8 %in) {
; LA32-LABEL: t1:
; LA32:       # %bb.0: # %entry
; LA32-NEXT:    ld.w $a2, $a1, 12
; LA32-NEXT:    st.w $a2, $a0, 12
; LA32-NEXT:    ld.w $a2, $a1, 8
; LA32-NEXT:    st.w $a2, $a0, 8
; LA32-NEXT:    ld.w $a2, $a1, 4
; LA32-NEXT:    st.w $a2, $a0, 4
; LA32-NEXT:    ld.w $a1, $a1, 0
; LA32-NEXT:    st.w $a1, $a0, 0
; LA32-NEXT:    ret
;
; LA64-LABEL: t1:
; LA64:       # %bb.0: # %entry
; LA64-NEXT:    ld.d $a2, $a1, 8
; LA64-NEXT:    st.d $a2, $a0, 8
; LA64-NEXT:    ld.d $a1, $a1, 0
; LA64-NEXT:    st.d $a1, $a0, 0
; LA64-NEXT:    ret
entry:
  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %out, ptr align 8 %in, i64 16, i1 false)
  ret void
}

;; Tiny (4 bytes here) unaligned memcpy() should be inlined with byte sized
;; loads and stores if ual is turned off.
define void @t2(ptr %out, ptr %in) {
; LA32-LABEL: t2:
; LA32:       # %bb.0: # %entry
; LA32-NEXT:    ld.b $a2, $a1, 3
; LA32-NEXT:    st.b $a2, $a0, 3
; LA32-NEXT:    ld.b $a2, $a1, 2
; LA32-NEXT:    st.b $a2, $a0, 2
; LA32-NEXT:    ld.b $a2, $a1, 1
; LA32-NEXT:    st.b $a2, $a0, 1
; LA32-NEXT:    ld.b $a1, $a1, 0
; LA32-NEXT:    st.b $a1, $a0, 0
; LA32-NEXT:    ret
;
; LA64-LABEL: t2:
; LA64:       # %bb.0: # %entry
; LA64-NEXT:    ld.b $a2, $a1, 3
; LA64-NEXT:    st.b $a2, $a0, 3
; LA64-NEXT:    ld.b $a2, $a1, 2
; LA64-NEXT:    st.b $a2, $a0, 2
; LA64-NEXT:    ld.b $a2, $a1, 1
; LA64-NEXT:    st.b $a2, $a0, 1
; LA64-NEXT:    ld.b $a1, $a1, 0
; LA64-NEXT:    st.b $a1, $a0, 0
; LA64-NEXT:    ret
entry:
  call void @llvm.memcpy.p0.p0.i64(ptr %out, ptr %in, i64 4, i1 false)
  ret void
}

@.str = private constant [22 x i8] c"preemption imbalance \00", align 1

define void @t3() {
; LA32-LABEL: t3:
; LA32:       # %bb.0: # %entry
; LA32-NEXT:    addi.w $sp, $sp, -64
; LA32-NEXT:    .cfi_def_cfa_offset 64
; LA32-NEXT:    pcalau12i $a0, %pc_hi20(.L.str)
; LA32-NEXT:    addi.w $a0, $a0, %pc_lo12(.L.str)
; LA32-NEXT:    ld.h $a1, $a0, 20
; LA32-NEXT:    ld.w $a2, $a0, 16
; LA32-NEXT:    st.h $a1, $sp, 20
; LA32-NEXT:    st.w $a2, $sp, 16
; LA32-NEXT:    ld.w $a1, $a0, 12
; LA32-NEXT:    ld.w $a2, $a0, 8
; LA32-NEXT:    ld.w $a3, $a0, 4
; LA32-NEXT:    ld.w $a0, $a0, 0
; LA32-NEXT:    st.w $a1, $sp, 12
; LA32-NEXT:    st.w $a2, $sp, 8
; LA32-NEXT:    st.w $a3, $sp, 4
; LA32-NEXT:    st.w $a0, $sp, 0
; LA32-NEXT:    addi.w $sp, $sp, 64
; LA32-NEXT:    ret
;
; LA64-LABEL: t3:
; LA64:       # %bb.0: # %entry
; LA64-NEXT:    addi.d $sp, $sp, -80
; LA64-NEXT:    .cfi_def_cfa_offset 80
; LA64-NEXT:    pcalau12i $a0, %pc_hi20(.L.str)
; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(.L.str)
; LA64-NEXT:    ld.h $a1, $a0, 20
; LA64-NEXT:    ld.w $a2, $a0, 16
; LA64-NEXT:    ld.d $a3, $a0, 8
; LA64-NEXT:    ld.d $a0, $a0, 0
; LA64-NEXT:    st.h $a1, $sp, 36
; LA64-NEXT:    st.w $a2, $sp, 32
; LA64-NEXT:    st.d $a3, $sp, 24
; LA64-NEXT:    st.d $a0, $sp, 16
; LA64-NEXT:    addi.d $sp, $sp, 80
; LA64-NEXT:    ret
entry:
  %msgbuf = alloca [64 x i8], align 1
  call void @llvm.memcpy.p0.p0.i64(ptr align 1 %msgbuf, ptr align 1 @.str, i64 22, i1 false)
  ret void
}

declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1)