1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
|
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package runtime
import (
"runtime/internal/sys"
"unsafe"
)
type mOS struct {
unused byte
}
func futex(addr unsafe.Pointer, op int32, val uint32, ts, addr2 unsafe.Pointer, val3 uint32) int32 {
return int32(syscall(_SYS_futex, uintptr(addr), uintptr(op), uintptr(val), uintptr(ts), uintptr(addr2), uintptr(val3)))
}
// For sched_getaffinity use the system call rather than the libc call,
// because the system call returns the number of entries set by the kernel.
func sched_getaffinity(pid _pid_t, cpusetsize uintptr, mask *byte) int32 {
return int32(syscall(_SYS_sched_getaffinity, uintptr(pid), cpusetsize, uintptr(unsafe.Pointer(mask)), 0, 0, 0))
}
// Linux futex.
//
// futexsleep(uint32 *addr, uint32 val)
// futexwakeup(uint32 *addr)
//
// Futexsleep atomically checks if *addr == val and if so, sleeps on addr.
// Futexwakeup wakes up threads sleeping on addr.
// Futexsleep is allowed to wake up spuriously.
const (
_FUTEX_PRIVATE_FLAG = 128
_FUTEX_WAIT_PRIVATE = 0 | _FUTEX_PRIVATE_FLAG
_FUTEX_WAKE_PRIVATE = 1 | _FUTEX_PRIVATE_FLAG
)
// Atomically,
// if(*addr == val) sleep
// Might be woken up spuriously; that's allowed.
// Don't sleep longer than ns; ns < 0 means forever.
//go:nosplit
func futexsleep(addr *uint32, val uint32, ns int64) {
// Some Linux kernels have a bug where futex of
// FUTEX_WAIT returns an internal error code
// as an errno. Libpthread ignores the return value
// here, and so can we: as it says a few lines up,
// spurious wakeups are allowed.
if ns < 0 {
futex(unsafe.Pointer(addr), _FUTEX_WAIT_PRIVATE, val, nil, nil, 0)
return
}
var ts timespec
ts.setNsec(ns)
futex(unsafe.Pointer(addr), _FUTEX_WAIT_PRIVATE, val, unsafe.Pointer(&ts), nil, 0)
}
// If any procs are sleeping on addr, wake up at most cnt.
//go:nosplit
func futexwakeup(addr *uint32, cnt uint32) {
ret := futex(unsafe.Pointer(addr), _FUTEX_WAKE_PRIVATE, cnt, nil, nil, 0)
if ret >= 0 {
return
}
// I don't know that futex wakeup can return
// EAGAIN or EINTR, but if it does, it would be
// safe to loop and call futex again.
systemstack(func() {
print("futexwakeup addr=", addr, " returned ", ret, "\n")
})
*(*int32)(unsafe.Pointer(uintptr(0x1006))) = 0x1006
}
func getproccount() int32 {
// This buffer is huge (8 kB) but we are on the system stack
// and there should be plenty of space (64 kB).
// Also this is a leaf, so we're not holding up the memory for long.
// See golang.org/issue/11823.
// The suggested behavior here is to keep trying with ever-larger
// buffers, but we don't have a dynamic memory allocator at the
// moment, so that's a bit tricky and seems like overkill.
const maxCPUs = 64 * 1024
var buf [maxCPUs / 8]byte
r := sched_getaffinity(0, unsafe.Sizeof(buf), &buf[0])
if r < 0 {
return 1
}
n := int32(0)
for _, v := range buf[:r] {
for v != 0 {
n += int32(v & 1)
v >>= 1
}
}
if n == 0 {
n = 1
}
return n
}
const (
_AT_NULL = 0 // End of vector
_AT_PAGESZ = 6 // System physical page size
_AT_HWCAP = 16 // hardware capability bit vector
_AT_RANDOM = 25 // introduced in 2.6.29
_AT_HWCAP2 = 26 // hardware capability bit vector 2
)
var procAuxv = []byte("/proc/self/auxv\x00")
var addrspace_vec [1]byte
//extern mincore
func mincore(addr unsafe.Pointer, n uintptr, dst *byte) int32
func sysargs(argc int32, argv **byte) {
n := argc + 1
// skip over argv, envp to get to auxv
for argv_index(argv, n) != nil {
n++
}
// skip NULL separator
n++
// now argv+n is auxv
auxv := (*[1 << 28]uintptr)(add(unsafe.Pointer(argv), uintptr(n)*sys.PtrSize))
if sysauxv(auxv[:]) != 0 {
return
}
// In some situations we don't get a loader-provided
// auxv, such as when loaded as a library on Android.
// Fall back to /proc/self/auxv.
fd := open(&procAuxv[0], 0 /* O_RDONLY */, 0)
if fd < 0 {
// On Android, /proc/self/auxv might be unreadable (issue 9229), so we fallback to
// try using mincore to detect the physical page size.
// mincore should return EINVAL when address is not a multiple of system page size.
const size = 256 << 10 // size of memory region to allocate
p, err := mmap(nil, size, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
if err != 0 {
return
}
var n uintptr
for n = 4 << 10; n < size; n <<= 1 {
err := mincore(unsafe.Pointer(uintptr(p)+n), 1, &addrspace_vec[0])
if err == 0 {
physPageSize = n
break
}
}
if physPageSize == 0 {
physPageSize = size
}
munmap(p, size)
return
}
var buf [128]uintptr
n = read(fd, noescape(unsafe.Pointer(&buf[0])), int32(unsafe.Sizeof(buf)))
closefd(fd)
if n < 0 {
return
}
// Make sure buf is terminated, even if we didn't read
// the whole file.
buf[len(buf)-2] = _AT_NULL
sysauxv(buf[:])
}
func sysauxv(auxv []uintptr) int {
var i int
for ; auxv[i] != _AT_NULL; i += 2 {
tag, val := auxv[i], auxv[i+1]
switch tag {
case _AT_RANDOM:
// The kernel provides a pointer to 16-bytes
// worth of random data.
startupRandomData = (*[16]byte)(unsafe.Pointer(val))[:]
setRandomNumber(uint32(startupRandomData[4]) | uint32(startupRandomData[5])<<8 |
uint32(startupRandomData[6])<<16 | uint32(startupRandomData[7])<<24)
case _AT_PAGESZ:
physPageSize = val
}
archauxv(tag, val)
// Commented out for gccgo for now.
// vdsoauxv(tag, val)
}
return i / 2
}
var sysTHPSizePath = []byte("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size\x00")
func getHugePageSize() uintptr {
var numbuf [20]byte
fd := open(&sysTHPSizePath[0], 0 /* O_RDONLY */, 0)
if fd < 0 {
return 0
}
n := read(fd, noescape(unsafe.Pointer(&numbuf[0])), int32(len(numbuf)))
closefd(fd)
if n <= 0 {
return 0
}
l := n - 1 // remove trailing newline
v, ok := atoi(slicebytetostringtmp(numbuf[:l]))
if !ok || v < 0 {
v = 0
}
if v&(v-1) != 0 {
// v is not a power of 2
return 0
}
return uintptr(v)
}
func osinit() {
ncpu = getproccount()
physHugePageSize = getHugePageSize()
}
|