Line data Source code
1 : /* $OpenBSD: sys_futex.c,v 1.9 2018/08/30 03:30:25 visa Exp $ */
2 :
3 : /*
4 : * Copyright (c) 2016-2017 Martin Pieuchot
5 : *
6 : * Permission to use, copy, modify, and distribute this software for any
7 : * purpose with or without fee is hereby granted, provided that the above
8 : * copyright notice and this permission notice appear in all copies.
9 : *
10 : * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 : * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 : * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 : * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 : * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 : * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 : * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 : */
18 :
19 : #include <sys/param.h>
20 : #include <sys/systm.h>
21 : #include <sys/proc.h>
22 : #include <sys/kernel.h>
23 : #include <sys/mount.h>
24 : #include <sys/syscallargs.h>
25 : #include <sys/pool.h>
26 : #include <sys/time.h>
27 : #include <sys/rwlock.h>
28 : #include <sys/futex.h>
29 :
30 : #ifdef KTRACE
31 : #include <sys/ktrace.h>
32 : #endif
33 :
34 : #include <uvm/uvm.h>
35 :
36 : /*
37 : * Atomicity is only needed on MULTIPROCESSOR kernels. Fall back on
38 : * copyin(9) until non-MULTIPROCESSOR architectures have a copyin32(9)
39 : * implementation.
40 : */
41 : #ifndef MULTIPROCESSOR
42 : #define copyin32(uaddr, kaddr) copyin((uaddr), (kaddr), sizeof(uint32_t))
43 : #endif
44 :
45 : /*
46 : * Kernel representation of a futex.
47 : */
48 : struct futex {
49 : LIST_ENTRY(futex) ft_list; /* list of all futexes */
50 : TAILQ_HEAD(, proc) ft_threads; /* sleeping queue */
51 : struct uvm_object *ft_obj; /* UVM object */
52 : voff_t ft_off; /* UVM offset */
53 : unsigned int ft_refcnt; /* # of references */
54 : };
55 :
56 : /* Syscall helpers. */
57 : int futex_wait(uint32_t *, uint32_t, const struct timespec *, int);
58 : int futex_wake(uint32_t *, uint32_t, int);
59 : int futex_requeue(uint32_t *, uint32_t, uint32_t *, uint32_t, int);
60 :
61 : /* Flags for futex_get(). */
62 : #define FT_CREATE 0x1 /* Create a futex if it doesn't exist. */
63 : #define FT_PRIVATE 0x2 /* Futex is process-private. */
64 :
65 : struct futex *futex_get(uint32_t *, int);
66 : void futex_put(struct futex *);
67 :
68 : /*
69 : * The global futex lock serializes futex(2) calls so that no wakeup
70 : * event is lost, and protects all futex lists and futex states.
71 : */
72 : struct rwlock ftlock = RWLOCK_INITIALIZER("futex");
73 : static struct futex_list ftlist_shared =
74 : LIST_HEAD_INITIALIZER(ftlist_shared);
75 : struct pool ftpool;
76 :
77 :
78 : void
79 0 : futex_init(void)
80 : {
81 0 : pool_init(&ftpool, sizeof(struct futex), 0, IPL_NONE,
82 : PR_WAITOK | PR_RWLOCK, "futexpl", NULL);
83 0 : }
84 :
85 : int
86 0 : sys_futex(struct proc *p, void *v, register_t *retval)
87 : {
88 : struct sys_futex_args /* {
89 : syscallarg(uint32_t *) f;
90 : syscallarg(int) op;
91 : syscallarg(inr) val;
92 : syscallarg(const struct timespec *) timeout;
93 : syscallarg(uint32_t *) g;
94 0 : } */ *uap = v;
95 0 : uint32_t *uaddr = SCARG(uap, f);
96 0 : int op = SCARG(uap, op);
97 0 : uint32_t val = SCARG(uap, val);
98 0 : const struct timespec *timeout = SCARG(uap, timeout);
99 0 : void *g = SCARG(uap, g);
100 : int flags = 0;
101 :
102 0 : if (op & FUTEX_PRIVATE_FLAG)
103 0 : flags |= FT_PRIVATE;
104 :
105 0 : switch (op) {
106 : case FUTEX_WAIT:
107 : case FUTEX_WAIT_PRIVATE:
108 0 : KERNEL_LOCK();
109 0 : rw_enter_write(&ftlock);
110 0 : *retval = futex_wait(uaddr, val, timeout, flags);
111 0 : rw_exit_write(&ftlock);
112 0 : KERNEL_UNLOCK();
113 0 : break;
114 : case FUTEX_WAKE:
115 : case FUTEX_WAKE_PRIVATE:
116 0 : rw_enter_write(&ftlock);
117 0 : *retval = futex_wake(uaddr, val, flags);
118 0 : rw_exit_write(&ftlock);
119 0 : break;
120 : case FUTEX_REQUEUE:
121 : case FUTEX_REQUEUE_PRIVATE:
122 0 : rw_enter_write(&ftlock);
123 0 : *retval = futex_requeue(uaddr, val, g, (u_long)timeout, flags);
124 0 : rw_exit_write(&ftlock);
125 0 : break;
126 : default:
127 0 : *retval = ENOSYS;
128 0 : break;
129 : }
130 :
131 0 : return 0;
132 : }
133 :
134 : /*
135 : * Return an existing futex matching userspace address ``uaddr''.
136 : *
137 : * If such futex does not exist and FT_CREATE is given, create it.
138 : */
139 : struct futex *
140 0 : futex_get(uint32_t *uaddr, int flags)
141 : {
142 0 : struct proc *p = curproc;
143 0 : vm_map_t map = &p->p_vmspace->vm_map;
144 0 : vm_map_entry_t entry;
145 : struct uvm_object *obj = NULL;
146 0 : voff_t off = (vaddr_t)uaddr;
147 : struct futex *f;
148 0 : struct futex_list *ftlist = &p->p_p->ps_ftlist;
149 :
150 0 : rw_assert_wrlock(&ftlock);
151 :
152 0 : if (!(flags & FT_PRIVATE)) {
153 0 : vm_map_lock_read(map);
154 0 : if (uvm_map_lookup_entry(map, (vaddr_t)uaddr, &entry) &&
155 0 : UVM_ET_ISOBJ(entry) && entry->object.uvm_obj &&
156 0 : entry->inheritance == MAP_INHERIT_SHARE) {
157 : ftlist = &ftlist_shared;
158 : obj = entry->object.uvm_obj;
159 0 : off = entry->offset + ((vaddr_t)uaddr - entry->start);
160 0 : }
161 0 : vm_map_unlock_read(map);
162 0 : }
163 :
164 0 : LIST_FOREACH(f, ftlist, ft_list) {
165 0 : if (f->ft_obj == obj && f->ft_off == off) {
166 0 : f->ft_refcnt++;
167 0 : break;
168 : }
169 : }
170 :
171 0 : if ((f == NULL) && (flags & FT_CREATE)) {
172 : /*
173 : * We rely on the rwlock to ensure that no other thread
174 : * create the same futex.
175 : */
176 0 : f = pool_get(&ftpool, PR_WAITOK);
177 0 : TAILQ_INIT(&f->ft_threads);
178 0 : f->ft_obj = obj;
179 0 : f->ft_off = off;
180 0 : f->ft_refcnt = 1;
181 0 : LIST_INSERT_HEAD(ftlist, f, ft_list);
182 0 : }
183 :
184 0 : return f;
185 0 : }
186 :
187 : /*
188 : * Release a given futex.
189 : */
190 : void
191 0 : futex_put(struct futex *f)
192 : {
193 0 : rw_assert_wrlock(&ftlock);
194 :
195 0 : KASSERT(f->ft_refcnt > 0);
196 :
197 0 : --f->ft_refcnt;
198 0 : if (f->ft_refcnt == 0) {
199 0 : KASSERT(TAILQ_EMPTY(&f->ft_threads));
200 0 : LIST_REMOVE(f, ft_list);
201 0 : pool_put(&ftpool, f);
202 0 : }
203 0 : }
204 :
205 : /*
206 : * Put the current thread on the sleep queue of the futex at address
207 : * ``uaddr''. Let it sleep for the specified ``timeout'' time, or
208 : * indefinitly if the argument is NULL.
209 : */
210 : int
211 0 : futex_wait(uint32_t *uaddr, uint32_t val, const struct timespec *timeout,
212 : int flags)
213 : {
214 0 : struct proc *p = curproc;
215 : struct futex *f;
216 : uint64_t to_ticks = 0;
217 0 : uint32_t cval;
218 : int error;
219 :
220 : /*
221 : * After reading the value a race is still possible but
222 : * we deal with it by serializing all futex syscalls.
223 : */
224 0 : rw_assert_wrlock(&ftlock);
225 :
226 : /*
227 : * Read user space futex value
228 : */
229 0 : if ((error = copyin32(uaddr, &cval)))
230 0 : return error;
231 :
232 : /* If the value changed, stop here. */
233 0 : if (cval != val)
234 0 : return EAGAIN;
235 :
236 0 : if (timeout != NULL) {
237 0 : struct timespec ts;
238 :
239 0 : if ((error = copyin(timeout, &ts, sizeof(ts))))
240 0 : return error;
241 : #ifdef KTRACE
242 0 : if (KTRPOINT(p, KTR_STRUCT))
243 0 : ktrabstimespec(p, &ts);
244 : #endif
245 0 : to_ticks = (uint64_t)hz * ts.tv_sec +
246 0 : (ts.tv_nsec + tick * 1000 - 1) / (tick * 1000) + 1;
247 0 : if (to_ticks > INT_MAX)
248 : to_ticks = INT_MAX;
249 0 : }
250 :
251 0 : f = futex_get(uaddr, flags | FT_CREATE);
252 0 : TAILQ_INSERT_TAIL(&f->ft_threads, p, p_fut_link);
253 0 : p->p_futex = f;
254 :
255 0 : error = rwsleep(p, &ftlock, PUSER|PCATCH, "fsleep", (int)to_ticks);
256 0 : if (error == ERESTART)
257 0 : error = ECANCELED;
258 0 : else if (error == EWOULDBLOCK) {
259 : /* A race occured between a wakeup and a timeout. */
260 0 : if (p->p_futex == NULL)
261 0 : error = 0;
262 : else
263 : error = ETIMEDOUT;
264 : }
265 :
266 : /* Remove ourself if we haven't been awaken. */
267 0 : if ((f = p->p_futex) != NULL) {
268 0 : p->p_futex = NULL;
269 0 : TAILQ_REMOVE(&f->ft_threads, p, p_fut_link);
270 0 : futex_put(f);
271 0 : }
272 :
273 0 : return error;
274 0 : }
275 :
276 : /*
277 : * Wakeup at most ``n'' sibling threads sleeping on a futex at address
278 : * ``uaddr'' and requeue at most ``m'' sibling threads on a futex at
279 : * address ``uaddr2''.
280 : */
281 : int
282 0 : futex_requeue(uint32_t *uaddr, uint32_t n, uint32_t *uaddr2, uint32_t m,
283 : int flags)
284 : {
285 : struct futex *f, *g;
286 : struct proc *p;
287 : uint32_t count = 0;
288 :
289 0 : rw_assert_wrlock(&ftlock);
290 :
291 0 : f = futex_get(uaddr, flags);
292 0 : if (f == NULL)
293 0 : return 0;
294 :
295 0 : while ((p = TAILQ_FIRST(&f->ft_threads)) != NULL && (count < (n + m))) {
296 0 : p->p_futex = NULL;
297 0 : TAILQ_REMOVE(&f->ft_threads, p, p_fut_link);
298 0 : futex_put(f);
299 :
300 0 : if (count < n) {
301 0 : wakeup_one(p);
302 0 : } else if (uaddr2 != NULL) {
303 0 : g = futex_get(uaddr2, FT_CREATE);
304 0 : TAILQ_INSERT_TAIL(&g->ft_threads, p, p_fut_link);
305 0 : p->p_futex = g;
306 0 : }
307 0 : count++;
308 : }
309 :
310 0 : futex_put(f);
311 :
312 0 : return count;
313 0 : }
314 :
315 : /*
316 : * Wakeup at most ``n'' sibling threads sleeping on a futex at address
317 : * ``uaddr''.
318 : */
319 : int
320 0 : futex_wake(uint32_t *uaddr, uint32_t n, int flags)
321 : {
322 0 : return futex_requeue(uaddr, n, NULL, 0, flags);
323 : }
|