Line data Source code
1 : /* $OpenBSD: uvm_mmap.c,v 1.151 2018/08/15 20:22:13 kettenis Exp $ */
2 : /* $NetBSD: uvm_mmap.c,v 1.49 2001/02/18 21:19:08 chs Exp $ */
3 :
4 : /*
5 : * Copyright (c) 1997 Charles D. Cranor and Washington University.
6 : * Copyright (c) 1991, 1993 The Regents of the University of California.
7 : * Copyright (c) 1988 University of Utah.
8 : *
9 : * All rights reserved.
10 : *
11 : * This code is derived from software contributed to Berkeley by
12 : * the Systems Programming Group of the University of Utah Computer
13 : * Science Department.
14 : *
15 : * Redistribution and use in source and binary forms, with or without
16 : * modification, are permitted provided that the following conditions
17 : * are met:
18 : * 1. Redistributions of source code must retain the above copyright
19 : * notice, this list of conditions and the following disclaimer.
20 : * 2. Redistributions in binary form must reproduce the above copyright
21 : * notice, this list of conditions and the following disclaimer in the
22 : * documentation and/or other materials provided with the distribution.
23 : * 3. All advertising materials mentioning features or use of this software
24 : * must display the following acknowledgement:
25 : * This product includes software developed by the Charles D. Cranor,
26 : * Washington University, University of California, Berkeley and
27 : * its contributors.
28 : * 4. Neither the name of the University nor the names of its contributors
29 : * may be used to endorse or promote products derived from this software
30 : * without specific prior written permission.
31 : *
32 : * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
33 : * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
34 : * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
35 : * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
36 : * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
37 : * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
38 : * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
39 : * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
40 : * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
41 : * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
42 : * SUCH DAMAGE.
43 : *
44 : * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
45 : * @(#)vm_mmap.c 8.5 (Berkeley) 5/19/94
46 : * from: Id: uvm_mmap.c,v 1.1.2.14 1998/01/05 21:04:26 chuck Exp
47 : */
48 :
49 : /*
50 : * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap
51 : * function.
52 : */
53 : #include <sys/param.h>
54 : #include <sys/systm.h>
55 : #include <sys/fcntl.h>
56 : #include <sys/file.h>
57 : #include <sys/filedesc.h>
58 : #include <sys/resourcevar.h>
59 : #include <sys/mman.h>
60 : #include <sys/mount.h>
61 : #include <sys/proc.h>
62 : #include <sys/malloc.h>
63 : #include <sys/vnode.h>
64 : #include <sys/conf.h>
65 : #include <sys/signalvar.h>
66 : #include <sys/syslog.h>
67 : #include <sys/stat.h>
68 : #include <sys/specdev.h>
69 : #include <sys/stdint.h>
70 : #include <sys/pledge.h>
71 : #include <sys/unistd.h> /* for KBIND* */
72 : #include <sys/user.h>
73 :
74 : #include <machine/exec.h> /* for __LDPGSZ */
75 :
76 : #include <sys/syscallargs.h>
77 :
78 : #include <uvm/uvm.h>
79 : #include <uvm/uvm_device.h>
80 : #include <uvm/uvm_vnode.h>
81 :
82 : int uvm_mmapanon(vm_map_t, vaddr_t *, vsize_t, vm_prot_t, vm_prot_t, int,
83 : vsize_t, struct proc *);
84 : int uvm_mmapfile(vm_map_t, vaddr_t *, vsize_t, vm_prot_t, vm_prot_t, int,
85 : struct vnode *, voff_t, vsize_t, struct proc *);
86 :
87 :
88 : /*
89 : * Page align addr and size, returning EINVAL on wraparound.
90 : */
91 : #define ALIGN_ADDR(addr, size, pageoff) do { \
92 : pageoff = (addr & PAGE_MASK); \
93 : if (pageoff != 0) { \
94 : if (size > SIZE_MAX - pageoff) \
95 : return (EINVAL); /* wraparound */ \
96 : addr -= pageoff; \
97 : size += pageoff; \
98 : } \
99 : if (size != 0) { \
100 : size = (vsize_t)round_page(size); \
101 : if (size == 0) \
102 : return (EINVAL); /* wraparound */ \
103 : } \
104 : } while (0)
105 :
106 : /*
107 : * sys_mquery: provide mapping hints to applications that do fixed mappings
108 : *
109 : * flags: 0 or MAP_FIXED (MAP_FIXED - means that we insist on this addr and
110 : * don't care about PMAP_PREFER or such)
111 : * addr: hint where we'd like to place the mapping.
112 : * size: size of the mapping
113 : * fd: fd of the file we want to map
114 : * off: offset within the file
115 : */
116 : int
117 0 : sys_mquery(struct proc *p, void *v, register_t *retval)
118 : {
119 : struct sys_mquery_args /* {
120 : syscallarg(void *) addr;
121 : syscallarg(size_t) len;
122 : syscallarg(int) prot;
123 : syscallarg(int) flags;
124 : syscallarg(int) fd;
125 : syscallarg(long) pad;
126 : syscallarg(off_t) pos;
127 0 : } */ *uap = v;
128 0 : struct file *fp;
129 : voff_t uoff;
130 : int error;
131 0 : vaddr_t vaddr;
132 : int flags = 0;
133 : vsize_t size;
134 : vm_prot_t prot;
135 : int fd;
136 :
137 0 : vaddr = (vaddr_t) SCARG(uap, addr);
138 0 : prot = SCARG(uap, prot);
139 0 : size = (vsize_t) SCARG(uap, len);
140 0 : fd = SCARG(uap, fd);
141 :
142 0 : if ((prot & PROT_MASK) != prot)
143 0 : return (EINVAL);
144 :
145 0 : if (SCARG(uap, flags) & MAP_FIXED)
146 0 : flags |= UVM_FLAG_FIXED;
147 :
148 0 : if (fd >= 0) {
149 0 : if ((error = getvnode(p, fd, &fp)) != 0)
150 0 : return (error);
151 0 : uoff = SCARG(uap, pos);
152 0 : } else {
153 0 : fp = NULL;
154 : uoff = UVM_UNKNOWN_OFFSET;
155 : }
156 :
157 0 : if (vaddr == 0)
158 0 : vaddr = uvm_map_hint(p->p_vmspace, prot, VM_MIN_ADDRESS,
159 : VM_MAXUSER_ADDRESS);
160 :
161 0 : error = uvm_map_mquery(&p->p_vmspace->vm_map, &vaddr, size, uoff,
162 : flags);
163 0 : if (error == 0)
164 0 : *retval = (register_t)(vaddr);
165 :
166 0 : if (fp != NULL)
167 0 : FRELE(fp, p);
168 0 : return (error);
169 0 : }
170 :
171 : /*
172 : * sys_mincore: determine if pages are in core or not.
173 : */
174 : /* ARGSUSED */
175 : int
176 0 : sys_mincore(struct proc *p, void *v, register_t *retval)
177 : {
178 : struct sys_mincore_args /* {
179 : syscallarg(void *) addr;
180 : syscallarg(size_t) len;
181 : syscallarg(char *) vec;
182 0 : } */ *uap = v;
183 : vm_page_t m;
184 : char *vec, *pgi, *pgs;
185 : struct uvm_object *uobj;
186 : struct vm_amap *amap;
187 : struct vm_anon *anon;
188 0 : vm_map_entry_t entry, next;
189 : vaddr_t start, end, lim;
190 : vm_map_t map;
191 : vsize_t len, npgs;
192 : int error = 0;
193 :
194 0 : map = &p->p_vmspace->vm_map;
195 :
196 0 : start = (vaddr_t)SCARG(uap, addr);
197 0 : len = SCARG(uap, len);
198 0 : vec = SCARG(uap, vec);
199 :
200 0 : if (start & PAGE_MASK)
201 0 : return (EINVAL);
202 0 : len = round_page(len);
203 0 : end = start + len;
204 0 : if (end <= start)
205 0 : return (EINVAL);
206 :
207 0 : npgs = len >> PAGE_SHIFT;
208 :
209 : /*
210 : * < art> Anyone trying to mincore more than 4GB of address space is
211 : * clearly insane.
212 : */
213 0 : if (npgs >= (0xffffffff >> PAGE_SHIFT))
214 0 : return (E2BIG);
215 0 : pgs = mallocarray(npgs, sizeof(*pgs), M_TEMP, M_WAITOK | M_CANFAIL);
216 0 : if (pgs == NULL)
217 0 : return (ENOMEM);
218 : pgi = pgs;
219 :
220 : /*
221 : * Lock down vec, so our returned status isn't outdated by
222 : * storing the status byte for a page.
223 : */
224 0 : if ((error = uvm_vslock(p, vec, npgs, PROT_WRITE)) != 0) {
225 0 : free(pgs, M_TEMP, npgs * sizeof(*pgs));
226 0 : return (error);
227 : }
228 :
229 0 : vm_map_lock_read(map);
230 :
231 0 : if (uvm_map_lookup_entry(map, start, &entry) == FALSE) {
232 : error = ENOMEM;
233 0 : goto out;
234 : }
235 :
236 0 : for (/* nothing */;
237 0 : entry != NULL && entry->start < end;
238 0 : entry = RBT_NEXT(uvm_map_addr, entry)) {
239 0 : KASSERT(!UVM_ET_ISSUBMAP(entry));
240 0 : KASSERT(start >= entry->start);
241 :
242 : /* Make sure there are no holes. */
243 0 : next = RBT_NEXT(uvm_map_addr, entry);
244 0 : if (entry->end < end &&
245 0 : (next == NULL ||
246 0 : next->start > entry->end)) {
247 : error = ENOMEM;
248 0 : goto out;
249 : }
250 :
251 0 : lim = end < entry->end ? end : entry->end;
252 :
253 : /*
254 : * Special case for objects with no "real" pages. Those
255 : * are always considered resident (mapped devices).
256 : */
257 0 : if (UVM_ET_ISOBJ(entry)) {
258 0 : KASSERT(!UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj));
259 0 : if (entry->object.uvm_obj->pgops->pgo_fault != NULL) {
260 0 : for (/* nothing */; start < lim;
261 0 : start += PAGE_SIZE, pgi++)
262 0 : *pgi = 1;
263 : continue;
264 : }
265 : }
266 :
267 0 : amap = entry->aref.ar_amap; /* top layer */
268 0 : uobj = entry->object.uvm_obj; /* bottom layer */
269 :
270 0 : for (/* nothing */; start < lim; start += PAGE_SIZE, pgi++) {
271 0 : *pgi = 0;
272 0 : if (amap != NULL) {
273 : /* Check the top layer first. */
274 0 : anon = amap_lookup(&entry->aref,
275 0 : start - entry->start);
276 0 : if (anon != NULL && anon->an_page != NULL) {
277 : /*
278 : * Anon has the page for this entry
279 : * offset.
280 : */
281 0 : *pgi = 1;
282 0 : }
283 : }
284 :
285 0 : if (uobj != NULL && *pgi == 0) {
286 : /* Check the bottom layer. */
287 0 : m = uvm_pagelookup(uobj,
288 0 : entry->offset + (start - entry->start));
289 0 : if (m != NULL) {
290 : /*
291 : * Object has the page for this entry
292 : * offset.
293 : */
294 0 : *pgi = 1;
295 0 : }
296 : }
297 : }
298 : }
299 :
300 : out:
301 0 : vm_map_unlock_read(map);
302 0 : uvm_vsunlock(p, SCARG(uap, vec), npgs);
303 : /* now the map is unlocked we can copyout without fear. */
304 0 : if (error == 0)
305 0 : copyout(pgs, vec, npgs * sizeof(char));
306 0 : free(pgs, M_TEMP, npgs * sizeof(*pgs));
307 0 : return (error);
308 0 : }
309 :
310 : int uvm_wxabort;
311 :
312 : /*
313 : * W^X violations are only allowed on permitted filesystems.
314 : */
315 : static inline int
316 0 : uvm_wxcheck(struct proc *p, char *call)
317 : {
318 0 : struct process *pr = p->p_p;
319 0 : int wxallowed = (pr->ps_textvp->v_mount &&
320 0 : (pr->ps_textvp->v_mount->mnt_flag & MNT_WXALLOWED));
321 :
322 0 : if (wxallowed && (pr->ps_flags & PS_WXNEEDED))
323 0 : return (0);
324 :
325 : /* Report W^X failures, and potentially SIGABRT */
326 0 : if (pr->ps_wxcounter++ == 0)
327 0 : log(LOG_NOTICE, "%s(%d): %s W^X violation\n",
328 0 : pr->ps_comm, pr->ps_pid, call);
329 :
330 : /* Send uncatchable SIGABRT for coredump */
331 0 : if (uvm_wxabort)
332 0 : sigexit(p, SIGABRT);
333 :
334 0 : return (ENOTSUP);
335 0 : }
336 :
337 : /*
338 : * sys_mmap: mmap system call.
339 : *
340 : * => file offset and address may not be page aligned
341 : * - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE
342 : * - if address isn't page aligned the mapping starts at trunc_page(addr)
343 : * and the return value is adjusted up by the page offset.
344 : */
345 : int
346 0 : sys_mmap(struct proc *p, void *v, register_t *retval)
347 : {
348 : struct sys_mmap_args /* {
349 : syscallarg(void *) addr;
350 : syscallarg(size_t) len;
351 : syscallarg(int) prot;
352 : syscallarg(int) flags;
353 : syscallarg(int) fd;
354 : syscallarg(long) pad;
355 : syscallarg(off_t) pos;
356 0 : } */ *uap = v;
357 0 : vaddr_t addr;
358 0 : struct vattr va;
359 : off_t pos;
360 : vsize_t size, pageoff;
361 : vm_prot_t prot, maxprot;
362 : int flags, fd;
363 : vaddr_t vm_min_address = VM_MIN_ADDRESS;
364 0 : struct filedesc *fdp = p->p_fd;
365 : struct file *fp = NULL;
366 : struct vnode *vp;
367 : int error;
368 :
369 : /* first, extract syscall args from the uap. */
370 0 : addr = (vaddr_t) SCARG(uap, addr);
371 0 : size = (vsize_t) SCARG(uap, len);
372 0 : prot = SCARG(uap, prot);
373 0 : flags = SCARG(uap, flags);
374 0 : fd = SCARG(uap, fd);
375 0 : pos = SCARG(uap, pos);
376 :
377 : /*
378 : * Validate the flags.
379 : */
380 0 : if ((prot & PROT_MASK) != prot)
381 0 : return (EINVAL);
382 0 : if ((prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC) &&
383 0 : (error = uvm_wxcheck(p, "mmap")))
384 0 : return (error);
385 :
386 0 : if ((flags & MAP_FLAGMASK) != flags)
387 0 : return (EINVAL);
388 0 : if ((flags & (MAP_SHARED|MAP_PRIVATE)) == (MAP_SHARED|MAP_PRIVATE))
389 0 : return (EINVAL);
390 0 : if ((flags & (MAP_FIXED|__MAP_NOREPLACE)) == __MAP_NOREPLACE)
391 0 : return (EINVAL);
392 0 : if (flags & MAP_STACK) {
393 0 : if ((flags & (MAP_ANON|MAP_PRIVATE)) != (MAP_ANON|MAP_PRIVATE))
394 0 : return (EINVAL);
395 0 : if (flags & ~(MAP_STACK|MAP_FIXED|MAP_ANON|MAP_PRIVATE))
396 0 : return (EINVAL);
397 0 : if (pos != 0)
398 0 : return (EINVAL);
399 0 : if ((prot & (PROT_READ|PROT_WRITE)) != (PROT_READ|PROT_WRITE))
400 0 : return (EINVAL);
401 : }
402 0 : if (size == 0)
403 0 : return (EINVAL);
404 :
405 0 : error = pledge_protexec(p, prot);
406 0 : if (error)
407 0 : return (error);
408 :
409 : /* align file position and save offset. adjust size. */
410 0 : ALIGN_ADDR(pos, size, pageoff);
411 :
412 : /* now check (MAP_FIXED) or get (!MAP_FIXED) the "addr" */
413 0 : if (flags & MAP_FIXED) {
414 : /* adjust address by the same amount as we did the offset */
415 0 : addr -= pageoff;
416 0 : if (addr & PAGE_MASK)
417 0 : return (EINVAL); /* not page aligned */
418 :
419 0 : if (addr > SIZE_MAX - size)
420 0 : return (EINVAL); /* no wrapping! */
421 0 : if (VM_MAXUSER_ADDRESS > 0 &&
422 0 : (addr + size) > VM_MAXUSER_ADDRESS)
423 0 : return (EINVAL);
424 0 : if (vm_min_address > 0 && addr < vm_min_address)
425 0 : return (EINVAL);
426 :
427 : }
428 :
429 : /* check for file mappings (i.e. not anonymous) and verify file. */
430 0 : if ((flags & MAP_ANON) == 0) {
431 0 : if ((fp = fd_getfile(fdp, fd)) == NULL)
432 0 : return (EBADF);
433 :
434 0 : if (fp->f_type != DTYPE_VNODE) {
435 : error = ENODEV; /* only mmap vnodes! */
436 0 : goto out;
437 : }
438 0 : vp = (struct vnode *)fp->f_data; /* convert to vnode */
439 :
440 0 : if (vp->v_type != VREG && vp->v_type != VCHR &&
441 0 : vp->v_type != VBLK) {
442 : error = ENODEV; /* only REG/CHR/BLK support mmap */
443 0 : goto out;
444 : }
445 :
446 0 : if (vp->v_type == VREG && (pos + size) < pos) {
447 : error = EINVAL; /* no offset wrapping */
448 0 : goto out;
449 : }
450 :
451 : /* special case: catch SunOS style /dev/zero */
452 0 : if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
453 0 : flags |= MAP_ANON;
454 0 : FRELE(fp, p);
455 : fp = NULL;
456 0 : goto is_anon;
457 : }
458 :
459 : /*
460 : * Old programs may not select a specific sharing type, so
461 : * default to an appropriate one.
462 : */
463 0 : if ((flags & (MAP_SHARED|MAP_PRIVATE)) == 0) {
464 : #if defined(DEBUG)
465 : printf("WARNING: defaulted mmap() share type to"
466 : " %s (pid %d comm %s)\n",
467 : vp->v_type == VCHR ? "MAP_SHARED" : "MAP_PRIVATE",
468 : p->p_p->ps_pid, p->p_p->ps_comm);
469 : #endif
470 0 : if (vp->v_type == VCHR)
471 0 : flags |= MAP_SHARED; /* for a device */
472 : else
473 0 : flags |= MAP_PRIVATE; /* for a file */
474 : }
475 :
476 : /*
477 : * MAP_PRIVATE device mappings don't make sense (and aren't
478 : * supported anyway). However, some programs rely on this,
479 : * so just change it to MAP_SHARED.
480 : */
481 0 : if (vp->v_type == VCHR && (flags & MAP_PRIVATE) != 0) {
482 0 : flags = (flags & ~MAP_PRIVATE) | MAP_SHARED;
483 0 : }
484 :
485 : /* now check protection */
486 : maxprot = PROT_EXEC;
487 :
488 : /* check read access */
489 0 : if (fp->f_flag & FREAD)
490 0 : maxprot |= PROT_READ;
491 0 : else if (prot & PROT_READ) {
492 : error = EACCES;
493 0 : goto out;
494 : }
495 :
496 : /* check write access, shared case first */
497 0 : if (flags & MAP_SHARED) {
498 : /*
499 : * if the file is writable, only add PROT_WRITE to
500 : * maxprot if the file is not immutable, append-only.
501 : * otherwise, if we have asked for PROT_WRITE, return
502 : * EPERM.
503 : */
504 0 : if (fp->f_flag & FWRITE) {
505 0 : KERNEL_LOCK();
506 0 : error = VOP_GETATTR(vp, &va, p->p_ucred, p);
507 0 : KERNEL_UNLOCK();
508 0 : if (error)
509 : goto out;
510 0 : if ((va.va_flags & (IMMUTABLE|APPEND)) == 0)
511 0 : maxprot |= PROT_WRITE;
512 0 : else if (prot & PROT_WRITE) {
513 : error = EPERM;
514 0 : goto out;
515 : }
516 0 : } else if (prot & PROT_WRITE) {
517 : error = EACCES;
518 0 : goto out;
519 : }
520 : } else {
521 : /* MAP_PRIVATE mappings can always write to */
522 0 : maxprot |= PROT_WRITE;
523 : }
524 0 : if ((flags & __MAP_NOFAULT) != 0 ||
525 0 : ((flags & MAP_PRIVATE) != 0 && (prot & PROT_WRITE) != 0)) {
526 0 : if (p->p_rlimit[RLIMIT_DATA].rlim_cur < size ||
527 0 : p->p_rlimit[RLIMIT_DATA].rlim_cur - size <
528 0 : ptoa(p->p_vmspace->vm_dused)) {
529 : error = ENOMEM;
530 0 : goto out;
531 : }
532 : }
533 0 : KERNEL_LOCK();
534 0 : error = uvm_mmapfile(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
535 0 : flags, vp, pos, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur, p);
536 0 : KERNEL_UNLOCK();
537 0 : } else { /* MAP_ANON case */
538 0 : if (fd != -1)
539 0 : return EINVAL;
540 :
541 : is_anon: /* label for SunOS style /dev/zero */
542 :
543 : /* __MAP_NOFAULT only makes sense with a backing object */
544 0 : if ((flags & __MAP_NOFAULT) != 0)
545 0 : return EINVAL;
546 :
547 0 : if (p->p_rlimit[RLIMIT_DATA].rlim_cur < size ||
548 0 : p->p_rlimit[RLIMIT_DATA].rlim_cur - size <
549 0 : ptoa(p->p_vmspace->vm_dused)) {
550 0 : return ENOMEM;
551 : }
552 :
553 : /*
554 : * We've been treating (MAP_SHARED|MAP_PRIVATE) == 0 as
555 : * MAP_PRIVATE, so make that clear.
556 : */
557 0 : if ((flags & MAP_SHARED) == 0)
558 0 : flags |= MAP_PRIVATE;
559 :
560 : maxprot = PROT_MASK;
561 0 : error = uvm_mmapanon(&p->p_vmspace->vm_map, &addr, size, prot,
562 0 : maxprot, flags, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur, p);
563 : }
564 :
565 0 : if (error == 0)
566 : /* remember to add offset */
567 0 : *retval = (register_t)(addr + pageoff);
568 :
569 : out:
570 0 : if (fp)
571 0 : FRELE(fp, p);
572 0 : return (error);
573 0 : }
574 :
575 : /*
576 : * sys_msync: the msync system call (a front-end for flush)
577 : */
578 :
579 : int
580 0 : sys_msync(struct proc *p, void *v, register_t *retval)
581 : {
582 : struct sys_msync_args /* {
583 : syscallarg(void *) addr;
584 : syscallarg(size_t) len;
585 : syscallarg(int) flags;
586 0 : } */ *uap = v;
587 : vaddr_t addr;
588 : vsize_t size, pageoff;
589 : vm_map_t map;
590 : int flags, uvmflags;
591 :
592 : /* extract syscall args from the uap */
593 0 : addr = (vaddr_t)SCARG(uap, addr);
594 0 : size = (vsize_t)SCARG(uap, len);
595 0 : flags = SCARG(uap, flags);
596 :
597 : /* sanity check flags */
598 0 : if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 ||
599 0 : (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 ||
600 0 : (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC))
601 0 : return (EINVAL);
602 0 : if ((flags & (MS_ASYNC | MS_SYNC)) == 0)
603 0 : flags |= MS_SYNC;
604 :
605 : /* align the address to a page boundary, and adjust the size accordingly */
606 0 : ALIGN_ADDR(addr, size, pageoff);
607 0 : if (addr > SIZE_MAX - size)
608 0 : return (EINVAL); /* disallow wrap-around. */
609 :
610 : /* get map */
611 0 : map = &p->p_vmspace->vm_map;
612 :
613 : /* translate MS_ flags into PGO_ flags */
614 : uvmflags = PGO_CLEANIT;
615 0 : if (flags & MS_INVALIDATE)
616 0 : uvmflags |= PGO_FREE;
617 : if (flags & MS_SYNC)
618 : uvmflags |= PGO_SYNCIO;
619 : else
620 : uvmflags |= PGO_SYNCIO; /* XXXCDC: force sync for now! */
621 :
622 0 : return (uvm_map_clean(map, addr, addr+size, uvmflags));
623 0 : }
624 :
625 : /*
626 : * sys_munmap: unmap a users memory
627 : */
628 : int
629 0 : sys_munmap(struct proc *p, void *v, register_t *retval)
630 : {
631 : struct sys_munmap_args /* {
632 : syscallarg(void *) addr;
633 : syscallarg(size_t) len;
634 0 : } */ *uap = v;
635 : vaddr_t addr;
636 : vsize_t size, pageoff;
637 : vm_map_t map;
638 : vaddr_t vm_min_address = VM_MIN_ADDRESS;
639 0 : struct uvm_map_deadq dead_entries;
640 :
641 : /* get syscall args... */
642 0 : addr = (vaddr_t) SCARG(uap, addr);
643 0 : size = (vsize_t) SCARG(uap, len);
644 :
645 : /* align address to a page boundary, and adjust size accordingly */
646 0 : ALIGN_ADDR(addr, size, pageoff);
647 :
648 : /*
649 : * Check for illegal addresses. Watch out for address wrap...
650 : * Note that VM_*_ADDRESS are not constants due to casts (argh).
651 : */
652 0 : if (addr > SIZE_MAX - size)
653 0 : return (EINVAL);
654 0 : if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS)
655 0 : return (EINVAL);
656 0 : if (vm_min_address > 0 && addr < vm_min_address)
657 0 : return (EINVAL);
658 0 : map = &p->p_vmspace->vm_map;
659 :
660 :
661 0 : vm_map_lock(map); /* lock map so we can checkprot */
662 :
663 : /*
664 : * interesting system call semantic: make sure entire range is
665 : * allocated before allowing an unmap.
666 : */
667 0 : if (!uvm_map_checkprot(map, addr, addr + size, PROT_NONE)) {
668 0 : vm_map_unlock(map);
669 0 : return (EINVAL);
670 : }
671 :
672 0 : TAILQ_INIT(&dead_entries);
673 0 : uvm_unmap_remove(map, addr, addr + size, &dead_entries, FALSE, TRUE);
674 0 : vm_map_unlock(map); /* and unlock */
675 :
676 0 : uvm_unmap_detach(&dead_entries, 0);
677 :
678 0 : return (0);
679 0 : }
680 :
681 : /*
682 : * sys_mprotect: the mprotect system call
683 : */
684 : int
685 0 : sys_mprotect(struct proc *p, void *v, register_t *retval)
686 : {
687 : struct sys_mprotect_args /* {
688 : syscallarg(void *) addr;
689 : syscallarg(size_t) len;
690 : syscallarg(int) prot;
691 0 : } */ *uap = v;
692 : vaddr_t addr;
693 : vsize_t size, pageoff;
694 : vm_prot_t prot;
695 : int error;
696 :
697 : /*
698 : * extract syscall args from uap
699 : */
700 :
701 0 : addr = (vaddr_t)SCARG(uap, addr);
702 0 : size = (vsize_t)SCARG(uap, len);
703 0 : prot = SCARG(uap, prot);
704 :
705 0 : if ((prot & PROT_MASK) != prot)
706 0 : return (EINVAL);
707 0 : if ((prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC) &&
708 0 : (error = uvm_wxcheck(p, "mprotect")))
709 0 : return (error);
710 :
711 0 : error = pledge_protexec(p, prot);
712 0 : if (error)
713 0 : return (error);
714 :
715 : /*
716 : * align the address to a page boundary, and adjust the size accordingly
717 : */
718 0 : ALIGN_ADDR(addr, size, pageoff);
719 0 : if (addr > SIZE_MAX - size)
720 0 : return (EINVAL); /* disallow wrap-around. */
721 :
722 0 : return (uvm_map_protect(&p->p_vmspace->vm_map, addr, addr+size,
723 : prot, FALSE));
724 0 : }
725 :
726 : /*
727 : * sys_minherit: the minherit system call
728 : */
729 : int
730 0 : sys_minherit(struct proc *p, void *v, register_t *retval)
731 : {
732 : struct sys_minherit_args /* {
733 : syscallarg(void *) addr;
734 : syscallarg(size_t) len;
735 : syscallarg(int) inherit;
736 0 : } */ *uap = v;
737 : vaddr_t addr;
738 : vsize_t size, pageoff;
739 : vm_inherit_t inherit;
740 :
741 0 : addr = (vaddr_t)SCARG(uap, addr);
742 0 : size = (vsize_t)SCARG(uap, len);
743 0 : inherit = SCARG(uap, inherit);
744 :
745 : /*
746 : * align the address to a page boundary, and adjust the size accordingly
747 : */
748 0 : ALIGN_ADDR(addr, size, pageoff);
749 0 : if (addr > SIZE_MAX - size)
750 0 : return (EINVAL); /* disallow wrap-around. */
751 :
752 0 : return (uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size,
753 : inherit));
754 0 : }
755 :
756 : /*
757 : * sys_madvise: give advice about memory usage.
758 : */
759 : /* ARGSUSED */
760 : int
761 0 : sys_madvise(struct proc *p, void *v, register_t *retval)
762 : {
763 : struct sys_madvise_args /* {
764 : syscallarg(void *) addr;
765 : syscallarg(size_t) len;
766 : syscallarg(int) behav;
767 0 : } */ *uap = v;
768 : vaddr_t addr;
769 : vsize_t size, pageoff;
770 : int advice, error;
771 :
772 0 : addr = (vaddr_t)SCARG(uap, addr);
773 0 : size = (vsize_t)SCARG(uap, len);
774 0 : advice = SCARG(uap, behav);
775 :
776 : /*
777 : * align the address to a page boundary, and adjust the size accordingly
778 : */
779 0 : ALIGN_ADDR(addr, size, pageoff);
780 0 : if (addr > SIZE_MAX - size)
781 0 : return (EINVAL); /* disallow wrap-around. */
782 :
783 0 : switch (advice) {
784 : case MADV_NORMAL:
785 : case MADV_RANDOM:
786 : case MADV_SEQUENTIAL:
787 0 : error = uvm_map_advice(&p->p_vmspace->vm_map, addr,
788 0 : addr + size, advice);
789 0 : break;
790 :
791 : case MADV_WILLNEED:
792 : /*
793 : * Activate all these pages, pre-faulting them in if
794 : * necessary.
795 : */
796 : /*
797 : * XXX IMPLEMENT ME.
798 : * Should invent a "weak" mode for uvm_fault()
799 : * which would only do the PGO_LOCKED pgo_get().
800 : */
801 0 : return (0);
802 :
803 : case MADV_DONTNEED:
804 : /*
805 : * Deactivate all these pages. We don't need them
806 : * any more. We don't, however, toss the data in
807 : * the pages.
808 : */
809 0 : error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
810 : PGO_DEACTIVATE);
811 0 : break;
812 :
813 : case MADV_FREE:
814 : /*
815 : * These pages contain no valid data, and may be
816 : * garbage-collected. Toss all resources, including
817 : * any swap space in use.
818 : */
819 0 : error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
820 : PGO_FREE);
821 0 : break;
822 :
823 : case MADV_SPACEAVAIL:
824 : /*
825 : * XXXMRG What is this? I think it's:
826 : *
827 : * Ensure that we have allocated backing-store
828 : * for these pages.
829 : *
830 : * This is going to require changes to the page daemon,
831 : * as it will free swap space allocated to pages in core.
832 : * There's also what to do for device/file/anonymous memory.
833 : */
834 0 : return (EINVAL);
835 :
836 : default:
837 0 : return (EINVAL);
838 : }
839 :
840 0 : return (error);
841 0 : }
842 :
843 : /*
844 : * sys_mlock: memory lock
845 : */
846 :
847 : int
848 0 : sys_mlock(struct proc *p, void *v, register_t *retval)
849 : {
850 : struct sys_mlock_args /* {
851 : syscallarg(const void *) addr;
852 : syscallarg(size_t) len;
853 0 : } */ *uap = v;
854 : vaddr_t addr;
855 : vsize_t size, pageoff;
856 : int error;
857 :
858 : /* extract syscall args from uap */
859 0 : addr = (vaddr_t)SCARG(uap, addr);
860 0 : size = (vsize_t)SCARG(uap, len);
861 :
862 : /* align address to a page boundary and adjust size accordingly */
863 0 : ALIGN_ADDR(addr, size, pageoff);
864 0 : if (addr > SIZE_MAX - size)
865 0 : return (EINVAL); /* disallow wrap-around. */
866 :
867 0 : if (atop(size) + uvmexp.wired > uvmexp.wiredmax)
868 0 : return (EAGAIN);
869 :
870 : #ifdef pmap_wired_count
871 0 : if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
872 0 : p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
873 0 : return (EAGAIN);
874 : #else
875 : if ((error = suser(p)) != 0)
876 : return (error);
877 : #endif
878 :
879 0 : error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE,
880 : 0);
881 0 : return (error == 0 ? 0 : ENOMEM);
882 0 : }
883 :
884 : /*
885 : * sys_munlock: unlock wired pages
886 : */
887 :
888 : int
889 0 : sys_munlock(struct proc *p, void *v, register_t *retval)
890 : {
891 : struct sys_munlock_args /* {
892 : syscallarg(const void *) addr;
893 : syscallarg(size_t) len;
894 0 : } */ *uap = v;
895 : vaddr_t addr;
896 : vsize_t size, pageoff;
897 : int error;
898 :
899 : /* extract syscall args from uap */
900 0 : addr = (vaddr_t)SCARG(uap, addr);
901 0 : size = (vsize_t)SCARG(uap, len);
902 :
903 : /* align address to a page boundary, and adjust size accordingly */
904 0 : ALIGN_ADDR(addr, size, pageoff);
905 0 : if (addr > SIZE_MAX - size)
906 0 : return (EINVAL); /* disallow wrap-around. */
907 :
908 : #ifndef pmap_wired_count
909 : if ((error = suser(p)) != 0)
910 : return (error);
911 : #endif
912 :
913 0 : error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE,
914 : 0);
915 0 : return (error == 0 ? 0 : ENOMEM);
916 0 : }
917 :
918 : /*
919 : * sys_mlockall: lock all pages mapped into an address space.
920 : */
921 : int
922 0 : sys_mlockall(struct proc *p, void *v, register_t *retval)
923 : {
924 : struct sys_mlockall_args /* {
925 : syscallarg(int) flags;
926 0 : } */ *uap = v;
927 : int error, flags;
928 :
929 0 : flags = SCARG(uap, flags);
930 :
931 0 : if (flags == 0 ||
932 0 : (flags & ~(MCL_CURRENT|MCL_FUTURE)) != 0)
933 0 : return (EINVAL);
934 :
935 : #ifndef pmap_wired_count
936 : if ((error = suser(p)) != 0)
937 : return (error);
938 : #endif
939 :
940 0 : error = uvm_map_pageable_all(&p->p_vmspace->vm_map, flags,
941 0 : p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
942 0 : if (error != 0 && error != ENOMEM)
943 0 : return (EAGAIN);
944 0 : return (error);
945 0 : }
946 :
947 : /*
948 : * sys_munlockall: unlock all pages mapped into an address space.
949 : */
950 : int
951 0 : sys_munlockall(struct proc *p, void *v, register_t *retval)
952 : {
953 :
954 0 : (void) uvm_map_pageable_all(&p->p_vmspace->vm_map, 0, 0);
955 0 : return (0);
956 : }
957 :
958 : /*
959 : * common code for mmapanon and mmapfile to lock a mmaping
960 : */
961 : int
962 0 : uvm_mmaplock(vm_map_t map, vaddr_t *addr, vsize_t size, vm_prot_t prot,
963 : vsize_t locklimit)
964 : {
965 : int error;
966 :
967 : /*
968 : * POSIX 1003.1b -- if our address space was configured
969 : * to lock all future mappings, wire the one we just made.
970 : */
971 0 : if (prot == PROT_NONE) {
972 : /*
973 : * No more work to do in this case.
974 : */
975 0 : return (0);
976 : }
977 :
978 0 : vm_map_lock(map);
979 0 : if (map->flags & VM_MAP_WIREFUTURE) {
980 0 : KERNEL_LOCK();
981 0 : if ((atop(size) + uvmexp.wired) > uvmexp.wiredmax
982 : #ifdef pmap_wired_count
983 0 : || (locklimit != 0 && (size +
984 0 : ptoa(pmap_wired_count(vm_map_pmap(map)))) >
985 : locklimit)
986 : #endif
987 : ) {
988 : error = ENOMEM;
989 0 : vm_map_unlock(map);
990 : /* unmap the region! */
991 0 : uvm_unmap(map, *addr, *addr + size);
992 0 : KERNEL_UNLOCK();
993 0 : return (error);
994 : }
995 : /*
996 : * uvm_map_pageable() always returns the map
997 : * unlocked.
998 : */
999 0 : error = uvm_map_pageable(map, *addr, *addr + size,
1000 : FALSE, UVM_LK_ENTER);
1001 0 : if (error != 0) {
1002 : /* unmap the region! */
1003 0 : uvm_unmap(map, *addr, *addr + size);
1004 0 : KERNEL_UNLOCK();
1005 0 : return (error);
1006 : }
1007 0 : KERNEL_UNLOCK();
1008 0 : return (0);
1009 : }
1010 0 : vm_map_unlock(map);
1011 0 : return (0);
1012 0 : }
1013 :
1014 : /*
1015 : * uvm_mmapanon: internal version of mmap for anons
1016 : *
1017 : * - used by sys_mmap
1018 : */
1019 : int
1020 0 : uvm_mmapanon(vm_map_t map, vaddr_t *addr, vsize_t size, vm_prot_t prot,
1021 : vm_prot_t maxprot, int flags, vsize_t locklimit, struct proc *p)
1022 : {
1023 : int error;
1024 : int advice = MADV_NORMAL;
1025 : unsigned int uvmflag = 0;
1026 : vsize_t align = 0; /* userland page size */
1027 :
1028 : /*
1029 : * for non-fixed mappings, round off the suggested address.
1030 : * for fixed mappings, check alignment and zap old mappings.
1031 : */
1032 0 : if ((flags & MAP_FIXED) == 0) {
1033 0 : *addr = round_page(*addr); /* round */
1034 0 : } else {
1035 0 : if (*addr & PAGE_MASK)
1036 0 : return(EINVAL);
1037 :
1038 : uvmflag |= UVM_FLAG_FIXED;
1039 0 : if ((flags & __MAP_NOREPLACE) == 0)
1040 0 : uvmflag |= UVM_FLAG_UNMAP;
1041 : }
1042 :
1043 0 : if ((flags & MAP_FIXED) == 0 && size >= __LDPGSZ)
1044 0 : align = __LDPGSZ;
1045 0 : if ((flags & MAP_SHARED) == 0)
1046 : /* XXX: defer amap create */
1047 0 : uvmflag |= UVM_FLAG_COPYONW;
1048 : else
1049 : /* shared: create amap now */
1050 0 : uvmflag |= UVM_FLAG_OVERLAY;
1051 0 : if (flags & MAP_STACK)
1052 0 : uvmflag |= UVM_FLAG_STACK;
1053 :
1054 : /* set up mapping flags */
1055 0 : uvmflag = UVM_MAPFLAG(prot, maxprot,
1056 : (flags & MAP_SHARED) ? MAP_INHERIT_SHARE : MAP_INHERIT_COPY,
1057 : advice, uvmflag);
1058 :
1059 0 : error = uvm_mapanon(map, addr, size, align, uvmflag);
1060 :
1061 0 : if (error == 0)
1062 0 : error = uvm_mmaplock(map, addr, size, prot, locklimit);
1063 0 : return error;
1064 0 : }
1065 :
1066 : /*
1067 : * uvm_mmapfile: internal version of mmap for non-anons
1068 : *
1069 : * - used by sys_mmap
1070 : * - caller must page-align the file offset
1071 : */
1072 : int
1073 0 : uvm_mmapfile(vm_map_t map, vaddr_t *addr, vsize_t size, vm_prot_t prot,
1074 : vm_prot_t maxprot, int flags, struct vnode *vp, voff_t foff,
1075 : vsize_t locklimit, struct proc *p)
1076 : {
1077 : struct uvm_object *uobj;
1078 : int error;
1079 : int advice = MADV_NORMAL;
1080 : unsigned int uvmflag = 0;
1081 : vsize_t align = 0; /* userland page size */
1082 :
1083 : /*
1084 : * for non-fixed mappings, round off the suggested address.
1085 : * for fixed mappings, check alignment and zap old mappings.
1086 : */
1087 0 : if ((flags & MAP_FIXED) == 0) {
1088 0 : *addr = round_page(*addr); /* round */
1089 0 : } else {
1090 0 : if (*addr & PAGE_MASK)
1091 0 : return(EINVAL);
1092 :
1093 : uvmflag |= UVM_FLAG_FIXED;
1094 0 : if ((flags & __MAP_NOREPLACE) == 0)
1095 0 : uvmflag |= UVM_FLAG_UNMAP;
1096 : }
1097 :
1098 : /*
1099 : * attach to underlying vm object.
1100 : */
1101 0 : if (vp->v_type != VCHR) {
1102 0 : uobj = uvn_attach(vp, (flags & MAP_SHARED) ?
1103 0 : maxprot : (maxprot & ~PROT_WRITE));
1104 :
1105 : /*
1106 : * XXXCDC: hack from old code
1107 : * don't allow vnodes which have been mapped
1108 : * shared-writeable to persist [forces them to be
1109 : * flushed out when last reference goes].
1110 : * XXXCDC: interesting side effect: avoids a bug.
1111 : * note that in WRITE [ufs_readwrite.c] that we
1112 : * allocate buffer, uncache, and then do the write.
1113 : * the problem with this is that if the uncache causes
1114 : * VM data to be flushed to the same area of the file
1115 : * we are writing to... in that case we've got the
1116 : * buffer locked and our process goes to sleep forever.
1117 : *
1118 : * XXXCDC: checking maxprot protects us from the
1119 : * "persistbug" program but this is not a long term
1120 : * solution.
1121 : *
1122 : * XXXCDC: we don't bother calling uncache with the vp
1123 : * VOP_LOCKed since we know that we are already
1124 : * holding a valid reference to the uvn (from the
1125 : * uvn_attach above), and thus it is impossible for
1126 : * the uncache to kill the uvn and trigger I/O.
1127 : */
1128 0 : if (flags & MAP_SHARED) {
1129 0 : if ((prot & PROT_WRITE) ||
1130 0 : (maxprot & PROT_WRITE)) {
1131 0 : uvm_vnp_uncache(vp);
1132 0 : }
1133 : }
1134 : } else {
1135 0 : uobj = udv_attach(vp->v_rdev,
1136 0 : (flags & MAP_SHARED) ? maxprot :
1137 0 : (maxprot & ~PROT_WRITE), foff, size);
1138 : /*
1139 : * XXX Some devices don't like to be mapped with
1140 : * XXX PROT_EXEC, but we don't really have a
1141 : * XXX better way of handling this, right now
1142 : */
1143 0 : if (uobj == NULL && (prot & PROT_EXEC) == 0) {
1144 0 : maxprot &= ~PROT_EXEC;
1145 0 : uobj = udv_attach(vp->v_rdev,
1146 0 : (flags & MAP_SHARED) ? maxprot :
1147 0 : (maxprot & ~PROT_WRITE), foff, size);
1148 0 : }
1149 : advice = MADV_RANDOM;
1150 : }
1151 :
1152 0 : if (uobj == NULL)
1153 0 : return((vp->v_type == VREG) ? ENOMEM : EINVAL);
1154 :
1155 0 : if ((flags & MAP_SHARED) == 0)
1156 0 : uvmflag |= UVM_FLAG_COPYONW;
1157 0 : if (flags & __MAP_NOFAULT)
1158 0 : uvmflag |= (UVM_FLAG_NOFAULT | UVM_FLAG_OVERLAY);
1159 0 : if (flags & MAP_STACK)
1160 0 : uvmflag |= UVM_FLAG_STACK;
1161 :
1162 : /* set up mapping flags */
1163 0 : uvmflag = UVM_MAPFLAG(prot, maxprot,
1164 : (flags & MAP_SHARED) ? MAP_INHERIT_SHARE : MAP_INHERIT_COPY,
1165 : advice, uvmflag);
1166 :
1167 0 : error = uvm_map(map, addr, size, uobj, foff, align, uvmflag);
1168 :
1169 0 : if (error == 0)
1170 0 : return uvm_mmaplock(map, addr, size, prot, locklimit);
1171 :
1172 : /* errors: first detach from the uobj, if any. */
1173 0 : if (uobj)
1174 0 : uobj->pgops->pgo_detach(uobj);
1175 :
1176 0 : return (error);
1177 0 : }
1178 :
1179 : /* an address that can't be in userspace */
1180 : #define BOGO_PC (KERNBASE + 1)
1181 : int
1182 0 : sys_kbind(struct proc *p, void *v, register_t *retval)
1183 : {
1184 : struct sys_kbind_args /* {
1185 : syscallarg(const struct __kbind *) param;
1186 : syscallarg(size_t) psize;
1187 : syscallarg(uint64_t) proc_cookie;
1188 0 : } */ *uap = v;
1189 : const struct __kbind *paramp;
1190 0 : union {
1191 : struct __kbind uk[KBIND_BLOCK_MAX];
1192 : char upad[KBIND_BLOCK_MAX * sizeof(*paramp) + KBIND_DATA_MAX];
1193 : } param;
1194 0 : struct uvm_map_deadq dead_entries;
1195 60 : struct process *pr = p->p_p;
1196 : const char *data;
1197 0 : vaddr_t baseva, last_baseva, endva, pageoffset, kva;
1198 : size_t psize, s;
1199 : u_long pc;
1200 : int count, i;
1201 : int error;
1202 :
1203 : /*
1204 : * extract syscall args from uap
1205 : */
1206 0 : paramp = SCARG(uap, param);
1207 0 : psize = SCARG(uap, psize);
1208 :
1209 : /* a NULL paramp disables the syscall for the process */
1210 0 : if (paramp == NULL) {
1211 0 : pr->ps_kbind_addr = BOGO_PC;
1212 0 : return (0);
1213 : }
1214 :
1215 : /* security checks */
1216 0 : pc = PROC_PC(p);
1217 0 : if (pr->ps_kbind_addr == 0) {
1218 0 : pr->ps_kbind_addr = pc;
1219 0 : pr->ps_kbind_cookie = SCARG(uap, proc_cookie);
1220 0 : } else if (pc != pr->ps_kbind_addr || pc == BOGO_PC)
1221 0 : sigexit(p, SIGILL);
1222 60 : else if (pr->ps_kbind_cookie != SCARG(uap, proc_cookie))
1223 0 : sigexit(p, SIGILL);
1224 0 : if (psize < sizeof(struct __kbind) || psize > sizeof(param))
1225 0 : return (EINVAL);
1226 0 : if ((error = copyin(paramp, ¶m, psize)))
1227 0 : return (error);
1228 :
1229 : /*
1230 : * The param argument points to an array of __kbind structures
1231 : * followed by the corresponding new data areas for them. Verify
1232 : * that the sizes in the __kbind structures add up to the total
1233 : * size and find the start of the new area.
1234 : */
1235 0 : paramp = ¶m.uk[0];
1236 : s = psize;
1237 60 : for (count = 0; s > 0 && count < KBIND_BLOCK_MAX; count++) {
1238 0 : if (s < sizeof(*paramp))
1239 0 : return (EINVAL);
1240 0 : s -= sizeof(*paramp);
1241 :
1242 0 : baseva = (vaddr_t)paramp[count].kb_addr;
1243 0 : endva = baseva + paramp[count].kb_size - 1;
1244 0 : if (paramp[count].kb_addr == NULL ||
1245 0 : paramp[count].kb_size == 0 ||
1246 0 : paramp[count].kb_size > KBIND_DATA_MAX ||
1247 0 : baseva >= VM_MAXUSER_ADDRESS ||
1248 0 : endva >= VM_MAXUSER_ADDRESS ||
1249 0 : trunc_page(baseva) != trunc_page(endva) ||
1250 0 : s < paramp[count].kb_size)
1251 0 : return (EINVAL);
1252 :
1253 0 : s -= paramp[count].kb_size;
1254 : }
1255 0 : if (s > 0)
1256 0 : return (EINVAL);
1257 0 : data = (const char *)¶mp[count];
1258 :
1259 : /* all looks good, so do the bindings */
1260 : last_baseva = VM_MAXUSER_ADDRESS;
1261 0 : kva = 0;
1262 0 : TAILQ_INIT(&dead_entries);
1263 60 : for (i = 0; i < count; i++) {
1264 0 : baseva = (vaddr_t)paramp[i].kb_addr;
1265 0 : pageoffset = baseva & PAGE_MASK;
1266 0 : baseva = trunc_page(baseva);
1267 :
1268 : /* make sure sure the desired page is mapped into kernel_map */
1269 0 : if (baseva != last_baseva) {
1270 0 : if (kva != 0) {
1271 0 : vm_map_lock(kernel_map);
1272 0 : uvm_unmap_remove(kernel_map, kva,
1273 0 : kva+PAGE_SIZE, &dead_entries, FALSE, TRUE);
1274 0 : vm_map_unlock(kernel_map);
1275 0 : kva = 0;
1276 0 : }
1277 120 : if ((error = uvm_map_extract(&p->p_vmspace->vm_map,
1278 : baseva, PAGE_SIZE, &kva, UVM_EXTRACT_FIXPROT)))
1279 : break;
1280 : last_baseva = baseva;
1281 0 : }
1282 :
1283 : /* do the update */
1284 0 : if ((error = kcopy(data, (char *)kva + pageoffset,
1285 0 : paramp[i].kb_size)))
1286 : break;
1287 0 : data += paramp[i].kb_size;
1288 : }
1289 :
1290 0 : if (kva != 0) {
1291 60 : vm_map_lock(kernel_map);
1292 0 : uvm_unmap_remove(kernel_map, kva, kva+PAGE_SIZE,
1293 : &dead_entries, FALSE, TRUE);
1294 0 : vm_map_unlock(kernel_map);
1295 0 : }
1296 0 : uvm_unmap_detach(&dead_entries, AMAP_REFALL);
1297 :
1298 0 : return (error);
1299 0 : }
|