LCOV - code coverage report
Current view: top level - uvm - uvm_mmap.c (source / functions) Hit Total Coverage
Test: 6.4 Lines: 6 506 1.2 %
Date: 2018-10-19 03:25:38 Functions: 0 17 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*      $OpenBSD: uvm_mmap.c,v 1.151 2018/08/15 20:22:13 kettenis Exp $ */
       2             : /*      $NetBSD: uvm_mmap.c,v 1.49 2001/02/18 21:19:08 chs Exp $        */
       3             : 
       4             : /*
       5             :  * Copyright (c) 1997 Charles D. Cranor and Washington University.
       6             :  * Copyright (c) 1991, 1993 The Regents of the University of California.  
       7             :  * Copyright (c) 1988 University of Utah.
       8             :  * 
       9             :  * All rights reserved.
      10             :  *
      11             :  * This code is derived from software contributed to Berkeley by
      12             :  * the Systems Programming Group of the University of Utah Computer
      13             :  * Science Department.
      14             :  *
      15             :  * Redistribution and use in source and binary forms, with or without
      16             :  * modification, are permitted provided that the following conditions
      17             :  * are met:
      18             :  * 1. Redistributions of source code must retain the above copyright
      19             :  *    notice, this list of conditions and the following disclaimer.
      20             :  * 2. Redistributions in binary form must reproduce the above copyright
      21             :  *    notice, this list of conditions and the following disclaimer in the
      22             :  *    documentation and/or other materials provided with the distribution.
      23             :  * 3. All advertising materials mentioning features or use of this software
      24             :  *    must display the following acknowledgement:
      25             :  *      This product includes software developed by the Charles D. Cranor,
      26             :  *      Washington University, University of California, Berkeley and 
      27             :  *      its contributors.
      28             :  * 4. Neither the name of the University nor the names of its contributors
      29             :  *    may be used to endorse or promote products derived from this software
      30             :  *    without specific prior written permission.
      31             :  *
      32             :  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
      33             :  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
      34             :  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
      35             :  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
      36             :  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
      37             :  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
      38             :  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
      39             :  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
      40             :  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
      41             :  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
      42             :  * SUCH DAMAGE.
      43             :  *
      44             :  * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
      45             :  *      @(#)vm_mmap.c   8.5 (Berkeley) 5/19/94
      46             :  * from: Id: uvm_mmap.c,v 1.1.2.14 1998/01/05 21:04:26 chuck Exp
      47             :  */
      48             : 
      49             : /*
      50             :  * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap
      51             :  * function.
      52             :  */
      53             : #include <sys/param.h>
      54             : #include <sys/systm.h>
      55             : #include <sys/fcntl.h>
      56             : #include <sys/file.h>
      57             : #include <sys/filedesc.h>
      58             : #include <sys/resourcevar.h>
      59             : #include <sys/mman.h>
      60             : #include <sys/mount.h>
      61             : #include <sys/proc.h>
      62             : #include <sys/malloc.h>
      63             : #include <sys/vnode.h>
      64             : #include <sys/conf.h>
      65             : #include <sys/signalvar.h>
      66             : #include <sys/syslog.h>
      67             : #include <sys/stat.h>
      68             : #include <sys/specdev.h>
      69             : #include <sys/stdint.h>
      70             : #include <sys/pledge.h>
      71             : #include <sys/unistd.h>           /* for KBIND* */
      72             : #include <sys/user.h>
      73             : 
      74             : #include <machine/exec.h> /* for __LDPGSZ */
      75             : 
      76             : #include <sys/syscallargs.h>
      77             : 
      78             : #include <uvm/uvm.h>
      79             : #include <uvm/uvm_device.h>
      80             : #include <uvm/uvm_vnode.h>
      81             : 
      82             : int uvm_mmapanon(vm_map_t, vaddr_t *, vsize_t, vm_prot_t, vm_prot_t, int,
      83             :     vsize_t, struct proc *);
      84             : int uvm_mmapfile(vm_map_t, vaddr_t *, vsize_t, vm_prot_t, vm_prot_t, int,
      85             :     struct vnode *, voff_t, vsize_t, struct proc *);
      86             : 
      87             : 
      88             : /*
      89             :  * Page align addr and size, returning EINVAL on wraparound.
      90             :  */
      91             : #define ALIGN_ADDR(addr, size, pageoff) do {                            \
      92             :         pageoff = (addr & PAGE_MASK);                                       \
      93             :         if (pageoff != 0) {                                             \
      94             :                 if (size > SIZE_MAX - pageoff)                               \
      95             :                         return (EINVAL);        /* wraparound */        \
      96             :                 addr -= pageoff;                                        \
      97             :                 size += pageoff;                                        \
      98             :         }                                                               \
      99             :         if (size != 0) {                                                \
     100             :                 size = (vsize_t)round_page(size);                       \
     101             :                 if (size == 0)                                          \
     102             :                         return (EINVAL);        /* wraparound */        \
     103             :         }                                                               \
     104             : } while (0)
     105             : 
     106             : /*
     107             :  * sys_mquery: provide mapping hints to applications that do fixed mappings
     108             :  *
     109             :  * flags: 0 or MAP_FIXED (MAP_FIXED - means that we insist on this addr and
     110             :  *      don't care about PMAP_PREFER or such)
     111             :  * addr: hint where we'd like to place the mapping.
     112             :  * size: size of the mapping
     113             :  * fd: fd of the file we want to map
     114             :  * off: offset within the file
     115             :  */
     116             : int
     117           0 : sys_mquery(struct proc *p, void *v, register_t *retval)
     118             : {
     119             :         struct sys_mquery_args /* {
     120             :                 syscallarg(void *) addr;
     121             :                 syscallarg(size_t) len;
     122             :                 syscallarg(int) prot;
     123             :                 syscallarg(int) flags;
     124             :                 syscallarg(int) fd;
     125             :                 syscallarg(long) pad;
     126             :                 syscallarg(off_t) pos;
     127           0 :         } */ *uap = v;
     128           0 :         struct file *fp;
     129             :         voff_t uoff;
     130             :         int error;
     131           0 :         vaddr_t vaddr;
     132             :         int flags = 0;
     133             :         vsize_t size;
     134             :         vm_prot_t prot;
     135             :         int fd;
     136             : 
     137           0 :         vaddr = (vaddr_t) SCARG(uap, addr);
     138           0 :         prot = SCARG(uap, prot);
     139           0 :         size = (vsize_t) SCARG(uap, len);
     140           0 :         fd = SCARG(uap, fd);
     141             : 
     142           0 :         if ((prot & PROT_MASK) != prot)
     143           0 :                 return (EINVAL);
     144             : 
     145           0 :         if (SCARG(uap, flags) & MAP_FIXED)
     146           0 :                 flags |= UVM_FLAG_FIXED;
     147             : 
     148           0 :         if (fd >= 0) {
     149           0 :                 if ((error = getvnode(p, fd, &fp)) != 0)
     150           0 :                         return (error);
     151           0 :                 uoff = SCARG(uap, pos);
     152           0 :         } else {
     153           0 :                 fp = NULL;
     154             :                 uoff = UVM_UNKNOWN_OFFSET;
     155             :         }
     156             : 
     157           0 :         if (vaddr == 0)
     158           0 :                 vaddr = uvm_map_hint(p->p_vmspace, prot, VM_MIN_ADDRESS,
     159             :                     VM_MAXUSER_ADDRESS);
     160             : 
     161           0 :         error = uvm_map_mquery(&p->p_vmspace->vm_map, &vaddr, size, uoff,
     162             :             flags);
     163           0 :         if (error == 0)
     164           0 :                 *retval = (register_t)(vaddr);
     165             : 
     166           0 :         if (fp != NULL)
     167           0 :                 FRELE(fp, p);
     168           0 :         return (error);
     169           0 : }
     170             : 
     171             : /*
     172             :  * sys_mincore: determine if pages are in core or not.
     173             :  */
     174             : /* ARGSUSED */
     175             : int
     176           0 : sys_mincore(struct proc *p, void *v, register_t *retval)
     177             : {
     178             :         struct sys_mincore_args /* {
     179             :                 syscallarg(void *) addr;
     180             :                 syscallarg(size_t) len;
     181             :                 syscallarg(char *) vec;
     182           0 :         } */ *uap = v;
     183             :         vm_page_t m;
     184             :         char *vec, *pgi, *pgs;
     185             :         struct uvm_object *uobj;
     186             :         struct vm_amap *amap;
     187             :         struct vm_anon *anon;
     188           0 :         vm_map_entry_t entry, next;
     189             :         vaddr_t start, end, lim;
     190             :         vm_map_t map;
     191             :         vsize_t len, npgs;
     192             :         int error = 0; 
     193             : 
     194           0 :         map = &p->p_vmspace->vm_map;
     195             : 
     196           0 :         start = (vaddr_t)SCARG(uap, addr);
     197           0 :         len = SCARG(uap, len);
     198           0 :         vec = SCARG(uap, vec);
     199             : 
     200           0 :         if (start & PAGE_MASK)
     201           0 :                 return (EINVAL);
     202           0 :         len = round_page(len);
     203           0 :         end = start + len;
     204           0 :         if (end <= start)
     205           0 :                 return (EINVAL);
     206             : 
     207           0 :         npgs = len >> PAGE_SHIFT;
     208             : 
     209             :         /*
     210             :          * < art> Anyone trying to mincore more than 4GB of address space is
     211             :          *      clearly insane.
     212             :          */
     213           0 :         if (npgs >= (0xffffffff >> PAGE_SHIFT))
     214           0 :                 return (E2BIG);
     215           0 :         pgs = mallocarray(npgs, sizeof(*pgs), M_TEMP, M_WAITOK | M_CANFAIL);
     216           0 :         if (pgs == NULL)
     217           0 :                 return (ENOMEM);
     218             :         pgi = pgs;
     219             : 
     220             :         /*
     221             :          * Lock down vec, so our returned status isn't outdated by
     222             :          * storing the status byte for a page.
     223             :          */
     224           0 :         if ((error = uvm_vslock(p, vec, npgs, PROT_WRITE)) != 0) {
     225           0 :                 free(pgs, M_TEMP, npgs * sizeof(*pgs));
     226           0 :                 return (error);
     227             :         }
     228             : 
     229           0 :         vm_map_lock_read(map);
     230             : 
     231           0 :         if (uvm_map_lookup_entry(map, start, &entry) == FALSE) {
     232             :                 error = ENOMEM;
     233           0 :                 goto out;
     234             :         }
     235             : 
     236           0 :         for (/* nothing */;
     237           0 :              entry != NULL && entry->start < end;
     238           0 :              entry = RBT_NEXT(uvm_map_addr, entry)) {
     239           0 :                 KASSERT(!UVM_ET_ISSUBMAP(entry));
     240           0 :                 KASSERT(start >= entry->start);
     241             : 
     242             :                 /* Make sure there are no holes. */
     243           0 :                 next = RBT_NEXT(uvm_map_addr, entry);
     244           0 :                 if (entry->end < end &&
     245           0 :                      (next == NULL ||
     246           0 :                       next->start > entry->end)) {
     247             :                         error = ENOMEM;
     248           0 :                         goto out;
     249             :                 }
     250             : 
     251           0 :                 lim = end < entry->end ? end : entry->end;
     252             : 
     253             :                 /*
     254             :                  * Special case for objects with no "real" pages.  Those
     255             :                  * are always considered resident (mapped devices).
     256             :                  */
     257           0 :                 if (UVM_ET_ISOBJ(entry)) {
     258           0 :                         KASSERT(!UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj));
     259           0 :                         if (entry->object.uvm_obj->pgops->pgo_fault != NULL) {
     260           0 :                                 for (/* nothing */; start < lim;
     261           0 :                                      start += PAGE_SIZE, pgi++)
     262           0 :                                         *pgi = 1;
     263             :                                 continue;
     264             :                         }
     265             :                 }
     266             : 
     267           0 :                 amap = entry->aref.ar_amap;  /* top layer */
     268           0 :                 uobj = entry->object.uvm_obj;        /* bottom layer */
     269             : 
     270           0 :                 for (/* nothing */; start < lim; start += PAGE_SIZE, pgi++) {
     271           0 :                         *pgi = 0;
     272           0 :                         if (amap != NULL) {
     273             :                                 /* Check the top layer first. */
     274           0 :                                 anon = amap_lookup(&entry->aref,
     275           0 :                                     start - entry->start);
     276           0 :                                 if (anon != NULL && anon->an_page != NULL) {
     277             :                                         /*
     278             :                                          * Anon has the page for this entry
     279             :                                          * offset.
     280             :                                          */
     281           0 :                                         *pgi = 1;
     282           0 :                                 }
     283             :                         }
     284             : 
     285           0 :                         if (uobj != NULL && *pgi == 0) {
     286             :                                 /* Check the bottom layer. */
     287           0 :                                 m = uvm_pagelookup(uobj,
     288           0 :                                     entry->offset + (start - entry->start));
     289           0 :                                 if (m != NULL) {
     290             :                                         /*
     291             :                                          * Object has the page for this entry
     292             :                                          * offset.
     293             :                                          */
     294           0 :                                         *pgi = 1;
     295           0 :                                 }
     296             :                         }
     297             :                 }
     298             :         }
     299             : 
     300             :  out:
     301           0 :         vm_map_unlock_read(map);
     302           0 :         uvm_vsunlock(p, SCARG(uap, vec), npgs);
     303             :         /* now the map is unlocked we can copyout without fear. */
     304           0 :         if (error == 0)
     305           0 :                 copyout(pgs, vec, npgs * sizeof(char));
     306           0 :         free(pgs, M_TEMP, npgs * sizeof(*pgs));
     307           0 :         return (error);
     308           0 : }
     309             : 
     310             : int     uvm_wxabort;
     311             : 
     312             : /*
     313             :  * W^X violations are only allowed on permitted filesystems.
     314             :  */
     315             : static inline int
     316           0 : uvm_wxcheck(struct proc *p, char *call)
     317             : {
     318           0 :         struct process *pr = p->p_p;
     319           0 :         int wxallowed = (pr->ps_textvp->v_mount &&
     320           0 :             (pr->ps_textvp->v_mount->mnt_flag & MNT_WXALLOWED));
     321             : 
     322           0 :         if (wxallowed && (pr->ps_flags & PS_WXNEEDED))
     323           0 :                 return (0);
     324             : 
     325             :         /* Report W^X failures, and potentially SIGABRT */
     326           0 :         if (pr->ps_wxcounter++ == 0)
     327           0 :                 log(LOG_NOTICE, "%s(%d): %s W^X violation\n",
     328           0 :                     pr->ps_comm, pr->ps_pid, call);
     329             : 
     330             :         /* Send uncatchable SIGABRT for coredump */
     331           0 :         if (uvm_wxabort)
     332           0 :                 sigexit(p, SIGABRT);
     333             : 
     334           0 :         return (ENOTSUP);
     335           0 : }
     336             : 
     337             : /*
     338             :  * sys_mmap: mmap system call.
     339             :  *
     340             :  * => file offset and address may not be page aligned
     341             :  *    - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE
     342             :  *    - if address isn't page aligned the mapping starts at trunc_page(addr)
     343             :  *      and the return value is adjusted up by the page offset.
     344             :  */
     345             : int
     346           0 : sys_mmap(struct proc *p, void *v, register_t *retval)
     347             : {
     348             :         struct sys_mmap_args /* {
     349             :                 syscallarg(void *) addr;
     350             :                 syscallarg(size_t) len;
     351             :                 syscallarg(int) prot;
     352             :                 syscallarg(int) flags;
     353             :                 syscallarg(int) fd;
     354             :                 syscallarg(long) pad;
     355             :                 syscallarg(off_t) pos;
     356           0 :         } */ *uap = v;
     357           0 :         vaddr_t addr;
     358           0 :         struct vattr va;
     359             :         off_t pos;
     360             :         vsize_t size, pageoff;
     361             :         vm_prot_t prot, maxprot;
     362             :         int flags, fd;
     363             :         vaddr_t vm_min_address = VM_MIN_ADDRESS;
     364           0 :         struct filedesc *fdp = p->p_fd;
     365             :         struct file *fp = NULL;
     366             :         struct vnode *vp;
     367             :         int error;
     368             : 
     369             :         /* first, extract syscall args from the uap. */
     370           0 :         addr = (vaddr_t) SCARG(uap, addr);
     371           0 :         size = (vsize_t) SCARG(uap, len);
     372           0 :         prot = SCARG(uap, prot);
     373           0 :         flags = SCARG(uap, flags);
     374           0 :         fd = SCARG(uap, fd);
     375           0 :         pos = SCARG(uap, pos);
     376             : 
     377             :         /*
     378             :          * Validate the flags.
     379             :          */
     380           0 :         if ((prot & PROT_MASK) != prot)
     381           0 :                 return (EINVAL);
     382           0 :         if ((prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC) &&
     383           0 :             (error = uvm_wxcheck(p, "mmap")))
     384           0 :                 return (error);
     385             : 
     386           0 :         if ((flags & MAP_FLAGMASK) != flags)
     387           0 :                 return (EINVAL);
     388           0 :         if ((flags & (MAP_SHARED|MAP_PRIVATE)) == (MAP_SHARED|MAP_PRIVATE))
     389           0 :                 return (EINVAL);
     390           0 :         if ((flags & (MAP_FIXED|__MAP_NOREPLACE)) == __MAP_NOREPLACE)
     391           0 :                 return (EINVAL);
     392           0 :         if (flags & MAP_STACK) {
     393           0 :                 if ((flags & (MAP_ANON|MAP_PRIVATE)) != (MAP_ANON|MAP_PRIVATE))
     394           0 :                         return (EINVAL);
     395           0 :                 if (flags & ~(MAP_STACK|MAP_FIXED|MAP_ANON|MAP_PRIVATE))
     396           0 :                         return (EINVAL);
     397           0 :                 if (pos != 0)
     398           0 :                         return (EINVAL);
     399           0 :                 if ((prot & (PROT_READ|PROT_WRITE)) != (PROT_READ|PROT_WRITE))
     400           0 :                         return (EINVAL);
     401             :         }
     402           0 :         if (size == 0)
     403           0 :                 return (EINVAL);
     404             : 
     405           0 :         error = pledge_protexec(p, prot);
     406           0 :         if (error)
     407           0 :                 return (error);
     408             : 
     409             :         /* align file position and save offset.  adjust size. */
     410           0 :         ALIGN_ADDR(pos, size, pageoff);
     411             : 
     412             :         /* now check (MAP_FIXED) or get (!MAP_FIXED) the "addr" */
     413           0 :         if (flags & MAP_FIXED) {
     414             :                 /* adjust address by the same amount as we did the offset */
     415           0 :                 addr -= pageoff;
     416           0 :                 if (addr & PAGE_MASK)
     417           0 :                         return (EINVAL);                /* not page aligned */
     418             : 
     419           0 :                 if (addr > SIZE_MAX - size)
     420           0 :                         return (EINVAL);                /* no wrapping! */
     421           0 :                 if (VM_MAXUSER_ADDRESS > 0 &&
     422           0 :                     (addr + size) > VM_MAXUSER_ADDRESS)
     423           0 :                         return (EINVAL);
     424           0 :                 if (vm_min_address > 0 && addr < vm_min_address)
     425           0 :                         return (EINVAL);
     426             : 
     427             :         }
     428             : 
     429             :         /* check for file mappings (i.e. not anonymous) and verify file. */
     430           0 :         if ((flags & MAP_ANON) == 0) {
     431           0 :                 if ((fp = fd_getfile(fdp, fd)) == NULL)
     432           0 :                         return (EBADF);
     433             : 
     434           0 :                 if (fp->f_type != DTYPE_VNODE) {
     435             :                         error = ENODEV;         /* only mmap vnodes! */
     436           0 :                         goto out;
     437             :                 }
     438           0 :                 vp = (struct vnode *)fp->f_data;     /* convert to vnode */
     439             : 
     440           0 :                 if (vp->v_type != VREG && vp->v_type != VCHR &&
     441           0 :                     vp->v_type != VBLK) {
     442             :                         error = ENODEV; /* only REG/CHR/BLK support mmap */
     443           0 :                         goto out;
     444             :                 }
     445             : 
     446           0 :                 if (vp->v_type == VREG && (pos + size) < pos) {
     447             :                         error = EINVAL;         /* no offset wrapping */
     448           0 :                         goto out;
     449             :                 }
     450             : 
     451             :                 /* special case: catch SunOS style /dev/zero */
     452           0 :                 if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
     453           0 :                         flags |= MAP_ANON;
     454           0 :                         FRELE(fp, p);
     455             :                         fp = NULL;
     456           0 :                         goto is_anon;
     457             :                 }
     458             : 
     459             :                 /*
     460             :                  * Old programs may not select a specific sharing type, so
     461             :                  * default to an appropriate one.
     462             :                  */
     463           0 :                 if ((flags & (MAP_SHARED|MAP_PRIVATE)) == 0) {
     464             : #if defined(DEBUG)
     465             :                         printf("WARNING: defaulted mmap() share type to"
     466             :                             " %s (pid %d comm %s)\n",
     467             :                             vp->v_type == VCHR ? "MAP_SHARED" : "MAP_PRIVATE",
     468             :                             p->p_p->ps_pid, p->p_p->ps_comm);
     469             : #endif
     470           0 :                         if (vp->v_type == VCHR)
     471           0 :                                 flags |= MAP_SHARED;    /* for a device */
     472             :                         else
     473           0 :                                 flags |= MAP_PRIVATE;   /* for a file */
     474             :                 }
     475             : 
     476             :                 /* 
     477             :                  * MAP_PRIVATE device mappings don't make sense (and aren't
     478             :                  * supported anyway).  However, some programs rely on this,
     479             :                  * so just change it to MAP_SHARED.
     480             :                  */
     481           0 :                 if (vp->v_type == VCHR && (flags & MAP_PRIVATE) != 0) {
     482           0 :                         flags = (flags & ~MAP_PRIVATE) | MAP_SHARED;
     483           0 :                 }
     484             : 
     485             :                 /* now check protection */
     486             :                 maxprot = PROT_EXEC;
     487             : 
     488             :                 /* check read access */
     489           0 :                 if (fp->f_flag & FREAD)
     490           0 :                         maxprot |= PROT_READ;
     491           0 :                 else if (prot & PROT_READ) {
     492             :                         error = EACCES;
     493           0 :                         goto out;
     494             :                 }
     495             : 
     496             :                 /* check write access, shared case first */
     497           0 :                 if (flags & MAP_SHARED) {
     498             :                         /*
     499             :                          * if the file is writable, only add PROT_WRITE to
     500             :                          * maxprot if the file is not immutable, append-only.
     501             :                          * otherwise, if we have asked for PROT_WRITE, return
     502             :                          * EPERM.
     503             :                          */
     504           0 :                         if (fp->f_flag & FWRITE) {
     505           0 :                                 KERNEL_LOCK();
     506           0 :                                 error = VOP_GETATTR(vp, &va, p->p_ucred, p);
     507           0 :                                 KERNEL_UNLOCK();
     508           0 :                                 if (error)
     509             :                                         goto out;
     510           0 :                                 if ((va.va_flags & (IMMUTABLE|APPEND)) == 0)
     511           0 :                                         maxprot |= PROT_WRITE;
     512           0 :                                 else if (prot & PROT_WRITE) {
     513             :                                         error = EPERM;
     514           0 :                                         goto out;
     515             :                                 }
     516           0 :                         } else if (prot & PROT_WRITE) {
     517             :                                 error = EACCES;
     518           0 :                                 goto out;
     519             :                         }
     520             :                 } else {
     521             :                         /* MAP_PRIVATE mappings can always write to */
     522           0 :                         maxprot |= PROT_WRITE;
     523             :                 }
     524           0 :                 if ((flags & __MAP_NOFAULT) != 0 ||
     525           0 :                     ((flags & MAP_PRIVATE) != 0 && (prot & PROT_WRITE) != 0)) {
     526           0 :                         if (p->p_rlimit[RLIMIT_DATA].rlim_cur < size ||
     527           0 :                             p->p_rlimit[RLIMIT_DATA].rlim_cur - size <
     528           0 :                             ptoa(p->p_vmspace->vm_dused)) {
     529             :                                 error = ENOMEM;
     530           0 :                                 goto out;
     531             :                         }
     532             :                 }
     533           0 :                 KERNEL_LOCK();
     534           0 :                 error = uvm_mmapfile(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
     535           0 :                     flags, vp, pos, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur, p);
     536           0 :                 KERNEL_UNLOCK();
     537           0 :         } else {                /* MAP_ANON case */
     538           0 :                 if (fd != -1)
     539           0 :                         return EINVAL;
     540             : 
     541             : is_anon:        /* label for SunOS style /dev/zero */
     542             : 
     543             :                 /* __MAP_NOFAULT only makes sense with a backing object */
     544           0 :                 if ((flags & __MAP_NOFAULT) != 0)
     545           0 :                         return EINVAL;
     546             : 
     547           0 :                 if (p->p_rlimit[RLIMIT_DATA].rlim_cur < size ||
     548           0 :                     p->p_rlimit[RLIMIT_DATA].rlim_cur - size <
     549           0 :                     ptoa(p->p_vmspace->vm_dused)) {
     550           0 :                         return ENOMEM;
     551             :                 }
     552             : 
     553             :                 /*
     554             :                  * We've been treating (MAP_SHARED|MAP_PRIVATE) == 0 as
     555             :                  * MAP_PRIVATE, so make that clear.
     556             :                  */
     557           0 :                 if ((flags & MAP_SHARED) == 0)
     558           0 :                         flags |= MAP_PRIVATE;
     559             : 
     560             :                 maxprot = PROT_MASK;
     561           0 :                 error = uvm_mmapanon(&p->p_vmspace->vm_map, &addr, size, prot,
     562           0 :                     maxprot, flags, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur, p);
     563             :         }
     564             : 
     565           0 :         if (error == 0)
     566             :                 /* remember to add offset */
     567           0 :                 *retval = (register_t)(addr + pageoff);
     568             : 
     569             : out:
     570           0 :         if (fp)
     571           0 :                 FRELE(fp, p);
     572           0 :         return (error);
     573           0 : }
     574             : 
     575             : /*
     576             :  * sys_msync: the msync system call (a front-end for flush)
     577             :  */
     578             : 
     579             : int
     580           0 : sys_msync(struct proc *p, void *v, register_t *retval)
     581             : {
     582             :         struct sys_msync_args /* {
     583             :                 syscallarg(void *) addr;
     584             :                 syscallarg(size_t) len;
     585             :                 syscallarg(int) flags;
     586           0 :         } */ *uap = v;
     587             :         vaddr_t addr;
     588             :         vsize_t size, pageoff;
     589             :         vm_map_t map;
     590             :         int flags, uvmflags;
     591             : 
     592             :         /* extract syscall args from the uap */
     593           0 :         addr = (vaddr_t)SCARG(uap, addr);
     594           0 :         size = (vsize_t)SCARG(uap, len);
     595           0 :         flags = SCARG(uap, flags);
     596             : 
     597             :         /* sanity check flags */
     598           0 :         if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 ||
     599           0 :                         (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 ||
     600           0 :                         (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC))
     601           0 :                 return (EINVAL);
     602           0 :         if ((flags & (MS_ASYNC | MS_SYNC)) == 0)
     603           0 :                 flags |= MS_SYNC;
     604             : 
     605             :         /* align the address to a page boundary, and adjust the size accordingly */
     606           0 :         ALIGN_ADDR(addr, size, pageoff);
     607           0 :         if (addr > SIZE_MAX - size)
     608           0 :                 return (EINVAL);                /* disallow wrap-around. */
     609             : 
     610             :         /* get map */
     611           0 :         map = &p->p_vmspace->vm_map;
     612             : 
     613             :         /* translate MS_ flags into PGO_ flags */
     614             :         uvmflags = PGO_CLEANIT;
     615           0 :         if (flags & MS_INVALIDATE)
     616           0 :                 uvmflags |= PGO_FREE;
     617             :         if (flags & MS_SYNC)
     618             :                 uvmflags |= PGO_SYNCIO;
     619             :         else
     620             :                 uvmflags |= PGO_SYNCIO;  /* XXXCDC: force sync for now! */
     621             : 
     622           0 :         return (uvm_map_clean(map, addr, addr+size, uvmflags));
     623           0 : }
     624             : 
     625             : /*
     626             :  * sys_munmap: unmap a users memory
     627             :  */
     628             : int
     629           0 : sys_munmap(struct proc *p, void *v, register_t *retval)
     630             : {
     631             :         struct sys_munmap_args /* {
     632             :                 syscallarg(void *) addr;
     633             :                 syscallarg(size_t) len;
     634           0 :         } */ *uap = v;
     635             :         vaddr_t addr;
     636             :         vsize_t size, pageoff;
     637             :         vm_map_t map;
     638             :         vaddr_t vm_min_address = VM_MIN_ADDRESS;
     639           0 :         struct uvm_map_deadq dead_entries;
     640             : 
     641             :         /* get syscall args... */
     642           0 :         addr = (vaddr_t) SCARG(uap, addr);
     643           0 :         size = (vsize_t) SCARG(uap, len);
     644             :         
     645             :         /* align address to a page boundary, and adjust size accordingly */
     646           0 :         ALIGN_ADDR(addr, size, pageoff);
     647             : 
     648             :         /*
     649             :          * Check for illegal addresses.  Watch out for address wrap...
     650             :          * Note that VM_*_ADDRESS are not constants due to casts (argh).
     651             :          */
     652           0 :         if (addr > SIZE_MAX - size)
     653           0 :                 return (EINVAL);
     654           0 :         if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS)
     655           0 :                 return (EINVAL);
     656           0 :         if (vm_min_address > 0 && addr < vm_min_address)
     657           0 :                 return (EINVAL);
     658           0 :         map = &p->p_vmspace->vm_map;
     659             : 
     660             : 
     661           0 :         vm_map_lock(map);       /* lock map so we can checkprot */
     662             : 
     663             :         /*
     664             :          * interesting system call semantic: make sure entire range is 
     665             :          * allocated before allowing an unmap.
     666             :          */
     667           0 :         if (!uvm_map_checkprot(map, addr, addr + size, PROT_NONE)) {
     668           0 :                 vm_map_unlock(map);
     669           0 :                 return (EINVAL);
     670             :         }
     671             : 
     672           0 :         TAILQ_INIT(&dead_entries);
     673           0 :         uvm_unmap_remove(map, addr, addr + size, &dead_entries, FALSE, TRUE);
     674           0 :         vm_map_unlock(map);     /* and unlock */
     675             : 
     676           0 :         uvm_unmap_detach(&dead_entries, 0);
     677             : 
     678           0 :         return (0);
     679           0 : }
     680             : 
     681             : /*
     682             :  * sys_mprotect: the mprotect system call
     683             :  */
     684             : int
     685           0 : sys_mprotect(struct proc *p, void *v, register_t *retval)
     686             : {
     687             :         struct sys_mprotect_args /* {
     688             :                 syscallarg(void *) addr;
     689             :                 syscallarg(size_t) len;
     690             :                 syscallarg(int) prot;
     691           0 :         } */ *uap = v;
     692             :         vaddr_t addr;
     693             :         vsize_t size, pageoff;
     694             :         vm_prot_t prot;
     695             :         int error;
     696             : 
     697             :         /*
     698             :          * extract syscall args from uap
     699             :          */
     700             : 
     701           0 :         addr = (vaddr_t)SCARG(uap, addr);
     702           0 :         size = (vsize_t)SCARG(uap, len);
     703           0 :         prot = SCARG(uap, prot);
     704             :         
     705           0 :         if ((prot & PROT_MASK) != prot)
     706           0 :                 return (EINVAL);
     707           0 :         if ((prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC) &&
     708           0 :             (error = uvm_wxcheck(p, "mprotect")))
     709           0 :                 return (error);
     710             : 
     711           0 :         error = pledge_protexec(p, prot);
     712           0 :         if (error)
     713           0 :                 return (error);
     714             : 
     715             :         /*
     716             :          * align the address to a page boundary, and adjust the size accordingly
     717             :          */
     718           0 :         ALIGN_ADDR(addr, size, pageoff);
     719           0 :         if (addr > SIZE_MAX - size)
     720           0 :                 return (EINVAL);                /* disallow wrap-around. */
     721             : 
     722           0 :         return (uvm_map_protect(&p->p_vmspace->vm_map, addr, addr+size,
     723             :             prot, FALSE));
     724           0 : }
     725             : 
     726             : /*
     727             :  * sys_minherit: the minherit system call
     728             :  */
     729             : int
     730           0 : sys_minherit(struct proc *p, void *v, register_t *retval)
     731             : {
     732             :         struct sys_minherit_args /* {
     733             :                 syscallarg(void *) addr;
     734             :                 syscallarg(size_t) len;
     735             :                 syscallarg(int) inherit;
     736           0 :         } */ *uap = v;
     737             :         vaddr_t addr;
     738             :         vsize_t size, pageoff;
     739             :         vm_inherit_t inherit;
     740             :         
     741           0 :         addr = (vaddr_t)SCARG(uap, addr);
     742           0 :         size = (vsize_t)SCARG(uap, len);
     743           0 :         inherit = SCARG(uap, inherit);
     744             : 
     745             :         /*
     746             :          * align the address to a page boundary, and adjust the size accordingly
     747             :          */
     748           0 :         ALIGN_ADDR(addr, size, pageoff);
     749           0 :         if (addr > SIZE_MAX - size)
     750           0 :                 return (EINVAL);                /* disallow wrap-around. */
     751             :         
     752           0 :         return (uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size,
     753             :             inherit));
     754           0 : }
     755             : 
     756             : /*
     757             :  * sys_madvise: give advice about memory usage.
     758             :  */
     759             : /* ARGSUSED */
     760             : int
     761           0 : sys_madvise(struct proc *p, void *v, register_t *retval)
     762             : {
     763             :         struct sys_madvise_args /* {
     764             :                 syscallarg(void *) addr;
     765             :                 syscallarg(size_t) len;
     766             :                 syscallarg(int) behav;
     767           0 :         } */ *uap = v;
     768             :         vaddr_t addr;
     769             :         vsize_t size, pageoff;
     770             :         int advice, error;
     771             :         
     772           0 :         addr = (vaddr_t)SCARG(uap, addr);
     773           0 :         size = (vsize_t)SCARG(uap, len);
     774           0 :         advice = SCARG(uap, behav);
     775             : 
     776             :         /*
     777             :          * align the address to a page boundary, and adjust the size accordingly
     778             :          */
     779           0 :         ALIGN_ADDR(addr, size, pageoff);
     780           0 :         if (addr > SIZE_MAX - size)
     781           0 :                 return (EINVAL);                /* disallow wrap-around. */
     782             : 
     783           0 :         switch (advice) {
     784             :         case MADV_NORMAL:
     785             :         case MADV_RANDOM:
     786             :         case MADV_SEQUENTIAL:
     787           0 :                 error = uvm_map_advice(&p->p_vmspace->vm_map, addr,
     788           0 :                     addr + size, advice);
     789           0 :                 break;
     790             : 
     791             :         case MADV_WILLNEED:
     792             :                 /*
     793             :                  * Activate all these pages, pre-faulting them in if
     794             :                  * necessary.
     795             :                  */
     796             :                 /*
     797             :                  * XXX IMPLEMENT ME.
     798             :                  * Should invent a "weak" mode for uvm_fault()
     799             :                  * which would only do the PGO_LOCKED pgo_get().
     800             :                  */
     801           0 :                 return (0);
     802             : 
     803             :         case MADV_DONTNEED:
     804             :                 /*
     805             :                  * Deactivate all these pages.  We don't need them
     806             :                  * any more.  We don't, however, toss the data in
     807             :                  * the pages.
     808             :                  */
     809           0 :                 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
     810             :                     PGO_DEACTIVATE);
     811           0 :                 break;
     812             : 
     813             :         case MADV_FREE:
     814             :                 /*
     815             :                  * These pages contain no valid data, and may be
     816             :                  * garbage-collected.  Toss all resources, including
     817             :                  * any swap space in use.
     818             :                  */
     819           0 :                 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
     820             :                     PGO_FREE);
     821           0 :                 break;
     822             : 
     823             :         case MADV_SPACEAVAIL:
     824             :                 /*
     825             :                  * XXXMRG What is this?  I think it's:
     826             :                  *
     827             :                  *      Ensure that we have allocated backing-store
     828             :                  *      for these pages.
     829             :                  *
     830             :                  * This is going to require changes to the page daemon,
     831             :                  * as it will free swap space allocated to pages in core.
     832             :                  * There's also what to do for device/file/anonymous memory.
     833             :                  */
     834           0 :                 return (EINVAL);
     835             : 
     836             :         default:
     837           0 :                 return (EINVAL);
     838             :         }
     839             : 
     840           0 :         return (error);
     841           0 : }
     842             : 
     843             : /*
     844             :  * sys_mlock: memory lock
     845             :  */
     846             : 
     847             : int
     848           0 : sys_mlock(struct proc *p, void *v, register_t *retval)
     849             : {
     850             :         struct sys_mlock_args /* {
     851             :                 syscallarg(const void *) addr;
     852             :                 syscallarg(size_t) len;
     853           0 :         } */ *uap = v;
     854             :         vaddr_t addr;
     855             :         vsize_t size, pageoff;
     856             :         int error;
     857             : 
     858             :         /* extract syscall args from uap */
     859           0 :         addr = (vaddr_t)SCARG(uap, addr);
     860           0 :         size = (vsize_t)SCARG(uap, len);
     861             : 
     862             :         /* align address to a page boundary and adjust size accordingly */
     863           0 :         ALIGN_ADDR(addr, size, pageoff);
     864           0 :         if (addr > SIZE_MAX - size)
     865           0 :                 return (EINVAL);                /* disallow wrap-around. */
     866             : 
     867           0 :         if (atop(size) + uvmexp.wired > uvmexp.wiredmax)
     868           0 :                 return (EAGAIN);
     869             : 
     870             : #ifdef pmap_wired_count
     871           0 :         if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
     872           0 :                         p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
     873           0 :                 return (EAGAIN);
     874             : #else
     875             :         if ((error = suser(p)) != 0)
     876             :                 return (error);
     877             : #endif
     878             : 
     879           0 :         error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE,
     880             :             0);
     881           0 :         return (error == 0 ? 0 : ENOMEM);
     882           0 : }
     883             : 
     884             : /*
     885             :  * sys_munlock: unlock wired pages
     886             :  */
     887             : 
     888             : int
     889           0 : sys_munlock(struct proc *p, void *v, register_t *retval)
     890             : {
     891             :         struct sys_munlock_args /* {
     892             :                 syscallarg(const void *) addr;
     893             :                 syscallarg(size_t) len;
     894           0 :         } */ *uap = v;
     895             :         vaddr_t addr;
     896             :         vsize_t size, pageoff;
     897             :         int error;
     898             : 
     899             :         /* extract syscall args from uap */
     900           0 :         addr = (vaddr_t)SCARG(uap, addr);
     901           0 :         size = (vsize_t)SCARG(uap, len);
     902             : 
     903             :         /* align address to a page boundary, and adjust size accordingly */
     904           0 :         ALIGN_ADDR(addr, size, pageoff);
     905           0 :         if (addr > SIZE_MAX - size)
     906           0 :                 return (EINVAL);                /* disallow wrap-around. */
     907             : 
     908             : #ifndef pmap_wired_count
     909             :         if ((error = suser(p)) != 0)
     910             :                 return (error);
     911             : #endif
     912             : 
     913           0 :         error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE,
     914             :             0);
     915           0 :         return (error == 0 ? 0 : ENOMEM);
     916           0 : }
     917             : 
     918             : /*
     919             :  * sys_mlockall: lock all pages mapped into an address space.
     920             :  */
     921             : int
     922           0 : sys_mlockall(struct proc *p, void *v, register_t *retval)
     923             : {
     924             :         struct sys_mlockall_args /* {
     925             :                 syscallarg(int) flags;
     926           0 :         } */ *uap = v;
     927             :         int error, flags;
     928             : 
     929           0 :         flags = SCARG(uap, flags);
     930             : 
     931           0 :         if (flags == 0 ||
     932           0 :             (flags & ~(MCL_CURRENT|MCL_FUTURE)) != 0)
     933           0 :                 return (EINVAL);
     934             : 
     935             : #ifndef pmap_wired_count
     936             :         if ((error = suser(p)) != 0)
     937             :                 return (error);
     938             : #endif
     939             : 
     940           0 :         error = uvm_map_pageable_all(&p->p_vmspace->vm_map, flags,
     941           0 :             p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
     942           0 :         if (error != 0 && error != ENOMEM)
     943           0 :                 return (EAGAIN);
     944           0 :         return (error);
     945           0 : }
     946             : 
     947             : /*
     948             :  * sys_munlockall: unlock all pages mapped into an address space.
     949             :  */
     950             : int
     951           0 : sys_munlockall(struct proc *p, void *v, register_t *retval)
     952             : {
     953             : 
     954           0 :         (void) uvm_map_pageable_all(&p->p_vmspace->vm_map, 0, 0);
     955           0 :         return (0);
     956             : }
     957             : 
     958             : /*
     959             :  * common code for mmapanon and mmapfile to lock a mmaping
     960             :  */
     961             : int
     962           0 : uvm_mmaplock(vm_map_t map, vaddr_t *addr, vsize_t size, vm_prot_t prot,
     963             :     vsize_t locklimit)
     964             : {
     965             :         int error;
     966             : 
     967             :         /*
     968             :          * POSIX 1003.1b -- if our address space was configured
     969             :          * to lock all future mappings, wire the one we just made.
     970             :          */
     971           0 :         if (prot == PROT_NONE) {
     972             :                 /*
     973             :                  * No more work to do in this case.
     974             :                  */
     975           0 :                 return (0);
     976             :         }
     977             : 
     978           0 :         vm_map_lock(map);
     979           0 :         if (map->flags & VM_MAP_WIREFUTURE) {
     980           0 :                 KERNEL_LOCK();
     981           0 :                 if ((atop(size) + uvmexp.wired) > uvmexp.wiredmax
     982             : #ifdef pmap_wired_count
     983           0 :                     || (locklimit != 0 && (size +
     984           0 :                          ptoa(pmap_wired_count(vm_map_pmap(map)))) >
     985             :                         locklimit)
     986             : #endif
     987             :                 ) {
     988             :                         error = ENOMEM;
     989           0 :                         vm_map_unlock(map);
     990             :                         /* unmap the region! */
     991           0 :                         uvm_unmap(map, *addr, *addr + size);
     992           0 :                         KERNEL_UNLOCK();
     993           0 :                         return (error);
     994             :                 }
     995             :                 /*
     996             :                  * uvm_map_pageable() always returns the map
     997             :                  * unlocked.
     998             :                  */
     999           0 :                 error = uvm_map_pageable(map, *addr, *addr + size,
    1000             :                     FALSE, UVM_LK_ENTER);
    1001           0 :                 if (error != 0) {
    1002             :                         /* unmap the region! */
    1003           0 :                         uvm_unmap(map, *addr, *addr + size);
    1004           0 :                         KERNEL_UNLOCK();
    1005           0 :                         return (error);
    1006             :                 }
    1007           0 :                 KERNEL_UNLOCK();
    1008           0 :                 return (0);
    1009             :         }
    1010           0 :         vm_map_unlock(map);
    1011           0 :         return (0);
    1012           0 : }
    1013             : 
    1014             : /*
    1015             :  * uvm_mmapanon: internal version of mmap for anons
    1016             :  *
    1017             :  * - used by sys_mmap
    1018             :  */
    1019             : int
    1020           0 : uvm_mmapanon(vm_map_t map, vaddr_t *addr, vsize_t size, vm_prot_t prot,
    1021             :     vm_prot_t maxprot, int flags, vsize_t locklimit, struct proc *p)
    1022             : {
    1023             :         int error;
    1024             :         int advice = MADV_NORMAL;
    1025             :         unsigned int uvmflag = 0;
    1026             :         vsize_t align = 0;      /* userland page size */
    1027             : 
    1028             :         /*
    1029             :          * for non-fixed mappings, round off the suggested address.
    1030             :          * for fixed mappings, check alignment and zap old mappings.
    1031             :          */
    1032           0 :         if ((flags & MAP_FIXED) == 0) {
    1033           0 :                 *addr = round_page(*addr);      /* round */
    1034           0 :         } else {
    1035           0 :                 if (*addr & PAGE_MASK)
    1036           0 :                         return(EINVAL);
    1037             : 
    1038             :                 uvmflag |= UVM_FLAG_FIXED;
    1039           0 :                 if ((flags & __MAP_NOREPLACE) == 0)
    1040           0 :                         uvmflag |= UVM_FLAG_UNMAP;
    1041             :         }
    1042             : 
    1043           0 :         if ((flags & MAP_FIXED) == 0 && size >= __LDPGSZ)
    1044           0 :                 align = __LDPGSZ;
    1045           0 :         if ((flags & MAP_SHARED) == 0)
    1046             :                 /* XXX: defer amap create */
    1047           0 :                 uvmflag |= UVM_FLAG_COPYONW;
    1048             :         else
    1049             :                 /* shared: create amap now */
    1050           0 :                 uvmflag |= UVM_FLAG_OVERLAY;
    1051           0 :         if (flags & MAP_STACK)
    1052           0 :                 uvmflag |= UVM_FLAG_STACK;
    1053             : 
    1054             :         /* set up mapping flags */
    1055           0 :         uvmflag = UVM_MAPFLAG(prot, maxprot,
    1056             :             (flags & MAP_SHARED) ? MAP_INHERIT_SHARE : MAP_INHERIT_COPY,
    1057             :             advice, uvmflag);
    1058             : 
    1059           0 :         error = uvm_mapanon(map, addr, size, align, uvmflag);
    1060             : 
    1061           0 :         if (error == 0)
    1062           0 :                 error = uvm_mmaplock(map, addr, size, prot, locklimit);
    1063           0 :         return error;
    1064           0 : }
    1065             : 
    1066             : /*
    1067             :  * uvm_mmapfile: internal version of mmap for non-anons
    1068             :  *
    1069             :  * - used by sys_mmap
    1070             :  * - caller must page-align the file offset
    1071             :  */
    1072             : int
    1073           0 : uvm_mmapfile(vm_map_t map, vaddr_t *addr, vsize_t size, vm_prot_t prot,
    1074             :     vm_prot_t maxprot, int flags, struct vnode *vp, voff_t foff,
    1075             :     vsize_t locklimit, struct proc *p)
    1076             : {
    1077             :         struct uvm_object *uobj;
    1078             :         int error;
    1079             :         int advice = MADV_NORMAL;
    1080             :         unsigned int uvmflag = 0;
    1081             :         vsize_t align = 0;      /* userland page size */
    1082             : 
    1083             :         /*
    1084             :          * for non-fixed mappings, round off the suggested address.
    1085             :          * for fixed mappings, check alignment and zap old mappings.
    1086             :          */
    1087           0 :         if ((flags & MAP_FIXED) == 0) {
    1088           0 :                 *addr = round_page(*addr);      /* round */
    1089           0 :         } else {
    1090           0 :                 if (*addr & PAGE_MASK)
    1091           0 :                         return(EINVAL);
    1092             : 
    1093             :                 uvmflag |= UVM_FLAG_FIXED;
    1094           0 :                 if ((flags & __MAP_NOREPLACE) == 0)
    1095           0 :                         uvmflag |= UVM_FLAG_UNMAP;
    1096             :         }
    1097             : 
    1098             :         /*
    1099             :          * attach to underlying vm object.
    1100             :          */
    1101           0 :         if (vp->v_type != VCHR) {
    1102           0 :                 uobj = uvn_attach(vp, (flags & MAP_SHARED) ?
    1103           0 :                    maxprot : (maxprot & ~PROT_WRITE));
    1104             : 
    1105             :                 /*
    1106             :                  * XXXCDC: hack from old code
    1107             :                  * don't allow vnodes which have been mapped
    1108             :                  * shared-writeable to persist [forces them to be
    1109             :                  * flushed out when last reference goes].
    1110             :                  * XXXCDC: interesting side effect: avoids a bug.
    1111             :                  * note that in WRITE [ufs_readwrite.c] that we
    1112             :                  * allocate buffer, uncache, and then do the write.
    1113             :                  * the problem with this is that if the uncache causes
    1114             :                  * VM data to be flushed to the same area of the file
    1115             :                  * we are writing to... in that case we've got the
    1116             :                  * buffer locked and our process goes to sleep forever.
    1117             :                  *
    1118             :                  * XXXCDC: checking maxprot protects us from the
    1119             :                  * "persistbug" program but this is not a long term
    1120             :                  * solution.
    1121             :                  *
    1122             :                  * XXXCDC: we don't bother calling uncache with the vp
    1123             :                  * VOP_LOCKed since we know that we are already
    1124             :                  * holding a valid reference to the uvn (from the
    1125             :                  * uvn_attach above), and thus it is impossible for
    1126             :                  * the uncache to kill the uvn and trigger I/O.
    1127             :                  */
    1128           0 :                 if (flags & MAP_SHARED) {
    1129           0 :                         if ((prot & PROT_WRITE) ||
    1130           0 :                             (maxprot & PROT_WRITE)) {
    1131           0 :                                 uvm_vnp_uncache(vp);
    1132           0 :                         }
    1133             :                 }
    1134             :         } else {
    1135           0 :                 uobj = udv_attach(vp->v_rdev,
    1136           0 :                     (flags & MAP_SHARED) ? maxprot :
    1137           0 :                     (maxprot & ~PROT_WRITE), foff, size);
    1138             :                 /*
    1139             :                  * XXX Some devices don't like to be mapped with
    1140             :                  * XXX PROT_EXEC, but we don't really have a
    1141             :                  * XXX better way of handling this, right now
    1142             :                  */
    1143           0 :                 if (uobj == NULL && (prot & PROT_EXEC) == 0) {
    1144           0 :                         maxprot &= ~PROT_EXEC;
    1145           0 :                         uobj = udv_attach(vp->v_rdev,
    1146           0 :                             (flags & MAP_SHARED) ? maxprot :
    1147           0 :                             (maxprot & ~PROT_WRITE), foff, size);
    1148           0 :                 }
    1149             :                 advice = MADV_RANDOM;
    1150             :         }
    1151             : 
    1152           0 :         if (uobj == NULL)
    1153           0 :                 return((vp->v_type == VREG) ? ENOMEM : EINVAL);
    1154             : 
    1155           0 :         if ((flags & MAP_SHARED) == 0)
    1156           0 :                 uvmflag |= UVM_FLAG_COPYONW;
    1157           0 :         if (flags & __MAP_NOFAULT)
    1158           0 :                 uvmflag |= (UVM_FLAG_NOFAULT | UVM_FLAG_OVERLAY);
    1159           0 :         if (flags & MAP_STACK)
    1160           0 :                 uvmflag |= UVM_FLAG_STACK;
    1161             : 
    1162             :         /* set up mapping flags */
    1163           0 :         uvmflag = UVM_MAPFLAG(prot, maxprot,
    1164             :             (flags & MAP_SHARED) ? MAP_INHERIT_SHARE : MAP_INHERIT_COPY,
    1165             :             advice, uvmflag);
    1166             : 
    1167           0 :         error = uvm_map(map, addr, size, uobj, foff, align, uvmflag);
    1168             : 
    1169           0 :         if (error == 0)
    1170           0 :                 return uvm_mmaplock(map, addr, size, prot, locklimit);
    1171             : 
    1172             :         /* errors: first detach from the uobj, if any.  */
    1173           0 :         if (uobj)
    1174           0 :                 uobj->pgops->pgo_detach(uobj);
    1175             : 
    1176           0 :         return (error);
    1177           0 : }
    1178             : 
    1179             : /* an address that can't be in userspace */
    1180             : #define BOGO_PC (KERNBASE + 1)
    1181             : int
    1182           0 : sys_kbind(struct proc *p, void *v, register_t *retval)
    1183             : {
    1184             :         struct sys_kbind_args /* {
    1185             :                 syscallarg(const struct __kbind *) param;
    1186             :                 syscallarg(size_t) psize;
    1187             :                 syscallarg(uint64_t) proc_cookie;
    1188           0 :         } */ *uap = v;
    1189             :         const struct __kbind *paramp;
    1190           0 :         union {
    1191             :                 struct __kbind uk[KBIND_BLOCK_MAX];
    1192             :                 char upad[KBIND_BLOCK_MAX * sizeof(*paramp) + KBIND_DATA_MAX];
    1193             :         } param;
    1194           0 :         struct uvm_map_deadq dead_entries;
    1195          60 :         struct process *pr = p->p_p;
    1196             :         const char *data;
    1197           0 :         vaddr_t baseva, last_baseva, endva, pageoffset, kva;
    1198             :         size_t psize, s;
    1199             :         u_long pc;
    1200             :         int count, i;
    1201             :         int error;
    1202             : 
    1203             :         /*
    1204             :          * extract syscall args from uap
    1205             :          */
    1206           0 :         paramp = SCARG(uap, param);
    1207           0 :         psize = SCARG(uap, psize);
    1208             : 
    1209             :         /* a NULL paramp disables the syscall for the process */
    1210           0 :         if (paramp == NULL) {
    1211           0 :                 pr->ps_kbind_addr = BOGO_PC;
    1212           0 :                 return (0);
    1213             :         }
    1214             : 
    1215             :         /* security checks */
    1216           0 :         pc = PROC_PC(p);
    1217           0 :         if (pr->ps_kbind_addr == 0) {
    1218           0 :                 pr->ps_kbind_addr = pc;
    1219           0 :                 pr->ps_kbind_cookie = SCARG(uap, proc_cookie);
    1220           0 :         } else if (pc != pr->ps_kbind_addr || pc == BOGO_PC)
    1221           0 :                 sigexit(p, SIGILL);
    1222          60 :         else if (pr->ps_kbind_cookie != SCARG(uap, proc_cookie))
    1223           0 :                 sigexit(p, SIGILL);
    1224           0 :         if (psize < sizeof(struct __kbind) || psize > sizeof(param))
    1225           0 :                 return (EINVAL);
    1226           0 :         if ((error = copyin(paramp, &param, psize)))
    1227           0 :                 return (error);
    1228             : 
    1229             :         /*
    1230             :          * The param argument points to an array of __kbind structures
    1231             :          * followed by the corresponding new data areas for them.  Verify
    1232             :          * that the sizes in the __kbind structures add up to the total
    1233             :          * size and find the start of the new area.
    1234             :          */
    1235           0 :         paramp = &param.uk[0];
    1236             :         s = psize;
    1237          60 :         for (count = 0; s > 0 && count < KBIND_BLOCK_MAX; count++) {
    1238           0 :                 if (s < sizeof(*paramp))
    1239           0 :                         return (EINVAL);
    1240           0 :                 s -= sizeof(*paramp);
    1241             : 
    1242           0 :                 baseva = (vaddr_t)paramp[count].kb_addr;
    1243           0 :                 endva = baseva + paramp[count].kb_size - 1;
    1244           0 :                 if (paramp[count].kb_addr == NULL ||
    1245           0 :                     paramp[count].kb_size == 0 ||
    1246           0 :                     paramp[count].kb_size > KBIND_DATA_MAX ||
    1247           0 :                     baseva >= VM_MAXUSER_ADDRESS ||
    1248           0 :                     endva >= VM_MAXUSER_ADDRESS ||
    1249           0 :                     trunc_page(baseva) != trunc_page(endva) ||
    1250           0 :                     s < paramp[count].kb_size)
    1251           0 :                         return (EINVAL);
    1252             : 
    1253           0 :                 s -= paramp[count].kb_size;
    1254             :         }
    1255           0 :         if (s > 0)
    1256           0 :                 return (EINVAL);
    1257           0 :         data = (const char *)&paramp[count];
    1258             : 
    1259             :         /* all looks good, so do the bindings */
    1260             :         last_baseva = VM_MAXUSER_ADDRESS;
    1261           0 :         kva = 0;
    1262           0 :         TAILQ_INIT(&dead_entries);
    1263          60 :         for (i = 0; i < count; i++) {
    1264           0 :                 baseva = (vaddr_t)paramp[i].kb_addr;
    1265           0 :                 pageoffset = baseva & PAGE_MASK;
    1266           0 :                 baseva = trunc_page(baseva);
    1267             : 
    1268             :                 /* make sure sure the desired page is mapped into kernel_map */
    1269           0 :                 if (baseva != last_baseva) {
    1270           0 :                         if (kva != 0) {
    1271           0 :                                 vm_map_lock(kernel_map);
    1272           0 :                                 uvm_unmap_remove(kernel_map, kva,
    1273           0 :                                     kva+PAGE_SIZE, &dead_entries, FALSE, TRUE);
    1274           0 :                                 vm_map_unlock(kernel_map);
    1275           0 :                                 kva = 0;
    1276           0 :                         }
    1277         120 :                         if ((error = uvm_map_extract(&p->p_vmspace->vm_map,
    1278             :                             baseva, PAGE_SIZE, &kva, UVM_EXTRACT_FIXPROT)))
    1279             :                                 break;
    1280             :                         last_baseva = baseva;
    1281           0 :                 }
    1282             : 
    1283             :                 /* do the update */
    1284           0 :                 if ((error = kcopy(data, (char *)kva + pageoffset,
    1285           0 :                     paramp[i].kb_size)))
    1286             :                         break;
    1287           0 :                 data += paramp[i].kb_size;
    1288             :         }
    1289             : 
    1290           0 :         if (kva != 0) {
    1291          60 :                 vm_map_lock(kernel_map);
    1292           0 :                 uvm_unmap_remove(kernel_map, kva, kva+PAGE_SIZE,
    1293             :                     &dead_entries, FALSE, TRUE);
    1294           0 :                 vm_map_unlock(kernel_map);
    1295           0 :         }
    1296           0 :         uvm_unmap_detach(&dead_entries, AMAP_REFALL);
    1297             : 
    1298           0 :         return (error);
    1299           0 : }

Generated by: LCOV version 1.13