LCOV - code coverage report
Current view: top level - kern - sched_bsd.c (source / functions) Hit Total Coverage
Test: 6.4 Lines: 0 247 0.0 %
Date: 2018-10-19 03:25:38 Functions: 0 14 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*      $OpenBSD: sched_bsd.c,v 1.47 2017/12/04 09:38:20 mpi Exp $      */
       2             : /*      $NetBSD: kern_synch.c,v 1.37 1996/04/22 01:38:37 christos Exp $ */
       3             : 
       4             : /*-
       5             :  * Copyright (c) 1982, 1986, 1990, 1991, 1993
       6             :  *      The Regents of the University of California.  All rights reserved.
       7             :  * (c) UNIX System Laboratories, Inc.
       8             :  * All or some portions of this file are derived from material licensed
       9             :  * to the University of California by American Telephone and Telegraph
      10             :  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
      11             :  * the permission of UNIX System Laboratories, Inc.
      12             :  *
      13             :  * Redistribution and use in source and binary forms, with or without
      14             :  * modification, are permitted provided that the following conditions
      15             :  * are met:
      16             :  * 1. Redistributions of source code must retain the above copyright
      17             :  *    notice, this list of conditions and the following disclaimer.
      18             :  * 2. Redistributions in binary form must reproduce the above copyright
      19             :  *    notice, this list of conditions and the following disclaimer in the
      20             :  *    documentation and/or other materials provided with the distribution.
      21             :  * 3. Neither the name of the University nor the names of its contributors
      22             :  *    may be used to endorse or promote products derived from this software
      23             :  *    without specific prior written permission.
      24             :  *
      25             :  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
      26             :  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
      27             :  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
      28             :  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
      29             :  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
      30             :  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
      31             :  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
      32             :  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
      33             :  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
      34             :  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
      35             :  * SUCH DAMAGE.
      36             :  *
      37             :  *      @(#)kern_synch.c        8.6 (Berkeley) 1/21/94
      38             :  */
      39             : 
      40             : #include <sys/param.h>
      41             : #include <sys/systm.h>
      42             : #include <sys/proc.h>
      43             : #include <sys/kernel.h>
      44             : #include <sys/malloc.h>
      45             : #include <sys/signalvar.h>
      46             : #include <sys/resourcevar.h>
      47             : #include <uvm/uvm_extern.h>
      48             : #include <sys/sched.h>
      49             : #include <sys/timeout.h>
      50             : 
      51             : #ifdef KTRACE
      52             : #include <sys/ktrace.h>
      53             : #endif
      54             : 
      55             : 
      56             : int     lbolt;                  /* once a second sleep address */
      57             : int     rrticks_init;           /* # of hardclock ticks per roundrobin() */
      58             : 
      59             : #ifdef MULTIPROCESSOR
      60             : struct __mp_lock sched_lock;
      61             : #endif
      62             : 
      63             : void     schedcpu(void *);
      64             : void     updatepri(struct proc *);
      65             : 
      66             : void
      67           0 : scheduler_start(void)
      68             : {
      69             :         static struct timeout schedcpu_to;
      70             : 
      71             :         /*
      72             :          * We avoid polluting the global namespace by keeping the scheduler
      73             :          * timeouts static in this function.
      74             :          * We setup the timeout here and kick schedcpu once to make it do
      75             :          * its job.
      76             :          */
      77           0 :         timeout_set(&schedcpu_to, schedcpu, &schedcpu_to);
      78             : 
      79           0 :         rrticks_init = hz / 10;
      80           0 :         schedcpu(&schedcpu_to);
      81           0 : }
      82             : 
      83             : /*
      84             :  * Force switch among equal priority processes every 100ms.
      85             :  */
      86             : void
      87           0 : roundrobin(struct cpu_info *ci)
      88             : {
      89           0 :         struct schedstate_percpu *spc = &ci->ci_schedstate;
      90             : 
      91           0 :         spc->spc_rrticks = rrticks_init;
      92             : 
      93           0 :         if (ci->ci_curproc != NULL) {
      94           0 :                 if (spc->spc_schedflags & SPCF_SEENRR) {
      95             :                         /*
      96             :                          * The process has already been through a roundrobin
      97             :                          * without switching and may be hogging the CPU.
      98             :                          * Indicate that the process should yield.
      99             :                          */
     100           0 :                         atomic_setbits_int(&spc->spc_schedflags,
     101             :                             SPCF_SHOULDYIELD);
     102           0 :                 } else {
     103           0 :                         atomic_setbits_int(&spc->spc_schedflags,
     104             :                             SPCF_SEENRR);
     105             :                 }
     106             :         }
     107             : 
     108           0 :         if (spc->spc_nrun)
     109           0 :                 need_resched(ci);
     110           0 : }
     111             : 
     112             : /*
     113             :  * Constants for digital decay and forget:
     114             :  *      90% of (p_estcpu) usage in 5 * loadav time
     115             :  *      95% of (p_pctcpu) usage in 60 seconds (load insensitive)
     116             :  *          Note that, as ps(1) mentions, this can let percentages
     117             :  *          total over 100% (I've seen 137.9% for 3 processes).
     118             :  *
     119             :  * Note that hardclock updates p_estcpu and p_cpticks independently.
     120             :  *
     121             :  * We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds.
     122             :  * That is, the system wants to compute a value of decay such
     123             :  * that the following for loop:
     124             :  *      for (i = 0; i < (5 * loadavg); i++)
     125             :  *              p_estcpu *= decay;
     126             :  * will compute
     127             :  *      p_estcpu *= 0.1;
     128             :  * for all values of loadavg:
     129             :  *
     130             :  * Mathematically this loop can be expressed by saying:
     131             :  *      decay ** (5 * loadavg) ~= .1
     132             :  *
     133             :  * The system computes decay as:
     134             :  *      decay = (2 * loadavg) / (2 * loadavg + 1)
     135             :  *
     136             :  * We wish to prove that the system's computation of decay
     137             :  * will always fulfill the equation:
     138             :  *      decay ** (5 * loadavg) ~= .1
     139             :  *
     140             :  * If we compute b as:
     141             :  *      b = 2 * loadavg
     142             :  * then
     143             :  *      decay = b / (b + 1)
     144             :  *
     145             :  * We now need to prove two things:
     146             :  *      1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1)
     147             :  *      2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg)
     148             :  *      
     149             :  * Facts:
     150             :  *         For x close to zero, exp(x) =~ 1 + x, since
     151             :  *              exp(x) = 0! + x**1/1! + x**2/2! + ... .
     152             :  *              therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b.
     153             :  *         For x close to zero, ln(1+x) =~ x, since
     154             :  *              ln(1+x) = x - x**2/2 + x**3/3 - ...     -1 < x < 1
     155             :  *              therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1).
     156             :  *         ln(.1) =~ -2.30
     157             :  *
     158             :  * Proof of (1):
     159             :  *    Solve (factor)**(power) =~ .1 given power (5*loadav):
     160             :  *      solving for factor,
     161             :  *      ln(factor) =~ (-2.30/5*loadav), or
     162             :  *      factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) =
     163             :  *          exp(-1/b) =~ (b-1)/b =~ b/(b+1).                    QED
     164             :  *
     165             :  * Proof of (2):
     166             :  *    Solve (factor)**(power) =~ .1 given factor == (b/(b+1)):
     167             :  *      solving for power,
     168             :  *      power*ln(b/(b+1)) =~ -2.30, or
     169             :  *      power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav.  QED
     170             :  *
     171             :  * Actual power values for the implemented algorithm are as follows:
     172             :  *      loadav: 1       2       3       4
     173             :  *      power:  5.68    10.32   14.94   19.55
     174             :  */
     175             : 
     176             : /* calculations for digital decay to forget 90% of usage in 5*loadav sec */
     177             : #define loadfactor(loadav)      (2 * (loadav))
     178             : #define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE))
     179             : 
     180             : /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
     181             : fixpt_t ccpu = 0.95122942450071400909 * FSCALE;         /* exp(-1/20) */
     182             : 
     183             : /*
     184             :  * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
     185             :  * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below
     186             :  * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).
     187             :  *
     188             :  * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:
     189             :  *      1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).
     190             :  *
     191             :  * If you don't want to bother with the faster/more-accurate formula, you
     192             :  * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate
     193             :  * (more general) method of calculating the %age of CPU used by a process.
     194             :  */
     195             : #define CCPU_SHIFT      11
     196             : 
     197             : /*
     198             :  * Recompute process priorities, every second.
     199             :  */
     200             : void
     201           0 : schedcpu(void *arg)
     202             : {
     203           0 :         struct timeout *to = (struct timeout *)arg;
     204           0 :         fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
     205             :         struct proc *p;
     206             :         int s;
     207             :         unsigned int newcpu;
     208             :         int phz;
     209             : 
     210             :         /*
     211             :          * If we have a statistics clock, use that to calculate CPU
     212             :          * time, otherwise revert to using the profiling clock (which,
     213             :          * in turn, defaults to hz if there is no separate profiling
     214             :          * clock available)
     215             :          */
     216           0 :         phz = stathz ? stathz : profhz;
     217           0 :         KASSERT(phz);
     218             : 
     219           0 :         LIST_FOREACH(p, &allproc, p_list) {
     220             :                 /*
     221             :                  * Increment sleep time (if sleeping). We ignore overflow.
     222             :                  */
     223           0 :                 if (p->p_stat == SSLEEP || p->p_stat == SSTOP)
     224           0 :                         p->p_slptime++;
     225           0 :                 p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
     226             :                 /*
     227             :                  * If the process has slept the entire second,
     228             :                  * stop recalculating its priority until it wakes up.
     229             :                  */
     230           0 :                 if (p->p_slptime > 1)
     231             :                         continue;
     232           0 :                 SCHED_LOCK(s);
     233             :                 /*
     234             :                  * p_pctcpu is only for diagnostic tools such as ps.
     235             :                  */
     236             : #if     (FSHIFT >= CCPU_SHIFT)
     237           0 :                 p->p_pctcpu += (phz == 100)?
     238             :                         ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT):
     239           0 :                         100 * (((fixpt_t) p->p_cpticks)
     240           0 :                                 << (FSHIFT - CCPU_SHIFT)) / phz;
     241             : #else
     242             :                 p->p_pctcpu += ((FSCALE - ccpu) *
     243             :                         (p->p_cpticks * FSCALE / phz)) >> FSHIFT;
     244             : #endif
     245           0 :                 p->p_cpticks = 0;
     246           0 :                 newcpu = (u_int) decay_cpu(loadfac, p->p_estcpu);
     247           0 :                 p->p_estcpu = newcpu;
     248           0 :                 resetpriority(p);
     249           0 :                 if (p->p_priority >= PUSER) {
     250           0 :                         if (p->p_stat == SRUN &&
     251           0 :                             (p->p_priority / SCHED_PPQ) !=
     252           0 :                             (p->p_usrpri / SCHED_PPQ)) {
     253           0 :                                 remrunqueue(p);
     254           0 :                                 p->p_priority = p->p_usrpri;
     255           0 :                                 setrunqueue(p);
     256           0 :                         } else
     257           0 :                                 p->p_priority = p->p_usrpri;
     258             :                 }
     259           0 :                 SCHED_UNLOCK(s);
     260           0 :         }
     261           0 :         uvm_meter();
     262           0 :         wakeup(&lbolt);
     263           0 :         timeout_add_sec(to, 1);
     264           0 : }
     265             : 
     266             : /*
     267             :  * Recalculate the priority of a process after it has slept for a while.
     268             :  * For all load averages >= 1 and max p_estcpu of 255, sleeping for at
     269             :  * least six times the loadfactor will decay p_estcpu to zero.
     270             :  */
     271             : void
     272           0 : updatepri(struct proc *p)
     273             : {
     274           0 :         unsigned int newcpu = p->p_estcpu;
     275           0 :         fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
     276             : 
     277           0 :         SCHED_ASSERT_LOCKED();
     278             : 
     279           0 :         if (p->p_slptime > 5 * loadfac)
     280           0 :                 p->p_estcpu = 0;
     281             :         else {
     282           0 :                 p->p_slptime--;      /* the first time was done in schedcpu */
     283           0 :                 while (newcpu && --p->p_slptime)
     284           0 :                         newcpu = (int) decay_cpu(loadfac, newcpu);
     285           0 :                 p->p_estcpu = newcpu;
     286             :         }
     287           0 :         resetpriority(p);
     288           0 : }
     289             : 
     290             : /*
     291             :  * General yield call.  Puts the current process back on its run queue and
     292             :  * performs a voluntary context switch.
     293             :  */
     294             : void
     295           0 : yield(void)
     296             : {
     297           0 :         struct proc *p = curproc;
     298             :         int s;
     299             : 
     300           0 :         NET_ASSERT_UNLOCKED();
     301             : 
     302           0 :         SCHED_LOCK(s);
     303           0 :         p->p_priority = p->p_usrpri;
     304           0 :         p->p_stat = SRUN;
     305           0 :         setrunqueue(p);
     306           0 :         p->p_ru.ru_nvcsw++;
     307           0 :         mi_switch();
     308           0 :         SCHED_UNLOCK(s);
     309           0 : }
     310             : 
     311             : /*
     312             :  * General preemption call.  Puts the current process back on its run queue
     313             :  * and performs an involuntary context switch.  If a process is supplied,
     314             :  * we switch to that process.  Otherwise, we use the normal process selection
     315             :  * criteria.
     316             :  */
     317             : void
     318           0 : preempt(void)
     319             : {
     320           0 :         struct proc *p = curproc;
     321             :         int s;
     322             : 
     323           0 :         SCHED_LOCK(s);
     324           0 :         p->p_priority = p->p_usrpri;
     325           0 :         p->p_stat = SRUN;
     326           0 :         setrunqueue(p);
     327           0 :         p->p_ru.ru_nivcsw++;
     328           0 :         mi_switch();
     329           0 :         SCHED_UNLOCK(s);
     330           0 : }
     331             : 
     332             : void
     333           0 : mi_switch(void)
     334             : {
     335           0 :         struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
     336           0 :         struct proc *p = curproc;
     337             :         struct proc *nextproc;
     338           0 :         struct process *pr = p->p_p;
     339             :         struct rlimit *rlim;
     340             :         rlim_t secs;
     341           0 :         struct timespec ts;
     342             : #ifdef MULTIPROCESSOR
     343             :         int hold_count;
     344             :         int sched_count;
     345             : #endif
     346             : 
     347           0 :         assertwaitok();
     348           0 :         KASSERT(p->p_stat != SONPROC);
     349             : 
     350           0 :         SCHED_ASSERT_LOCKED();
     351             : 
     352             : #ifdef MULTIPROCESSOR
     353             :         /*
     354             :          * Release the kernel_lock, as we are about to yield the CPU.
     355             :          */
     356           0 :         sched_count = __mp_release_all_but_one(&sched_lock);
     357           0 :         if (_kernel_lock_held())
     358           0 :                 hold_count = __mp_release_all(&kernel_lock);
     359             :         else
     360             :                 hold_count = 0;
     361             : #endif
     362             : 
     363             :         /*
     364             :          * Compute the amount of time during which the current
     365             :          * process was running, and add that to its total so far.
     366             :          */
     367           0 :         nanouptime(&ts);
     368           0 :         if (timespeccmp(&ts, &spc->spc_runtime, <)) {
     369             : #if 0
     370             :                 printf("uptime is not monotonic! "
     371             :                     "ts=%lld.%09lu, runtime=%lld.%09lu\n",
     372             :                     (long long)tv.tv_sec, tv.tv_nsec,
     373             :                     (long long)spc->spc_runtime.tv_sec,
     374             :                     spc->spc_runtime.tv_nsec);
     375             : #endif
     376             :         } else {
     377           0 :                 timespecsub(&ts, &spc->spc_runtime, &ts);
     378           0 :                 timespecadd(&p->p_rtime, &ts, &p->p_rtime);
     379             :         }
     380             : 
     381             :         /* add the time counts for this thread to the process's total */
     382           0 :         tuagg_unlocked(pr, p);
     383             : 
     384             :         /*
     385             :          * Check if the process exceeds its cpu resource allocation.
     386             :          * If over max, kill it.
     387             :          */
     388           0 :         rlim = &pr->ps_limit->pl_rlimit[RLIMIT_CPU];
     389           0 :         secs = pr->ps_tu.tu_runtime.tv_sec;
     390           0 :         if (secs >= rlim->rlim_cur) {
     391           0 :                 if (secs >= rlim->rlim_max) {
     392           0 :                         psignal(p, SIGKILL);
     393           0 :                 } else {
     394           0 :                         psignal(p, SIGXCPU);
     395           0 :                         if (rlim->rlim_cur < rlim->rlim_max)
     396           0 :                                 rlim->rlim_cur += 5;
     397             :                 }
     398             :         }
     399             : 
     400             :         /*
     401             :          * Process is about to yield the CPU; clear the appropriate
     402             :          * scheduling flags.
     403             :          */
     404           0 :         atomic_clearbits_int(&spc->spc_schedflags, SPCF_SWITCHCLEAR);
     405             : 
     406           0 :         nextproc = sched_chooseproc();
     407             : 
     408           0 :         if (p != nextproc) {
     409           0 :                 uvmexp.swtch++;
     410           0 :                 cpu_switchto(p, nextproc);
     411           0 :         } else {
     412           0 :                 p->p_stat = SONPROC;
     413             :         }
     414             : 
     415           0 :         clear_resched(curcpu());
     416             : 
     417           0 :         SCHED_ASSERT_LOCKED();
     418             : 
     419             :         /*
     420             :          * To preserve lock ordering, we need to release the sched lock
     421             :          * and grab it after we grab the big lock.
     422             :          * In the future, when the sched lock isn't recursive, we'll
     423             :          * just release it here.
     424             :          */
     425             : #ifdef MULTIPROCESSOR
     426           0 :         __mp_unlock(&sched_lock);
     427             : #endif
     428             : 
     429           0 :         SCHED_ASSERT_UNLOCKED();
     430             : 
     431             :         /*
     432             :          * We're running again; record our new start time.  We might
     433             :          * be running on a new CPU now, so don't use the cache'd
     434             :          * schedstate_percpu pointer.
     435             :          */
     436           0 :         KASSERT(p->p_cpu == curcpu());
     437             : 
     438           0 :         nanouptime(&p->p_cpu->ci_schedstate.spc_runtime);
     439             : 
     440             : #ifdef MULTIPROCESSOR
     441             :         /*
     442             :          * Reacquire the kernel_lock now.  We do this after we've
     443             :          * released the scheduler lock to avoid deadlock, and before
     444             :          * we reacquire the interlock and the scheduler lock.
     445             :          */
     446           0 :         if (hold_count)
     447           0 :                 __mp_acquire_count(&kernel_lock, hold_count);
     448           0 :         __mp_acquire_count(&sched_lock, sched_count + 1);
     449             : #endif
     450           0 : }
     451             : 
     452             : static __inline void
     453           0 : resched_proc(struct proc *p, u_char pri)
     454             : {
     455             :         struct cpu_info *ci;
     456             : 
     457             :         /*
     458             :          * XXXSMP
     459             :          * This does not handle the case where its last
     460             :          * CPU is running a higher-priority process, but every
     461             :          * other CPU is running a lower-priority process.  There
     462             :          * are ways to handle this situation, but they're not
     463             :          * currently very pretty, and we also need to weigh the
     464             :          * cost of moving a process from one CPU to another.
     465             :          *
     466             :          * XXXSMP
     467             :          * There is also the issue of locking the other CPU's
     468             :          * sched state, which we currently do not do.
     469             :          */
     470           0 :         ci = (p->p_cpu != NULL) ? p->p_cpu : curcpu();
     471           0 :         if (pri < ci->ci_schedstate.spc_curpriority)
     472           0 :                 need_resched(ci);
     473           0 : }
     474             : 
     475             : /*
     476             :  * Change process state to be runnable,
     477             :  * placing it on the run queue if it is in memory,
     478             :  * and awakening the swapper if it isn't in memory.
     479             :  */
     480             : void
     481           0 : setrunnable(struct proc *p)
     482             : {
     483           0 :         SCHED_ASSERT_LOCKED();
     484             : 
     485           0 :         switch (p->p_stat) {
     486             :         case 0:
     487             :         case SRUN:
     488             :         case SONPROC:
     489             :         case SDEAD:
     490             :         case SIDL:
     491             :         default:
     492           0 :                 panic("setrunnable");
     493             :         case SSTOP:
     494             :                 /*
     495             :                  * If we're being traced (possibly because someone attached us
     496             :                  * while we were stopped), check for a signal from the debugger.
     497             :                  */
     498           0 :                 if ((p->p_p->ps_flags & PS_TRACED) != 0 && p->p_xstat != 0)
     499           0 :                         atomic_setbits_int(&p->p_siglist, sigmask(p->p_xstat));
     500             :         case SSLEEP:
     501           0 :                 unsleep(p);             /* e.g. when sending signals */
     502             :                 break;
     503             :         }
     504           0 :         p->p_stat = SRUN;
     505           0 :         p->p_cpu = sched_choosecpu(p);
     506           0 :         setrunqueue(p);
     507           0 :         if (p->p_slptime > 1)
     508           0 :                 updatepri(p);
     509           0 :         p->p_slptime = 0;
     510           0 :         resched_proc(p, p->p_priority);
     511           0 : }
     512             : 
     513             : /*
     514             :  * Compute the priority of a process when running in user mode.
     515             :  * Arrange to reschedule if the resulting priority is better
     516             :  * than that of the current process.
     517             :  */
     518             : void
     519           0 : resetpriority(struct proc *p)
     520             : {
     521             :         unsigned int newpriority;
     522             : 
     523           0 :         SCHED_ASSERT_LOCKED();
     524             : 
     525           0 :         newpriority = PUSER + p->p_estcpu +
     526           0 :             NICE_WEIGHT * (p->p_p->ps_nice - NZERO);
     527           0 :         newpriority = min(newpriority, MAXPRI);
     528           0 :         p->p_usrpri = newpriority;
     529           0 :         resched_proc(p, p->p_usrpri);
     530           0 : }
     531             : 
     532             : /*
     533             :  * We adjust the priority of the current process.  The priority of a process
     534             :  * gets worse as it accumulates CPU time.  The cpu usage estimator (p_estcpu)
     535             :  * is increased here.  The formula for computing priorities (in kern_synch.c)
     536             :  * will compute a different value each time p_estcpu increases. This can
     537             :  * cause a switch, but unless the priority crosses a PPQ boundary the actual
     538             :  * queue will not change.  The cpu usage estimator ramps up quite quickly
     539             :  * when the process is running (linearly), and decays away exponentially, at
     540             :  * a rate which is proportionally slower when the system is busy.  The basic
     541             :  * principle is that the system will 90% forget that the process used a lot
     542             :  * of CPU time in 5 * loadav seconds.  This causes the system to favor
     543             :  * processes which haven't run much recently, and to round-robin among other
     544             :  * processes.
     545             :  */
     546             : void
     547           0 : schedclock(struct proc *p)
     548             : {
     549             :         int s;
     550             : 
     551           0 :         SCHED_LOCK(s);
     552           0 :         p->p_estcpu = ESTCPULIM(p->p_estcpu + 1);
     553           0 :         resetpriority(p);
     554           0 :         if (p->p_priority >= PUSER)
     555           0 :                 p->p_priority = p->p_usrpri;
     556           0 :         SCHED_UNLOCK(s);
     557           0 : }
     558             : 
     559             : void (*cpu_setperf)(int);
     560             : 
     561             : #define PERFPOL_MANUAL 0
     562             : #define PERFPOL_AUTO 1
     563             : #define PERFPOL_HIGH 2
     564             : int perflevel = 100;
     565             : int perfpolicy = PERFPOL_MANUAL;
     566             : 
     567             : #ifndef SMALL_KERNEL
     568             : /*
     569             :  * The code below handles CPU throttling.
     570             :  */
     571             : #include <sys/sysctl.h>
     572             : 
     573             : void setperf_auto(void *);
     574             : struct timeout setperf_to = TIMEOUT_INITIALIZER(setperf_auto, NULL);
     575             : 
     576             : void
     577           0 : setperf_auto(void *v)
     578             : {
     579             :         static uint64_t *idleticks, *totalticks;
     580             :         static int downbeats;
     581             : 
     582             :         int i, j;
     583             :         int speedup;
     584             :         CPU_INFO_ITERATOR cii;
     585             :         struct cpu_info *ci;
     586             :         uint64_t idle, total, allidle, alltotal;
     587             : 
     588           0 :         if (perfpolicy != PERFPOL_AUTO)
     589           0 :                 return;
     590             : 
     591           0 :         if (!idleticks)
     592           0 :                 if (!(idleticks = mallocarray(ncpusfound, sizeof(*idleticks),
     593             :                     M_DEVBUF, M_NOWAIT | M_ZERO)))
     594           0 :                         return;
     595           0 :         if (!totalticks)
     596           0 :                 if (!(totalticks = mallocarray(ncpusfound, sizeof(*totalticks),
     597             :                     M_DEVBUF, M_NOWAIT | M_ZERO))) {
     598           0 :                         free(idleticks, M_DEVBUF,
     599           0 :                             sizeof(*idleticks) * ncpusfound);
     600           0 :                         return;
     601             :                 }
     602             : 
     603             :         alltotal = allidle = 0;
     604             :         j = 0;
     605             :         speedup = 0;
     606           0 :         CPU_INFO_FOREACH(cii, ci) {
     607             :                 total = 0;
     608           0 :                 for (i = 0; i < CPUSTATES; i++) {
     609           0 :                         total += ci->ci_schedstate.spc_cp_time[i];
     610             :                 }
     611           0 :                 total -= totalticks[j];
     612           0 :                 idle = ci->ci_schedstate.spc_cp_time[CP_IDLE] - idleticks[j];
     613           0 :                 if (idle < total / 3)
     614           0 :                         speedup = 1;
     615           0 :                 alltotal += total;
     616           0 :                 allidle += idle;
     617           0 :                 idleticks[j] += idle;
     618           0 :                 totalticks[j] += total;
     619           0 :                 j++;
     620             :         }
     621           0 :         if (allidle < alltotal / 2)
     622           0 :                 speedup = 1;
     623           0 :         if (speedup)
     624           0 :                 downbeats = 5;
     625             : 
     626           0 :         if (speedup && perflevel != 100) {
     627           0 :                 perflevel = 100;
     628           0 :                 cpu_setperf(perflevel);
     629           0 :         } else if (!speedup && perflevel != 0 && --downbeats <= 0) {
     630           0 :                 perflevel = 0;
     631           0 :                 cpu_setperf(perflevel);
     632           0 :         }
     633             :         
     634           0 :         timeout_add_msec(&setperf_to, 100);
     635           0 : }
     636             : 
     637             : int
     638           0 : sysctl_hwsetperf(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
     639             : {
     640           0 :         int err, newperf;
     641             : 
     642           0 :         if (!cpu_setperf)
     643           0 :                 return EOPNOTSUPP;
     644             : 
     645           0 :         if (perfpolicy != PERFPOL_MANUAL)
     646           0 :                 return sysctl_rdint(oldp, oldlenp, newp, perflevel);
     647             :         
     648           0 :         newperf = perflevel;
     649           0 :         err = sysctl_int(oldp, oldlenp, newp, newlen, &newperf);
     650           0 :         if (err)
     651           0 :                 return err;
     652           0 :         if (newperf > 100)
     653           0 :                 newperf = 100;
     654           0 :         if (newperf < 0)
     655           0 :                 newperf = 0;
     656           0 :         perflevel = newperf;
     657           0 :         cpu_setperf(perflevel);
     658             : 
     659           0 :         return 0;
     660           0 : }
     661             : 
     662             : int
     663           0 : sysctl_hwperfpolicy(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
     664             : {
     665           0 :         char policy[32];
     666             :         int err;
     667             : 
     668           0 :         if (!cpu_setperf)
     669           0 :                 return EOPNOTSUPP;
     670             : 
     671           0 :         switch (perfpolicy) {
     672             :         case PERFPOL_MANUAL:
     673           0 :                 strlcpy(policy, "manual", sizeof(policy));
     674           0 :                 break;
     675             :         case PERFPOL_AUTO:
     676           0 :                 strlcpy(policy, "auto", sizeof(policy));
     677           0 :                 break;
     678             :         case PERFPOL_HIGH:
     679           0 :                 strlcpy(policy, "high", sizeof(policy));
     680           0 :                 break;
     681             :         default:
     682           0 :                 strlcpy(policy, "unknown", sizeof(policy));
     683           0 :                 break;
     684             :         }
     685             : 
     686           0 :         if (newp == NULL)
     687           0 :                 return sysctl_rdstring(oldp, oldlenp, newp, policy);
     688             : 
     689           0 :         err = sysctl_string(oldp, oldlenp, newp, newlen, policy, sizeof(policy));
     690           0 :         if (err)
     691           0 :                 return err;
     692           0 :         if (strcmp(policy, "manual") == 0)
     693           0 :                 perfpolicy = PERFPOL_MANUAL;
     694           0 :         else if (strcmp(policy, "auto") == 0)
     695           0 :                 perfpolicy = PERFPOL_AUTO;
     696           0 :         else if (strcmp(policy, "high") == 0)
     697           0 :                 perfpolicy = PERFPOL_HIGH;
     698             :         else
     699           0 :                 return EINVAL;
     700             : 
     701           0 :         if (perfpolicy == PERFPOL_AUTO) {
     702           0 :                 timeout_add_msec(&setperf_to, 200);
     703           0 :         } else if (perfpolicy == PERFPOL_HIGH) {
     704           0 :                 perflevel = 100;
     705           0 :                 cpu_setperf(perflevel);
     706           0 :         }
     707           0 :         return 0;
     708           0 : }
     709             : #endif

Generated by: LCOV version 1.13