Line data Source code
1 : /* $OpenBSD: kern_sched.c,v 1.51 2018/07/12 01:23:38 cheloha Exp $ */
2 : /*
3 : * Copyright (c) 2007, 2008 Artur Grabowski <art@openbsd.org>
4 : *
5 : * Permission to use, copy, modify, and distribute this software for any
6 : * purpose with or without fee is hereby granted, provided that the above
7 : * copyright notice and this permission notice appear in all copies.
8 : *
9 : * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 : * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 : * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 : * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 : * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 : * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 : * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 : */
17 :
18 : #include <sys/param.h>
19 :
20 : #include <sys/sched.h>
21 : #include <sys/proc.h>
22 : #include <sys/kthread.h>
23 : #include <sys/systm.h>
24 : #include <sys/resourcevar.h>
25 : #include <sys/signalvar.h>
26 : #include <sys/mutex.h>
27 : #include <sys/task.h>
28 :
29 : #include <uvm/uvm_extern.h>
30 :
31 : void sched_kthreads_create(void *);
32 :
33 : int sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p);
34 : struct proc *sched_steal_proc(struct cpu_info *);
35 :
36 : /*
37 : * To help choosing which cpu should run which process we keep track
38 : * of cpus which are currently idle and which cpus have processes
39 : * queued.
40 : */
41 : struct cpuset sched_idle_cpus;
42 : struct cpuset sched_queued_cpus;
43 : struct cpuset sched_all_cpus;
44 :
45 : /*
46 : * Some general scheduler counters.
47 : */
48 : uint64_t sched_nmigrations; /* Cpu migration counter */
49 : uint64_t sched_nomigrations; /* Cpu no migration counter */
50 : uint64_t sched_noidle; /* Times we didn't pick the idle task */
51 : uint64_t sched_stolen; /* Times we stole proc from other cpus */
52 : uint64_t sched_choose; /* Times we chose a cpu */
53 : uint64_t sched_wasidle; /* Times we came out of idle */
54 :
55 : #ifdef MULTIPROCESSOR
56 : struct taskq *sbartq;
57 : #endif
58 :
59 : int sched_smt;
60 :
61 : /*
62 : * A few notes about cpu_switchto that is implemented in MD code.
63 : *
64 : * cpu_switchto takes two arguments, the old proc and the proc
65 : * it should switch to. The new proc will never be NULL, so we always have
66 : * a saved state that we need to switch to. The old proc however can
67 : * be NULL if the process is exiting. NULL for the old proc simply
68 : * means "don't bother saving old state".
69 : *
70 : * cpu_switchto is supposed to atomically load the new state of the process
71 : * including the pcb, pmap and setting curproc, the p_cpu pointer in the
72 : * proc and p_stat to SONPROC. Atomically with respect to interrupts, other
73 : * cpus in the system must not depend on this state being consistent.
74 : * Therefore no locking is necessary in cpu_switchto other than blocking
75 : * interrupts during the context switch.
76 : */
77 :
78 : /*
79 : * sched_init_cpu is called from main() for the boot cpu, then it's the
80 : * responsibility of the MD code to call it for all other cpus.
81 : */
82 : void
83 0 : sched_init_cpu(struct cpu_info *ci)
84 : {
85 0 : struct schedstate_percpu *spc = &ci->ci_schedstate;
86 : int i;
87 :
88 0 : for (i = 0; i < SCHED_NQS; i++)
89 0 : TAILQ_INIT(&spc->spc_qs[i]);
90 :
91 0 : spc->spc_idleproc = NULL;
92 :
93 0 : kthread_create_deferred(sched_kthreads_create, ci);
94 :
95 0 : LIST_INIT(&spc->spc_deadproc);
96 :
97 : /*
98 : * Slight hack here until the cpuset code handles cpu_info
99 : * structures.
100 : */
101 0 : cpuset_init_cpu(ci);
102 :
103 : #ifdef __HAVE_CPU_TOPOLOGY
104 0 : if (!sched_smt && ci->ci_smt_id > 0)
105 0 : return;
106 : #endif
107 0 : cpuset_add(&sched_all_cpus, ci);
108 0 : }
109 :
110 : void
111 0 : sched_kthreads_create(void *v)
112 : {
113 0 : struct cpu_info *ci = v;
114 0 : struct schedstate_percpu *spc = &ci->ci_schedstate;
115 : static int num;
116 :
117 0 : if (fork1(&proc0, FORK_SHAREVM|FORK_SHAREFILES|FORK_NOZOMBIE|
118 : FORK_SYSTEM|FORK_SIGHAND|FORK_IDLE, sched_idle, ci, NULL,
119 0 : &spc->spc_idleproc))
120 0 : panic("fork idle");
121 :
122 : /* Name it as specified. */
123 0 : snprintf(spc->spc_idleproc->p_p->ps_comm,
124 : sizeof(spc->spc_idleproc->p_p->ps_comm),
125 0 : "idle%d", num);
126 :
127 0 : num++;
128 0 : }
129 :
130 : void
131 0 : sched_idle(void *v)
132 : {
133 : struct schedstate_percpu *spc;
134 0 : struct proc *p = curproc;
135 0 : struct cpu_info *ci = v;
136 : int s;
137 :
138 0 : KERNEL_UNLOCK();
139 :
140 0 : spc = &ci->ci_schedstate;
141 :
142 : /*
143 : * First time we enter here, we're not supposed to idle,
144 : * just go away for a while.
145 : */
146 0 : SCHED_LOCK(s);
147 0 : cpuset_add(&sched_idle_cpus, ci);
148 0 : p->p_stat = SSLEEP;
149 0 : p->p_cpu = ci;
150 0 : atomic_setbits_int(&p->p_flag, P_CPUPEG);
151 0 : mi_switch();
152 0 : cpuset_del(&sched_idle_cpus, ci);
153 0 : SCHED_UNLOCK(s);
154 :
155 0 : KASSERT(ci == curcpu());
156 0 : KASSERT(curproc == spc->spc_idleproc);
157 :
158 0 : while (1) {
159 0 : while (!cpu_is_idle(curcpu())) {
160 : struct proc *dead;
161 :
162 0 : SCHED_LOCK(s);
163 0 : p->p_stat = SSLEEP;
164 0 : mi_switch();
165 0 : SCHED_UNLOCK(s);
166 :
167 0 : while ((dead = LIST_FIRST(&spc->spc_deadproc))) {
168 0 : LIST_REMOVE(dead, p_hash);
169 0 : exit2(dead);
170 : }
171 : }
172 :
173 0 : splassert(IPL_NONE);
174 :
175 0 : cpuset_add(&sched_idle_cpus, ci);
176 0 : cpu_idle_enter();
177 0 : while (spc->spc_whichqs == 0) {
178 : #ifdef MULTIPROCESSOR
179 0 : if (spc->spc_schedflags & SPCF_SHOULDHALT &&
180 0 : (spc->spc_schedflags & SPCF_HALTED) == 0) {
181 0 : cpuset_del(&sched_idle_cpus, ci);
182 0 : SCHED_LOCK(s);
183 0 : atomic_setbits_int(&spc->spc_schedflags,
184 0 : spc->spc_whichqs ? 0 : SPCF_HALTED);
185 0 : SCHED_UNLOCK(s);
186 0 : wakeup(spc);
187 0 : }
188 : #endif
189 0 : cpu_idle_cycle();
190 : }
191 0 : cpu_idle_leave();
192 0 : cpuset_del(&sched_idle_cpus, ci);
193 : }
194 : }
195 :
196 : /*
197 : * To free our address space we have to jump through a few hoops.
198 : * The freeing is done by the reaper, but until we have one reaper
199 : * per cpu, we have no way of putting this proc on the deadproc list
200 : * and waking up the reaper without risking having our address space and
201 : * stack torn from under us before we manage to switch to another proc.
202 : * Therefore we have a per-cpu list of dead processes where we put this
203 : * proc and have idle clean up that list and move it to the reaper list.
204 : * All this will be unnecessary once we can bind the reaper this cpu
205 : * and not risk having it switch to another in case it sleeps.
206 : */
207 : void
208 0 : sched_exit(struct proc *p)
209 : {
210 0 : struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
211 0 : struct timespec ts;
212 : struct proc *idle;
213 : int s;
214 :
215 0 : nanouptime(&ts);
216 0 : timespecsub(&ts, &spc->spc_runtime, &ts);
217 0 : timespecadd(&p->p_rtime, &ts, &p->p_rtime);
218 :
219 0 : LIST_INSERT_HEAD(&spc->spc_deadproc, p, p_hash);
220 :
221 : #ifdef MULTIPROCESSOR
222 : /* This process no longer needs to hold the kernel lock. */
223 0 : KERNEL_ASSERT_LOCKED();
224 0 : __mp_release_all(&kernel_lock);
225 : #endif
226 :
227 0 : SCHED_LOCK(s);
228 0 : idle = spc->spc_idleproc;
229 0 : idle->p_stat = SRUN;
230 0 : cpu_switchto(NULL, idle);
231 0 : panic("cpu_switchto returned");
232 : }
233 :
234 : /*
235 : * Run queue management.
236 : */
237 : void
238 0 : sched_init_runqueues(void)
239 : {
240 0 : }
241 :
242 : void
243 0 : setrunqueue(struct proc *p)
244 : {
245 : struct schedstate_percpu *spc;
246 0 : int queue = p->p_priority >> 2;
247 :
248 0 : SCHED_ASSERT_LOCKED();
249 0 : spc = &p->p_cpu->ci_schedstate;
250 0 : spc->spc_nrun++;
251 :
252 0 : TAILQ_INSERT_TAIL(&spc->spc_qs[queue], p, p_runq);
253 0 : spc->spc_whichqs |= (1 << queue);
254 0 : cpuset_add(&sched_queued_cpus, p->p_cpu);
255 :
256 0 : if (cpuset_isset(&sched_idle_cpus, p->p_cpu))
257 0 : cpu_unidle(p->p_cpu);
258 0 : }
259 :
260 : void
261 0 : remrunqueue(struct proc *p)
262 : {
263 : struct schedstate_percpu *spc;
264 0 : int queue = p->p_priority >> 2;
265 :
266 0 : SCHED_ASSERT_LOCKED();
267 0 : spc = &p->p_cpu->ci_schedstate;
268 0 : spc->spc_nrun--;
269 :
270 0 : TAILQ_REMOVE(&spc->spc_qs[queue], p, p_runq);
271 0 : if (TAILQ_EMPTY(&spc->spc_qs[queue])) {
272 0 : spc->spc_whichqs &= ~(1 << queue);
273 0 : if (spc->spc_whichqs == 0)
274 0 : cpuset_del(&sched_queued_cpus, p->p_cpu);
275 : }
276 0 : }
277 :
278 : struct proc *
279 0 : sched_chooseproc(void)
280 : {
281 0 : struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
282 : struct proc *p;
283 : int queue;
284 :
285 0 : SCHED_ASSERT_LOCKED();
286 :
287 : #ifdef MULTIPROCESSOR
288 0 : if (spc->spc_schedflags & SPCF_SHOULDHALT) {
289 0 : if (spc->spc_whichqs) {
290 0 : for (queue = 0; queue < SCHED_NQS; queue++) {
291 0 : while ((p = TAILQ_FIRST(&spc->spc_qs[queue]))) {
292 0 : remrunqueue(p);
293 0 : p->p_cpu = sched_choosecpu(p);
294 0 : setrunqueue(p);
295 0 : if (p->p_cpu == curcpu()) {
296 0 : KASSERT(p->p_flag & P_CPUPEG);
297 0 : goto again;
298 : }
299 : }
300 : }
301 : }
302 0 : p = spc->spc_idleproc;
303 0 : KASSERT(p);
304 0 : KASSERT(p->p_wchan == NULL);
305 0 : p->p_stat = SRUN;
306 0 : return (p);
307 : }
308 : #endif
309 :
310 : again:
311 0 : if (spc->spc_whichqs) {
312 0 : queue = ffs(spc->spc_whichqs) - 1;
313 0 : p = TAILQ_FIRST(&spc->spc_qs[queue]);
314 0 : remrunqueue(p);
315 0 : sched_noidle++;
316 0 : KASSERT(p->p_stat == SRUN);
317 0 : } else if ((p = sched_steal_proc(curcpu())) == NULL) {
318 0 : p = spc->spc_idleproc;
319 0 : if (p == NULL) {
320 : int s;
321 : /*
322 : * We get here if someone decides to switch during
323 : * boot before forking kthreads, bleh.
324 : * This is kind of like a stupid idle loop.
325 : */
326 : #ifdef MULTIPROCESSOR
327 0 : __mp_unlock(&sched_lock);
328 : #endif
329 0 : spl0();
330 0 : delay(10);
331 0 : SCHED_LOCK(s);
332 : goto again;
333 : }
334 0 : KASSERT(p);
335 0 : p->p_stat = SRUN;
336 0 : }
337 :
338 0 : KASSERT(p->p_wchan == NULL);
339 0 : return (p);
340 0 : }
341 :
342 : struct cpu_info *
343 0 : sched_choosecpu_fork(struct proc *parent, int flags)
344 : {
345 : #ifdef MULTIPROCESSOR
346 : struct cpu_info *choice = NULL;
347 : fixpt_t load, best_load = ~0;
348 : int run, best_run = INT_MAX;
349 : struct cpu_info *ci;
350 0 : struct cpuset set;
351 :
352 : #if 0
353 : /*
354 : * XXX
355 : * Don't do this until we have a painless way to move the cpu in exec.
356 : * Preferably when nuking the old pmap and getting a new one on a
357 : * new cpu.
358 : */
359 : /*
360 : * PPWAIT forks are simple. We know that the parent will not
361 : * run until we exec and choose another cpu, so we just steal its
362 : * cpu.
363 : */
364 : if (flags & FORK_PPWAIT)
365 : return (parent->p_cpu);
366 : #endif
367 :
368 : /*
369 : * Look at all cpus that are currently idle and have nothing queued.
370 : * If there are none, pick the one with least queued procs first,
371 : * then the one with lowest load average.
372 : */
373 0 : cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus);
374 0 : cpuset_intersection(&set, &set, &sched_all_cpus);
375 0 : if (cpuset_first(&set) == NULL)
376 0 : cpuset_copy(&set, &sched_all_cpus);
377 :
378 0 : while ((ci = cpuset_first(&set)) != NULL) {
379 0 : cpuset_del(&set, ci);
380 :
381 0 : load = ci->ci_schedstate.spc_ldavg;
382 0 : run = ci->ci_schedstate.spc_nrun;
383 :
384 0 : if (choice == NULL || run < best_run ||
385 0 : (run == best_run &&load < best_load)) {
386 : choice = ci;
387 : best_load = load;
388 : best_run = run;
389 0 : }
390 : }
391 :
392 0 : return (choice);
393 : #else
394 : return (curcpu());
395 : #endif
396 0 : }
397 :
398 : struct cpu_info *
399 0 : sched_choosecpu(struct proc *p)
400 : {
401 : #ifdef MULTIPROCESSOR
402 : struct cpu_info *choice = NULL;
403 : int last_cost = INT_MAX;
404 : struct cpu_info *ci;
405 0 : struct cpuset set;
406 :
407 : /*
408 : * If pegged to a cpu, don't allow it to move.
409 : */
410 0 : if (p->p_flag & P_CPUPEG)
411 0 : return (p->p_cpu);
412 :
413 0 : sched_choose++;
414 :
415 : /*
416 : * Look at all cpus that are currently idle and have nothing queued.
417 : * If there are none, pick the cheapest of those.
418 : * (idle + queued could mean that the cpu is handling an interrupt
419 : * at this moment and haven't had time to leave idle yet).
420 : */
421 0 : cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus);
422 0 : cpuset_intersection(&set, &set, &sched_all_cpus);
423 :
424 : /*
425 : * First, just check if our current cpu is in that set, if it is,
426 : * this is simple.
427 : * Also, our cpu might not be idle, but if it's the current cpu
428 : * and it has nothing else queued and we're curproc, take it.
429 : */
430 0 : if (cpuset_isset(&set, p->p_cpu) ||
431 0 : (p->p_cpu == curcpu() && p->p_cpu->ci_schedstate.spc_nrun == 0 &&
432 0 : (p->p_cpu->ci_schedstate.spc_schedflags & SPCF_SHOULDHALT) == 0 &&
433 0 : curproc == p)) {
434 0 : sched_wasidle++;
435 0 : return (p->p_cpu);
436 : }
437 :
438 0 : if (cpuset_first(&set) == NULL)
439 0 : cpuset_copy(&set, &sched_all_cpus);
440 :
441 0 : while ((ci = cpuset_first(&set)) != NULL) {
442 0 : int cost = sched_proc_to_cpu_cost(ci, p);
443 :
444 0 : if (choice == NULL || cost < last_cost) {
445 : choice = ci;
446 : last_cost = cost;
447 0 : }
448 0 : cpuset_del(&set, ci);
449 : }
450 :
451 0 : if (p->p_cpu != choice)
452 0 : sched_nmigrations++;
453 : else
454 0 : sched_nomigrations++;
455 :
456 0 : return (choice);
457 : #else
458 : return (curcpu());
459 : #endif
460 0 : }
461 :
462 : /*
463 : * Attempt to steal a proc from some cpu.
464 : */
465 : struct proc *
466 0 : sched_steal_proc(struct cpu_info *self)
467 : {
468 : struct proc *best = NULL;
469 : #ifdef MULTIPROCESSOR
470 : struct schedstate_percpu *spc;
471 : int bestcost = INT_MAX;
472 : struct cpu_info *ci;
473 0 : struct cpuset set;
474 :
475 0 : KASSERT((self->ci_schedstate.spc_schedflags & SPCF_SHOULDHALT) == 0);
476 :
477 : /* Don't steal if we don't want to schedule processes in this CPU. */
478 0 : if (!cpuset_isset(&sched_all_cpus, self))
479 0 : return (NULL);
480 :
481 0 : cpuset_copy(&set, &sched_queued_cpus);
482 :
483 0 : while ((ci = cpuset_first(&set)) != NULL) {
484 : struct proc *p;
485 : int queue;
486 : int cost;
487 :
488 0 : cpuset_del(&set, ci);
489 :
490 0 : spc = &ci->ci_schedstate;
491 :
492 0 : queue = ffs(spc->spc_whichqs) - 1;
493 0 : TAILQ_FOREACH(p, &spc->spc_qs[queue], p_runq) {
494 0 : if (p->p_flag & P_CPUPEG)
495 : continue;
496 :
497 0 : cost = sched_proc_to_cpu_cost(self, p);
498 :
499 0 : if (best == NULL || cost < bestcost) {
500 : best = p;
501 : bestcost = cost;
502 0 : }
503 : }
504 : }
505 0 : if (best == NULL)
506 0 : return (NULL);
507 :
508 0 : spc = &best->p_cpu->ci_schedstate;
509 0 : remrunqueue(best);
510 0 : best->p_cpu = self;
511 :
512 0 : sched_stolen++;
513 : #endif
514 0 : return (best);
515 0 : }
516 :
517 : #ifdef MULTIPROCESSOR
518 : /*
519 : * Base 2 logarithm of an int. returns 0 for 0 (yeye, I know).
520 : */
521 : static int
522 0 : log2(unsigned int i)
523 : {
524 : int ret = 0;
525 :
526 0 : while (i >>= 1)
527 0 : ret++;
528 :
529 0 : return (ret);
530 : }
531 :
532 : /*
533 : * Calculate the cost of moving the proc to this cpu.
534 : *
535 : * What we want is some guesstimate of how much "performance" it will
536 : * cost us to move the proc here. Not just for caches and TLBs and NUMA
537 : * memory, but also for the proc itself. A highly loaded cpu might not
538 : * be the best candidate for this proc since it won't get run.
539 : *
540 : * Just total guesstimates for now.
541 : */
542 :
543 : int sched_cost_load = 1;
544 : int sched_cost_priority = 1;
545 : int sched_cost_runnable = 3;
546 : int sched_cost_resident = 1;
547 : #endif
548 :
549 : int
550 0 : sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p)
551 : {
552 : int cost = 0;
553 : #ifdef MULTIPROCESSOR
554 : struct schedstate_percpu *spc;
555 : int l2resident = 0;
556 :
557 0 : spc = &ci->ci_schedstate;
558 :
559 : /*
560 : * First, account for the priority of the proc we want to move.
561 : * More willing to move, the lower the priority of the destination
562 : * and the higher the priority of the proc.
563 : */
564 0 : if (!cpuset_isset(&sched_idle_cpus, ci)) {
565 0 : cost += (p->p_priority - spc->spc_curpriority) *
566 0 : sched_cost_priority;
567 0 : cost += sched_cost_runnable;
568 0 : }
569 0 : if (cpuset_isset(&sched_queued_cpus, ci))
570 0 : cost += spc->spc_nrun * sched_cost_runnable;
571 :
572 : /*
573 : * Try to avoid the primary cpu as it handles hardware interrupts.
574 : *
575 : * XXX Needs to be revisited when we distribute interrupts
576 : * over cpus.
577 : */
578 0 : if (CPU_IS_PRIMARY(ci))
579 0 : cost += sched_cost_runnable;
580 :
581 : /*
582 : * Higher load on the destination means we don't want to go there.
583 : */
584 0 : cost += ((sched_cost_load * spc->spc_ldavg) >> FSHIFT);
585 :
586 : /*
587 : * If the proc is on this cpu already, lower the cost by how much
588 : * it has been running and an estimate of its footprint.
589 : */
590 0 : if (p->p_cpu == ci && p->p_slptime == 0) {
591 : l2resident =
592 0 : log2(pmap_resident_count(p->p_vmspace->vm_map.pmap));
593 0 : cost -= l2resident * sched_cost_resident;
594 0 : }
595 : #endif
596 0 : return (cost);
597 : }
598 :
599 : /*
600 : * Peg a proc to a cpu.
601 : */
602 : void
603 0 : sched_peg_curproc(struct cpu_info *ci)
604 : {
605 0 : struct proc *p = curproc;
606 : int s;
607 :
608 0 : SCHED_LOCK(s);
609 0 : p->p_priority = p->p_usrpri;
610 0 : p->p_stat = SRUN;
611 0 : p->p_cpu = ci;
612 0 : atomic_setbits_int(&p->p_flag, P_CPUPEG);
613 0 : setrunqueue(p);
614 0 : p->p_ru.ru_nvcsw++;
615 0 : mi_switch();
616 0 : SCHED_UNLOCK(s);
617 0 : }
618 :
619 : #ifdef MULTIPROCESSOR
620 :
621 : void
622 0 : sched_start_secondary_cpus(void)
623 : {
624 : CPU_INFO_ITERATOR cii;
625 : struct cpu_info *ci;
626 :
627 0 : CPU_INFO_FOREACH(cii, ci) {
628 0 : struct schedstate_percpu *spc = &ci->ci_schedstate;
629 :
630 0 : if (CPU_IS_PRIMARY(ci))
631 0 : continue;
632 0 : atomic_clearbits_int(&spc->spc_schedflags,
633 : SPCF_SHOULDHALT | SPCF_HALTED);
634 : #ifdef __HAVE_CPU_TOPOLOGY
635 0 : if (!sched_smt && ci->ci_smt_id > 0)
636 0 : continue;
637 : #endif
638 0 : cpuset_add(&sched_all_cpus, ci);
639 0 : }
640 0 : }
641 :
642 : void
643 0 : sched_stop_secondary_cpus(void)
644 : {
645 : CPU_INFO_ITERATOR cii;
646 : struct cpu_info *ci;
647 :
648 : /*
649 : * Make sure we stop the secondary CPUs.
650 : */
651 0 : CPU_INFO_FOREACH(cii, ci) {
652 0 : struct schedstate_percpu *spc = &ci->ci_schedstate;
653 :
654 0 : if (CPU_IS_PRIMARY(ci))
655 0 : continue;
656 0 : cpuset_del(&sched_all_cpus, ci);
657 0 : atomic_setbits_int(&spc->spc_schedflags, SPCF_SHOULDHALT);
658 0 : }
659 0 : CPU_INFO_FOREACH(cii, ci) {
660 0 : struct schedstate_percpu *spc = &ci->ci_schedstate;
661 0 : struct sleep_state sls;
662 :
663 0 : if (CPU_IS_PRIMARY(ci))
664 0 : continue;
665 0 : while ((spc->spc_schedflags & SPCF_HALTED) == 0) {
666 0 : sleep_setup(&sls, spc, PZERO, "schedstate");
667 0 : sleep_finish(&sls,
668 0 : (spc->spc_schedflags & SPCF_HALTED) == 0);
669 : }
670 0 : }
671 0 : }
672 :
673 : struct sched_barrier_state {
674 : struct cpu_info *ci;
675 : struct cond cond;
676 : };
677 :
678 : void
679 0 : sched_barrier_task(void *arg)
680 : {
681 0 : struct sched_barrier_state *sb = arg;
682 0 : struct cpu_info *ci = sb->ci;
683 :
684 0 : sched_peg_curproc(ci);
685 0 : cond_signal(&sb->cond);
686 0 : atomic_clearbits_int(&curproc->p_flag, P_CPUPEG);
687 0 : }
688 :
689 : void
690 0 : sched_barrier(struct cpu_info *ci)
691 : {
692 0 : struct sched_barrier_state sb;
693 0 : struct task task;
694 : CPU_INFO_ITERATOR cii;
695 :
696 0 : if (ci == NULL) {
697 0 : CPU_INFO_FOREACH(cii, ci) {
698 0 : if (CPU_IS_PRIMARY(ci))
699 : break;
700 : }
701 : }
702 0 : KASSERT(ci != NULL);
703 :
704 0 : if (ci == curcpu())
705 0 : return;
706 :
707 0 : sb.ci = ci;
708 0 : cond_init(&sb.cond);
709 0 : task_set(&task, sched_barrier_task, &sb);
710 :
711 0 : task_add(systqmp, &task);
712 0 : cond_wait(&sb.cond, "sbar");
713 0 : }
714 :
715 : #else
716 :
717 : void
718 : sched_barrier(struct cpu_info *ci)
719 : {
720 : }
721 :
722 : #endif
723 :
724 : /*
725 : * Functions to manipulate cpu sets.
726 : */
727 : struct cpu_info *cpuset_infos[MAXCPUS];
728 : static struct cpuset cpuset_all;
729 :
730 : void
731 0 : cpuset_init_cpu(struct cpu_info *ci)
732 : {
733 0 : cpuset_add(&cpuset_all, ci);
734 0 : cpuset_infos[CPU_INFO_UNIT(ci)] = ci;
735 0 : }
736 :
737 : void
738 0 : cpuset_clear(struct cpuset *cs)
739 : {
740 0 : memset(cs, 0, sizeof(*cs));
741 0 : }
742 :
743 : void
744 0 : cpuset_add(struct cpuset *cs, struct cpu_info *ci)
745 : {
746 0 : unsigned int num = CPU_INFO_UNIT(ci);
747 0 : atomic_setbits_int(&cs->cs_set[num/32], (1 << (num % 32)));
748 0 : }
749 :
750 : void
751 0 : cpuset_del(struct cpuset *cs, struct cpu_info *ci)
752 : {
753 0 : unsigned int num = CPU_INFO_UNIT(ci);
754 0 : atomic_clearbits_int(&cs->cs_set[num/32], (1 << (num % 32)));
755 0 : }
756 :
757 : int
758 0 : cpuset_isset(struct cpuset *cs, struct cpu_info *ci)
759 : {
760 0 : unsigned int num = CPU_INFO_UNIT(ci);
761 0 : return (cs->cs_set[num/32] & (1 << (num % 32)));
762 : }
763 :
764 : void
765 0 : cpuset_add_all(struct cpuset *cs)
766 : {
767 0 : cpuset_copy(cs, &cpuset_all);
768 0 : }
769 :
770 : void
771 0 : cpuset_copy(struct cpuset *to, struct cpuset *from)
772 : {
773 0 : memcpy(to, from, sizeof(*to));
774 0 : }
775 :
776 : struct cpu_info *
777 0 : cpuset_first(struct cpuset *cs)
778 : {
779 : int i;
780 :
781 0 : for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
782 0 : if (cs->cs_set[i])
783 0 : return (cpuset_infos[i * 32 + ffs(cs->cs_set[i]) - 1]);
784 :
785 0 : return (NULL);
786 0 : }
787 :
788 : void
789 0 : cpuset_union(struct cpuset *to, struct cpuset *a, struct cpuset *b)
790 : {
791 : int i;
792 :
793 0 : for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
794 0 : to->cs_set[i] = a->cs_set[i] | b->cs_set[i];
795 0 : }
796 :
797 : void
798 0 : cpuset_intersection(struct cpuset *to, struct cpuset *a, struct cpuset *b)
799 : {
800 : int i;
801 :
802 0 : for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
803 0 : to->cs_set[i] = a->cs_set[i] & b->cs_set[i];
804 0 : }
805 :
806 : void
807 0 : cpuset_complement(struct cpuset *to, struct cpuset *a, struct cpuset *b)
808 : {
809 : int i;
810 :
811 0 : for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
812 0 : to->cs_set[i] = b->cs_set[i] & ~a->cs_set[i];
813 0 : }
814 :
815 : int
816 0 : cpuset_cardinality(struct cpuset *cs)
817 : {
818 : int cardinality, i, n;
819 :
820 : cardinality = 0;
821 :
822 0 : for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
823 0 : for (n = cs->cs_set[i]; n != 0; n &= n - 1)
824 0 : cardinality++;
825 :
826 0 : return (cardinality);
827 : }
828 :
829 : int
830 0 : sysctl_hwncpuonline(void)
831 : {
832 0 : return cpuset_cardinality(&sched_all_cpus);
833 : }
834 :
835 : #ifdef __HAVE_CPU_TOPOLOGY
836 :
837 : #include <sys/sysctl.h>
838 :
839 : int
840 0 : sysctl_hwsmt(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
841 : {
842 : CPU_INFO_ITERATOR cii;
843 : struct cpu_info *ci;
844 0 : int err, newsmt;
845 :
846 0 : newsmt = sched_smt;
847 0 : err = sysctl_int(oldp, oldlenp, newp, newlen, &newsmt);
848 0 : if (err)
849 0 : return err;
850 0 : if (newsmt > 1)
851 0 : newsmt = 1;
852 0 : if (newsmt < 0)
853 0 : newsmt = 0;
854 0 : if (newsmt == sched_smt)
855 0 : return 0;
856 :
857 0 : sched_smt = newsmt;
858 0 : CPU_INFO_FOREACH(cii, ci) {
859 0 : if (CPU_IS_PRIMARY(ci))
860 : continue;
861 0 : if (ci->ci_smt_id == 0)
862 : continue;
863 0 : if (sched_smt)
864 0 : cpuset_add(&sched_all_cpus, ci);
865 : else
866 0 : cpuset_del(&sched_all_cpus, ci);
867 : }
868 :
869 0 : return 0;
870 0 : }
871 :
872 : #endif
|