Line data Source code
1 : /* $OpenBSD: kern_clock.c,v 1.95 2018/06/04 18:16:43 cheloha Exp $ */
2 : /* $NetBSD: kern_clock.c,v 1.34 1996/06/09 04:51:03 briggs Exp $ */
3 :
4 : /*-
5 : * Copyright (c) 1982, 1986, 1991, 1993
6 : * The Regents of the University of California. All rights reserved.
7 : * (c) UNIX System Laboratories, Inc.
8 : * All or some portions of this file are derived from material licensed
9 : * to the University of California by American Telephone and Telegraph
10 : * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11 : * the permission of UNIX System Laboratories, Inc.
12 : *
13 : * Redistribution and use in source and binary forms, with or without
14 : * modification, are permitted provided that the following conditions
15 : * are met:
16 : * 1. Redistributions of source code must retain the above copyright
17 : * notice, this list of conditions and the following disclaimer.
18 : * 2. Redistributions in binary form must reproduce the above copyright
19 : * notice, this list of conditions and the following disclaimer in the
20 : * documentation and/or other materials provided with the distribution.
21 : * 3. Neither the name of the University nor the names of its contributors
22 : * may be used to endorse or promote products derived from this software
23 : * without specific prior written permission.
24 : *
25 : * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 : * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 : * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 : * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 : * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 : * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 : * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 : * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 : * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 : * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 : * SUCH DAMAGE.
36 : *
37 : * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94
38 : */
39 :
40 : #include <sys/param.h>
41 : #include <sys/systm.h>
42 : #include <sys/timeout.h>
43 : #include <sys/kernel.h>
44 : #include <sys/limits.h>
45 : #include <sys/proc.h>
46 : #include <sys/user.h>
47 : #include <sys/resourcevar.h>
48 : #include <sys/signalvar.h>
49 : #include <sys/sysctl.h>
50 : #include <sys/sched.h>
51 : #include <sys/timetc.h>
52 :
53 :
54 : #if defined(GPROF) || defined(DDBPROF)
55 : #include <sys/gmon.h>
56 : #endif
57 :
58 : /*
59 : * Clock handling routines.
60 : *
61 : * This code is written to operate with two timers that run independently of
62 : * each other. The main clock, running hz times per second, is used to keep
63 : * track of real time. The second timer handles kernel and user profiling,
64 : * and does resource use estimation. If the second timer is programmable,
65 : * it is randomized to avoid aliasing between the two clocks. For example,
66 : * the randomization prevents an adversary from always giving up the cpu
67 : * just before its quantum expires. Otherwise, it would never accumulate
68 : * cpu ticks. The mean frequency of the second timer is stathz.
69 : *
70 : * If no second timer exists, stathz will be zero; in this case we drive
71 : * profiling and statistics off the main clock. This WILL NOT be accurate;
72 : * do not do it unless absolutely necessary.
73 : *
74 : * The statistics clock may (or may not) be run at a higher rate while
75 : * profiling. This profile clock runs at profhz. We require that profhz
76 : * be an integral multiple of stathz.
77 : *
78 : * If the statistics clock is running fast, it must be divided by the ratio
79 : * profhz/stathz for statistics. (For profiling, every tick counts.)
80 : */
81 :
82 : int stathz;
83 : int schedhz;
84 : int profhz;
85 : int profprocs;
86 : int ticks;
87 : static int psdiv, pscnt; /* prof => stat divider */
88 : int psratio; /* ratio: prof / stat */
89 :
90 : void *softclock_si;
91 :
92 : volatile unsigned long jiffies; /* XXX Linux API for drm(4) */
93 :
94 : /*
95 : * Initialize clock frequencies and start both clocks running.
96 : */
97 : void
98 0 : initclocks(void)
99 : {
100 : int i;
101 :
102 0 : softclock_si = softintr_establish(IPL_SOFTCLOCK, softclock, NULL);
103 0 : if (softclock_si == NULL)
104 0 : panic("initclocks: unable to register softclock intr");
105 :
106 0 : ticks = INT_MAX - (15 * 60 * hz);
107 0 : jiffies = ULONG_MAX - (10 * 60 * hz);
108 :
109 : /*
110 : * Set divisors to 1 (normal case) and let the machine-specific
111 : * code do its bit.
112 : */
113 0 : psdiv = pscnt = 1;
114 0 : cpu_initclocks();
115 :
116 : /*
117 : * Compute profhz/stathz, and fix profhz if needed.
118 : */
119 0 : i = stathz ? stathz : hz;
120 0 : if (profhz == 0)
121 0 : profhz = i;
122 0 : psratio = profhz / i;
123 :
124 : /* For very large HZ, ensure that division by 0 does not occur later */
125 0 : if (tickadj == 0)
126 0 : tickadj = 1;
127 :
128 0 : inittimecounter();
129 0 : }
130 :
131 : /*
132 : * hardclock does the accounting needed for ITIMER_PROF and ITIMER_VIRTUAL.
133 : * We don't want to send signals with psignal from hardclock because it makes
134 : * MULTIPROCESSOR locking very complicated. Instead, to use an idea from
135 : * FreeBSD, we set a flag on the thread and when it goes to return to
136 : * userspace it signals itself.
137 : */
138 :
139 : /*
140 : * The real-time timer, interrupting hz times per second.
141 : */
142 : void
143 0 : hardclock(struct clockframe *frame)
144 : {
145 : struct proc *p;
146 0 : struct cpu_info *ci = curcpu();
147 :
148 0 : p = curproc;
149 0 : if (p && ((p->p_flag & (P_SYSTEM | P_WEXIT)) == 0)) {
150 0 : struct process *pr = p->p_p;
151 :
152 : /*
153 : * Run current process's virtual and profile time, as needed.
154 : */
155 0 : if (CLKF_USERMODE(frame) &&
156 0 : timerisset(&pr->ps_timer[ITIMER_VIRTUAL].it_value) &&
157 0 : itimerdecr(&pr->ps_timer[ITIMER_VIRTUAL], tick) == 0) {
158 0 : atomic_setbits_int(&p->p_flag, P_ALRMPEND);
159 0 : need_proftick(p);
160 0 : }
161 0 : if (timerisset(&pr->ps_timer[ITIMER_PROF].it_value) &&
162 0 : itimerdecr(&pr->ps_timer[ITIMER_PROF], tick) == 0) {
163 0 : atomic_setbits_int(&p->p_flag, P_PROFPEND);
164 0 : need_proftick(p);
165 0 : }
166 0 : }
167 :
168 : /*
169 : * If no separate statistics clock is available, run it from here.
170 : */
171 0 : if (stathz == 0)
172 0 : statclock(frame);
173 :
174 0 : if (--ci->ci_schedstate.spc_rrticks <= 0)
175 0 : roundrobin(ci);
176 :
177 : /*
178 : * If we are not the primary CPU, we're not allowed to do
179 : * any more work.
180 : */
181 0 : if (CPU_IS_PRIMARY(ci) == 0)
182 0 : return;
183 :
184 0 : tc_ticktock();
185 0 : ticks++;
186 0 : jiffies++;
187 :
188 : /*
189 : * Update real-time timeout queue.
190 : * Process callouts at a very low cpu priority, so we don't keep the
191 : * relatively high clock interrupt priority any longer than necessary.
192 : */
193 0 : if (timeout_hardclock_update())
194 0 : softintr_schedule(softclock_si);
195 0 : }
196 :
197 : /*
198 : * Compute number of hz in the specified amount of time.
199 : */
200 : int
201 0 : tvtohz(const struct timeval *tv)
202 : {
203 : unsigned long nticks;
204 : time_t sec;
205 : long usec;
206 :
207 : /*
208 : * If the number of usecs in the whole seconds part of the time
209 : * fits in a long, then the total number of usecs will
210 : * fit in an unsigned long. Compute the total and convert it to
211 : * ticks, rounding up and adding 1 to allow for the current tick
212 : * to expire. Rounding also depends on unsigned long arithmetic
213 : * to avoid overflow.
214 : *
215 : * Otherwise, if the number of ticks in the whole seconds part of
216 : * the time fits in a long, then convert the parts to
217 : * ticks separately and add, using similar rounding methods and
218 : * overflow avoidance. This method would work in the previous
219 : * case but it is slightly slower and assumes that hz is integral.
220 : *
221 : * Otherwise, round the time down to the maximum
222 : * representable value.
223 : *
224 : * If ints have 32 bits, then the maximum value for any timeout in
225 : * 10ms ticks is 248 days.
226 : */
227 0 : sec = tv->tv_sec;
228 0 : usec = tv->tv_usec;
229 0 : if (sec < 0 || (sec == 0 && usec <= 0))
230 0 : nticks = 0;
231 0 : else if (sec <= LONG_MAX / 1000000)
232 0 : nticks = (sec * 1000000 + (unsigned long)usec + (tick - 1))
233 0 : / tick + 1;
234 0 : else if (sec <= LONG_MAX / hz)
235 0 : nticks = sec * hz
236 0 : + ((unsigned long)usec + (tick - 1)) / tick + 1;
237 : else
238 : nticks = LONG_MAX;
239 0 : if (nticks > INT_MAX)
240 0 : nticks = INT_MAX;
241 0 : return ((int)nticks);
242 : }
243 :
244 : int
245 0 : tstohz(const struct timespec *ts)
246 : {
247 0 : struct timeval tv;
248 0 : TIMESPEC_TO_TIMEVAL(&tv, ts);
249 :
250 : /* Round up. */
251 0 : if ((ts->tv_nsec % 1000) != 0) {
252 0 : tv.tv_usec += 1;
253 0 : if (tv.tv_usec >= 1000000) {
254 0 : tv.tv_usec -= 1000000;
255 0 : tv.tv_sec += 1;
256 0 : }
257 : }
258 :
259 0 : return (tvtohz(&tv));
260 0 : }
261 :
262 : /*
263 : * Start profiling on a process.
264 : *
265 : * Kernel profiling passes proc0 which never exits and hence
266 : * keeps the profile clock running constantly.
267 : */
268 : void
269 0 : startprofclock(struct process *pr)
270 : {
271 : int s;
272 :
273 0 : if ((pr->ps_flags & PS_PROFIL) == 0) {
274 0 : atomic_setbits_int(&pr->ps_flags, PS_PROFIL);
275 0 : if (++profprocs == 1 && stathz != 0) {
276 0 : s = splstatclock();
277 0 : psdiv = pscnt = psratio;
278 0 : setstatclockrate(profhz);
279 0 : splx(s);
280 0 : }
281 : }
282 0 : }
283 :
284 : /*
285 : * Stop profiling on a process.
286 : */
287 : void
288 0 : stopprofclock(struct process *pr)
289 : {
290 : int s;
291 :
292 0 : if (pr->ps_flags & PS_PROFIL) {
293 0 : atomic_clearbits_int(&pr->ps_flags, PS_PROFIL);
294 0 : if (--profprocs == 0 && stathz != 0) {
295 0 : s = splstatclock();
296 0 : psdiv = pscnt = 1;
297 0 : setstatclockrate(stathz);
298 0 : splx(s);
299 0 : }
300 : }
301 0 : }
302 :
303 : /*
304 : * Statistics clock. Grab profile sample, and if divider reaches 0,
305 : * do process and kernel statistics.
306 : */
307 : void
308 0 : statclock(struct clockframe *frame)
309 : {
310 : #if defined(GPROF) || defined(DDBPROF)
311 : struct gmonparam *g;
312 : u_long i;
313 : #endif
314 0 : struct cpu_info *ci = curcpu();
315 0 : struct schedstate_percpu *spc = &ci->ci_schedstate;
316 0 : struct proc *p = curproc;
317 : struct process *pr;
318 :
319 : /*
320 : * Notice changes in divisor frequency, and adjust clock
321 : * frequency accordingly.
322 : */
323 0 : if (spc->spc_psdiv != psdiv) {
324 0 : spc->spc_psdiv = psdiv;
325 0 : spc->spc_pscnt = psdiv;
326 0 : if (psdiv == 1) {
327 0 : setstatclockrate(stathz);
328 0 : } else {
329 0 : setstatclockrate(profhz);
330 : }
331 : }
332 :
333 0 : if (CLKF_USERMODE(frame)) {
334 0 : pr = p->p_p;
335 0 : if (pr->ps_flags & PS_PROFIL)
336 0 : addupc_intr(p, CLKF_PC(frame));
337 0 : if (--spc->spc_pscnt > 0)
338 0 : return;
339 : /*
340 : * Came from user mode; CPU was in user state.
341 : * If this process is being profiled record the tick.
342 : */
343 0 : p->p_uticks++;
344 0 : if (pr->ps_nice > NZERO)
345 0 : spc->spc_cp_time[CP_NICE]++;
346 : else
347 0 : spc->spc_cp_time[CP_USER]++;
348 : } else {
349 : #if defined(GPROF) || defined(DDBPROF)
350 : /*
351 : * Kernel statistics are just like addupc_intr, only easier.
352 : */
353 : g = ci->ci_gmon;
354 : if (g != NULL && g->state == GMON_PROF_ON) {
355 : i = CLKF_PC(frame) - g->lowpc;
356 : if (i < g->textsize) {
357 : i /= HISTFRACTION * sizeof(*g->kcount);
358 : g->kcount[i]++;
359 : }
360 : }
361 : #endif
362 : #if defined(PROC_PC)
363 0 : if (p != NULL && p->p_p->ps_flags & PS_PROFIL)
364 0 : addupc_intr(p, PROC_PC(p));
365 : #endif
366 0 : if (--spc->spc_pscnt > 0)
367 0 : return;
368 : /*
369 : * Came from kernel mode, so we were:
370 : * - spinning on a lock
371 : * - handling an interrupt,
372 : * - doing syscall or trap work on behalf of the current
373 : * user process, or
374 : * - spinning in the idle loop.
375 : * Whichever it is, charge the time as appropriate.
376 : * Note that we charge interrupts to the current process,
377 : * regardless of whether they are ``for'' that process,
378 : * so that we know how much of its real time was spent
379 : * in ``non-process'' (i.e., interrupt) work.
380 : */
381 0 : if (spc->spc_spinning)
382 0 : spc->spc_cp_time[CP_SPIN]++;
383 0 : else if (CLKF_INTR(frame)) {
384 0 : if (p != NULL)
385 0 : p->p_iticks++;
386 0 : spc->spc_cp_time[CP_INTR]++;
387 0 : } else if (p != NULL && p != spc->spc_idleproc) {
388 0 : p->p_sticks++;
389 0 : spc->spc_cp_time[CP_SYS]++;
390 0 : } else
391 0 : spc->spc_cp_time[CP_IDLE]++;
392 : }
393 0 : spc->spc_pscnt = psdiv;
394 :
395 0 : if (p != NULL) {
396 0 : p->p_cpticks++;
397 : /*
398 : * If no schedclock is provided, call it here at ~~12-25 Hz;
399 : * ~~16 Hz is best
400 : */
401 0 : if (schedhz == 0) {
402 0 : if ((++curcpu()->ci_schedstate.spc_schedticks & 3) ==
403 : 0)
404 0 : schedclock(p);
405 : }
406 : }
407 0 : }
408 :
409 : /*
410 : * Return information about system clocks.
411 : */
412 : int
413 0 : sysctl_clockrate(char *where, size_t *sizep, void *newp)
414 : {
415 0 : struct clockinfo clkinfo;
416 :
417 : /*
418 : * Construct clockinfo structure.
419 : */
420 0 : memset(&clkinfo, 0, sizeof clkinfo);
421 0 : clkinfo.tick = tick;
422 0 : clkinfo.tickadj = tickadj;
423 0 : clkinfo.hz = hz;
424 0 : clkinfo.profhz = profhz;
425 0 : clkinfo.stathz = stathz ? stathz : hz;
426 0 : return (sysctl_rdstruct(where, sizep, newp, &clkinfo, sizeof(clkinfo)));
427 0 : }
|