Line data Source code
1 : /* $OpenBSD: kern_tc.c,v 1.33 2018/05/28 18:05:42 guenther Exp $ */
2 :
3 : /*
4 : * Copyright (c) 2000 Poul-Henning Kamp <phk@FreeBSD.org>
5 : *
6 : * Permission to use, copy, modify, and distribute this software for any
7 : * purpose with or without fee is hereby granted, provided that the above
8 : * copyright notice and this permission notice appear in all copies.
9 : *
10 : * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 : * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 : * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 : * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 : * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 : * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 : * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 : */
18 :
19 : /*
20 : * If we meet some day, and you think this stuff is worth it, you
21 : * can buy me a beer in return. Poul-Henning Kamp
22 : */
23 :
24 : #include <sys/param.h>
25 : #include <sys/kernel.h>
26 : #include <sys/timeout.h>
27 : #include <sys/sysctl.h>
28 : #include <sys/syslog.h>
29 : #include <sys/systm.h>
30 : #include <sys/timetc.h>
31 : #include <sys/malloc.h>
32 : #include <dev/rndvar.h>
33 :
34 : /*
35 : * A large step happens on boot. This constant detects such steps.
36 : * It is relatively small so that ntp_update_second gets called enough
37 : * in the typical 'missed a couple of seconds' case, but doesn't loop
38 : * forever when the time step is large.
39 : */
40 : #define LARGE_STEP 200
41 :
42 : u_int dummy_get_timecount(struct timecounter *);
43 :
44 : void ntp_update_second(int64_t *);
45 : int sysctl_tc_hardware(void *, size_t *, void *, size_t);
46 : int sysctl_tc_choice(void *, size_t *, void *, size_t);
47 :
48 : /*
49 : * Implement a dummy timecounter which we can use until we get a real one
50 : * in the air. This allows the console and other early stuff to use
51 : * time services.
52 : */
53 :
54 : u_int
55 0 : dummy_get_timecount(struct timecounter *tc)
56 : {
57 : static u_int now;
58 :
59 0 : return (++now);
60 : }
61 :
62 : static struct timecounter dummy_timecounter = {
63 : dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000
64 : };
65 :
66 : struct timehands {
67 : /* These fields must be initialized by the driver. */
68 : struct timecounter *th_counter;
69 : int64_t th_adjustment;
70 : u_int64_t th_scale;
71 : u_int th_offset_count;
72 : struct bintime th_offset;
73 : struct timeval th_microtime;
74 : struct timespec th_nanotime;
75 : /* Fields not to be copied in tc_windup start with th_generation. */
76 : volatile u_int th_generation;
77 : struct timehands *th_next;
78 : };
79 :
80 : static struct timehands th0;
81 : static struct timehands th9 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th0};
82 : static struct timehands th8 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th9};
83 : static struct timehands th7 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th8};
84 : static struct timehands th6 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th7};
85 : static struct timehands th5 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th6};
86 : static struct timehands th4 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th5};
87 : static struct timehands th3 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th4};
88 : static struct timehands th2 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th3};
89 : static struct timehands th1 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th2};
90 : static struct timehands th0 = {
91 : &dummy_timecounter,
92 : 0,
93 : (uint64_t)-1 / 1000000,
94 : 0,
95 : {1, 0},
96 : {0, 0},
97 : {0, 0},
98 : 1,
99 : &th1
100 : };
101 :
102 : static struct timehands *volatile timehands = &th0;
103 : struct timecounter *timecounter = &dummy_timecounter;
104 : static struct timecounter *timecounters = &dummy_timecounter;
105 :
106 : volatile time_t time_second = 1;
107 : volatile time_t time_uptime = 0;
108 :
109 : struct bintime naptime;
110 : static struct bintime boottimebin;
111 : static int timestepwarnings;
112 :
113 : void tc_windup(void);
114 :
115 : /*
116 : * Return the difference between the timehands' counter value now and what
117 : * was when we copied it to the timehands' offset_count.
118 : */
119 : static __inline u_int
120 0 : tc_delta(struct timehands *th)
121 : {
122 : struct timecounter *tc;
123 :
124 0 : tc = th->th_counter;
125 0 : return ((tc->tc_get_timecount(tc) - th->th_offset_count) &
126 0 : tc->tc_counter_mask);
127 : }
128 :
129 : /*
130 : * Functions for reading the time. We have to loop until we are sure that
131 : * the timehands that we operated on was not updated under our feet. See
132 : * the comment in <sys/time.h> for a description of these 12 functions.
133 : */
134 :
135 : void
136 0 : binuptime(struct bintime *bt)
137 : {
138 : struct timehands *th;
139 : u_int gen;
140 :
141 0 : do {
142 0 : th = timehands;
143 0 : gen = th->th_generation;
144 0 : *bt = th->th_offset;
145 0 : bintime_addx(bt, th->th_scale * tc_delta(th));
146 0 : } while (gen == 0 || gen != th->th_generation);
147 0 : }
148 :
149 : void
150 0 : nanouptime(struct timespec *tsp)
151 : {
152 0 : struct bintime bt;
153 :
154 0 : binuptime(&bt);
155 0 : bintime2timespec(&bt, tsp);
156 0 : }
157 :
158 : void
159 0 : microuptime(struct timeval *tvp)
160 : {
161 0 : struct bintime bt;
162 :
163 0 : binuptime(&bt);
164 0 : bintime2timeval(&bt, tvp);
165 0 : }
166 :
167 : void
168 0 : bintime(struct bintime *bt)
169 : {
170 :
171 0 : binuptime(bt);
172 0 : bintime_add(bt, &boottimebin);
173 0 : }
174 :
175 : void
176 0 : nanotime(struct timespec *tsp)
177 : {
178 0 : struct bintime bt;
179 :
180 0 : bintime(&bt);
181 0 : bintime2timespec(&bt, tsp);
182 0 : }
183 :
184 : void
185 0 : microtime(struct timeval *tvp)
186 : {
187 0 : struct bintime bt;
188 :
189 0 : bintime(&bt);
190 0 : bintime2timeval(&bt, tvp);
191 0 : }
192 :
193 : void
194 0 : getnanouptime(struct timespec *tsp)
195 : {
196 : struct timehands *th;
197 : u_int gen;
198 :
199 0 : do {
200 0 : th = timehands;
201 0 : gen = th->th_generation;
202 0 : bintime2timespec(&th->th_offset, tsp);
203 0 : } while (gen == 0 || gen != th->th_generation);
204 0 : }
205 :
206 : void
207 0 : getmicrouptime(struct timeval *tvp)
208 : {
209 : struct timehands *th;
210 : u_int gen;
211 :
212 0 : do {
213 0 : th = timehands;
214 0 : gen = th->th_generation;
215 0 : bintime2timeval(&th->th_offset, tvp);
216 0 : } while (gen == 0 || gen != th->th_generation);
217 0 : }
218 :
219 : void
220 0 : getnanotime(struct timespec *tsp)
221 : {
222 : struct timehands *th;
223 : u_int gen;
224 :
225 0 : do {
226 0 : th = timehands;
227 0 : gen = th->th_generation;
228 0 : *tsp = th->th_nanotime;
229 0 : } while (gen == 0 || gen != th->th_generation);
230 0 : }
231 :
232 : void
233 0 : getmicrotime(struct timeval *tvp)
234 : {
235 : struct timehands *th;
236 : u_int gen;
237 :
238 0 : do {
239 0 : th = timehands;
240 0 : gen = th->th_generation;
241 0 : *tvp = th->th_microtime;
242 0 : } while (gen == 0 || gen != th->th_generation);
243 0 : }
244 :
245 : /*
246 : * Initialize a new timecounter and possibly use it.
247 : */
248 : void
249 0 : tc_init(struct timecounter *tc)
250 : {
251 : u_int u;
252 :
253 0 : u = tc->tc_frequency / tc->tc_counter_mask;
254 : /* XXX: We need some margin here, 10% is a guess */
255 0 : u *= 11;
256 0 : u /= 10;
257 0 : if (tc->tc_quality >= 0) {
258 0 : if (u > hz) {
259 0 : tc->tc_quality = -2000;
260 0 : printf("Timecounter \"%s\" frequency %lu Hz",
261 0 : tc->tc_name, (unsigned long)tc->tc_frequency);
262 0 : printf(" -- Insufficient hz, needs at least %u\n", u);
263 0 : }
264 : }
265 :
266 0 : tc->tc_next = timecounters;
267 0 : timecounters = tc;
268 : /*
269 : * Never automatically use a timecounter with negative quality.
270 : * Even though we run on the dummy counter, switching here may be
271 : * worse since this timecounter may not be monotonic.
272 : */
273 0 : if (tc->tc_quality < 0)
274 0 : return;
275 0 : if (tc->tc_quality < timecounter->tc_quality)
276 0 : return;
277 0 : if (tc->tc_quality == timecounter->tc_quality &&
278 0 : tc->tc_frequency < timecounter->tc_frequency)
279 0 : return;
280 0 : (void)tc->tc_get_timecount(tc);
281 0 : enqueue_randomness(tc->tc_get_timecount(tc));
282 :
283 0 : timecounter = tc;
284 0 : }
285 :
286 : /* Report the frequency of the current timecounter. */
287 : u_int64_t
288 0 : tc_getfrequency(void)
289 : {
290 :
291 0 : return (timehands->th_counter->tc_frequency);
292 : }
293 :
294 : /*
295 : * Step our concept of UTC, aka the realtime clock.
296 : * This is done by modifying our estimate of when we booted.
297 : * XXX: not locked.
298 : */
299 : void
300 0 : tc_setrealtimeclock(const struct timespec *ts)
301 : {
302 0 : struct timespec ts2;
303 0 : struct bintime bt, bt2;
304 :
305 0 : binuptime(&bt2);
306 0 : timespec2bintime(ts, &bt);
307 0 : bintime_sub(&bt, &bt2);
308 0 : bintime_add(&bt2, &boottimebin);
309 0 : boottimebin = bt;
310 0 : bintime2timespec(&bt, &boottime);
311 0 : enqueue_randomness(ts->tv_sec);
312 :
313 : /* XXX fiddle all the little crinkly bits around the fiords... */
314 0 : tc_windup();
315 0 : if (timestepwarnings) {
316 0 : bintime2timespec(&bt2, &ts2);
317 0 : log(LOG_INFO, "Time stepped from %lld.%09ld to %lld.%09ld\n",
318 0 : (long long)ts2.tv_sec, ts2.tv_nsec,
319 0 : (long long)ts->tv_sec, ts->tv_nsec);
320 0 : }
321 0 : }
322 :
323 : /*
324 : * Step the monotonic and realtime clocks, triggering any timeouts that
325 : * should have occurred across the interval.
326 : * XXX: not locked.
327 : */
328 : void
329 0 : tc_setclock(const struct timespec *ts)
330 : {
331 0 : struct bintime bt, bt2;
332 : #ifndef SMALL_KERNEL
333 : long long adj_ticks;
334 : #endif
335 :
336 : /*
337 : * When we're called for the first time, during boot when
338 : * the root partition is mounted, boottime is still zero:
339 : * we just need to set it.
340 : */
341 0 : if (boottimebin.sec == 0) {
342 0 : tc_setrealtimeclock(ts);
343 0 : return;
344 : }
345 :
346 0 : enqueue_randomness(ts->tv_sec);
347 :
348 0 : timespec2bintime(ts, &bt);
349 0 : bintime_sub(&bt, &boottimebin);
350 0 : bt2 = timehands->th_offset;
351 0 : timehands->th_offset = bt;
352 :
353 : /* XXX fiddle all the little crinkly bits around the fiords... */
354 0 : tc_windup();
355 :
356 : #ifndef SMALL_KERNEL
357 : /* convert the bintime to ticks */
358 0 : bintime_sub(&bt, &bt2);
359 0 : bintime_add(&naptime, &bt);
360 0 : adj_ticks = (uint64_t)hz * bt.sec +
361 0 : (((uint64_t)1000000 * (uint32_t)(bt.frac >> 32)) >> 32) / tick;
362 0 : if (adj_ticks > 0) {
363 0 : if (adj_ticks > INT_MAX)
364 0 : adj_ticks = INT_MAX;
365 0 : timeout_adjust_ticks(adj_ticks);
366 0 : }
367 : #endif
368 0 : }
369 :
370 : /*
371 : * Initialize the next struct timehands in the ring and make
372 : * it the active timehands. Along the way we might switch to a different
373 : * timecounter and/or do seconds processing in NTP. Slightly magic.
374 : */
375 : void
376 0 : tc_windup(void)
377 : {
378 0 : struct bintime bt;
379 : struct timehands *th, *tho;
380 : u_int64_t scale;
381 : u_int delta, ncount, ogen;
382 : int i;
383 :
384 : /*
385 : * Make the next timehands a copy of the current one, but do not
386 : * overwrite the generation or next pointer. While we update
387 : * the contents, the generation must be zero.
388 : */
389 0 : tho = timehands;
390 0 : th = tho->th_next;
391 0 : ogen = th->th_generation;
392 0 : th->th_generation = 0;
393 0 : memcpy(th, tho, offsetof(struct timehands, th_generation));
394 :
395 : /*
396 : * Capture a timecounter delta on the current timecounter and if
397 : * changing timecounters, a counter value from the new timecounter.
398 : * Update the offset fields accordingly.
399 : */
400 0 : delta = tc_delta(th);
401 0 : if (th->th_counter != timecounter)
402 0 : ncount = timecounter->tc_get_timecount(timecounter);
403 : else
404 : ncount = 0;
405 0 : th->th_offset_count += delta;
406 0 : th->th_offset_count &= th->th_counter->tc_counter_mask;
407 0 : bintime_addx(&th->th_offset, th->th_scale * delta);
408 :
409 : #ifdef notyet
410 : /*
411 : * Hardware latching timecounters may not generate interrupts on
412 : * PPS events, so instead we poll them. There is a finite risk that
413 : * the hardware might capture a count which is later than the one we
414 : * got above, and therefore possibly in the next NTP second which might
415 : * have a different rate than the current NTP second. It doesn't
416 : * matter in practice.
417 : */
418 : if (tho->th_counter->tc_poll_pps)
419 : tho->th_counter->tc_poll_pps(tho->th_counter);
420 : #endif
421 :
422 : /*
423 : * Deal with NTP second processing. The for loop normally
424 : * iterates at most once, but in extreme situations it might
425 : * keep NTP sane if timeouts are not run for several seconds.
426 : * At boot, the time step can be large when the TOD hardware
427 : * has been read, so on really large steps, we call
428 : * ntp_update_second only twice. We need to call it twice in
429 : * case we missed a leap second.
430 : */
431 0 : bt = th->th_offset;
432 0 : bintime_add(&bt, &boottimebin);
433 0 : i = bt.sec - tho->th_microtime.tv_sec;
434 0 : if (i > LARGE_STEP)
435 : i = 2;
436 0 : for (; i > 0; i--)
437 0 : ntp_update_second(&th->th_adjustment);
438 :
439 : /* Update the UTC timestamps used by the get*() functions. */
440 : /* XXX shouldn't do this here. Should force non-`get' versions. */
441 0 : bintime2timeval(&bt, &th->th_microtime);
442 0 : bintime2timespec(&bt, &th->th_nanotime);
443 :
444 : /* Now is a good time to change timecounters. */
445 0 : if (th->th_counter != timecounter) {
446 0 : th->th_counter = timecounter;
447 0 : th->th_offset_count = ncount;
448 0 : }
449 :
450 : /*-
451 : * Recalculate the scaling factor. We want the number of 1/2^64
452 : * fractions of a second per period of the hardware counter, taking
453 : * into account the th_adjustment factor which the NTP PLL/adjtime(2)
454 : * processing provides us with.
455 : *
456 : * The th_adjustment is nanoseconds per second with 32 bit binary
457 : * fraction and we want 64 bit binary fraction of second:
458 : *
459 : * x = a * 2^32 / 10^9 = a * 4.294967296
460 : *
461 : * The range of th_adjustment is +/- 5000PPM so inside a 64bit int
462 : * we can only multiply by about 850 without overflowing, but that
463 : * leaves suitably precise fractions for multiply before divide.
464 : *
465 : * Divide before multiply with a fraction of 2199/512 results in a
466 : * systematic undercompensation of 10PPM of th_adjustment. On a
467 : * 5000PPM adjustment this is a 0.05PPM error. This is acceptable.
468 : *
469 : * We happily sacrifice the lowest of the 64 bits of our result
470 : * to the goddess of code clarity.
471 : *
472 : */
473 : scale = (u_int64_t)1 << 63;
474 0 : scale += (th->th_adjustment / 1024) * 2199;
475 0 : scale /= th->th_counter->tc_frequency;
476 0 : th->th_scale = scale * 2;
477 :
478 : /*
479 : * Now that the struct timehands is again consistent, set the new
480 : * generation number, making sure to not make it zero.
481 : */
482 0 : if (++ogen == 0)
483 : ogen = 1;
484 0 : th->th_generation = ogen;
485 :
486 : /* Go live with the new struct timehands. */
487 0 : time_second = th->th_microtime.tv_sec;
488 0 : time_uptime = th->th_offset.sec;
489 0 : timehands = th;
490 0 : }
491 :
492 : /* Report or change the active timecounter hardware. */
493 : int
494 0 : sysctl_tc_hardware(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
495 : {
496 0 : char newname[32];
497 : struct timecounter *newtc, *tc;
498 : int error;
499 :
500 0 : tc = timecounter;
501 0 : strlcpy(newname, tc->tc_name, sizeof(newname));
502 :
503 0 : error = sysctl_string(oldp, oldlenp, newp, newlen, newname, sizeof(newname));
504 0 : if (error != 0 || strcmp(newname, tc->tc_name) == 0)
505 0 : return (error);
506 0 : for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) {
507 0 : if (strcmp(newname, newtc->tc_name) != 0)
508 : continue;
509 :
510 : /* Warm up new timecounter. */
511 0 : (void)newtc->tc_get_timecount(newtc);
512 0 : (void)newtc->tc_get_timecount(newtc);
513 :
514 0 : timecounter = newtc;
515 0 : return (0);
516 : }
517 0 : return (EINVAL);
518 0 : }
519 :
520 : /* Report or change the active timecounter hardware. */
521 : int
522 0 : sysctl_tc_choice(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
523 : {
524 0 : char buf[32], *spc, *choices;
525 : struct timecounter *tc;
526 : int error, maxlen;
527 :
528 : spc = "";
529 : maxlen = 0;
530 0 : for (tc = timecounters; tc != NULL; tc = tc->tc_next)
531 0 : maxlen += sizeof(buf);
532 0 : choices = malloc(maxlen, M_TEMP, M_WAITOK);
533 0 : *choices = '\0';
534 0 : for (tc = timecounters; tc != NULL; tc = tc->tc_next) {
535 0 : snprintf(buf, sizeof(buf), "%s%s(%d)",
536 0 : spc, tc->tc_name, tc->tc_quality);
537 : spc = " ";
538 0 : strlcat(choices, buf, maxlen);
539 : }
540 0 : error = sysctl_rdstring(oldp, oldlenp, newp, choices);
541 0 : free(choices, M_TEMP, maxlen);
542 0 : return (error);
543 0 : }
544 :
545 : /*
546 : * Timecounters need to be updated every so often to prevent the hardware
547 : * counter from overflowing. Updating also recalculates the cached values
548 : * used by the get*() family of functions, so their precision depends on
549 : * the update frequency.
550 : */
551 : static int tc_tick;
552 :
553 : void
554 0 : tc_ticktock(void)
555 : {
556 : static int count;
557 :
558 0 : if (++count < tc_tick)
559 : return;
560 0 : count = 0;
561 0 : tc_windup();
562 0 : }
563 :
564 : void
565 0 : inittimecounter(void)
566 : {
567 : #ifdef DEBUG
568 : u_int p;
569 : #endif
570 :
571 : /*
572 : * Set the initial timeout to
573 : * max(1, <approx. number of hardclock ticks in a millisecond>).
574 : * People should probably not use the sysctl to set the timeout
575 : * to smaller than its initial value, since that value is the
576 : * smallest reasonable one. If they want better timestamps they
577 : * should use the non-"get"* functions.
578 : */
579 0 : if (hz > 1000)
580 0 : tc_tick = (hz + 500) / 1000;
581 : else
582 0 : tc_tick = 1;
583 : #ifdef DEBUG
584 : p = (tc_tick * 1000000) / hz;
585 : printf("Timecounters tick every %d.%03u msec\n", p / 1000, p % 1000);
586 : #endif
587 :
588 : /* warm up new timecounter (again) and get rolling. */
589 0 : (void)timecounter->tc_get_timecount(timecounter);
590 0 : (void)timecounter->tc_get_timecount(timecounter);
591 0 : }
592 :
593 : /*
594 : * Return timecounter-related information.
595 : */
596 : int
597 0 : sysctl_tc(int *name, u_int namelen, void *oldp, size_t *oldlenp,
598 : void *newp, size_t newlen)
599 : {
600 0 : if (namelen != 1)
601 0 : return (ENOTDIR);
602 :
603 0 : switch (name[0]) {
604 : case KERN_TIMECOUNTER_TICK:
605 0 : return (sysctl_rdint(oldp, oldlenp, newp, tc_tick));
606 : case KERN_TIMECOUNTER_TIMESTEPWARNINGS:
607 0 : return (sysctl_int(oldp, oldlenp, newp, newlen,
608 : ×tepwarnings));
609 : case KERN_TIMECOUNTER_HARDWARE:
610 0 : return (sysctl_tc_hardware(oldp, oldlenp, newp, newlen));
611 : case KERN_TIMECOUNTER_CHOICE:
612 0 : return (sysctl_tc_choice(oldp, oldlenp, newp, newlen));
613 : default:
614 0 : return (EOPNOTSUPP);
615 : }
616 : /* NOTREACHED */
617 0 : }
618 :
619 : void
620 0 : ntp_update_second(int64_t *adjust)
621 : {
622 : int64_t adj;
623 :
624 : /* Skew time according to any adjtime(2) adjustments. */
625 0 : if (adjtimedelta > 0)
626 0 : adj = MIN(5000, adjtimedelta);
627 : else
628 0 : adj = MAX(-5000, adjtimedelta);
629 0 : adjtimedelta -= adj;
630 0 : *adjust = (adj * 1000) << 32;
631 0 : *adjust += timecounter->tc_freq_adj;
632 0 : }
633 :
634 : int
635 0 : tc_adjfreq(int64_t *old, int64_t *new)
636 : {
637 0 : if (old != NULL) {
638 0 : *old = timecounter->tc_freq_adj;
639 0 : }
640 0 : if (new != NULL) {
641 0 : timecounter->tc_freq_adj = *new;
642 0 : }
643 0 : return 0;
644 : }
|