1 // Copyright 2016 The Fuchsia Authors
2 // Copyright (c) 2008-2014 Travis Geiselbrecht
3 //
4 // Use of this source code is governed by a MIT-style
5 // license that can be found in the LICENSE file or at
6 // https://opensource.org/licenses/MIT
7
8 /**
9 * @file
10 * @brief Kernel timer subsystem
11 * @defgroup timer Timers
12 *
13 * The timer subsystem allows functions to be scheduled for later
14 * execution. Each timer object is used to cause one function to
15 * be executed at a later time.
16 *
17 * Timer callback functions are called in interrupt context.
18 *
19 * @{
20 */
21 #include <assert.h>
22 #include <debug.h>
23 #include <err.h>
24 #include <inttypes.h>
25 #include <kernel/align.h>
26 #include <kernel/lockdep.h>
27 #include <kernel/mp.h>
28 #include <kernel/percpu.h>
29 #include <kernel/sched.h>
30 #include <kernel/spinlock.h>
31 #include <kernel/stats.h>
32 #include <kernel/thread.h>
33 #include <kernel/timer.h>
34 #include <lib/counters.h>
35 #include <list.h>
36 #include <malloc.h>
37 #include <platform.h>
38 #include <platform/timer.h>
39 #include <trace.h>
40 #include <zircon/time.h>
41 #include <zircon/types.h>
42
43 #define LOCAL_TRACE 0
44
45 // Total number of timers set. Always increasing.
46 KCOUNTER(timer_created_counter, "kernel.timer.created");
47
48 // Number of timers merged into an existing timer because of slack.
49 KCOUNTER(timer_coalesced_counter, "kernel.timer.coalesced");
50
51 // Number of timers that have fired (i.e. callback was invoked).
52 KCOUNTER(timer_fired_counter, "kernel.timer.fired");
53
54 // Number of timers that were successfully canceled. Attempts to cancel a timer that is currently
55 // firing are not counted.
56 KCOUNTER(timer_canceled_counter, "kernel.timer.canceled");
57
58 namespace {
59
60 spin_lock_t timer_lock __CPU_ALIGN_EXCLUSIVE = SPIN_LOCK_INITIAL_VALUE;
61 DECLARE_SINGLETON_LOCK_WRAPPER(TimerLock, timer_lock);
62
63 } // anonymous namespace
64
timer_init(timer_t * timer)65 void timer_init(timer_t* timer) {
66 *timer = (timer_t)TIMER_INITIAL_VALUE(*timer);
67 }
68
69 // Set the platform's oneshot timer to the minimum of its current deadline and |new_deadline|.
70 //
71 // Call this when the timer queue's head changes.
72 //
73 // Can only be called when interrupts are disabled and current CPU is |cpu|.
update_platform_timer(uint cpu,zx_time_t new_deadline)74 static void update_platform_timer(uint cpu, zx_time_t new_deadline) {
75 DEBUG_ASSERT(arch_ints_disabled());
76 DEBUG_ASSERT(cpu == arch_curr_cpu_num());
77 if (new_deadline < percpu[cpu].next_timer_deadline) {
78 LTRACEF("rescheduling timer for %" PRIi64 " nsecs\n", new_deadline);
79 platform_set_oneshot_timer(new_deadline);
80 percpu[cpu].next_timer_deadline = new_deadline;
81 }
82 }
83
insert_timer_in_queue(uint cpu,timer_t * timer,zx_time_t earliest_deadline,zx_time_t latest_deadline)84 static void insert_timer_in_queue(uint cpu, timer_t* timer,
85 zx_time_t earliest_deadline, zx_time_t latest_deadline) {
86
87 DEBUG_ASSERT(arch_ints_disabled());
88 LTRACEF("timer %p, cpu %u, scheduled %" PRIi64 "\n", timer, cpu, timer->scheduled_time);
89
90 // For inserting the timer we consider several cases. In general we
91 // want to coalesce with the current timer unless we can prove that
92 // either that:
93 // 1- there is no slack overlap with current timer OR
94 // 2- the next timer is a better fit.
95 //
96 // In diagrams that follow
97 // - Let |e| be the current (existing) timer deadline
98 // - Let |t| be the deadline of the timer we are inserting
99 // - Let |n| be the next timer deadline if any
100 // - Let |x| be the end of the list (not a timer)
101 // - Let |(| and |)| the earliest_deadline and latest_deadline.
102 //
103 timer_t* entry;
104
105 list_for_every_entry (&percpu[cpu].timer_queue, entry, timer_t, node) {
106 if (entry->scheduled_time > latest_deadline) {
107 // New timer latest is earlier than the current timer.
108 // Just add upfront as is, without slack.
109 //
110 // ---------t---)--e-------------------------------> time
111 //
112 timer->slack = 0ll;
113 list_add_before(&entry->node, &timer->node);
114 return;
115 }
116
117 if (entry->scheduled_time >= timer->scheduled_time) {
118 // New timer slack overlaps and is to the left (or equal). We
119 // coalesce with current by scheduling late.
120 //
121 // --------(----t---e-)----------------------------> time
122 //
123 timer->slack = zx_time_sub_time(entry->scheduled_time, timer->scheduled_time);
124 timer->scheduled_time = entry->scheduled_time;
125 kcounter_add(timer_coalesced_counter, 1);
126 list_add_after(&entry->node, &timer->node);
127 return;
128 }
129
130 if (entry->scheduled_time < earliest_deadline) {
131 // new timer earliest is later than the current timer. This case
132 // is handled in a future iteration.
133 //
134 // ----------------e--(---t-----------------------> time
135 //
136 continue;
137 }
138
139 // New timer is to the right of current timer and there is overlap
140 // with the current timer, but could the next timer (if any) be
141 // a better fit?
142 //
143 // -------------(--e---t-----?-------------------> time
144 //
145
146 const timer_t* next =
147 list_next_type(&percpu[cpu].timer_queue, &entry->node, timer_t, node);
148
149 if (next != NULL) {
150 if (next->scheduled_time <= timer->scheduled_time) {
151 // The new timer is to the right of the next timer. There is no
152 // chance the current timer is a better fit.
153 //
154 // -------------(--e---n---t----------------------> time
155 //
156 continue;
157 }
158
159 if (next->scheduled_time < latest_deadline) {
160 // There is slack overlap with the next timer, and also with the
161 // current timer. Which coalescing is a better match?
162 //
163 // --------------(-e---t---n-)-----------------------> time
164 //
165 zx_duration_t delta_entry =
166 zx_time_sub_time(timer->scheduled_time, entry->scheduled_time);
167 zx_duration_t delta_next =
168 zx_time_sub_time(next->scheduled_time, timer->scheduled_time);
169 if (delta_next < delta_entry) {
170 // New timer is closer to the next timer, handle it in the
171 // next iteration.
172 continue;
173 }
174 }
175 }
176
177 // Handles the remaining cases, note that there is overlap with
178 // the current timer.
179 //
180 // 1- this is the last timer (next == NULL) or
181 // 2- there is no overlap with the next timer, or
182 // 3- there is overlap with both current and next but
183 // current is closer.
184 //
185 // So we coalesce by scheduling early.
186 //
187 timer->slack = zx_time_sub_time(entry->scheduled_time, timer->scheduled_time);
188 timer->scheduled_time = entry->scheduled_time;
189 kcounter_add(timer_coalesced_counter, 1);
190 list_add_after(&entry->node, &timer->node);
191 return;
192 }
193
194 // Walked off the end of the list and there was no overlap.
195 timer->slack = 0;
196 list_add_tail(&percpu[cpu].timer_queue, &timer->node);
197 }
198
timer_set(timer_t * timer,zx_time_t deadline,TimerSlack slack,timer_callback callback,void * arg)199 void timer_set(timer_t* timer, zx_time_t deadline, TimerSlack slack,
200 timer_callback callback, void* arg) {
201 LTRACEF("timer %p deadline %" PRIi64 " slack.amount %" PRIi64
202 " slack.mode %u callback %p arg %p\n",
203 timer, deadline, slack.amount(), slack.mode(), callback, arg);
204
205 DEBUG_ASSERT(timer->magic == TIMER_MAGIC);
206 DEBUG_ASSERT(slack.mode() <= TIMER_SLACK_EARLY);
207 DEBUG_ASSERT(slack.amount() >= 0);
208
209 if (list_in_list(&timer->node)) {
210 panic("timer %p already in list\n", timer);
211 }
212
213 zx_time_t latest_deadline;
214 zx_time_t earliest_deadline;
215
216 if (slack.amount() == 0u) {
217 latest_deadline = deadline;
218 earliest_deadline = deadline;
219 } else {
220 switch (slack.mode()) {
221 case TIMER_SLACK_CENTER:
222 latest_deadline = zx_time_add_duration(deadline, slack.amount());
223 earliest_deadline = zx_time_sub_duration(deadline, slack.amount());
224 break;
225 case TIMER_SLACK_LATE:
226 latest_deadline = zx_time_add_duration(deadline, slack.amount());
227 earliest_deadline = deadline;
228 break;
229 case TIMER_SLACK_EARLY:
230 earliest_deadline = zx_time_sub_duration(deadline, slack.amount());
231 latest_deadline = deadline;
232 break;
233 default:
234 panic("invalid timer mode\n");
235 };
236 }
237
238 Guard<spin_lock_t, IrqSave> guard{TimerLock::Get()};
239
240 uint cpu = arch_curr_cpu_num();
241
242 bool currently_active = (timer->active_cpu == (int)cpu);
243 if (unlikely(currently_active)) {
244 // the timer is active on our own cpu, we must be inside the callback
245 if (timer->cancel) {
246 return;
247 }
248 } else if (unlikely(timer->active_cpu >= 0)) {
249 panic("timer %p currently active on a different cpu %d\n", timer, timer->active_cpu);
250 }
251
252 // Set up the structure.
253 timer->scheduled_time = deadline;
254 timer->callback = callback;
255 timer->arg = arg;
256 timer->cancel = false;
257 // We don't need to modify timer->active_cpu because it is managed by timer_tick().
258
259 LTRACEF("scheduled time %" PRIi64 "\n", timer->scheduled_time);
260
261 insert_timer_in_queue(cpu, timer, earliest_deadline, latest_deadline);
262 kcounter_add(timer_created_counter, 1);
263
264 if (list_peek_head_type(&percpu[cpu].timer_queue, timer_t, node) == timer) {
265 // we just modified the head of the timer queue
266 update_platform_timer(cpu, deadline);
267 }
268 }
269
timer_preempt_reset(zx_time_t deadline)270 void timer_preempt_reset(zx_time_t deadline) {
271 DEBUG_ASSERT(arch_ints_disabled());
272
273 uint cpu = arch_curr_cpu_num();
274
275 LTRACEF("preempt timer cpu %u deadline %" PRIi64 "\n", cpu, deadline);
276
277 percpu[cpu].preempt_timer_deadline = deadline;
278
279 update_platform_timer(cpu, deadline);
280 }
281
timer_preempt_cancel()282 void timer_preempt_cancel() {
283 DEBUG_ASSERT(arch_ints_disabled());
284
285 uint cpu = arch_curr_cpu_num();
286
287 percpu[cpu].preempt_timer_deadline = ZX_TIME_INFINITE;
288
289 // Note, we're not updating the platform timer. It's entirely possible the timer queue is empty
290 // and the preemption timer is the only reason the platform timer is set. To know that, we'd
291 // need to acquire a lock and look at the queue. Rather than pay that cost, leave the platform
292 // timer as is and expect the recipient to handle spurious wakeups.
293 }
294
timer_cancel(timer_t * timer)295 bool timer_cancel(timer_t* timer) {
296 DEBUG_ASSERT(timer->magic == TIMER_MAGIC);
297
298 Guard<spin_lock_t, IrqSave> guard{TimerLock::Get()};
299
300 uint cpu = arch_curr_cpu_num();
301
302 // mark the timer as canceled
303 timer->cancel = true;
304 mb();
305
306 // see if we're trying to cancel the timer we're currently in the middle of handling
307 if (unlikely(timer->active_cpu == (int)cpu)) {
308 // zero it out
309 timer->callback = NULL;
310 timer->arg = NULL;
311
312 // we're done, so return back to the callback
313 return false;
314 }
315
316 bool callback_not_running;
317
318 // if the timer is in a queue, remove it and adjust hardware timers if needed
319 if (list_in_list(&timer->node)) {
320 callback_not_running = true;
321
322 // save a copy of the old head of the queue so later we can see if we modified the head
323 timer_t* oldhead = list_peek_head_type(&percpu[cpu].timer_queue, timer_t, node);
324
325 // remove our timer from the queue
326 list_delete(&timer->node);
327 kcounter_add(timer_canceled_counter, 1);
328
329 // TODO(cpu): if after removing |timer| there is one other single timer with
330 // the same scheduled_time and slack non-zero then it is possible to return
331 // that timer to the ideal scheduled_time.
332
333 // see if we've just modified the head of this cpu's timer queue.
334 // if we modified another cpu's queue, we'll just let it fire and sort itself out
335 if (unlikely(oldhead == timer)) {
336 // timer we're canceling was at head of queue, see if we should update platform timer
337 timer_t* newhead = list_peek_head_type(&percpu[cpu].timer_queue, timer_t, node);
338 if (newhead) {
339 update_platform_timer(cpu, newhead->scheduled_time);
340 } else if (percpu[cpu].next_timer_deadline == ZX_TIME_INFINITE) {
341 LTRACEF("clearing old hw timer, preempt timer not set, nothing in the queue\n");
342 platform_stop_timer();
343 }
344 }
345 } else {
346 callback_not_running = false;
347 }
348
349 guard.Release();
350
351 // wait for the timer to become un-busy in case a callback is currently active on another cpu
352 while (timer->active_cpu >= 0) {
353 arch_spinloop_pause();
354 }
355
356 // zero it out
357 timer->callback = NULL;
358 timer->arg = NULL;
359
360 return callback_not_running;
361 }
362
363 // called at interrupt time to process any pending timers
timer_tick(zx_time_t now)364 void timer_tick(zx_time_t now) {
365 timer_t* timer;
366
367 DEBUG_ASSERT(arch_ints_disabled());
368
369 CPU_STATS_INC(timer_ints);
370
371 uint cpu = arch_curr_cpu_num();
372
373 LTRACEF("cpu %u now %" PRIi64 ", sp %p\n", cpu, now, __GET_FRAME());
374
375 // platform timer has fired, no deadline is set
376 percpu[cpu].next_timer_deadline = ZX_TIME_INFINITE;
377
378 // service preempt timer before acquiring the timer lock
379 if (now >= percpu[cpu].preempt_timer_deadline) {
380 percpu[cpu].preempt_timer_deadline = ZX_TIME_INFINITE;
381 sched_preempt_timer_tick(now);
382 }
383
384 Guard<spin_lock_t, NoIrqSave> guard{TimerLock::Get()};
385
386 for (;;) {
387 // see if there's an event to process
388 timer = list_peek_head_type(&percpu[cpu].timer_queue, timer_t, node);
389 if (likely(timer == 0)) {
390 break;
391 }
392 LTRACEF("next item on timer queue %p at %" PRIi64 " now %" PRIi64 " (%p, arg %p)\n",
393 timer, timer->scheduled_time, now, timer->callback, timer->arg);
394 if (likely(now < timer->scheduled_time)) {
395 break;
396 }
397
398 // process it
399 LTRACEF("timer %p\n", timer);
400 DEBUG_ASSERT_MSG(timer && timer->magic == TIMER_MAGIC,
401 "ASSERT: timer failed magic check: timer %p, magic 0x%x\n",
402 timer, (uint)timer->magic);
403 list_delete(&timer->node);
404
405 // mark the timer busy
406 timer->active_cpu = cpu;
407 // Unlocking the spinlock in CallUnlocked acts as a memory barrier.
408
409 // Now that the timer is off of the list, release the spinlock to handle
410 // the callback, then re-acquire in case it is requeued.
411 guard.CallUnlocked(
412 [timer, now]() {
413 LTRACEF("dequeued timer %p, scheduled %" PRIi64 "\n", timer, timer->scheduled_time);
414
415 CPU_STATS_INC(timers);
416 kcounter_add(timer_fired_counter, 1);
417
418 LTRACEF("timer %p firing callback %p, arg %p\n", timer, timer->callback, timer->arg);
419 timer->callback(timer, now, timer->arg);
420
421 DEBUG_ASSERT(arch_ints_disabled());
422 });
423
424 // mark it not busy
425 timer->active_cpu = -1;
426 mb();
427 }
428
429 // get the deadline of the event at the head of the queue (if any)
430 zx_time_t deadline = ZX_TIME_INFINITE;
431 timer = list_peek_head_type(&percpu[cpu].timer_queue, timer_t, node);
432 if (timer) {
433 deadline = timer->scheduled_time;
434
435 // has to be the case or it would have fired already
436 DEBUG_ASSERT(deadline > now);
437 }
438
439 // we're done manipulating the timer queue
440 guard.Release();
441
442 // set the platform timer to the *soonest* of queue event and preempt timer
443 if (percpu[cpu].preempt_timer_deadline < deadline) {
444 deadline = percpu[cpu].preempt_timer_deadline;
445 }
446 update_platform_timer(cpu, deadline);
447 }
448
timer_trylock_or_cancel(timer_t * t,spin_lock_t * lock)449 zx_status_t timer_trylock_or_cancel(timer_t* t, spin_lock_t* lock) {
450 // spin trylocking on the passed in spinlock either waiting for it
451 // to grab or the passed in timer to be canceled.
452 while (unlikely(spin_trylock(lock))) {
453 // we failed to grab it, check for cancel
454 if (t->cancel) {
455 // we were canceled, so bail immediately
456 return ZX_ERR_TIMED_OUT;
457 }
458 // tell the arch to wait
459 arch_spinloop_pause();
460 }
461
462 return ZX_OK;
463 }
464
timer_transition_off_cpu(uint old_cpu)465 void timer_transition_off_cpu(uint old_cpu) {
466 Guard<spin_lock_t, IrqSave> guard{TimerLock::Get()};
467 uint cpu = arch_curr_cpu_num();
468
469 timer_t* old_head = list_peek_head_type(&percpu[cpu].timer_queue, timer_t, node);
470
471 timer_t *entry = NULL, *tmp_entry = NULL;
472 // Move all timers from old_cpu to this cpu
473 list_for_every_entry_safe (&percpu[old_cpu].timer_queue, entry, tmp_entry, timer_t, node) {
474 list_delete(&entry->node);
475 // We lost the original asymmetric slack information so when we combine them
476 // with the other timer queue they are not coalesced again.
477 // TODO(cpu): figure how important this case is.
478 insert_timer_in_queue(cpu, entry, entry->scheduled_time, entry->scheduled_time);
479 // Note, we do not increment the "created" counter here because we are simply moving these
480 // timers from one queue to another and we already counted them when they were first
481 // created.
482 }
483
484 timer_t* new_head = list_peek_head_type(&percpu[cpu].timer_queue, timer_t, node);
485 if (new_head != NULL && new_head != old_head) {
486 // we just modified the head of the timer queue
487 update_platform_timer(cpu, new_head->scheduled_time);
488 }
489
490 // the old cpu has no tasks left, so reset the deadlines
491 percpu[old_cpu].preempt_timer_deadline = ZX_TIME_INFINITE;
492 percpu[old_cpu].next_timer_deadline = ZX_TIME_INFINITE;
493 }
494
timer_thaw_percpu(void)495 void timer_thaw_percpu(void) {
496 DEBUG_ASSERT(arch_ints_disabled());
497 Guard<spin_lock_t, NoIrqSave> guard{TimerLock::Get()};
498
499 uint cpu = arch_curr_cpu_num();
500
501 // reset next_timer_deadline so that update_platform_timer will reconfigure the timer
502 percpu[cpu].next_timer_deadline = ZX_TIME_INFINITE;
503 zx_time_t deadline = percpu[cpu].preempt_timer_deadline;
504
505 timer_t* t = list_peek_head_type(&percpu[cpu].timer_queue, timer_t, node);
506 if (t) {
507 if (t->scheduled_time < deadline) {
508 deadline = t->scheduled_time;
509 }
510 }
511
512 guard.Release();
513
514 update_platform_timer(cpu, deadline);
515 }
516
timer_queue_init(void)517 void timer_queue_init(void) {
518 for (uint i = 0; i < SMP_MAX_CPUS; i++) {
519 list_initialize(&percpu[i].timer_queue);
520 percpu[i].preempt_timer_deadline = ZX_TIME_INFINITE;
521 percpu[i].next_timer_deadline = ZX_TIME_INFINITE;
522 }
523 }
524
525 // print a timer queue dump into the passed in buffer
dump_timer_queues(char * buf,size_t len)526 static void dump_timer_queues(char* buf, size_t len) {
527 size_t ptr = 0;
528 zx_time_t now = current_time();
529
530 Guard<spin_lock_t, IrqSave> guard{TimerLock::Get()};
531
532 for (uint i = 0; i < SMP_MAX_CPUS; i++) {
533 if (mp_is_cpu_online(i)) {
534 ptr += snprintf(buf + ptr, len - ptr, "cpu %u:\n", i);
535
536 timer_t* t;
537 zx_time_t last = now;
538 list_for_every_entry (&percpu[i].timer_queue, t, timer_t, node) {
539 zx_duration_t delta_now = zx_time_sub_time(t->scheduled_time, now);
540 zx_duration_t delta_last = zx_time_sub_time(t->scheduled_time, last);
541 ptr += snprintf(buf + ptr, len - ptr,
542 "\ttime %" PRIi64 " delta_now %" PRIi64 " delta_last %" PRIi64 " func %p arg %p\n",
543 t->scheduled_time, delta_now, delta_last, t->callback, t->arg);
544 last = t->scheduled_time;
545 }
546 }
547 }
548 }
549
550 #include <lib/console.h>
551
cmd_timers(int argc,const cmd_args * argv,uint32_t flags)552 static int cmd_timers(int argc, const cmd_args* argv, uint32_t flags) {
553 const size_t timer_buffer_size = PAGE_SIZE;
554
555 // allocate a buffer to dump the timer queue into to avoid reentrancy issues with the
556 // timer spinlock
557 char* buf = static_cast<char*>(malloc(timer_buffer_size));
558 if (!buf) {
559 return ZX_ERR_NO_MEMORY;
560 }
561
562 dump_timer_queues(buf, timer_buffer_size);
563
564 printf("%s", buf);
565
566 free(buf);
567
568 return 0;
569 }
570
571 STATIC_COMMAND_START
572 #if LK_DEBUGLEVEL > 1
573 STATIC_COMMAND_MASKED("timers", "dump the current kernel timer queues", &cmd_timers, CMD_AVAIL_NORMAL)
574 #endif
575 STATIC_COMMAND_END(kernel);
576