1 // Copyright 2016 The Fuchsia Authors
2 // Copyright (c) 2008-2014 Travis Geiselbrecht
3 //
4 // Use of this source code is governed by a MIT-style
5 // license that can be found in the LICENSE file or at
6 // https://opensource.org/licenses/MIT
7 
8 /**
9  * @file
10  * @brief  Kernel timer subsystem
11  * @defgroup timer Timers
12  *
13  * The timer subsystem allows functions to be scheduled for later
14  * execution.  Each timer object is used to cause one function to
15  * be executed at a later time.
16  *
17  * Timer callback functions are called in interrupt context.
18  *
19  * @{
20  */
21 #include <assert.h>
22 #include <debug.h>
23 #include <err.h>
24 #include <inttypes.h>
25 #include <kernel/align.h>
26 #include <kernel/lockdep.h>
27 #include <kernel/mp.h>
28 #include <kernel/percpu.h>
29 #include <kernel/sched.h>
30 #include <kernel/spinlock.h>
31 #include <kernel/stats.h>
32 #include <kernel/thread.h>
33 #include <kernel/timer.h>
34 #include <lib/counters.h>
35 #include <list.h>
36 #include <malloc.h>
37 #include <platform.h>
38 #include <platform/timer.h>
39 #include <trace.h>
40 #include <zircon/time.h>
41 #include <zircon/types.h>
42 
43 #define LOCAL_TRACE 0
44 
45 // Total number of timers set. Always increasing.
46 KCOUNTER(timer_created_counter, "kernel.timer.created");
47 
48 // Number of timers merged into an existing timer because of slack.
49 KCOUNTER(timer_coalesced_counter, "kernel.timer.coalesced");
50 
51 // Number of timers that have fired (i.e. callback was invoked).
52 KCOUNTER(timer_fired_counter, "kernel.timer.fired");
53 
54 // Number of timers that were successfully canceled. Attempts to cancel a timer that is currently
55 // firing are not counted.
56 KCOUNTER(timer_canceled_counter, "kernel.timer.canceled");
57 
58 namespace {
59 
60 spin_lock_t timer_lock __CPU_ALIGN_EXCLUSIVE = SPIN_LOCK_INITIAL_VALUE;
61 DECLARE_SINGLETON_LOCK_WRAPPER(TimerLock, timer_lock);
62 
63 } // anonymous namespace
64 
timer_init(timer_t * timer)65 void timer_init(timer_t* timer) {
66     *timer = (timer_t)TIMER_INITIAL_VALUE(*timer);
67 }
68 
69 // Set the platform's oneshot timer to the minimum of its current deadline and |new_deadline|.
70 //
71 // Call this when the timer queue's head changes.
72 //
73 // Can only be called when interrupts are disabled and current CPU is |cpu|.
update_platform_timer(uint cpu,zx_time_t new_deadline)74 static void update_platform_timer(uint cpu, zx_time_t new_deadline) {
75     DEBUG_ASSERT(arch_ints_disabled());
76     DEBUG_ASSERT(cpu == arch_curr_cpu_num());
77     if (new_deadline < percpu[cpu].next_timer_deadline) {
78         LTRACEF("rescheduling timer for %" PRIi64 " nsecs\n", new_deadline);
79         platform_set_oneshot_timer(new_deadline);
80         percpu[cpu].next_timer_deadline = new_deadline;
81     }
82 }
83 
insert_timer_in_queue(uint cpu,timer_t * timer,zx_time_t earliest_deadline,zx_time_t latest_deadline)84 static void insert_timer_in_queue(uint cpu, timer_t* timer,
85                                   zx_time_t earliest_deadline, zx_time_t latest_deadline) {
86 
87     DEBUG_ASSERT(arch_ints_disabled());
88     LTRACEF("timer %p, cpu %u, scheduled %" PRIi64 "\n", timer, cpu, timer->scheduled_time);
89 
90     // For inserting the timer we consider several cases. In general we
91     // want to coalesce with the current timer unless we can prove that
92     // either that:
93     //  1- there is no slack overlap with current timer OR
94     //  2- the next timer is a better fit.
95     //
96     // In diagrams that follow
97     // - Let |e| be the current (existing) timer deadline
98     // - Let |t| be the deadline of the timer we are inserting
99     // - Let |n| be the next timer deadline if any
100     // - Let |x| be the end of the list (not a timer)
101     // - Let |(| and |)| the earliest_deadline and latest_deadline.
102     //
103     timer_t* entry;
104 
105     list_for_every_entry (&percpu[cpu].timer_queue, entry, timer_t, node) {
106         if (entry->scheduled_time > latest_deadline) {
107             // New timer latest is earlier than the current timer.
108             // Just add upfront as is, without slack.
109             //
110             //   ---------t---)--e-------------------------------> time
111             //
112             timer->slack = 0ll;
113             list_add_before(&entry->node, &timer->node);
114             return;
115         }
116 
117         if (entry->scheduled_time >= timer->scheduled_time) {
118             //  New timer slack overlaps and is to the left (or equal). We
119             //  coalesce with current by scheduling late.
120             //
121             //  --------(----t---e-)----------------------------> time
122             //
123             timer->slack = zx_time_sub_time(entry->scheduled_time, timer->scheduled_time);
124             timer->scheduled_time = entry->scheduled_time;
125             kcounter_add(timer_coalesced_counter, 1);
126             list_add_after(&entry->node, &timer->node);
127             return;
128         }
129 
130         if (entry->scheduled_time < earliest_deadline) {
131             // new timer earliest is later than the current timer. This case
132             // is handled in a future iteration.
133             //
134             //   ----------------e--(---t-----------------------> time
135             //
136             continue;
137         }
138 
139         // New timer is to the right of current timer and there is overlap
140         // with the current timer, but could the next timer (if any) be
141         // a better fit?
142         //
143         //  -------------(--e---t-----?-------------------> time
144         //
145 
146         const timer_t* next =
147             list_next_type(&percpu[cpu].timer_queue, &entry->node, timer_t, node);
148 
149         if (next != NULL) {
150             if (next->scheduled_time <= timer->scheduled_time) {
151                 // The new timer is to the right of the next timer. There is no
152                 // chance the current timer is a better fit.
153                 //
154                 //  -------------(--e---n---t----------------------> time
155                 //
156                 continue;
157             }
158 
159             if (next->scheduled_time < latest_deadline) {
160                 // There is slack overlap with the next timer, and also with the
161                 // current timer. Which coalescing is a better match?
162                 //
163                 //  --------------(-e---t---n-)-----------------------> time
164                 //
165                 zx_duration_t delta_entry =
166                     zx_time_sub_time(timer->scheduled_time, entry->scheduled_time);
167                 zx_duration_t delta_next =
168                     zx_time_sub_time(next->scheduled_time, timer->scheduled_time);
169                 if (delta_next < delta_entry) {
170                     // New timer is closer to the next timer, handle it in the
171                     // next iteration.
172                     continue;
173                 }
174             }
175         }
176 
177         // Handles the remaining cases, note that there is overlap with
178         // the current timer.
179         //
180         //  1- this is the last timer (next == NULL) or
181         //  2- there is no overlap with the next timer, or
182         //  3- there is overlap with both current and next but
183         //     current is closer.
184         //
185         //  So we coalesce by scheduling early.
186         //
187         timer->slack = zx_time_sub_time(entry->scheduled_time, timer->scheduled_time);
188         timer->scheduled_time = entry->scheduled_time;
189         kcounter_add(timer_coalesced_counter, 1);
190         list_add_after(&entry->node, &timer->node);
191         return;
192     }
193 
194     // Walked off the end of the list and there was no overlap.
195     timer->slack = 0;
196     list_add_tail(&percpu[cpu].timer_queue, &timer->node);
197 }
198 
timer_set(timer_t * timer,zx_time_t deadline,TimerSlack slack,timer_callback callback,void * arg)199 void timer_set(timer_t* timer, zx_time_t deadline, TimerSlack slack,
200                timer_callback callback, void* arg) {
201     LTRACEF("timer %p deadline %" PRIi64 " slack.amount %" PRIi64
202             " slack.mode %u callback %p arg %p\n",
203             timer, deadline, slack.amount(), slack.mode(), callback, arg);
204 
205     DEBUG_ASSERT(timer->magic == TIMER_MAGIC);
206     DEBUG_ASSERT(slack.mode() <= TIMER_SLACK_EARLY);
207     DEBUG_ASSERT(slack.amount() >= 0);
208 
209     if (list_in_list(&timer->node)) {
210         panic("timer %p already in list\n", timer);
211     }
212 
213     zx_time_t latest_deadline;
214     zx_time_t earliest_deadline;
215 
216     if (slack.amount() == 0u) {
217         latest_deadline = deadline;
218         earliest_deadline = deadline;
219     } else {
220         switch (slack.mode()) {
221         case TIMER_SLACK_CENTER:
222             latest_deadline = zx_time_add_duration(deadline, slack.amount());
223             earliest_deadline = zx_time_sub_duration(deadline, slack.amount());
224             break;
225         case TIMER_SLACK_LATE:
226             latest_deadline = zx_time_add_duration(deadline, slack.amount());
227             earliest_deadline = deadline;
228             break;
229         case TIMER_SLACK_EARLY:
230             earliest_deadline = zx_time_sub_duration(deadline, slack.amount());
231             latest_deadline = deadline;
232             break;
233         default:
234             panic("invalid timer mode\n");
235         };
236     }
237 
238     Guard<spin_lock_t, IrqSave> guard{TimerLock::Get()};
239 
240     uint cpu = arch_curr_cpu_num();
241 
242     bool currently_active = (timer->active_cpu == (int)cpu);
243     if (unlikely(currently_active)) {
244         // the timer is active on our own cpu, we must be inside the callback
245         if (timer->cancel) {
246             return;
247         }
248     } else if (unlikely(timer->active_cpu >= 0)) {
249         panic("timer %p currently active on a different cpu %d\n", timer, timer->active_cpu);
250     }
251 
252     // Set up the structure.
253     timer->scheduled_time = deadline;
254     timer->callback = callback;
255     timer->arg = arg;
256     timer->cancel = false;
257     // We don't need to modify timer->active_cpu because it is managed by timer_tick().
258 
259     LTRACEF("scheduled time %" PRIi64 "\n", timer->scheduled_time);
260 
261     insert_timer_in_queue(cpu, timer, earliest_deadline, latest_deadline);
262     kcounter_add(timer_created_counter, 1);
263 
264     if (list_peek_head_type(&percpu[cpu].timer_queue, timer_t, node) == timer) {
265         // we just modified the head of the timer queue
266         update_platform_timer(cpu, deadline);
267     }
268 }
269 
timer_preempt_reset(zx_time_t deadline)270 void timer_preempt_reset(zx_time_t deadline) {
271     DEBUG_ASSERT(arch_ints_disabled());
272 
273     uint cpu = arch_curr_cpu_num();
274 
275     LTRACEF("preempt timer cpu %u deadline %" PRIi64 "\n", cpu, deadline);
276 
277     percpu[cpu].preempt_timer_deadline = deadline;
278 
279     update_platform_timer(cpu, deadline);
280 }
281 
timer_preempt_cancel()282 void timer_preempt_cancel() {
283     DEBUG_ASSERT(arch_ints_disabled());
284 
285     uint cpu = arch_curr_cpu_num();
286 
287     percpu[cpu].preempt_timer_deadline = ZX_TIME_INFINITE;
288 
289     // Note, we're not updating the platform timer. It's entirely possible the timer queue is empty
290     // and the preemption timer is the only reason the platform timer is set. To know that, we'd
291     // need to acquire a lock and look at the queue. Rather than pay that cost, leave the platform
292     // timer as is and expect the recipient to handle spurious wakeups.
293 }
294 
timer_cancel(timer_t * timer)295 bool timer_cancel(timer_t* timer) {
296     DEBUG_ASSERT(timer->magic == TIMER_MAGIC);
297 
298     Guard<spin_lock_t, IrqSave> guard{TimerLock::Get()};
299 
300     uint cpu = arch_curr_cpu_num();
301 
302     // mark the timer as canceled
303     timer->cancel = true;
304     mb();
305 
306     // see if we're trying to cancel the timer we're currently in the middle of handling
307     if (unlikely(timer->active_cpu == (int)cpu)) {
308         // zero it out
309         timer->callback = NULL;
310         timer->arg = NULL;
311 
312         // we're done, so return back to the callback
313         return false;
314     }
315 
316     bool callback_not_running;
317 
318     // if the timer is in a queue, remove it and adjust hardware timers if needed
319     if (list_in_list(&timer->node)) {
320         callback_not_running = true;
321 
322         // save a copy of the old head of the queue so later we can see if we modified the head
323         timer_t* oldhead = list_peek_head_type(&percpu[cpu].timer_queue, timer_t, node);
324 
325         // remove our timer from the queue
326         list_delete(&timer->node);
327         kcounter_add(timer_canceled_counter, 1);
328 
329         // TODO(cpu): if  after removing |timer| there is one other single timer with
330         // the same scheduled_time and slack non-zero then it is possible to return
331         // that timer to the ideal scheduled_time.
332 
333         // see if we've just modified the head of this cpu's timer queue.
334         // if we modified another cpu's queue, we'll just let it fire and sort itself out
335         if (unlikely(oldhead == timer)) {
336             // timer we're canceling was at head of queue, see if we should update platform timer
337             timer_t* newhead = list_peek_head_type(&percpu[cpu].timer_queue, timer_t, node);
338             if (newhead) {
339                 update_platform_timer(cpu, newhead->scheduled_time);
340             } else if (percpu[cpu].next_timer_deadline == ZX_TIME_INFINITE) {
341                 LTRACEF("clearing old hw timer, preempt timer not set, nothing in the queue\n");
342                 platform_stop_timer();
343             }
344         }
345     } else {
346         callback_not_running = false;
347     }
348 
349     guard.Release();
350 
351     // wait for the timer to become un-busy in case a callback is currently active on another cpu
352     while (timer->active_cpu >= 0) {
353         arch_spinloop_pause();
354     }
355 
356     // zero it out
357     timer->callback = NULL;
358     timer->arg = NULL;
359 
360     return callback_not_running;
361 }
362 
363 // called at interrupt time to process any pending timers
timer_tick(zx_time_t now)364 void timer_tick(zx_time_t now) {
365     timer_t* timer;
366 
367     DEBUG_ASSERT(arch_ints_disabled());
368 
369     CPU_STATS_INC(timer_ints);
370 
371     uint cpu = arch_curr_cpu_num();
372 
373     LTRACEF("cpu %u now %" PRIi64 ", sp %p\n", cpu, now, __GET_FRAME());
374 
375     // platform timer has fired, no deadline is set
376     percpu[cpu].next_timer_deadline = ZX_TIME_INFINITE;
377 
378     // service preempt timer before acquiring the timer lock
379     if (now >= percpu[cpu].preempt_timer_deadline) {
380         percpu[cpu].preempt_timer_deadline = ZX_TIME_INFINITE;
381         sched_preempt_timer_tick(now);
382     }
383 
384     Guard<spin_lock_t, NoIrqSave> guard{TimerLock::Get()};
385 
386     for (;;) {
387         // see if there's an event to process
388         timer = list_peek_head_type(&percpu[cpu].timer_queue, timer_t, node);
389         if (likely(timer == 0)) {
390             break;
391         }
392         LTRACEF("next item on timer queue %p at %" PRIi64 " now %" PRIi64 " (%p, arg %p)\n",
393                 timer, timer->scheduled_time, now, timer->callback, timer->arg);
394         if (likely(now < timer->scheduled_time)) {
395             break;
396         }
397 
398         // process it
399         LTRACEF("timer %p\n", timer);
400         DEBUG_ASSERT_MSG(timer && timer->magic == TIMER_MAGIC,
401                          "ASSERT: timer failed magic check: timer %p, magic 0x%x\n",
402                          timer, (uint)timer->magic);
403         list_delete(&timer->node);
404 
405         // mark the timer busy
406         timer->active_cpu = cpu;
407         // Unlocking the spinlock in CallUnlocked acts as a memory barrier.
408 
409         // Now that the timer is off of the list, release the spinlock to handle
410         // the callback, then re-acquire in case it is requeued.
411         guard.CallUnlocked(
412             [timer, now]() {
413                 LTRACEF("dequeued timer %p, scheduled %" PRIi64 "\n", timer, timer->scheduled_time);
414 
415                 CPU_STATS_INC(timers);
416                 kcounter_add(timer_fired_counter, 1);
417 
418                 LTRACEF("timer %p firing callback %p, arg %p\n", timer, timer->callback, timer->arg);
419                 timer->callback(timer, now, timer->arg);
420 
421                 DEBUG_ASSERT(arch_ints_disabled());
422             });
423 
424         // mark it not busy
425         timer->active_cpu = -1;
426         mb();
427     }
428 
429     // get the deadline of the event at the head of the queue (if any)
430     zx_time_t deadline = ZX_TIME_INFINITE;
431     timer = list_peek_head_type(&percpu[cpu].timer_queue, timer_t, node);
432     if (timer) {
433         deadline = timer->scheduled_time;
434 
435         // has to be the case or it would have fired already
436         DEBUG_ASSERT(deadline > now);
437     }
438 
439     // we're done manipulating the timer queue
440     guard.Release();
441 
442     // set the platform timer to the *soonest* of queue event and preempt timer
443     if (percpu[cpu].preempt_timer_deadline < deadline) {
444         deadline = percpu[cpu].preempt_timer_deadline;
445     }
446     update_platform_timer(cpu, deadline);
447 }
448 
timer_trylock_or_cancel(timer_t * t,spin_lock_t * lock)449 zx_status_t timer_trylock_or_cancel(timer_t* t, spin_lock_t* lock) {
450     // spin trylocking on the passed in spinlock either waiting for it
451     // to grab or the passed in timer to be canceled.
452     while (unlikely(spin_trylock(lock))) {
453         // we failed to grab it, check for cancel
454         if (t->cancel) {
455             // we were canceled, so bail immediately
456             return ZX_ERR_TIMED_OUT;
457         }
458         // tell the arch to wait
459         arch_spinloop_pause();
460     }
461 
462     return ZX_OK;
463 }
464 
timer_transition_off_cpu(uint old_cpu)465 void timer_transition_off_cpu(uint old_cpu) {
466     Guard<spin_lock_t, IrqSave> guard{TimerLock::Get()};
467     uint cpu = arch_curr_cpu_num();
468 
469     timer_t* old_head = list_peek_head_type(&percpu[cpu].timer_queue, timer_t, node);
470 
471     timer_t *entry = NULL, *tmp_entry = NULL;
472     // Move all timers from old_cpu to this cpu
473     list_for_every_entry_safe (&percpu[old_cpu].timer_queue, entry, tmp_entry, timer_t, node) {
474         list_delete(&entry->node);
475         // We lost the original asymmetric slack information so when we combine them
476         // with the other timer queue they are not coalesced again.
477         // TODO(cpu): figure how important this case is.
478         insert_timer_in_queue(cpu, entry, entry->scheduled_time, entry->scheduled_time);
479         // Note, we do not increment the "created" counter here because we are simply moving these
480         // timers from one queue to another and we already counted them when they were first
481         // created.
482     }
483 
484     timer_t* new_head = list_peek_head_type(&percpu[cpu].timer_queue, timer_t, node);
485     if (new_head != NULL && new_head != old_head) {
486         // we just modified the head of the timer queue
487         update_platform_timer(cpu, new_head->scheduled_time);
488     }
489 
490     // the old cpu has no tasks left, so reset the deadlines
491     percpu[old_cpu].preempt_timer_deadline = ZX_TIME_INFINITE;
492     percpu[old_cpu].next_timer_deadline = ZX_TIME_INFINITE;
493 }
494 
timer_thaw_percpu(void)495 void timer_thaw_percpu(void) {
496     DEBUG_ASSERT(arch_ints_disabled());
497     Guard<spin_lock_t, NoIrqSave> guard{TimerLock::Get()};
498 
499     uint cpu = arch_curr_cpu_num();
500 
501     // reset next_timer_deadline so that update_platform_timer will reconfigure the timer
502     percpu[cpu].next_timer_deadline = ZX_TIME_INFINITE;
503     zx_time_t deadline = percpu[cpu].preempt_timer_deadline;
504 
505     timer_t* t = list_peek_head_type(&percpu[cpu].timer_queue, timer_t, node);
506     if (t) {
507         if (t->scheduled_time < deadline) {
508             deadline = t->scheduled_time;
509         }
510     }
511 
512     guard.Release();
513 
514     update_platform_timer(cpu, deadline);
515 }
516 
timer_queue_init(void)517 void timer_queue_init(void) {
518     for (uint i = 0; i < SMP_MAX_CPUS; i++) {
519         list_initialize(&percpu[i].timer_queue);
520         percpu[i].preempt_timer_deadline = ZX_TIME_INFINITE;
521         percpu[i].next_timer_deadline = ZX_TIME_INFINITE;
522     }
523 }
524 
525 // print a timer queue dump into the passed in buffer
dump_timer_queues(char * buf,size_t len)526 static void dump_timer_queues(char* buf, size_t len) {
527     size_t ptr = 0;
528     zx_time_t now = current_time();
529 
530     Guard<spin_lock_t, IrqSave> guard{TimerLock::Get()};
531 
532     for (uint i = 0; i < SMP_MAX_CPUS; i++) {
533         if (mp_is_cpu_online(i)) {
534             ptr += snprintf(buf + ptr, len - ptr, "cpu %u:\n", i);
535 
536             timer_t* t;
537             zx_time_t last = now;
538             list_for_every_entry (&percpu[i].timer_queue, t, timer_t, node) {
539                 zx_duration_t delta_now = zx_time_sub_time(t->scheduled_time, now);
540                 zx_duration_t delta_last = zx_time_sub_time(t->scheduled_time, last);
541                 ptr += snprintf(buf + ptr, len - ptr,
542                                 "\ttime %" PRIi64 " delta_now %" PRIi64 " delta_last %" PRIi64 " func %p arg %p\n",
543                                 t->scheduled_time, delta_now, delta_last, t->callback, t->arg);
544                 last = t->scheduled_time;
545             }
546         }
547     }
548 }
549 
550 #include <lib/console.h>
551 
cmd_timers(int argc,const cmd_args * argv,uint32_t flags)552 static int cmd_timers(int argc, const cmd_args* argv, uint32_t flags) {
553     const size_t timer_buffer_size = PAGE_SIZE;
554 
555     // allocate a buffer to dump the timer queue into to avoid reentrancy issues with the
556     // timer spinlock
557     char* buf = static_cast<char*>(malloc(timer_buffer_size));
558     if (!buf) {
559         return ZX_ERR_NO_MEMORY;
560     }
561 
562     dump_timer_queues(buf, timer_buffer_size);
563 
564     printf("%s", buf);
565 
566     free(buf);
567 
568     return 0;
569 }
570 
571 STATIC_COMMAND_START
572 #if LK_DEBUGLEVEL > 1
573 STATIC_COMMAND_MASKED("timers", "dump the current kernel timer queues", &cmd_timers, CMD_AVAIL_NORMAL)
574 #endif
575 STATIC_COMMAND_END(kernel);
576