1 /******************************************************************************
2  * timer.c
3  *
4  * Copyright (c) 2002-2003 Rolf Neugebauer
5  * Copyright (c) 2002-2005 K A Fraser
6  */
7 
8 #include <xen/init.h>
9 #include <xen/types.h>
10 #include <xen/errno.h>
11 #include <xen/sched.h>
12 #include <xen/lib.h>
13 #include <xen/param.h>
14 #include <xen/smp.h>
15 #include <xen/perfc.h>
16 #include <xen/time.h>
17 #include <xen/softirq.h>
18 #include <xen/timer.h>
19 #include <xen/keyhandler.h>
20 #include <xen/percpu.h>
21 #include <xen/cpu.h>
22 #include <xen/rcupdate.h>
23 #include <xen/symbols.h>
24 #include <asm/system.h>
25 #include <asm/atomic.h>
26 
27 /* We program the time hardware this far behind the closest deadline. */
28 static unsigned int timer_slop __read_mostly = 50000; /* 50 us */
29 integer_param("timer_slop", timer_slop);
30 
31 struct timers {
32     spinlock_t     lock;
33     struct timer **heap;
34     struct timer  *list;
35     struct timer  *running;
36     struct list_head inactive;
37 } __cacheline_aligned;
38 
39 static DEFINE_PER_CPU(struct timers, timers);
40 
41 /* Protects lock-free access to per-timer cpu field against cpu offlining. */
42 static DEFINE_RCU_READ_LOCK(timer_cpu_read_lock);
43 
44 DEFINE_PER_CPU(s_time_t, timer_deadline);
45 
46 /****************************************************************************
47  * HEAP OPERATIONS.
48  *
49  * Slot 0 of the heap is never a valid timer pointer, and instead holds the
50  * heap metadata.
51  */
52 
53 struct heap_metadata {
54     uint16_t size, limit;
55 };
56 
heap_metadata(struct timer ** heap)57 static struct heap_metadata *heap_metadata(struct timer **heap)
58 {
59     /* Check that our type-punning doesn't overflow into heap[1] */
60     BUILD_BUG_ON(sizeof(struct heap_metadata) > sizeof(struct timer *));
61 
62     return (struct heap_metadata *)&heap[0];
63 }
64 
65 /* Sink down element @pos of @heap. */
down_heap(struct timer ** heap,unsigned int pos)66 static void down_heap(struct timer **heap, unsigned int pos)
67 {
68     unsigned int sz = heap_metadata(heap)->size, nxt;
69     struct timer *t = heap[pos];
70 
71     while ( (nxt = (pos << 1)) <= sz )
72     {
73         if ( ((nxt+1) <= sz) && (heap[nxt+1]->expires < heap[nxt]->expires) )
74             nxt++;
75         if ( heap[nxt]->expires > t->expires )
76             break;
77         heap[pos] = heap[nxt];
78         heap[pos]->heap_offset = pos;
79         pos = nxt;
80     }
81 
82     heap[pos] = t;
83     t->heap_offset = pos;
84 }
85 
86 /* Float element @pos up @heap. */
up_heap(struct timer ** heap,unsigned int pos)87 static void up_heap(struct timer **heap, unsigned int pos)
88 {
89     struct timer *t = heap[pos];
90 
91     while ( (pos > 1) && (t->expires < heap[pos>>1]->expires) )
92     {
93         heap[pos] = heap[pos>>1];
94         heap[pos]->heap_offset = pos;
95         pos >>= 1;
96     }
97 
98     heap[pos] = t;
99     t->heap_offset = pos;
100 }
101 
102 
103 /* Delete @t from @heap. Return TRUE if new top of heap. */
remove_from_heap(struct timer ** heap,struct timer * t)104 static int remove_from_heap(struct timer **heap, struct timer *t)
105 {
106     unsigned int sz = heap_metadata(heap)->size;
107     unsigned int pos = t->heap_offset;
108 
109     if ( unlikely(pos == sz) )
110     {
111         heap_metadata(heap)->size = sz - 1;
112         goto out;
113     }
114 
115     heap[pos] = heap[sz];
116     heap[pos]->heap_offset = pos;
117 
118     heap_metadata(heap)->size = --sz;
119 
120     if ( (pos > 1) && (heap[pos]->expires < heap[pos>>1]->expires) )
121         up_heap(heap, pos);
122     else
123         down_heap(heap, pos);
124 
125  out:
126     return (pos == 1);
127 }
128 
129 
130 /* Add new entry @t to @heap. Return TRUE if new top of heap. */
add_to_heap(struct timer ** heap,struct timer * t)131 static int add_to_heap(struct timer **heap, struct timer *t)
132 {
133     unsigned int sz = heap_metadata(heap)->size;
134 
135     /* Fail if the heap is full. */
136     if ( unlikely(sz == heap_metadata(heap)->limit) )
137         return 0;
138 
139     heap_metadata(heap)->size = ++sz;
140     heap[sz] = t;
141     t->heap_offset = sz;
142     up_heap(heap, sz);
143 
144     return (t->heap_offset == 1);
145 }
146 
147 
148 /****************************************************************************
149  * LINKED LIST OPERATIONS.
150  */
151 
remove_from_list(struct timer ** pprev,struct timer * t)152 static int remove_from_list(struct timer **pprev, struct timer *t)
153 {
154     struct timer *curr, **_pprev = pprev;
155 
156     while ( (curr = *_pprev) != t )
157         _pprev = &curr->list_next;
158 
159     *_pprev = t->list_next;
160 
161     return (_pprev == pprev);
162 }
163 
add_to_list(struct timer ** pprev,struct timer * t)164 static int add_to_list(struct timer **pprev, struct timer *t)
165 {
166     struct timer *curr, **_pprev = pprev;
167 
168     while ( ((curr = *_pprev) != NULL) && (curr->expires <= t->expires) )
169         _pprev = &curr->list_next;
170 
171     t->list_next = curr;
172     *_pprev = t;
173 
174     return (_pprev == pprev);
175 }
176 
177 
178 /****************************************************************************
179  * TIMER OPERATIONS.
180  */
181 
remove_entry(struct timer * t)182 static int remove_entry(struct timer *t)
183 {
184     struct timers *timers = &per_cpu(timers, t->cpu);
185     int rc;
186 
187     switch ( t->status )
188     {
189     case TIMER_STATUS_in_heap:
190         rc = remove_from_heap(timers->heap, t);
191         break;
192     case TIMER_STATUS_in_list:
193         rc = remove_from_list(&timers->list, t);
194         break;
195     default:
196         rc = 0;
197         BUG();
198     }
199 
200     t->status = TIMER_STATUS_invalid;
201     return rc;
202 }
203 
add_entry(struct timer * t)204 static int add_entry(struct timer *t)
205 {
206     struct timers *timers = &per_cpu(timers, t->cpu);
207     int rc;
208 
209     ASSERT(t->status == TIMER_STATUS_invalid);
210 
211     /* Try to add to heap. t->heap_offset indicates whether we succeed. */
212     t->heap_offset = 0;
213     t->status = TIMER_STATUS_in_heap;
214     rc = add_to_heap(timers->heap, t);
215     if ( t->heap_offset != 0 )
216         return rc;
217 
218     /* Fall back to adding to the slower linked list. */
219     t->status = TIMER_STATUS_in_list;
220     return add_to_list(&timers->list, t);
221 }
222 
activate_timer(struct timer * timer)223 static inline void activate_timer(struct timer *timer)
224 {
225     ASSERT(timer->status == TIMER_STATUS_inactive);
226     timer->status = TIMER_STATUS_invalid;
227     list_del(&timer->inactive);
228 
229     if ( add_entry(timer) )
230         cpu_raise_softirq(timer->cpu, TIMER_SOFTIRQ);
231 }
232 
deactivate_timer(struct timer * timer)233 static inline void deactivate_timer(struct timer *timer)
234 {
235     if ( remove_entry(timer) )
236         cpu_raise_softirq(timer->cpu, TIMER_SOFTIRQ);
237 
238     timer->status = TIMER_STATUS_inactive;
239     list_add(&timer->inactive, &per_cpu(timers, timer->cpu).inactive);
240 }
241 
timer_lock_unsafe(struct timer * timer)242 static inline bool timer_lock_unsafe(struct timer *timer)
243 {
244     unsigned int cpu;
245 
246     rcu_read_lock(&timer_cpu_read_lock);
247 
248     for ( ; ; )
249     {
250         cpu = read_atomic(&timer->cpu);
251         if ( unlikely(cpu == TIMER_CPU_status_killed) )
252         {
253             rcu_read_unlock(&timer_cpu_read_lock);
254             return 0;
255         }
256         /* Use the speculation unsafe variant, the wrapper has the barrier. */
257         _spin_lock(&per_cpu(timers, cpu).lock);
258         if ( likely(timer->cpu == cpu) )
259             break;
260         spin_unlock(&per_cpu(timers, cpu).lock);
261     }
262 
263     rcu_read_unlock(&timer_cpu_read_lock);
264     return 1;
265 }
266 
267 #define timer_lock_irqsave(t, flags) ({         \
268     bool __x;                                   \
269     local_irq_save(flags);                      \
270     if ( !(__x = timer_lock_unsafe(t)) )        \
271         local_irq_restore(flags);               \
272     block_lock_speculation();                   \
273     __x;                                        \
274 })
275 
timer_unlock(struct timer * timer)276 static inline void timer_unlock(struct timer *timer)
277 {
278     spin_unlock(&per_cpu(timers, timer->cpu).lock);
279 }
280 
281 #define timer_unlock_irqrestore(t, flags) ({    \
282     timer_unlock(t);                            \
283     local_irq_restore(flags);                   \
284 })
285 
286 
active_timer(const struct timer * timer)287 static bool active_timer(const struct timer *timer)
288 {
289     ASSERT(timer->status >= TIMER_STATUS_inactive);
290     return timer_is_active(timer);
291 }
292 
293 
init_timer(struct timer * timer,void (* function)(void * data),void * data,unsigned int cpu)294 void init_timer(
295     struct timer *timer,
296     void        (*function)(void *data),
297     void         *data,
298     unsigned int  cpu)
299 {
300     unsigned long flags;
301     memset(timer, 0, sizeof(*timer));
302     timer->function = function;
303     timer->data = data;
304     write_atomic(&timer->cpu, cpu);
305     timer->status = TIMER_STATUS_inactive;
306     if ( !timer_lock_irqsave(timer, flags) )
307         BUG();
308     list_add(&timer->inactive, &per_cpu(timers, cpu).inactive);
309     timer_unlock_irqrestore(timer, flags);
310 }
311 
312 
set_timer(struct timer * timer,s_time_t expires)313 void set_timer(struct timer *timer, s_time_t expires)
314 {
315     unsigned long flags;
316 
317     if ( !timer_lock_irqsave(timer, flags) )
318         return;
319 
320     if ( active_timer(timer) )
321         deactivate_timer(timer);
322 
323     timer->expires = expires;
324 
325     activate_timer(timer);
326 
327     timer_unlock_irqrestore(timer, flags);
328 }
329 
330 
stop_timer(struct timer * timer)331 void stop_timer(struct timer *timer)
332 {
333     unsigned long flags;
334 
335     if ( !timer_lock_irqsave(timer, flags) )
336         return;
337 
338     if ( active_timer(timer) )
339         deactivate_timer(timer);
340 
341     timer_unlock_irqrestore(timer, flags);
342 }
343 
timer_expires_before(struct timer * timer,s_time_t t)344 bool timer_expires_before(struct timer *timer, s_time_t t)
345 {
346     unsigned long flags;
347     bool ret;
348 
349     if ( !timer_lock_irqsave(timer, flags) )
350         return false;
351 
352     ret = active_timer(timer) && timer->expires <= t;
353 
354     timer_unlock_irqrestore(timer, flags);
355 
356     return ret;
357 }
358 
migrate_timer(struct timer * timer,unsigned int new_cpu)359 void migrate_timer(struct timer *timer, unsigned int new_cpu)
360 {
361     unsigned int old_cpu;
362 #if CONFIG_NR_CPUS > 1
363     bool active;
364     unsigned long flags;
365 
366     rcu_read_lock(&timer_cpu_read_lock);
367 
368     for ( ; ; )
369     {
370         old_cpu = read_atomic(&timer->cpu);
371         if ( (old_cpu == new_cpu) || (old_cpu == TIMER_CPU_status_killed) )
372         {
373             rcu_read_unlock(&timer_cpu_read_lock);
374             return;
375         }
376 
377         if ( old_cpu < new_cpu )
378         {
379             spin_lock_irqsave(&per_cpu(timers, old_cpu).lock, flags);
380             spin_lock(&per_cpu(timers, new_cpu).lock);
381         }
382         else
383         {
384             spin_lock_irqsave(&per_cpu(timers, new_cpu).lock, flags);
385             spin_lock(&per_cpu(timers, old_cpu).lock);
386         }
387 
388         if ( likely(timer->cpu == old_cpu) )
389              break;
390 
391         spin_unlock(&per_cpu(timers, old_cpu).lock);
392         spin_unlock_irqrestore(&per_cpu(timers, new_cpu).lock, flags);
393     }
394 
395     rcu_read_unlock(&timer_cpu_read_lock);
396 
397     active = active_timer(timer);
398     if ( active )
399         deactivate_timer(timer);
400 
401     list_del(&timer->inactive);
402     write_atomic(&timer->cpu, new_cpu);
403     list_add(&timer->inactive, &per_cpu(timers, new_cpu).inactive);
404 
405     if ( active )
406         activate_timer(timer);
407 
408     spin_unlock(&per_cpu(timers, old_cpu).lock);
409     spin_unlock_irqrestore(&per_cpu(timers, new_cpu).lock, flags);
410 #else /* CONFIG_NR_CPUS == 1 */
411     old_cpu = read_atomic(&timer->cpu);
412     if ( old_cpu != TIMER_CPU_status_killed )
413         WARN_ON(new_cpu != old_cpu);
414 #endif /* CONFIG_NR_CPUS */
415 }
416 
417 
kill_timer(struct timer * timer)418 void kill_timer(struct timer *timer)
419 {
420     unsigned int old_cpu, cpu;
421     unsigned long flags;
422 
423     BUG_ON(this_cpu(timers).running == timer);
424 
425     if ( !timer_lock_irqsave(timer, flags) )
426         return;
427 
428     if ( active_timer(timer) )
429         deactivate_timer(timer);
430 
431     list_del(&timer->inactive);
432     timer->status = TIMER_STATUS_killed;
433     old_cpu = timer->cpu;
434     write_atomic(&timer->cpu, TIMER_CPU_status_killed);
435 
436     spin_unlock_irqrestore(&per_cpu(timers, old_cpu).lock, flags);
437 
438     for_each_online_cpu ( cpu )
439         while ( per_cpu(timers, cpu).running == timer )
440             cpu_relax();
441 }
442 
443 
execute_timer(struct timers * ts,struct timer * t)444 static void execute_timer(struct timers *ts, struct timer *t)
445 {
446     void (*fn)(void *data) = t->function;
447     void *data = t->data;
448 
449     t->status = TIMER_STATUS_inactive;
450     list_add(&t->inactive, &ts->inactive);
451 
452     ts->running = t;
453     spin_unlock_irq(&ts->lock);
454     (*fn)(data);
455     spin_lock_irq(&ts->lock);
456     ts->running = NULL;
457 }
458 
459 
timer_softirq_action(void)460 static void cf_check timer_softirq_action(void)
461 {
462     struct timer  *t, **heap, *next;
463     struct timers *ts;
464     s_time_t       now, deadline;
465 
466     ts = &this_cpu(timers);
467     heap = ts->heap;
468 
469     /* If we overflowed the heap, try to allocate a larger heap. */
470     if ( unlikely(ts->list != NULL) )
471     {
472         /* old_limit == (2^n)-1; new_limit == (2^(n+4))-1 */
473         unsigned int old_limit = heap_metadata(heap)->limit;
474         unsigned int new_limit = ((old_limit + 1) << 4) - 1;
475         struct timer **newheap = NULL;
476 
477         /* Don't grow the heap beyond what is representable in its metadata. */
478         if ( new_limit == (typeof(heap_metadata(heap)->limit))new_limit &&
479              new_limit + 1 )
480             newheap = xmalloc_array(struct timer *, new_limit + 1);
481         else
482             printk_once(XENLOG_WARNING "CPU%u: timer heap limit reached\n",
483                         smp_processor_id());
484         if ( newheap != NULL )
485         {
486             spin_lock_irq(&ts->lock);
487             memcpy(newheap, heap, (old_limit + 1) * sizeof(*heap));
488             heap_metadata(newheap)->limit = new_limit;
489             ts->heap = newheap;
490             spin_unlock_irq(&ts->lock);
491             if ( old_limit != 0 )
492                 xfree(heap);
493             heap = newheap;
494         }
495     }
496 
497     spin_lock_irq(&ts->lock);
498 
499     now = NOW();
500 
501     /* Execute ready heap timers. */
502     while ( (heap_metadata(heap)->size != 0) &&
503             ((t = heap[1])->expires < now) )
504     {
505         remove_from_heap(heap, t);
506         execute_timer(ts, t);
507     }
508 
509     /* Execute ready list timers. */
510     while ( ((t = ts->list) != NULL) && (t->expires < now) )
511     {
512         ts->list = t->list_next;
513         execute_timer(ts, t);
514     }
515 
516     /* Try to move timers from linked list to more efficient heap. */
517     next = ts->list;
518     ts->list = NULL;
519     while ( unlikely((t = next) != NULL) )
520     {
521         next = t->list_next;
522         t->status = TIMER_STATUS_invalid;
523         add_entry(t);
524     }
525 
526     /* Find earliest deadline from head of linked list and top of heap. */
527     deadline = STIME_MAX;
528     if ( heap_metadata(heap)->size != 0 )
529         deadline = heap[1]->expires;
530     if ( (ts->list != NULL) && (ts->list->expires < deadline) )
531         deadline = ts->list->expires;
532     now = NOW();
533     this_cpu(timer_deadline) =
534         (deadline == STIME_MAX) ? 0 : MAX(deadline, now + timer_slop);
535 
536     if ( !reprogram_timer(this_cpu(timer_deadline)) )
537         raise_softirq(TIMER_SOFTIRQ);
538 
539     spin_unlock_irq(&ts->lock);
540 }
541 
align_timer(s_time_t firsttick,uint64_t period)542 s_time_t align_timer(s_time_t firsttick, uint64_t period)
543 {
544     if ( !period )
545         return firsttick;
546 
547     return firsttick + (period - 1) - ((firsttick - 1) % period);
548 }
549 
dump_timer(struct timer * t,s_time_t now)550 static void dump_timer(struct timer *t, s_time_t now)
551 {
552     printk("  ex=%12"PRId64"us timer=%p cb=%ps(%p)\n",
553            (t->expires - now) / 1000, t, t->function, t->data);
554 }
555 
dump_timerq(unsigned char key)556 static void cf_check dump_timerq(unsigned char key)
557 {
558     struct timer  *t;
559     struct timers *ts;
560     unsigned long  flags;
561     s_time_t       now = NOW();
562     unsigned int   i, j;
563 
564     printk("Dumping timer queues:\n");
565 
566     for_each_online_cpu( i )
567     {
568         ts = &per_cpu(timers, i);
569 
570         printk("CPU%02d:\n", i);
571         spin_lock_irqsave(&ts->lock, flags);
572         for ( j = 1; j <= heap_metadata(ts->heap)->size; j++ )
573             dump_timer(ts->heap[j], now);
574         for ( t = ts->list; t != NULL; t = t->list_next )
575             dump_timer(t, now);
576         spin_unlock_irqrestore(&ts->lock, flags);
577     }
578 }
579 
migrate_timers_from_cpu(unsigned int old_cpu)580 static void migrate_timers_from_cpu(unsigned int old_cpu)
581 {
582     unsigned int new_cpu = cpumask_any(&cpu_online_map);
583     struct timers *old_ts, *new_ts;
584     struct timer *t;
585     bool notify = false;
586 
587     ASSERT(!cpu_online(old_cpu) && cpu_online(new_cpu));
588 
589     old_ts = &per_cpu(timers, old_cpu);
590     new_ts = &per_cpu(timers, new_cpu);
591 
592     if ( old_cpu < new_cpu )
593     {
594         spin_lock_irq(&old_ts->lock);
595         spin_lock(&new_ts->lock);
596     }
597     else
598     {
599         spin_lock_irq(&new_ts->lock);
600         spin_lock(&old_ts->lock);
601     }
602 
603     while ( (t = heap_metadata(old_ts->heap)->size
604              ? old_ts->heap[1] : old_ts->list) != NULL )
605     {
606         remove_entry(t);
607         write_atomic(&t->cpu, new_cpu);
608         notify |= add_entry(t);
609     }
610 
611     while ( !list_empty(&old_ts->inactive) )
612     {
613         t = list_entry(old_ts->inactive.next, struct timer, inactive);
614         list_del(&t->inactive);
615         write_atomic(&t->cpu, new_cpu);
616         list_add(&t->inactive, &new_ts->inactive);
617     }
618 
619     spin_unlock(&old_ts->lock);
620     spin_unlock_irq(&new_ts->lock);
621 
622     if ( notify )
623         cpu_raise_softirq(new_cpu, TIMER_SOFTIRQ);
624 }
625 
626 /*
627  * All CPUs initially share an empty dummy heap. Only those CPUs that
628  * are brought online will be dynamically allocated their own heap.
629  * The size/limit metadata are both 0 by being in .bss
630  */
631 static struct timer *dummy_heap[1];
632 
free_percpu_timers(unsigned int cpu)633 static void free_percpu_timers(unsigned int cpu)
634 {
635     struct timers *ts = &per_cpu(timers, cpu);
636 
637     ASSERT(heap_metadata(ts->heap)->size == 0);
638     if ( heap_metadata(ts->heap)->limit )
639     {
640         xfree(ts->heap);
641         ts->heap = dummy_heap;
642     }
643     else
644         ASSERT(ts->heap == dummy_heap);
645 }
646 
cpu_callback(struct notifier_block * nfb,unsigned long action,void * hcpu)647 static int cf_check cpu_callback(
648     struct notifier_block *nfb, unsigned long action, void *hcpu)
649 {
650     unsigned int cpu = (unsigned long)hcpu;
651     struct timers *ts = &per_cpu(timers, cpu);
652 
653     switch ( action )
654     {
655     case CPU_UP_PREPARE:
656         /* Only initialise ts once. */
657         if ( !ts->heap )
658         {
659             INIT_LIST_HEAD(&ts->inactive);
660             spin_lock_init(&ts->lock);
661             ts->heap = dummy_heap;
662         }
663         break;
664 
665     case CPU_UP_CANCELED:
666     case CPU_DEAD:
667     case CPU_RESUME_FAILED:
668         migrate_timers_from_cpu(cpu);
669 
670         if ( !park_offline_cpus && system_state != SYS_STATE_suspend )
671             free_percpu_timers(cpu);
672         break;
673 
674     case CPU_REMOVE:
675         if ( park_offline_cpus )
676             free_percpu_timers(cpu);
677         break;
678 
679     default:
680         break;
681     }
682 
683     return NOTIFY_DONE;
684 }
685 
686 static struct notifier_block cpu_nfb = {
687     .notifier_call = cpu_callback,
688     .priority = 99
689 };
690 
timer_init(void)691 void __init timer_init(void)
692 {
693     void *cpu = (void *)(long)smp_processor_id();
694 
695     open_softirq(TIMER_SOFTIRQ, timer_softirq_action);
696 
697     cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
698     register_cpu_notifier(&cpu_nfb);
699 
700     register_keyhandler('a', dump_timerq, "dump timer queues", 1);
701 }
702 
703 /*
704  * Local variables:
705  * mode: C
706  * c-file-style: "BSD"
707  * c-basic-offset: 4
708  * tab-width: 4
709  * indent-tabs-mode: nil
710  * End:
711  */
712