1 /******************************************************************************
2  * timer.c
3  *
4  * Copyright (c) 2002-2003 Rolf Neugebauer
5  * Copyright (c) 2002-2005 K A Fraser
6  */
7 
8 #include <xen/init.h>
9 #include <xen/types.h>
10 #include <xen/errno.h>
11 #include <xen/sched.h>
12 #include <xen/lib.h>
13 #include <xen/param.h>
14 #include <xen/sections.h>
15 #include <xen/smp.h>
16 #include <xen/perfc.h>
17 #include <xen/time.h>
18 #include <xen/softirq.h>
19 #include <xen/timer.h>
20 #include <xen/keyhandler.h>
21 #include <xen/percpu.h>
22 #include <xen/cpu.h>
23 #include <xen/rcupdate.h>
24 #include <xen/symbols.h>
25 #include <asm/system.h>
26 #include <asm/atomic.h>
27 
28 /* We program the time hardware this far behind the closest deadline. */
29 static unsigned int timer_slop __read_mostly = 50000; /* 50 us */
30 integer_param("timer_slop", timer_slop);
31 
32 struct timers {
33     spinlock_t     lock;
34     struct timer **heap;
35     struct timer  *list;
36     struct timer  *running;
37     struct list_head inactive;
38 } __cacheline_aligned;
39 
40 static DEFINE_PER_CPU(struct timers, timers);
41 
42 /* Protects lock-free access to per-timer cpu field against cpu offlining. */
43 static DEFINE_RCU_READ_LOCK(timer_cpu_read_lock);
44 
45 DEFINE_PER_CPU(s_time_t, timer_deadline);
46 
47 /****************************************************************************
48  * HEAP OPERATIONS.
49  *
50  * Slot 0 of the heap is never a valid timer pointer, and instead holds the
51  * heap metadata.
52  */
53 
54 struct heap_metadata {
55     uint16_t size, limit;
56 };
57 
heap_metadata(struct timer ** heap)58 static struct heap_metadata *heap_metadata(struct timer **heap)
59 {
60     /* Check that our type-punning doesn't overflow into heap[1] */
61     BUILD_BUG_ON(sizeof(struct heap_metadata) > sizeof(struct timer *));
62 
63     return (struct heap_metadata *)&heap[0];
64 }
65 
66 /* Sink down element @pos of @heap. */
down_heap(struct timer ** heap,unsigned int pos)67 static void down_heap(struct timer **heap, unsigned int pos)
68 {
69     unsigned int sz = heap_metadata(heap)->size, nxt;
70     struct timer *t = heap[pos];
71 
72     while ( (nxt = (pos << 1)) <= sz )
73     {
74         if ( ((nxt+1) <= sz) && (heap[nxt+1]->expires < heap[nxt]->expires) )
75             nxt++;
76         if ( heap[nxt]->expires > t->expires )
77             break;
78         heap[pos] = heap[nxt];
79         heap[pos]->heap_offset = pos;
80         pos = nxt;
81     }
82 
83     heap[pos] = t;
84     t->heap_offset = pos;
85 }
86 
87 /* Float element @pos up @heap. */
up_heap(struct timer ** heap,unsigned int pos)88 static void up_heap(struct timer **heap, unsigned int pos)
89 {
90     struct timer *t = heap[pos];
91 
92     while ( (pos > 1) && (t->expires < heap[pos>>1]->expires) )
93     {
94         heap[pos] = heap[pos>>1];
95         heap[pos]->heap_offset = pos;
96         pos >>= 1;
97     }
98 
99     heap[pos] = t;
100     t->heap_offset = pos;
101 }
102 
103 
104 /* Delete @t from @heap. Return TRUE if new top of heap. */
remove_from_heap(struct timer ** heap,struct timer * t)105 static int remove_from_heap(struct timer **heap, struct timer *t)
106 {
107     unsigned int sz = heap_metadata(heap)->size;
108     unsigned int pos = t->heap_offset;
109 
110     if ( unlikely(pos == sz) )
111     {
112         heap_metadata(heap)->size = sz - 1;
113         goto out;
114     }
115 
116     heap[pos] = heap[sz];
117     heap[pos]->heap_offset = pos;
118 
119     heap_metadata(heap)->size = --sz;
120 
121     if ( (pos > 1) && (heap[pos]->expires < heap[pos>>1]->expires) )
122         up_heap(heap, pos);
123     else
124         down_heap(heap, pos);
125 
126  out:
127     return (pos == 1);
128 }
129 
130 
131 /* Add new entry @t to @heap. Return TRUE if new top of heap. */
add_to_heap(struct timer ** heap,struct timer * t)132 static int add_to_heap(struct timer **heap, struct timer *t)
133 {
134     unsigned int sz = heap_metadata(heap)->size;
135 
136     /* Fail if the heap is full. */
137     if ( unlikely(sz == heap_metadata(heap)->limit) )
138         return 0;
139 
140     heap_metadata(heap)->size = ++sz;
141     heap[sz] = t;
142     t->heap_offset = sz;
143     up_heap(heap, sz);
144 
145     return (t->heap_offset == 1);
146 }
147 
148 
149 /****************************************************************************
150  * LINKED LIST OPERATIONS.
151  */
152 
remove_from_list(struct timer ** pprev,struct timer * t)153 static int remove_from_list(struct timer **pprev, struct timer *t)
154 {
155     struct timer *curr, **_pprev = pprev;
156 
157     while ( (curr = *_pprev) != t )
158         _pprev = &curr->list_next;
159 
160     *_pprev = t->list_next;
161 
162     return (_pprev == pprev);
163 }
164 
add_to_list(struct timer ** pprev,struct timer * t)165 static int add_to_list(struct timer **pprev, struct timer *t)
166 {
167     struct timer *curr, **_pprev = pprev;
168 
169     while ( ((curr = *_pprev) != NULL) && (curr->expires <= t->expires) )
170         _pprev = &curr->list_next;
171 
172     t->list_next = curr;
173     *_pprev = t;
174 
175     return (_pprev == pprev);
176 }
177 
178 
179 /****************************************************************************
180  * TIMER OPERATIONS.
181  */
182 
remove_entry(struct timer * t)183 static int remove_entry(struct timer *t)
184 {
185     struct timers *timers = &per_cpu(timers, t->cpu);
186     int rc;
187 
188     switch ( t->status )
189     {
190     case TIMER_STATUS_in_heap:
191         rc = remove_from_heap(timers->heap, t);
192         break;
193     case TIMER_STATUS_in_list:
194         rc = remove_from_list(&timers->list, t);
195         break;
196     default:
197         rc = 0;
198         BUG();
199     }
200 
201     t->status = TIMER_STATUS_invalid;
202     return rc;
203 }
204 
add_entry(struct timer * t)205 static int add_entry(struct timer *t)
206 {
207     struct timers *timers = &per_cpu(timers, t->cpu);
208     int rc;
209 
210     ASSERT(t->status == TIMER_STATUS_invalid);
211 
212     /* Try to add to heap. t->heap_offset indicates whether we succeed. */
213     t->heap_offset = 0;
214     t->status = TIMER_STATUS_in_heap;
215     rc = add_to_heap(timers->heap, t);
216     if ( t->heap_offset != 0 )
217         return rc;
218 
219     /* Fall back to adding to the slower linked list. */
220     t->status = TIMER_STATUS_in_list;
221     return add_to_list(&timers->list, t);
222 }
223 
activate_timer(struct timer * timer)224 static inline void activate_timer(struct timer *timer)
225 {
226     ASSERT(timer->status == TIMER_STATUS_inactive);
227     timer->status = TIMER_STATUS_invalid;
228     list_del(&timer->inactive);
229 
230     if ( add_entry(timer) )
231         cpu_raise_softirq(timer->cpu, TIMER_SOFTIRQ);
232 }
233 
deactivate_timer(struct timer * timer)234 static inline void deactivate_timer(struct timer *timer)
235 {
236     if ( remove_entry(timer) )
237         cpu_raise_softirq(timer->cpu, TIMER_SOFTIRQ);
238 
239     timer->status = TIMER_STATUS_inactive;
240     list_add(&timer->inactive, &per_cpu(timers, timer->cpu).inactive);
241 }
242 
timer_lock_unsafe(struct timer * timer)243 static inline bool timer_lock_unsafe(struct timer *timer)
244 {
245     unsigned int cpu;
246 
247     rcu_read_lock(&timer_cpu_read_lock);
248 
249     for ( ; ; )
250     {
251         cpu = read_atomic(&timer->cpu);
252         if ( unlikely(cpu == TIMER_CPU_status_killed) )
253         {
254             rcu_read_unlock(&timer_cpu_read_lock);
255             return 0;
256         }
257         /* Use the speculation unsafe variant, the wrapper has the barrier. */
258         _spin_lock(&per_cpu(timers, cpu).lock);
259         if ( likely(timer->cpu == cpu) )
260             break;
261         spin_unlock(&per_cpu(timers, cpu).lock);
262     }
263 
264     rcu_read_unlock(&timer_cpu_read_lock);
265     return 1;
266 }
267 
268 #define timer_lock_irqsave(t, flags) ({         \
269     bool __x;                                   \
270     local_irq_save(flags);                      \
271     if ( !(__x = timer_lock_unsafe(t)) )        \
272         local_irq_restore(flags);               \
273     block_lock_speculation();                   \
274     __x;                                        \
275 })
276 
timer_unlock(struct timer * timer)277 static inline void timer_unlock(struct timer *timer)
278 {
279     spin_unlock(&per_cpu(timers, timer->cpu).lock);
280 }
281 
282 #define timer_unlock_irqrestore(t, flags) ({    \
283     timer_unlock(t);                            \
284     local_irq_restore(flags);                   \
285 })
286 
287 
active_timer(const struct timer * timer)288 static bool active_timer(const struct timer *timer)
289 {
290     ASSERT(timer->status >= TIMER_STATUS_inactive);
291     return timer_is_active(timer);
292 }
293 
294 
init_timer(struct timer * timer,void (* function)(void * data),void * data,unsigned int cpu)295 void init_timer(
296     struct timer *timer,
297     void        (*function)(void *data),
298     void         *data,
299     unsigned int  cpu)
300 {
301     unsigned long flags;
302     memset(timer, 0, sizeof(*timer));
303     timer->function = function;
304     timer->data = data;
305     write_atomic(&timer->cpu, cpu);
306     timer->status = TIMER_STATUS_inactive;
307     if ( !timer_lock_irqsave(timer, flags) )
308         BUG();
309     list_add(&timer->inactive, &per_cpu(timers, cpu).inactive);
310     timer_unlock_irqrestore(timer, flags);
311 }
312 
313 
set_timer(struct timer * timer,s_time_t expires)314 void set_timer(struct timer *timer, s_time_t expires)
315 {
316     unsigned long flags;
317 
318     if ( !timer_lock_irqsave(timer, flags) )
319         return;
320 
321     if ( active_timer(timer) )
322         deactivate_timer(timer);
323 
324     timer->expires = expires;
325 
326     activate_timer(timer);
327 
328     timer_unlock_irqrestore(timer, flags);
329 }
330 
331 
stop_timer(struct timer * timer)332 void stop_timer(struct timer *timer)
333 {
334     unsigned long flags;
335 
336     if ( !timer_lock_irqsave(timer, flags) )
337         return;
338 
339     if ( active_timer(timer) )
340         deactivate_timer(timer);
341 
342     timer_unlock_irqrestore(timer, flags);
343 }
344 
timer_expires_before(struct timer * timer,s_time_t t)345 bool timer_expires_before(struct timer *timer, s_time_t t)
346 {
347     unsigned long flags;
348     bool ret;
349 
350     if ( !timer_lock_irqsave(timer, flags) )
351         return false;
352 
353     ret = active_timer(timer) && timer->expires <= t;
354 
355     timer_unlock_irqrestore(timer, flags);
356 
357     return ret;
358 }
359 
migrate_timer(struct timer * timer,unsigned int new_cpu)360 void migrate_timer(struct timer *timer, unsigned int new_cpu)
361 {
362     unsigned int old_cpu;
363 #if CONFIG_NR_CPUS > 1
364     bool active;
365     unsigned long flags;
366 
367     rcu_read_lock(&timer_cpu_read_lock);
368 
369     for ( ; ; )
370     {
371         old_cpu = read_atomic(&timer->cpu);
372         if ( (old_cpu == new_cpu) || (old_cpu == TIMER_CPU_status_killed) )
373         {
374             rcu_read_unlock(&timer_cpu_read_lock);
375             return;
376         }
377 
378         if ( old_cpu < new_cpu )
379         {
380             spin_lock_irqsave(&per_cpu(timers, old_cpu).lock, flags);
381             spin_lock(&per_cpu(timers, new_cpu).lock);
382         }
383         else
384         {
385             spin_lock_irqsave(&per_cpu(timers, new_cpu).lock, flags);
386             spin_lock(&per_cpu(timers, old_cpu).lock);
387         }
388 
389         if ( likely(timer->cpu == old_cpu) )
390              break;
391 
392         spin_unlock(&per_cpu(timers, old_cpu).lock);
393         spin_unlock_irqrestore(&per_cpu(timers, new_cpu).lock, flags);
394     }
395 
396     rcu_read_unlock(&timer_cpu_read_lock);
397 
398     active = active_timer(timer);
399     if ( active )
400         deactivate_timer(timer);
401 
402     list_del(&timer->inactive);
403     write_atomic(&timer->cpu, new_cpu);
404     list_add(&timer->inactive, &per_cpu(timers, new_cpu).inactive);
405 
406     if ( active )
407         activate_timer(timer);
408 
409     spin_unlock(&per_cpu(timers, old_cpu).lock);
410     spin_unlock_irqrestore(&per_cpu(timers, new_cpu).lock, flags);
411 #else /* CONFIG_NR_CPUS == 1 */
412     old_cpu = read_atomic(&timer->cpu);
413     if ( old_cpu != TIMER_CPU_status_killed )
414         WARN_ON(new_cpu != old_cpu);
415 #endif /* CONFIG_NR_CPUS */
416 }
417 
418 
kill_timer(struct timer * timer)419 void kill_timer(struct timer *timer)
420 {
421     unsigned int old_cpu, cpu;
422     unsigned long flags;
423 
424     BUG_ON(this_cpu(timers).running == timer);
425 
426     if ( !timer_lock_irqsave(timer, flags) )
427         return;
428 
429     if ( active_timer(timer) )
430         deactivate_timer(timer);
431 
432     list_del(&timer->inactive);
433     timer->status = TIMER_STATUS_killed;
434     old_cpu = timer->cpu;
435     write_atomic(&timer->cpu, TIMER_CPU_status_killed);
436 
437     spin_unlock_irqrestore(&per_cpu(timers, old_cpu).lock, flags);
438 
439     for_each_online_cpu ( cpu )
440         while ( per_cpu(timers, cpu).running == timer )
441             cpu_relax();
442 }
443 
444 
execute_timer(struct timers * ts,struct timer * t)445 static void execute_timer(struct timers *ts, struct timer *t)
446 {
447     void (*fn)(void *data) = t->function;
448     void *data = t->data;
449 
450     t->status = TIMER_STATUS_inactive;
451     list_add(&t->inactive, &ts->inactive);
452 
453     ts->running = t;
454     spin_unlock_irq(&ts->lock);
455     (*fn)(data);
456     spin_lock_irq(&ts->lock);
457     ts->running = NULL;
458 }
459 
460 
timer_softirq_action(void)461 static void cf_check timer_softirq_action(void)
462 {
463     struct timer  *t, **heap, *next;
464     struct timers *ts;
465     s_time_t       now, deadline;
466 
467     ts = &this_cpu(timers);
468     heap = ts->heap;
469 
470     /* If we overflowed the heap, try to allocate a larger heap. */
471     if ( unlikely(ts->list != NULL) )
472     {
473         /* old_limit == (2^n)-1; new_limit == (2^(n+4))-1 */
474         unsigned int old_limit = heap_metadata(heap)->limit;
475         unsigned int new_limit = ((old_limit + 1) << 4) - 1;
476         struct timer **newheap = NULL;
477 
478         /* Don't grow the heap beyond what is representable in its metadata. */
479         if ( new_limit == (typeof(heap_metadata(heap)->limit))new_limit &&
480              new_limit + 1 )
481             newheap = xmalloc_array(struct timer *, new_limit + 1);
482         else
483             printk_once(XENLOG_WARNING "CPU%u: timer heap limit reached\n",
484                         smp_processor_id());
485         if ( newheap != NULL )
486         {
487             spin_lock_irq(&ts->lock);
488             memcpy(newheap, heap, (old_limit + 1) * sizeof(*heap));
489             heap_metadata(newheap)->limit = new_limit;
490             ts->heap = newheap;
491             spin_unlock_irq(&ts->lock);
492             if ( old_limit != 0 )
493                 xfree(heap);
494             heap = newheap;
495         }
496     }
497 
498     spin_lock_irq(&ts->lock);
499 
500     now = NOW();
501 
502     /* Execute ready heap timers. */
503     while ( (heap_metadata(heap)->size != 0) &&
504             ((t = heap[1])->expires < now) )
505     {
506         remove_from_heap(heap, t);
507         execute_timer(ts, t);
508     }
509 
510     /* Execute ready list timers. */
511     while ( ((t = ts->list) != NULL) && (t->expires < now) )
512     {
513         ts->list = t->list_next;
514         execute_timer(ts, t);
515     }
516 
517     /* Try to move timers from linked list to more efficient heap. */
518     next = ts->list;
519     ts->list = NULL;
520     while ( unlikely((t = next) != NULL) )
521     {
522         next = t->list_next;
523         t->status = TIMER_STATUS_invalid;
524         add_entry(t);
525     }
526 
527     /* Find earliest deadline from head of linked list and top of heap. */
528     deadline = STIME_MAX;
529     if ( heap_metadata(heap)->size != 0 )
530         deadline = heap[1]->expires;
531     if ( (ts->list != NULL) && (ts->list->expires < deadline) )
532         deadline = ts->list->expires;
533     now = NOW();
534     this_cpu(timer_deadline) =
535         (deadline == STIME_MAX) ? 0 : MAX(deadline, now + timer_slop);
536 
537     if ( !reprogram_timer(this_cpu(timer_deadline)) )
538         raise_softirq(TIMER_SOFTIRQ);
539 
540     spin_unlock_irq(&ts->lock);
541 }
542 
align_timer(s_time_t firsttick,uint64_t period)543 s_time_t align_timer(s_time_t firsttick, uint64_t period)
544 {
545     if ( !period )
546         return firsttick;
547 
548     return firsttick + (period - 1) - ((firsttick - 1) % period);
549 }
550 
dump_timer(struct timer * t,s_time_t now)551 static void dump_timer(struct timer *t, s_time_t now)
552 {
553     printk("  ex=%12"PRId64"us timer=%p cb=%ps(%p)\n",
554            (t->expires - now) / 1000, t, t->function, t->data);
555 }
556 
dump_timerq(unsigned char key)557 static void cf_check dump_timerq(unsigned char key)
558 {
559     struct timer  *t;
560     struct timers *ts;
561     unsigned long  flags;
562     s_time_t       now = NOW();
563     unsigned int   i, j;
564 
565     printk("Dumping timer queues:\n");
566 
567     for_each_online_cpu( i )
568     {
569         ts = &per_cpu(timers, i);
570 
571         printk("CPU%02d:\n", i);
572         spin_lock_irqsave(&ts->lock, flags);
573         for ( j = 1; j <= heap_metadata(ts->heap)->size; j++ )
574             dump_timer(ts->heap[j], now);
575         for ( t = ts->list; t != NULL; t = t->list_next )
576             dump_timer(t, now);
577         spin_unlock_irqrestore(&ts->lock, flags);
578     }
579 }
580 
migrate_timers_from_cpu(unsigned int old_cpu)581 static void migrate_timers_from_cpu(unsigned int old_cpu)
582 {
583     unsigned int new_cpu = cpumask_any(&cpu_online_map);
584     struct timers *old_ts, *new_ts;
585     struct timer *t;
586     bool notify = false;
587 
588     ASSERT(!cpu_online(old_cpu) && cpu_online(new_cpu));
589 
590     old_ts = &per_cpu(timers, old_cpu);
591     new_ts = &per_cpu(timers, new_cpu);
592 
593     if ( old_cpu < new_cpu )
594     {
595         spin_lock_irq(&old_ts->lock);
596         spin_lock(&new_ts->lock);
597     }
598     else
599     {
600         spin_lock_irq(&new_ts->lock);
601         spin_lock(&old_ts->lock);
602     }
603 
604     while ( (t = heap_metadata(old_ts->heap)->size
605              ? old_ts->heap[1] : old_ts->list) != NULL )
606     {
607         remove_entry(t);
608         write_atomic(&t->cpu, new_cpu);
609         notify |= add_entry(t);
610     }
611 
612     while ( !list_empty(&old_ts->inactive) )
613     {
614         t = list_entry(old_ts->inactive.next, struct timer, inactive);
615         list_del(&t->inactive);
616         write_atomic(&t->cpu, new_cpu);
617         list_add(&t->inactive, &new_ts->inactive);
618     }
619 
620     spin_unlock(&old_ts->lock);
621     spin_unlock_irq(&new_ts->lock);
622 
623     if ( notify )
624         cpu_raise_softirq(new_cpu, TIMER_SOFTIRQ);
625 }
626 
627 /*
628  * All CPUs initially share an empty dummy heap. Only those CPUs that
629  * are brought online will be dynamically allocated their own heap.
630  * The size/limit metadata are both 0 by being in .bss
631  */
632 static struct timer *dummy_heap[1];
633 
free_percpu_timers(unsigned int cpu)634 static void free_percpu_timers(unsigned int cpu)
635 {
636     struct timers *ts = &per_cpu(timers, cpu);
637 
638     ASSERT(heap_metadata(ts->heap)->size == 0);
639     if ( heap_metadata(ts->heap)->limit )
640     {
641         xfree(ts->heap);
642         ts->heap = dummy_heap;
643     }
644     else
645         ASSERT(ts->heap == dummy_heap);
646 }
647 
cpu_callback(struct notifier_block * nfb,unsigned long action,void * hcpu)648 static int cf_check cpu_callback(
649     struct notifier_block *nfb, unsigned long action, void *hcpu)
650 {
651     unsigned int cpu = (unsigned long)hcpu;
652     struct timers *ts = &per_cpu(timers, cpu);
653 
654     switch ( action )
655     {
656     case CPU_UP_PREPARE:
657         /* Only initialise ts once. */
658         if ( !ts->heap )
659         {
660             INIT_LIST_HEAD(&ts->inactive);
661             spin_lock_init(&ts->lock);
662             ts->heap = dummy_heap;
663         }
664         break;
665 
666     case CPU_UP_CANCELED:
667     case CPU_DEAD:
668     case CPU_RESUME_FAILED:
669         migrate_timers_from_cpu(cpu);
670 
671         if ( !park_offline_cpus && system_state != SYS_STATE_suspend )
672             free_percpu_timers(cpu);
673         break;
674 
675     case CPU_REMOVE:
676         if ( park_offline_cpus )
677             free_percpu_timers(cpu);
678         break;
679 
680     default:
681         break;
682     }
683 
684     return NOTIFY_DONE;
685 }
686 
687 static struct notifier_block cpu_nfb = {
688     .notifier_call = cpu_callback,
689     .priority = 99
690 };
691 
timer_init(void)692 void __init timer_init(void)
693 {
694     void *cpu = (void *)(long)smp_processor_id();
695 
696     open_softirq(TIMER_SOFTIRQ, timer_softirq_action);
697 
698     cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
699     register_cpu_notifier(&cpu_nfb);
700 
701     register_keyhandler('a', dump_timerq, "dump timer queues", 1);
702 }
703 
704 /*
705  * Local variables:
706  * mode: C
707  * c-file-style: "BSD"
708  * c-basic-offset: 4
709  * tab-width: 4
710  * indent-tabs-mode: nil
711  * End:
712  */
713