1 /******************************************************************************
2 * timer.c
3 *
4 * Copyright (c) 2002-2003 Rolf Neugebauer
5 * Copyright (c) 2002-2005 K A Fraser
6 */
7
8 #include <xen/init.h>
9 #include <xen/types.h>
10 #include <xen/errno.h>
11 #include <xen/sched.h>
12 #include <xen/lib.h>
13 #include <xen/param.h>
14 #include <xen/sections.h>
15 #include <xen/smp.h>
16 #include <xen/perfc.h>
17 #include <xen/time.h>
18 #include <xen/softirq.h>
19 #include <xen/timer.h>
20 #include <xen/keyhandler.h>
21 #include <xen/percpu.h>
22 #include <xen/cpu.h>
23 #include <xen/rcupdate.h>
24 #include <xen/symbols.h>
25 #include <asm/system.h>
26 #include <asm/atomic.h>
27
28 /* We program the time hardware this far behind the closest deadline. */
29 static unsigned int timer_slop __read_mostly = 50000; /* 50 us */
30 integer_param("timer_slop", timer_slop);
31
32 struct timers {
33 spinlock_t lock;
34 struct timer **heap;
35 struct timer *list;
36 struct timer *running;
37 struct list_head inactive;
38 } __cacheline_aligned;
39
40 static DEFINE_PER_CPU(struct timers, timers);
41
42 /* Protects lock-free access to per-timer cpu field against cpu offlining. */
43 static DEFINE_RCU_READ_LOCK(timer_cpu_read_lock);
44
45 DEFINE_PER_CPU(s_time_t, timer_deadline);
46
47 /****************************************************************************
48 * HEAP OPERATIONS.
49 *
50 * Slot 0 of the heap is never a valid timer pointer, and instead holds the
51 * heap metadata.
52 */
53
54 struct heap_metadata {
55 uint16_t size, limit;
56 };
57
heap_metadata(struct timer ** heap)58 static struct heap_metadata *heap_metadata(struct timer **heap)
59 {
60 /* Check that our type-punning doesn't overflow into heap[1] */
61 BUILD_BUG_ON(sizeof(struct heap_metadata) > sizeof(struct timer *));
62
63 return (struct heap_metadata *)&heap[0];
64 }
65
66 /* Sink down element @pos of @heap. */
down_heap(struct timer ** heap,unsigned int pos)67 static void down_heap(struct timer **heap, unsigned int pos)
68 {
69 unsigned int sz = heap_metadata(heap)->size, nxt;
70 struct timer *t = heap[pos];
71
72 while ( (nxt = (pos << 1)) <= sz )
73 {
74 if ( ((nxt+1) <= sz) && (heap[nxt+1]->expires < heap[nxt]->expires) )
75 nxt++;
76 if ( heap[nxt]->expires > t->expires )
77 break;
78 heap[pos] = heap[nxt];
79 heap[pos]->heap_offset = pos;
80 pos = nxt;
81 }
82
83 heap[pos] = t;
84 t->heap_offset = pos;
85 }
86
87 /* Float element @pos up @heap. */
up_heap(struct timer ** heap,unsigned int pos)88 static void up_heap(struct timer **heap, unsigned int pos)
89 {
90 struct timer *t = heap[pos];
91
92 while ( (pos > 1) && (t->expires < heap[pos>>1]->expires) )
93 {
94 heap[pos] = heap[pos>>1];
95 heap[pos]->heap_offset = pos;
96 pos >>= 1;
97 }
98
99 heap[pos] = t;
100 t->heap_offset = pos;
101 }
102
103
104 /* Delete @t from @heap. Return TRUE if new top of heap. */
remove_from_heap(struct timer ** heap,struct timer * t)105 static int remove_from_heap(struct timer **heap, struct timer *t)
106 {
107 unsigned int sz = heap_metadata(heap)->size;
108 unsigned int pos = t->heap_offset;
109
110 if ( unlikely(pos == sz) )
111 {
112 heap_metadata(heap)->size = sz - 1;
113 goto out;
114 }
115
116 heap[pos] = heap[sz];
117 heap[pos]->heap_offset = pos;
118
119 heap_metadata(heap)->size = --sz;
120
121 if ( (pos > 1) && (heap[pos]->expires < heap[pos>>1]->expires) )
122 up_heap(heap, pos);
123 else
124 down_heap(heap, pos);
125
126 out:
127 return (pos == 1);
128 }
129
130
131 /* Add new entry @t to @heap. Return TRUE if new top of heap. */
add_to_heap(struct timer ** heap,struct timer * t)132 static int add_to_heap(struct timer **heap, struct timer *t)
133 {
134 unsigned int sz = heap_metadata(heap)->size;
135
136 /* Fail if the heap is full. */
137 if ( unlikely(sz == heap_metadata(heap)->limit) )
138 return 0;
139
140 heap_metadata(heap)->size = ++sz;
141 heap[sz] = t;
142 t->heap_offset = sz;
143 up_heap(heap, sz);
144
145 return (t->heap_offset == 1);
146 }
147
148
149 /****************************************************************************
150 * LINKED LIST OPERATIONS.
151 */
152
remove_from_list(struct timer ** pprev,struct timer * t)153 static int remove_from_list(struct timer **pprev, struct timer *t)
154 {
155 struct timer *curr, **_pprev = pprev;
156
157 while ( (curr = *_pprev) != t )
158 _pprev = &curr->list_next;
159
160 *_pprev = t->list_next;
161
162 return (_pprev == pprev);
163 }
164
add_to_list(struct timer ** pprev,struct timer * t)165 static int add_to_list(struct timer **pprev, struct timer *t)
166 {
167 struct timer *curr, **_pprev = pprev;
168
169 while ( ((curr = *_pprev) != NULL) && (curr->expires <= t->expires) )
170 _pprev = &curr->list_next;
171
172 t->list_next = curr;
173 *_pprev = t;
174
175 return (_pprev == pprev);
176 }
177
178
179 /****************************************************************************
180 * TIMER OPERATIONS.
181 */
182
remove_entry(struct timer * t)183 static int remove_entry(struct timer *t)
184 {
185 struct timers *timers = &per_cpu(timers, t->cpu);
186 int rc;
187
188 switch ( t->status )
189 {
190 case TIMER_STATUS_in_heap:
191 rc = remove_from_heap(timers->heap, t);
192 break;
193 case TIMER_STATUS_in_list:
194 rc = remove_from_list(&timers->list, t);
195 break;
196 default:
197 rc = 0;
198 BUG();
199 }
200
201 t->status = TIMER_STATUS_invalid;
202 return rc;
203 }
204
add_entry(struct timer * t)205 static int add_entry(struct timer *t)
206 {
207 struct timers *timers = &per_cpu(timers, t->cpu);
208 int rc;
209
210 ASSERT(t->status == TIMER_STATUS_invalid);
211
212 /* Try to add to heap. t->heap_offset indicates whether we succeed. */
213 t->heap_offset = 0;
214 t->status = TIMER_STATUS_in_heap;
215 rc = add_to_heap(timers->heap, t);
216 if ( t->heap_offset != 0 )
217 return rc;
218
219 /* Fall back to adding to the slower linked list. */
220 t->status = TIMER_STATUS_in_list;
221 return add_to_list(&timers->list, t);
222 }
223
activate_timer(struct timer * timer)224 static inline void activate_timer(struct timer *timer)
225 {
226 ASSERT(timer->status == TIMER_STATUS_inactive);
227 timer->status = TIMER_STATUS_invalid;
228 list_del(&timer->inactive);
229
230 if ( add_entry(timer) )
231 cpu_raise_softirq(timer->cpu, TIMER_SOFTIRQ);
232 }
233
deactivate_timer(struct timer * timer)234 static inline void deactivate_timer(struct timer *timer)
235 {
236 if ( remove_entry(timer) )
237 cpu_raise_softirq(timer->cpu, TIMER_SOFTIRQ);
238
239 timer->status = TIMER_STATUS_inactive;
240 list_add(&timer->inactive, &per_cpu(timers, timer->cpu).inactive);
241 }
242
timer_lock_unsafe(struct timer * timer)243 static inline bool timer_lock_unsafe(struct timer *timer)
244 {
245 unsigned int cpu;
246
247 rcu_read_lock(&timer_cpu_read_lock);
248
249 for ( ; ; )
250 {
251 cpu = read_atomic(&timer->cpu);
252 if ( unlikely(cpu == TIMER_CPU_status_killed) )
253 {
254 rcu_read_unlock(&timer_cpu_read_lock);
255 return 0;
256 }
257 /* Use the speculation unsafe variant, the wrapper has the barrier. */
258 _spin_lock(&per_cpu(timers, cpu).lock);
259 if ( likely(timer->cpu == cpu) )
260 break;
261 spin_unlock(&per_cpu(timers, cpu).lock);
262 }
263
264 rcu_read_unlock(&timer_cpu_read_lock);
265 return 1;
266 }
267
268 #define timer_lock_irqsave(t, flags) ({ \
269 bool __x; \
270 local_irq_save(flags); \
271 if ( !(__x = timer_lock_unsafe(t)) ) \
272 local_irq_restore(flags); \
273 block_lock_speculation(); \
274 __x; \
275 })
276
timer_unlock(struct timer * timer)277 static inline void timer_unlock(struct timer *timer)
278 {
279 spin_unlock(&per_cpu(timers, timer->cpu).lock);
280 }
281
282 #define timer_unlock_irqrestore(t, flags) ({ \
283 timer_unlock(t); \
284 local_irq_restore(flags); \
285 })
286
287
active_timer(const struct timer * timer)288 static bool active_timer(const struct timer *timer)
289 {
290 ASSERT(timer->status >= TIMER_STATUS_inactive);
291 return timer_is_active(timer);
292 }
293
294
init_timer(struct timer * timer,void (* function)(void * data),void * data,unsigned int cpu)295 void init_timer(
296 struct timer *timer,
297 void (*function)(void *data),
298 void *data,
299 unsigned int cpu)
300 {
301 unsigned long flags;
302 memset(timer, 0, sizeof(*timer));
303 timer->function = function;
304 timer->data = data;
305 write_atomic(&timer->cpu, cpu);
306 timer->status = TIMER_STATUS_inactive;
307 if ( !timer_lock_irqsave(timer, flags) )
308 BUG();
309 list_add(&timer->inactive, &per_cpu(timers, cpu).inactive);
310 timer_unlock_irqrestore(timer, flags);
311 }
312
313
set_timer(struct timer * timer,s_time_t expires)314 void set_timer(struct timer *timer, s_time_t expires)
315 {
316 unsigned long flags;
317
318 if ( !timer_lock_irqsave(timer, flags) )
319 return;
320
321 if ( active_timer(timer) )
322 deactivate_timer(timer);
323
324 timer->expires = expires;
325
326 activate_timer(timer);
327
328 timer_unlock_irqrestore(timer, flags);
329 }
330
331
stop_timer(struct timer * timer)332 void stop_timer(struct timer *timer)
333 {
334 unsigned long flags;
335
336 if ( !timer_lock_irqsave(timer, flags) )
337 return;
338
339 if ( active_timer(timer) )
340 deactivate_timer(timer);
341
342 timer_unlock_irqrestore(timer, flags);
343 }
344
timer_expires_before(struct timer * timer,s_time_t t)345 bool timer_expires_before(struct timer *timer, s_time_t t)
346 {
347 unsigned long flags;
348 bool ret;
349
350 if ( !timer_lock_irqsave(timer, flags) )
351 return false;
352
353 ret = active_timer(timer) && timer->expires <= t;
354
355 timer_unlock_irqrestore(timer, flags);
356
357 return ret;
358 }
359
migrate_timer(struct timer * timer,unsigned int new_cpu)360 void migrate_timer(struct timer *timer, unsigned int new_cpu)
361 {
362 unsigned int old_cpu;
363 #if CONFIG_NR_CPUS > 1
364 bool active;
365 unsigned long flags;
366
367 rcu_read_lock(&timer_cpu_read_lock);
368
369 for ( ; ; )
370 {
371 old_cpu = read_atomic(&timer->cpu);
372 if ( (old_cpu == new_cpu) || (old_cpu == TIMER_CPU_status_killed) )
373 {
374 rcu_read_unlock(&timer_cpu_read_lock);
375 return;
376 }
377
378 if ( old_cpu < new_cpu )
379 {
380 spin_lock_irqsave(&per_cpu(timers, old_cpu).lock, flags);
381 spin_lock(&per_cpu(timers, new_cpu).lock);
382 }
383 else
384 {
385 spin_lock_irqsave(&per_cpu(timers, new_cpu).lock, flags);
386 spin_lock(&per_cpu(timers, old_cpu).lock);
387 }
388
389 if ( likely(timer->cpu == old_cpu) )
390 break;
391
392 spin_unlock(&per_cpu(timers, old_cpu).lock);
393 spin_unlock_irqrestore(&per_cpu(timers, new_cpu).lock, flags);
394 }
395
396 rcu_read_unlock(&timer_cpu_read_lock);
397
398 active = active_timer(timer);
399 if ( active )
400 deactivate_timer(timer);
401
402 list_del(&timer->inactive);
403 write_atomic(&timer->cpu, new_cpu);
404 list_add(&timer->inactive, &per_cpu(timers, new_cpu).inactive);
405
406 if ( active )
407 activate_timer(timer);
408
409 spin_unlock(&per_cpu(timers, old_cpu).lock);
410 spin_unlock_irqrestore(&per_cpu(timers, new_cpu).lock, flags);
411 #else /* CONFIG_NR_CPUS == 1 */
412 old_cpu = read_atomic(&timer->cpu);
413 if ( old_cpu != TIMER_CPU_status_killed )
414 WARN_ON(new_cpu != old_cpu);
415 #endif /* CONFIG_NR_CPUS */
416 }
417
418
kill_timer(struct timer * timer)419 void kill_timer(struct timer *timer)
420 {
421 unsigned int old_cpu, cpu;
422 unsigned long flags;
423
424 BUG_ON(this_cpu(timers).running == timer);
425
426 if ( !timer_lock_irqsave(timer, flags) )
427 return;
428
429 if ( active_timer(timer) )
430 deactivate_timer(timer);
431
432 list_del(&timer->inactive);
433 timer->status = TIMER_STATUS_killed;
434 old_cpu = timer->cpu;
435 write_atomic(&timer->cpu, TIMER_CPU_status_killed);
436
437 spin_unlock_irqrestore(&per_cpu(timers, old_cpu).lock, flags);
438
439 for_each_online_cpu ( cpu )
440 while ( per_cpu(timers, cpu).running == timer )
441 cpu_relax();
442 }
443
444
execute_timer(struct timers * ts,struct timer * t)445 static void execute_timer(struct timers *ts, struct timer *t)
446 {
447 void (*fn)(void *data) = t->function;
448 void *data = t->data;
449
450 t->status = TIMER_STATUS_inactive;
451 list_add(&t->inactive, &ts->inactive);
452
453 ts->running = t;
454 spin_unlock_irq(&ts->lock);
455 (*fn)(data);
456 spin_lock_irq(&ts->lock);
457 ts->running = NULL;
458 }
459
460
timer_softirq_action(void)461 static void cf_check timer_softirq_action(void)
462 {
463 struct timer *t, **heap, *next;
464 struct timers *ts;
465 s_time_t now, deadline;
466
467 ts = &this_cpu(timers);
468 heap = ts->heap;
469
470 /* If we overflowed the heap, try to allocate a larger heap. */
471 if ( unlikely(ts->list != NULL) )
472 {
473 /* old_limit == (2^n)-1; new_limit == (2^(n+4))-1 */
474 unsigned int old_limit = heap_metadata(heap)->limit;
475 unsigned int new_limit = ((old_limit + 1) << 4) - 1;
476 struct timer **newheap = NULL;
477
478 /* Don't grow the heap beyond what is representable in its metadata. */
479 if ( new_limit == (typeof(heap_metadata(heap)->limit))new_limit &&
480 new_limit + 1 )
481 newheap = xmalloc_array(struct timer *, new_limit + 1);
482 else
483 printk_once(XENLOG_WARNING "CPU%u: timer heap limit reached\n",
484 smp_processor_id());
485 if ( newheap != NULL )
486 {
487 spin_lock_irq(&ts->lock);
488 memcpy(newheap, heap, (old_limit + 1) * sizeof(*heap));
489 heap_metadata(newheap)->limit = new_limit;
490 ts->heap = newheap;
491 spin_unlock_irq(&ts->lock);
492 if ( old_limit != 0 )
493 xfree(heap);
494 heap = newheap;
495 }
496 }
497
498 spin_lock_irq(&ts->lock);
499
500 now = NOW();
501
502 /* Execute ready heap timers. */
503 while ( (heap_metadata(heap)->size != 0) &&
504 ((t = heap[1])->expires < now) )
505 {
506 remove_from_heap(heap, t);
507 execute_timer(ts, t);
508 }
509
510 /* Execute ready list timers. */
511 while ( ((t = ts->list) != NULL) && (t->expires < now) )
512 {
513 ts->list = t->list_next;
514 execute_timer(ts, t);
515 }
516
517 /* Try to move timers from linked list to more efficient heap. */
518 next = ts->list;
519 ts->list = NULL;
520 while ( unlikely((t = next) != NULL) )
521 {
522 next = t->list_next;
523 t->status = TIMER_STATUS_invalid;
524 add_entry(t);
525 }
526
527 /* Find earliest deadline from head of linked list and top of heap. */
528 deadline = STIME_MAX;
529 if ( heap_metadata(heap)->size != 0 )
530 deadline = heap[1]->expires;
531 if ( (ts->list != NULL) && (ts->list->expires < deadline) )
532 deadline = ts->list->expires;
533 now = NOW();
534 this_cpu(timer_deadline) =
535 (deadline == STIME_MAX) ? 0 : MAX(deadline, now + timer_slop);
536
537 if ( !reprogram_timer(this_cpu(timer_deadline)) )
538 raise_softirq(TIMER_SOFTIRQ);
539
540 spin_unlock_irq(&ts->lock);
541 }
542
align_timer(s_time_t firsttick,uint64_t period)543 s_time_t align_timer(s_time_t firsttick, uint64_t period)
544 {
545 if ( !period )
546 return firsttick;
547
548 return firsttick + (period - 1) - ((firsttick - 1) % period);
549 }
550
dump_timer(struct timer * t,s_time_t now)551 static void dump_timer(struct timer *t, s_time_t now)
552 {
553 printk(" ex=%12"PRId64"us timer=%p cb=%ps(%p)\n",
554 (t->expires - now) / 1000, t, t->function, t->data);
555 }
556
dump_timerq(unsigned char key)557 static void cf_check dump_timerq(unsigned char key)
558 {
559 struct timer *t;
560 struct timers *ts;
561 unsigned long flags;
562 s_time_t now = NOW();
563 unsigned int i, j;
564
565 printk("Dumping timer queues:\n");
566
567 for_each_online_cpu( i )
568 {
569 ts = &per_cpu(timers, i);
570
571 printk("CPU%02d:\n", i);
572 spin_lock_irqsave(&ts->lock, flags);
573 for ( j = 1; j <= heap_metadata(ts->heap)->size; j++ )
574 dump_timer(ts->heap[j], now);
575 for ( t = ts->list; t != NULL; t = t->list_next )
576 dump_timer(t, now);
577 spin_unlock_irqrestore(&ts->lock, flags);
578 }
579 }
580
migrate_timers_from_cpu(unsigned int old_cpu)581 static void migrate_timers_from_cpu(unsigned int old_cpu)
582 {
583 unsigned int new_cpu = cpumask_any(&cpu_online_map);
584 struct timers *old_ts, *new_ts;
585 struct timer *t;
586 bool notify = false;
587
588 ASSERT(!cpu_online(old_cpu) && cpu_online(new_cpu));
589
590 old_ts = &per_cpu(timers, old_cpu);
591 new_ts = &per_cpu(timers, new_cpu);
592
593 if ( old_cpu < new_cpu )
594 {
595 spin_lock_irq(&old_ts->lock);
596 spin_lock(&new_ts->lock);
597 }
598 else
599 {
600 spin_lock_irq(&new_ts->lock);
601 spin_lock(&old_ts->lock);
602 }
603
604 while ( (t = heap_metadata(old_ts->heap)->size
605 ? old_ts->heap[1] : old_ts->list) != NULL )
606 {
607 remove_entry(t);
608 write_atomic(&t->cpu, new_cpu);
609 notify |= add_entry(t);
610 }
611
612 while ( !list_empty(&old_ts->inactive) )
613 {
614 t = list_entry(old_ts->inactive.next, struct timer, inactive);
615 list_del(&t->inactive);
616 write_atomic(&t->cpu, new_cpu);
617 list_add(&t->inactive, &new_ts->inactive);
618 }
619
620 spin_unlock(&old_ts->lock);
621 spin_unlock_irq(&new_ts->lock);
622
623 if ( notify )
624 cpu_raise_softirq(new_cpu, TIMER_SOFTIRQ);
625 }
626
627 /*
628 * All CPUs initially share an empty dummy heap. Only those CPUs that
629 * are brought online will be dynamically allocated their own heap.
630 * The size/limit metadata are both 0 by being in .bss
631 */
632 static struct timer *dummy_heap[1];
633
free_percpu_timers(unsigned int cpu)634 static void free_percpu_timers(unsigned int cpu)
635 {
636 struct timers *ts = &per_cpu(timers, cpu);
637
638 ASSERT(heap_metadata(ts->heap)->size == 0);
639 if ( heap_metadata(ts->heap)->limit )
640 {
641 xfree(ts->heap);
642 ts->heap = dummy_heap;
643 }
644 else
645 ASSERT(ts->heap == dummy_heap);
646 }
647
cpu_callback(struct notifier_block * nfb,unsigned long action,void * hcpu)648 static int cf_check cpu_callback(
649 struct notifier_block *nfb, unsigned long action, void *hcpu)
650 {
651 unsigned int cpu = (unsigned long)hcpu;
652 struct timers *ts = &per_cpu(timers, cpu);
653
654 switch ( action )
655 {
656 case CPU_UP_PREPARE:
657 /* Only initialise ts once. */
658 if ( !ts->heap )
659 {
660 INIT_LIST_HEAD(&ts->inactive);
661 spin_lock_init(&ts->lock);
662 ts->heap = dummy_heap;
663 }
664 break;
665
666 case CPU_UP_CANCELED:
667 case CPU_DEAD:
668 case CPU_RESUME_FAILED:
669 migrate_timers_from_cpu(cpu);
670
671 if ( !park_offline_cpus && system_state != SYS_STATE_suspend )
672 free_percpu_timers(cpu);
673 break;
674
675 case CPU_REMOVE:
676 if ( park_offline_cpus )
677 free_percpu_timers(cpu);
678 break;
679
680 default:
681 break;
682 }
683
684 return NOTIFY_DONE;
685 }
686
687 static struct notifier_block cpu_nfb = {
688 .notifier_call = cpu_callback,
689 .priority = 99
690 };
691
timer_init(void)692 void __init timer_init(void)
693 {
694 void *cpu = (void *)(long)smp_processor_id();
695
696 open_softirq(TIMER_SOFTIRQ, timer_softirq_action);
697
698 cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
699 register_cpu_notifier(&cpu_nfb);
700
701 register_keyhandler('a', dump_timerq, "dump timer queues", 1);
702 }
703
704 /*
705 * Local variables:
706 * mode: C
707 * c-file-style: "BSD"
708 * c-basic-offset: 4
709 * tab-width: 4
710 * indent-tabs-mode: nil
711 * End:
712 */
713