1 /******************************************************************************
2 * timer.c
3 *
4 * Copyright (c) 2002-2003 Rolf Neugebauer
5 * Copyright (c) 2002-2005 K A Fraser
6 */
7
8 #include <xen/init.h>
9 #include <xen/types.h>
10 #include <xen/errno.h>
11 #include <xen/sched.h>
12 #include <xen/lib.h>
13 #include <xen/param.h>
14 #include <xen/smp.h>
15 #include <xen/perfc.h>
16 #include <xen/time.h>
17 #include <xen/softirq.h>
18 #include <xen/timer.h>
19 #include <xen/keyhandler.h>
20 #include <xen/percpu.h>
21 #include <xen/cpu.h>
22 #include <xen/rcupdate.h>
23 #include <xen/symbols.h>
24 #include <asm/system.h>
25 #include <asm/atomic.h>
26
27 /* We program the time hardware this far behind the closest deadline. */
28 static unsigned int timer_slop __read_mostly = 50000; /* 50 us */
29 integer_param("timer_slop", timer_slop);
30
31 struct timers {
32 spinlock_t lock;
33 struct timer **heap;
34 struct timer *list;
35 struct timer *running;
36 struct list_head inactive;
37 } __cacheline_aligned;
38
39 static DEFINE_PER_CPU(struct timers, timers);
40
41 /* Protects lock-free access to per-timer cpu field against cpu offlining. */
42 static DEFINE_RCU_READ_LOCK(timer_cpu_read_lock);
43
44 DEFINE_PER_CPU(s_time_t, timer_deadline);
45
46 /****************************************************************************
47 * HEAP OPERATIONS.
48 *
49 * Slot 0 of the heap is never a valid timer pointer, and instead holds the
50 * heap metadata.
51 */
52
53 struct heap_metadata {
54 uint16_t size, limit;
55 };
56
heap_metadata(struct timer ** heap)57 static struct heap_metadata *heap_metadata(struct timer **heap)
58 {
59 /* Check that our type-punning doesn't overflow into heap[1] */
60 BUILD_BUG_ON(sizeof(struct heap_metadata) > sizeof(struct timer *));
61
62 return (struct heap_metadata *)&heap[0];
63 }
64
65 /* Sink down element @pos of @heap. */
down_heap(struct timer ** heap,unsigned int pos)66 static void down_heap(struct timer **heap, unsigned int pos)
67 {
68 unsigned int sz = heap_metadata(heap)->size, nxt;
69 struct timer *t = heap[pos];
70
71 while ( (nxt = (pos << 1)) <= sz )
72 {
73 if ( ((nxt+1) <= sz) && (heap[nxt+1]->expires < heap[nxt]->expires) )
74 nxt++;
75 if ( heap[nxt]->expires > t->expires )
76 break;
77 heap[pos] = heap[nxt];
78 heap[pos]->heap_offset = pos;
79 pos = nxt;
80 }
81
82 heap[pos] = t;
83 t->heap_offset = pos;
84 }
85
86 /* Float element @pos up @heap. */
up_heap(struct timer ** heap,unsigned int pos)87 static void up_heap(struct timer **heap, unsigned int pos)
88 {
89 struct timer *t = heap[pos];
90
91 while ( (pos > 1) && (t->expires < heap[pos>>1]->expires) )
92 {
93 heap[pos] = heap[pos>>1];
94 heap[pos]->heap_offset = pos;
95 pos >>= 1;
96 }
97
98 heap[pos] = t;
99 t->heap_offset = pos;
100 }
101
102
103 /* Delete @t from @heap. Return TRUE if new top of heap. */
remove_from_heap(struct timer ** heap,struct timer * t)104 static int remove_from_heap(struct timer **heap, struct timer *t)
105 {
106 unsigned int sz = heap_metadata(heap)->size;
107 unsigned int pos = t->heap_offset;
108
109 if ( unlikely(pos == sz) )
110 {
111 heap_metadata(heap)->size = sz - 1;
112 goto out;
113 }
114
115 heap[pos] = heap[sz];
116 heap[pos]->heap_offset = pos;
117
118 heap_metadata(heap)->size = --sz;
119
120 if ( (pos > 1) && (heap[pos]->expires < heap[pos>>1]->expires) )
121 up_heap(heap, pos);
122 else
123 down_heap(heap, pos);
124
125 out:
126 return (pos == 1);
127 }
128
129
130 /* Add new entry @t to @heap. Return TRUE if new top of heap. */
add_to_heap(struct timer ** heap,struct timer * t)131 static int add_to_heap(struct timer **heap, struct timer *t)
132 {
133 unsigned int sz = heap_metadata(heap)->size;
134
135 /* Fail if the heap is full. */
136 if ( unlikely(sz == heap_metadata(heap)->limit) )
137 return 0;
138
139 heap_metadata(heap)->size = ++sz;
140 heap[sz] = t;
141 t->heap_offset = sz;
142 up_heap(heap, sz);
143
144 return (t->heap_offset == 1);
145 }
146
147
148 /****************************************************************************
149 * LINKED LIST OPERATIONS.
150 */
151
remove_from_list(struct timer ** pprev,struct timer * t)152 static int remove_from_list(struct timer **pprev, struct timer *t)
153 {
154 struct timer *curr, **_pprev = pprev;
155
156 while ( (curr = *_pprev) != t )
157 _pprev = &curr->list_next;
158
159 *_pprev = t->list_next;
160
161 return (_pprev == pprev);
162 }
163
add_to_list(struct timer ** pprev,struct timer * t)164 static int add_to_list(struct timer **pprev, struct timer *t)
165 {
166 struct timer *curr, **_pprev = pprev;
167
168 while ( ((curr = *_pprev) != NULL) && (curr->expires <= t->expires) )
169 _pprev = &curr->list_next;
170
171 t->list_next = curr;
172 *_pprev = t;
173
174 return (_pprev == pprev);
175 }
176
177
178 /****************************************************************************
179 * TIMER OPERATIONS.
180 */
181
remove_entry(struct timer * t)182 static int remove_entry(struct timer *t)
183 {
184 struct timers *timers = &per_cpu(timers, t->cpu);
185 int rc;
186
187 switch ( t->status )
188 {
189 case TIMER_STATUS_in_heap:
190 rc = remove_from_heap(timers->heap, t);
191 break;
192 case TIMER_STATUS_in_list:
193 rc = remove_from_list(&timers->list, t);
194 break;
195 default:
196 rc = 0;
197 BUG();
198 }
199
200 t->status = TIMER_STATUS_invalid;
201 return rc;
202 }
203
add_entry(struct timer * t)204 static int add_entry(struct timer *t)
205 {
206 struct timers *timers = &per_cpu(timers, t->cpu);
207 int rc;
208
209 ASSERT(t->status == TIMER_STATUS_invalid);
210
211 /* Try to add to heap. t->heap_offset indicates whether we succeed. */
212 t->heap_offset = 0;
213 t->status = TIMER_STATUS_in_heap;
214 rc = add_to_heap(timers->heap, t);
215 if ( t->heap_offset != 0 )
216 return rc;
217
218 /* Fall back to adding to the slower linked list. */
219 t->status = TIMER_STATUS_in_list;
220 return add_to_list(&timers->list, t);
221 }
222
activate_timer(struct timer * timer)223 static inline void activate_timer(struct timer *timer)
224 {
225 ASSERT(timer->status == TIMER_STATUS_inactive);
226 timer->status = TIMER_STATUS_invalid;
227 list_del(&timer->inactive);
228
229 if ( add_entry(timer) )
230 cpu_raise_softirq(timer->cpu, TIMER_SOFTIRQ);
231 }
232
deactivate_timer(struct timer * timer)233 static inline void deactivate_timer(struct timer *timer)
234 {
235 if ( remove_entry(timer) )
236 cpu_raise_softirq(timer->cpu, TIMER_SOFTIRQ);
237
238 timer->status = TIMER_STATUS_inactive;
239 list_add(&timer->inactive, &per_cpu(timers, timer->cpu).inactive);
240 }
241
timer_lock_unsafe(struct timer * timer)242 static inline bool timer_lock_unsafe(struct timer *timer)
243 {
244 unsigned int cpu;
245
246 rcu_read_lock(&timer_cpu_read_lock);
247
248 for ( ; ; )
249 {
250 cpu = read_atomic(&timer->cpu);
251 if ( unlikely(cpu == TIMER_CPU_status_killed) )
252 {
253 rcu_read_unlock(&timer_cpu_read_lock);
254 return 0;
255 }
256 /* Use the speculation unsafe variant, the wrapper has the barrier. */
257 _spin_lock(&per_cpu(timers, cpu).lock);
258 if ( likely(timer->cpu == cpu) )
259 break;
260 spin_unlock(&per_cpu(timers, cpu).lock);
261 }
262
263 rcu_read_unlock(&timer_cpu_read_lock);
264 return 1;
265 }
266
267 #define timer_lock_irqsave(t, flags) ({ \
268 bool __x; \
269 local_irq_save(flags); \
270 if ( !(__x = timer_lock_unsafe(t)) ) \
271 local_irq_restore(flags); \
272 block_lock_speculation(); \
273 __x; \
274 })
275
timer_unlock(struct timer * timer)276 static inline void timer_unlock(struct timer *timer)
277 {
278 spin_unlock(&per_cpu(timers, timer->cpu).lock);
279 }
280
281 #define timer_unlock_irqrestore(t, flags) ({ \
282 timer_unlock(t); \
283 local_irq_restore(flags); \
284 })
285
286
active_timer(const struct timer * timer)287 static bool active_timer(const struct timer *timer)
288 {
289 ASSERT(timer->status >= TIMER_STATUS_inactive);
290 return timer_is_active(timer);
291 }
292
293
init_timer(struct timer * timer,void (* function)(void * data),void * data,unsigned int cpu)294 void init_timer(
295 struct timer *timer,
296 void (*function)(void *data),
297 void *data,
298 unsigned int cpu)
299 {
300 unsigned long flags;
301 memset(timer, 0, sizeof(*timer));
302 timer->function = function;
303 timer->data = data;
304 write_atomic(&timer->cpu, cpu);
305 timer->status = TIMER_STATUS_inactive;
306 if ( !timer_lock_irqsave(timer, flags) )
307 BUG();
308 list_add(&timer->inactive, &per_cpu(timers, cpu).inactive);
309 timer_unlock_irqrestore(timer, flags);
310 }
311
312
set_timer(struct timer * timer,s_time_t expires)313 void set_timer(struct timer *timer, s_time_t expires)
314 {
315 unsigned long flags;
316
317 if ( !timer_lock_irqsave(timer, flags) )
318 return;
319
320 if ( active_timer(timer) )
321 deactivate_timer(timer);
322
323 timer->expires = expires;
324
325 activate_timer(timer);
326
327 timer_unlock_irqrestore(timer, flags);
328 }
329
330
stop_timer(struct timer * timer)331 void stop_timer(struct timer *timer)
332 {
333 unsigned long flags;
334
335 if ( !timer_lock_irqsave(timer, flags) )
336 return;
337
338 if ( active_timer(timer) )
339 deactivate_timer(timer);
340
341 timer_unlock_irqrestore(timer, flags);
342 }
343
timer_expires_before(struct timer * timer,s_time_t t)344 bool timer_expires_before(struct timer *timer, s_time_t t)
345 {
346 unsigned long flags;
347 bool ret;
348
349 if ( !timer_lock_irqsave(timer, flags) )
350 return false;
351
352 ret = active_timer(timer) && timer->expires <= t;
353
354 timer_unlock_irqrestore(timer, flags);
355
356 return ret;
357 }
358
migrate_timer(struct timer * timer,unsigned int new_cpu)359 void migrate_timer(struct timer *timer, unsigned int new_cpu)
360 {
361 unsigned int old_cpu;
362 #if CONFIG_NR_CPUS > 1
363 bool active;
364 unsigned long flags;
365
366 rcu_read_lock(&timer_cpu_read_lock);
367
368 for ( ; ; )
369 {
370 old_cpu = read_atomic(&timer->cpu);
371 if ( (old_cpu == new_cpu) || (old_cpu == TIMER_CPU_status_killed) )
372 {
373 rcu_read_unlock(&timer_cpu_read_lock);
374 return;
375 }
376
377 if ( old_cpu < new_cpu )
378 {
379 spin_lock_irqsave(&per_cpu(timers, old_cpu).lock, flags);
380 spin_lock(&per_cpu(timers, new_cpu).lock);
381 }
382 else
383 {
384 spin_lock_irqsave(&per_cpu(timers, new_cpu).lock, flags);
385 spin_lock(&per_cpu(timers, old_cpu).lock);
386 }
387
388 if ( likely(timer->cpu == old_cpu) )
389 break;
390
391 spin_unlock(&per_cpu(timers, old_cpu).lock);
392 spin_unlock_irqrestore(&per_cpu(timers, new_cpu).lock, flags);
393 }
394
395 rcu_read_unlock(&timer_cpu_read_lock);
396
397 active = active_timer(timer);
398 if ( active )
399 deactivate_timer(timer);
400
401 list_del(&timer->inactive);
402 write_atomic(&timer->cpu, new_cpu);
403 list_add(&timer->inactive, &per_cpu(timers, new_cpu).inactive);
404
405 if ( active )
406 activate_timer(timer);
407
408 spin_unlock(&per_cpu(timers, old_cpu).lock);
409 spin_unlock_irqrestore(&per_cpu(timers, new_cpu).lock, flags);
410 #else /* CONFIG_NR_CPUS == 1 */
411 old_cpu = read_atomic(&timer->cpu);
412 if ( old_cpu != TIMER_CPU_status_killed )
413 WARN_ON(new_cpu != old_cpu);
414 #endif /* CONFIG_NR_CPUS */
415 }
416
417
kill_timer(struct timer * timer)418 void kill_timer(struct timer *timer)
419 {
420 unsigned int old_cpu, cpu;
421 unsigned long flags;
422
423 BUG_ON(this_cpu(timers).running == timer);
424
425 if ( !timer_lock_irqsave(timer, flags) )
426 return;
427
428 if ( active_timer(timer) )
429 deactivate_timer(timer);
430
431 list_del(&timer->inactive);
432 timer->status = TIMER_STATUS_killed;
433 old_cpu = timer->cpu;
434 write_atomic(&timer->cpu, TIMER_CPU_status_killed);
435
436 spin_unlock_irqrestore(&per_cpu(timers, old_cpu).lock, flags);
437
438 for_each_online_cpu ( cpu )
439 while ( per_cpu(timers, cpu).running == timer )
440 cpu_relax();
441 }
442
443
execute_timer(struct timers * ts,struct timer * t)444 static void execute_timer(struct timers *ts, struct timer *t)
445 {
446 void (*fn)(void *data) = t->function;
447 void *data = t->data;
448
449 t->status = TIMER_STATUS_inactive;
450 list_add(&t->inactive, &ts->inactive);
451
452 ts->running = t;
453 spin_unlock_irq(&ts->lock);
454 (*fn)(data);
455 spin_lock_irq(&ts->lock);
456 ts->running = NULL;
457 }
458
459
timer_softirq_action(void)460 static void cf_check timer_softirq_action(void)
461 {
462 struct timer *t, **heap, *next;
463 struct timers *ts;
464 s_time_t now, deadline;
465
466 ts = &this_cpu(timers);
467 heap = ts->heap;
468
469 /* If we overflowed the heap, try to allocate a larger heap. */
470 if ( unlikely(ts->list != NULL) )
471 {
472 /* old_limit == (2^n)-1; new_limit == (2^(n+4))-1 */
473 unsigned int old_limit = heap_metadata(heap)->limit;
474 unsigned int new_limit = ((old_limit + 1) << 4) - 1;
475 struct timer **newheap = NULL;
476
477 /* Don't grow the heap beyond what is representable in its metadata. */
478 if ( new_limit == (typeof(heap_metadata(heap)->limit))new_limit &&
479 new_limit + 1 )
480 newheap = xmalloc_array(struct timer *, new_limit + 1);
481 else
482 printk_once(XENLOG_WARNING "CPU%u: timer heap limit reached\n",
483 smp_processor_id());
484 if ( newheap != NULL )
485 {
486 spin_lock_irq(&ts->lock);
487 memcpy(newheap, heap, (old_limit + 1) * sizeof(*heap));
488 heap_metadata(newheap)->limit = new_limit;
489 ts->heap = newheap;
490 spin_unlock_irq(&ts->lock);
491 if ( old_limit != 0 )
492 xfree(heap);
493 heap = newheap;
494 }
495 }
496
497 spin_lock_irq(&ts->lock);
498
499 now = NOW();
500
501 /* Execute ready heap timers. */
502 while ( (heap_metadata(heap)->size != 0) &&
503 ((t = heap[1])->expires < now) )
504 {
505 remove_from_heap(heap, t);
506 execute_timer(ts, t);
507 }
508
509 /* Execute ready list timers. */
510 while ( ((t = ts->list) != NULL) && (t->expires < now) )
511 {
512 ts->list = t->list_next;
513 execute_timer(ts, t);
514 }
515
516 /* Try to move timers from linked list to more efficient heap. */
517 next = ts->list;
518 ts->list = NULL;
519 while ( unlikely((t = next) != NULL) )
520 {
521 next = t->list_next;
522 t->status = TIMER_STATUS_invalid;
523 add_entry(t);
524 }
525
526 /* Find earliest deadline from head of linked list and top of heap. */
527 deadline = STIME_MAX;
528 if ( heap_metadata(heap)->size != 0 )
529 deadline = heap[1]->expires;
530 if ( (ts->list != NULL) && (ts->list->expires < deadline) )
531 deadline = ts->list->expires;
532 now = NOW();
533 this_cpu(timer_deadline) =
534 (deadline == STIME_MAX) ? 0 : MAX(deadline, now + timer_slop);
535
536 if ( !reprogram_timer(this_cpu(timer_deadline)) )
537 raise_softirq(TIMER_SOFTIRQ);
538
539 spin_unlock_irq(&ts->lock);
540 }
541
align_timer(s_time_t firsttick,uint64_t period)542 s_time_t align_timer(s_time_t firsttick, uint64_t period)
543 {
544 if ( !period )
545 return firsttick;
546
547 return firsttick + (period - 1) - ((firsttick - 1) % period);
548 }
549
dump_timer(struct timer * t,s_time_t now)550 static void dump_timer(struct timer *t, s_time_t now)
551 {
552 printk(" ex=%12"PRId64"us timer=%p cb=%ps(%p)\n",
553 (t->expires - now) / 1000, t, t->function, t->data);
554 }
555
dump_timerq(unsigned char key)556 static void cf_check dump_timerq(unsigned char key)
557 {
558 struct timer *t;
559 struct timers *ts;
560 unsigned long flags;
561 s_time_t now = NOW();
562 unsigned int i, j;
563
564 printk("Dumping timer queues:\n");
565
566 for_each_online_cpu( i )
567 {
568 ts = &per_cpu(timers, i);
569
570 printk("CPU%02d:\n", i);
571 spin_lock_irqsave(&ts->lock, flags);
572 for ( j = 1; j <= heap_metadata(ts->heap)->size; j++ )
573 dump_timer(ts->heap[j], now);
574 for ( t = ts->list; t != NULL; t = t->list_next )
575 dump_timer(t, now);
576 spin_unlock_irqrestore(&ts->lock, flags);
577 }
578 }
579
migrate_timers_from_cpu(unsigned int old_cpu)580 static void migrate_timers_from_cpu(unsigned int old_cpu)
581 {
582 unsigned int new_cpu = cpumask_any(&cpu_online_map);
583 struct timers *old_ts, *new_ts;
584 struct timer *t;
585 bool notify = false;
586
587 ASSERT(!cpu_online(old_cpu) && cpu_online(new_cpu));
588
589 old_ts = &per_cpu(timers, old_cpu);
590 new_ts = &per_cpu(timers, new_cpu);
591
592 if ( old_cpu < new_cpu )
593 {
594 spin_lock_irq(&old_ts->lock);
595 spin_lock(&new_ts->lock);
596 }
597 else
598 {
599 spin_lock_irq(&new_ts->lock);
600 spin_lock(&old_ts->lock);
601 }
602
603 while ( (t = heap_metadata(old_ts->heap)->size
604 ? old_ts->heap[1] : old_ts->list) != NULL )
605 {
606 remove_entry(t);
607 write_atomic(&t->cpu, new_cpu);
608 notify |= add_entry(t);
609 }
610
611 while ( !list_empty(&old_ts->inactive) )
612 {
613 t = list_entry(old_ts->inactive.next, struct timer, inactive);
614 list_del(&t->inactive);
615 write_atomic(&t->cpu, new_cpu);
616 list_add(&t->inactive, &new_ts->inactive);
617 }
618
619 spin_unlock(&old_ts->lock);
620 spin_unlock_irq(&new_ts->lock);
621
622 if ( notify )
623 cpu_raise_softirq(new_cpu, TIMER_SOFTIRQ);
624 }
625
626 /*
627 * All CPUs initially share an empty dummy heap. Only those CPUs that
628 * are brought online will be dynamically allocated their own heap.
629 * The size/limit metadata are both 0 by being in .bss
630 */
631 static struct timer *dummy_heap[1];
632
free_percpu_timers(unsigned int cpu)633 static void free_percpu_timers(unsigned int cpu)
634 {
635 struct timers *ts = &per_cpu(timers, cpu);
636
637 ASSERT(heap_metadata(ts->heap)->size == 0);
638 if ( heap_metadata(ts->heap)->limit )
639 {
640 xfree(ts->heap);
641 ts->heap = dummy_heap;
642 }
643 else
644 ASSERT(ts->heap == dummy_heap);
645 }
646
cpu_callback(struct notifier_block * nfb,unsigned long action,void * hcpu)647 static int cf_check cpu_callback(
648 struct notifier_block *nfb, unsigned long action, void *hcpu)
649 {
650 unsigned int cpu = (unsigned long)hcpu;
651 struct timers *ts = &per_cpu(timers, cpu);
652
653 switch ( action )
654 {
655 case CPU_UP_PREPARE:
656 /* Only initialise ts once. */
657 if ( !ts->heap )
658 {
659 INIT_LIST_HEAD(&ts->inactive);
660 spin_lock_init(&ts->lock);
661 ts->heap = dummy_heap;
662 }
663 break;
664
665 case CPU_UP_CANCELED:
666 case CPU_DEAD:
667 case CPU_RESUME_FAILED:
668 migrate_timers_from_cpu(cpu);
669
670 if ( !park_offline_cpus && system_state != SYS_STATE_suspend )
671 free_percpu_timers(cpu);
672 break;
673
674 case CPU_REMOVE:
675 if ( park_offline_cpus )
676 free_percpu_timers(cpu);
677 break;
678
679 default:
680 break;
681 }
682
683 return NOTIFY_DONE;
684 }
685
686 static struct notifier_block cpu_nfb = {
687 .notifier_call = cpu_callback,
688 .priority = 99
689 };
690
timer_init(void)691 void __init timer_init(void)
692 {
693 void *cpu = (void *)(long)smp_processor_id();
694
695 open_softirq(TIMER_SOFTIRQ, timer_softirq_action);
696
697 cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
698 register_cpu_notifier(&cpu_nfb);
699
700 register_keyhandler('a', dump_timerq, "dump timer queues", 1);
701 }
702
703 /*
704 * Local variables:
705 * mode: C
706 * c-file-style: "BSD"
707 * c-basic-offset: 4
708 * tab-width: 4
709 * indent-tabs-mode: nil
710 * End:
711 */
712