1 #include <xen/lib.h>
2 #include <xen/irq.h>
3 #include <xen/smp.h>
4 #include <xen/time.h>
5 #include <xen/spinlock.h>
6 #include <xen/guest_access.h>
7 #include <xen/preempt.h>
8 #include <public/sysctl.h>
9 #include <asm/processor.h>
10 #include <asm/atomic.h>
11 
12 #ifndef NDEBUG
13 
14 static atomic_t spin_debug __read_mostly = ATOMIC_INIT(0);
15 
check_lock(struct lock_debug * debug)16 static void check_lock(struct lock_debug *debug)
17 {
18     int irq_safe = !local_irq_is_enabled();
19 
20     if ( unlikely(atomic_read(&spin_debug) <= 0) )
21         return;
22 
23     /* A few places take liberties with this. */
24     /* BUG_ON(in_irq() && !irq_safe); */
25 
26     /*
27      * We partition locks into IRQ-safe (always held with IRQs disabled) and
28      * IRQ-unsafe (always held with IRQs enabled) types. The convention for
29      * every lock must be consistently observed else we can deadlock in
30      * IRQ-context rendezvous functions (a rendezvous which gets every CPU
31      * into IRQ context before any CPU is released from the rendezvous).
32      *
33      * If we can mix IRQ-disabled and IRQ-enabled callers, the following can
34      * happen:
35      *  * Lock is held by CPU A, with IRQs enabled
36      *  * CPU B is spinning on same lock, with IRQs disabled
37      *  * Rendezvous starts -- CPU A takes interrupt and enters rendezbous spin
38      *  * DEADLOCK -- CPU B will never enter rendezvous, CPU A will never exit
39      *                the rendezvous, and will hence never release the lock.
40      *
41      * To guard against this subtle bug we latch the IRQ safety of every
42      * spinlock in the system, on first use.
43      */
44     if ( unlikely(debug->irq_safe != irq_safe) )
45     {
46         int seen = cmpxchg(&debug->irq_safe, -1, irq_safe);
47 
48         if ( seen == !irq_safe )
49         {
50             printk("CHECKLOCK FAILURE: prev irqsafe: %d, curr irqsafe %d\n",
51                    seen, irq_safe);
52             BUG();
53         }
54     }
55 }
56 
check_barrier(struct lock_debug * debug)57 static void check_barrier(struct lock_debug *debug)
58 {
59     if ( unlikely(atomic_read(&spin_debug) <= 0) )
60         return;
61 
62     /*
63      * For a barrier, we have a relaxed IRQ-safety-consistency check.
64      *
65      * It is always safe to spin at the barrier with IRQs enabled -- that does
66      * not prevent us from entering an IRQ-context rendezvous, and nor are
67      * we preventing anyone else from doing so (since we do not actually
68      * acquire the lock during a barrier operation).
69      *
70      * However, if we spin on an IRQ-unsafe lock with IRQs disabled then that
71      * is clearly wrong, for the same reason outlined in check_lock() above.
72      */
73     BUG_ON(!local_irq_is_enabled() && (debug->irq_safe == 0));
74 }
75 
spin_debug_enable(void)76 void spin_debug_enable(void)
77 {
78     atomic_inc(&spin_debug);
79 }
80 
spin_debug_disable(void)81 void spin_debug_disable(void)
82 {
83     atomic_dec(&spin_debug);
84 }
85 
86 #else /* defined(NDEBUG) */
87 
88 #define check_lock(l) ((void)0)
89 #define check_barrier(l) ((void)0)
90 
91 #endif
92 
93 #ifdef CONFIG_LOCK_PROFILE
94 
95 #define LOCK_PROFILE_REL                                                     \
96     if (lock->profile)                                                       \
97     {                                                                        \
98         lock->profile->time_hold += NOW() - lock->profile->time_locked;      \
99         lock->profile->lock_cnt++;                                           \
100     }
101 #define LOCK_PROFILE_VAR    s_time_t block = 0
102 #define LOCK_PROFILE_BLOCK  block = block ? : NOW();
103 #define LOCK_PROFILE_GOT                                                     \
104     if (lock->profile)                                                       \
105     {                                                                        \
106         lock->profile->time_locked = NOW();                                  \
107         if (block)                                                           \
108         {                                                                    \
109             lock->profile->time_block += lock->profile->time_locked - block; \
110             lock->profile->block_cnt++;                                      \
111         }                                                                    \
112     }
113 
114 #else
115 
116 #define LOCK_PROFILE_REL
117 #define LOCK_PROFILE_VAR
118 #define LOCK_PROFILE_BLOCK
119 #define LOCK_PROFILE_GOT
120 
121 #endif
122 
observe_lock(spinlock_tickets_t * t)123 static always_inline spinlock_tickets_t observe_lock(spinlock_tickets_t *t)
124 {
125     spinlock_tickets_t v;
126 
127     smp_rmb();
128     v.head_tail = read_atomic(&t->head_tail);
129     return v;
130 }
131 
observe_head(spinlock_tickets_t * t)132 static always_inline u16 observe_head(spinlock_tickets_t *t)
133 {
134     smp_rmb();
135     return read_atomic(&t->head);
136 }
137 
_spin_lock_cb(spinlock_t * lock,void (* cb)(void *),void * data)138 void inline _spin_lock_cb(spinlock_t *lock, void (*cb)(void *), void *data)
139 {
140     spinlock_tickets_t tickets = SPINLOCK_TICKET_INC;
141     LOCK_PROFILE_VAR;
142 
143     check_lock(&lock->debug);
144     tickets.head_tail = arch_fetch_and_add(&lock->tickets.head_tail,
145                                            tickets.head_tail);
146     while ( tickets.tail != observe_head(&lock->tickets) )
147     {
148         LOCK_PROFILE_BLOCK;
149         if ( unlikely(cb) )
150             cb(data);
151         arch_lock_relax();
152     }
153     LOCK_PROFILE_GOT;
154     preempt_disable();
155     arch_lock_acquire_barrier();
156 }
157 
_spin_lock(spinlock_t * lock)158 void _spin_lock(spinlock_t *lock)
159 {
160      _spin_lock_cb(lock, NULL, NULL);
161 }
162 
_spin_lock_irq(spinlock_t * lock)163 void _spin_lock_irq(spinlock_t *lock)
164 {
165     ASSERT(local_irq_is_enabled());
166     local_irq_disable();
167     _spin_lock(lock);
168 }
169 
_spin_lock_irqsave(spinlock_t * lock)170 unsigned long _spin_lock_irqsave(spinlock_t *lock)
171 {
172     unsigned long flags;
173 
174     local_irq_save(flags);
175     _spin_lock(lock);
176     return flags;
177 }
178 
_spin_unlock(spinlock_t * lock)179 void _spin_unlock(spinlock_t *lock)
180 {
181     arch_lock_release_barrier();
182     preempt_enable();
183     LOCK_PROFILE_REL;
184     add_sized(&lock->tickets.head, 1);
185     arch_lock_signal();
186 }
187 
_spin_unlock_irq(spinlock_t * lock)188 void _spin_unlock_irq(spinlock_t *lock)
189 {
190     _spin_unlock(lock);
191     local_irq_enable();
192 }
193 
_spin_unlock_irqrestore(spinlock_t * lock,unsigned long flags)194 void _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
195 {
196     _spin_unlock(lock);
197     local_irq_restore(flags);
198 }
199 
_spin_is_locked(spinlock_t * lock)200 int _spin_is_locked(spinlock_t *lock)
201 {
202     check_lock(&lock->debug);
203 
204     /*
205      * Recursive locks may be locked by another CPU, yet we return
206      * "false" here, making this function suitable only for use in
207      * ASSERT()s and alike.
208      */
209     return lock->recurse_cpu == SPINLOCK_NO_CPU
210            ? lock->tickets.head != lock->tickets.tail
211            : lock->recurse_cpu == smp_processor_id();
212 }
213 
_spin_trylock(spinlock_t * lock)214 int _spin_trylock(spinlock_t *lock)
215 {
216     spinlock_tickets_t old, new;
217 
218     check_lock(&lock->debug);
219     old = observe_lock(&lock->tickets);
220     if ( old.head != old.tail )
221         return 0;
222     new = old;
223     new.tail++;
224     if ( cmpxchg(&lock->tickets.head_tail,
225                  old.head_tail, new.head_tail) != old.head_tail )
226         return 0;
227 #ifdef CONFIG_LOCK_PROFILE
228     if (lock->profile)
229         lock->profile->time_locked = NOW();
230 #endif
231     preempt_disable();
232     /*
233      * cmpxchg() is a full barrier so no need for an
234      * arch_lock_acquire_barrier().
235      */
236     return 1;
237 }
238 
_spin_barrier(spinlock_t * lock)239 void _spin_barrier(spinlock_t *lock)
240 {
241     spinlock_tickets_t sample;
242 #ifdef CONFIG_LOCK_PROFILE
243     s_time_t block = NOW();
244 #endif
245 
246     check_barrier(&lock->debug);
247     smp_mb();
248     sample = observe_lock(&lock->tickets);
249     if ( sample.head != sample.tail )
250     {
251         while ( observe_head(&lock->tickets) == sample.head )
252             arch_lock_relax();
253 #ifdef CONFIG_LOCK_PROFILE
254         if ( lock->profile )
255         {
256             lock->profile->time_block += NOW() - block;
257             lock->profile->block_cnt++;
258         }
259 #endif
260     }
261     smp_mb();
262 }
263 
_spin_trylock_recursive(spinlock_t * lock)264 int _spin_trylock_recursive(spinlock_t *lock)
265 {
266     unsigned int cpu = smp_processor_id();
267 
268     /* Don't allow overflow of recurse_cpu field. */
269     BUILD_BUG_ON(NR_CPUS > SPINLOCK_NO_CPU);
270 
271     check_lock(&lock->debug);
272 
273     if ( likely(lock->recurse_cpu != cpu) )
274     {
275         if ( !spin_trylock(lock) )
276             return 0;
277         lock->recurse_cpu = cpu;
278     }
279 
280     /* We support only fairly shallow recursion, else the counter overflows. */
281     ASSERT(lock->recurse_cnt < SPINLOCK_MAX_RECURSE);
282     lock->recurse_cnt++;
283 
284     return 1;
285 }
286 
_spin_lock_recursive(spinlock_t * lock)287 void _spin_lock_recursive(spinlock_t *lock)
288 {
289     unsigned int cpu = smp_processor_id();
290 
291     if ( likely(lock->recurse_cpu != cpu) )
292     {
293         _spin_lock(lock);
294         lock->recurse_cpu = cpu;
295     }
296 
297     /* We support only fairly shallow recursion, else the counter overflows. */
298     ASSERT(lock->recurse_cnt < SPINLOCK_MAX_RECURSE);
299     lock->recurse_cnt++;
300 }
301 
_spin_unlock_recursive(spinlock_t * lock)302 void _spin_unlock_recursive(spinlock_t *lock)
303 {
304     if ( likely(--lock->recurse_cnt == 0) )
305     {
306         lock->recurse_cpu = SPINLOCK_NO_CPU;
307         spin_unlock(lock);
308     }
309 }
310 
311 #ifdef CONFIG_LOCK_PROFILE
312 
313 struct lock_profile_anc {
314     struct lock_profile_qhead *head_q;   /* first head of this type */
315     char                      *name;     /* descriptive string for print */
316 };
317 
318 typedef void lock_profile_subfunc(
319     struct lock_profile *, int32_t, int32_t, void *);
320 
321 extern struct lock_profile *__lock_profile_start;
322 extern struct lock_profile *__lock_profile_end;
323 
324 static s_time_t lock_profile_start;
325 static struct lock_profile_anc lock_profile_ancs[LOCKPROF_TYPE_N];
326 static struct lock_profile_qhead lock_profile_glb_q;
327 static spinlock_t lock_profile_lock = SPIN_LOCK_UNLOCKED;
328 
spinlock_profile_iterate(lock_profile_subfunc * sub,void * par)329 static void spinlock_profile_iterate(lock_profile_subfunc *sub, void *par)
330 {
331     int i;
332     struct lock_profile_qhead *hq;
333     struct lock_profile *eq;
334 
335     spin_lock(&lock_profile_lock);
336     for ( i = 0; i < LOCKPROF_TYPE_N; i++ )
337         for ( hq = lock_profile_ancs[i].head_q; hq; hq = hq->head_q )
338             for ( eq = hq->elem_q; eq; eq = eq->next )
339                 sub(eq, i, hq->idx, par);
340     spin_unlock(&lock_profile_lock);
341 }
342 
spinlock_profile_print_elem(struct lock_profile * data,int32_t type,int32_t idx,void * par)343 static void spinlock_profile_print_elem(struct lock_profile *data,
344     int32_t type, int32_t idx, void *par)
345 {
346     if ( type == LOCKPROF_TYPE_GLOBAL )
347         printk("%s %s:\n", lock_profile_ancs[type].name, data->name);
348     else
349         printk("%s %d %s:\n", lock_profile_ancs[type].name, idx, data->name);
350     printk("  lock:%12"PRId64"(%08X:%08X), block:%12"PRId64"(%08X:%08X)\n",
351            data->lock_cnt, (u32)(data->time_hold >> 32), (u32)data->time_hold,
352            data->block_cnt, (u32)(data->time_block >> 32),
353            (u32)data->time_block);
354 }
355 
spinlock_profile_printall(unsigned char key)356 void spinlock_profile_printall(unsigned char key)
357 {
358     s_time_t now = NOW();
359     s_time_t diff;
360 
361     diff = now - lock_profile_start;
362     printk("Xen lock profile info SHOW  (now = %08X:%08X, "
363         "total = %08X:%08X)\n", (u32)(now>>32), (u32)now,
364         (u32)(diff>>32), (u32)diff);
365     spinlock_profile_iterate(spinlock_profile_print_elem, NULL);
366 }
367 
spinlock_profile_reset_elem(struct lock_profile * data,int32_t type,int32_t idx,void * par)368 static void spinlock_profile_reset_elem(struct lock_profile *data,
369     int32_t type, int32_t idx, void *par)
370 {
371     data->lock_cnt = 0;
372     data->block_cnt = 0;
373     data->time_hold = 0;
374     data->time_block = 0;
375 }
376 
spinlock_profile_reset(unsigned char key)377 void spinlock_profile_reset(unsigned char key)
378 {
379     s_time_t now = NOW();
380 
381     if ( key != '\0' )
382         printk("Xen lock profile info RESET (now = %08X:%08X)\n",
383             (u32)(now>>32), (u32)now);
384     lock_profile_start = now;
385     spinlock_profile_iterate(spinlock_profile_reset_elem, NULL);
386 }
387 
388 typedef struct {
389     struct xen_sysctl_lockprof_op *pc;
390     int                      rc;
391 } spinlock_profile_ucopy_t;
392 
spinlock_profile_ucopy_elem(struct lock_profile * data,int32_t type,int32_t idx,void * par)393 static void spinlock_profile_ucopy_elem(struct lock_profile *data,
394     int32_t type, int32_t idx, void *par)
395 {
396     spinlock_profile_ucopy_t *p = par;
397     struct xen_sysctl_lockprof_data elem;
398 
399     if ( p->rc )
400         return;
401 
402     if ( p->pc->nr_elem < p->pc->max_elem )
403     {
404         safe_strcpy(elem.name, data->name);
405         elem.type = type;
406         elem.idx = idx;
407         elem.lock_cnt = data->lock_cnt;
408         elem.block_cnt = data->block_cnt;
409         elem.lock_time = data->time_hold;
410         elem.block_time = data->time_block;
411         if ( copy_to_guest_offset(p->pc->data, p->pc->nr_elem, &elem, 1) )
412             p->rc = -EFAULT;
413     }
414 
415     if ( !p->rc )
416         p->pc->nr_elem++;
417 }
418 
419 /* Dom0 control of lock profiling */
spinlock_profile_control(struct xen_sysctl_lockprof_op * pc)420 int spinlock_profile_control(struct xen_sysctl_lockprof_op *pc)
421 {
422     int rc = 0;
423     spinlock_profile_ucopy_t par;
424 
425     switch ( pc->cmd )
426     {
427     case XEN_SYSCTL_LOCKPROF_reset:
428         spinlock_profile_reset('\0');
429         break;
430     case XEN_SYSCTL_LOCKPROF_query:
431         pc->nr_elem = 0;
432         par.rc = 0;
433         par.pc = pc;
434         spinlock_profile_iterate(spinlock_profile_ucopy_elem, &par);
435         pc->time = NOW() - lock_profile_start;
436         rc = par.rc;
437         break;
438     default:
439         rc = -EINVAL;
440         break;
441     }
442 
443     return rc;
444 }
445 
_lock_profile_register_struct(int32_t type,struct lock_profile_qhead * qhead,int32_t idx,char * name)446 void _lock_profile_register_struct(
447     int32_t type, struct lock_profile_qhead *qhead, int32_t idx, char *name)
448 {
449     qhead->idx = idx;
450     spin_lock(&lock_profile_lock);
451     qhead->head_q = lock_profile_ancs[type].head_q;
452     lock_profile_ancs[type].head_q = qhead;
453     lock_profile_ancs[type].name = name;
454     spin_unlock(&lock_profile_lock);
455 }
456 
_lock_profile_deregister_struct(int32_t type,struct lock_profile_qhead * qhead)457 void _lock_profile_deregister_struct(
458     int32_t type, struct lock_profile_qhead *qhead)
459 {
460     struct lock_profile_qhead **q;
461 
462     spin_lock(&lock_profile_lock);
463     for ( q = &lock_profile_ancs[type].head_q; *q; q = &(*q)->head_q )
464     {
465         if ( *q == qhead )
466         {
467             *q = qhead->head_q;
468             break;
469         }
470     }
471     spin_unlock(&lock_profile_lock);
472 }
473 
lock_prof_init(void)474 static int __init lock_prof_init(void)
475 {
476     struct lock_profile **q;
477 
478     for ( q = &__lock_profile_start; q < &__lock_profile_end; q++ )
479     {
480         (*q)->next = lock_profile_glb_q.elem_q;
481         lock_profile_glb_q.elem_q = *q;
482         (*q)->lock->profile = *q;
483     }
484 
485     _lock_profile_register_struct(
486         LOCKPROF_TYPE_GLOBAL, &lock_profile_glb_q,
487         0, "Global lock");
488 
489     return 0;
490 }
491 __initcall(lock_prof_init);
492 
493 #endif /* LOCK_PROFILE */
494