1 #include <xen/lib.h>
2 #include <xen/irq.h>
3 #include <xen/smp.h>
4 #include <xen/time.h>
5 #include <xen/spinlock.h>
6 #include <xen/guest_access.h>
7 #include <xen/preempt.h>
8 #include <public/sysctl.h>
9 #include <asm/processor.h>
10 #include <asm/atomic.h>
11
12 #ifndef NDEBUG
13
14 static atomic_t spin_debug __read_mostly = ATOMIC_INIT(0);
15
check_lock(struct lock_debug * debug)16 static void check_lock(struct lock_debug *debug)
17 {
18 int irq_safe = !local_irq_is_enabled();
19
20 if ( unlikely(atomic_read(&spin_debug) <= 0) )
21 return;
22
23 /* A few places take liberties with this. */
24 /* BUG_ON(in_irq() && !irq_safe); */
25
26 /*
27 * We partition locks into IRQ-safe (always held with IRQs disabled) and
28 * IRQ-unsafe (always held with IRQs enabled) types. The convention for
29 * every lock must be consistently observed else we can deadlock in
30 * IRQ-context rendezvous functions (a rendezvous which gets every CPU
31 * into IRQ context before any CPU is released from the rendezvous).
32 *
33 * If we can mix IRQ-disabled and IRQ-enabled callers, the following can
34 * happen:
35 * * Lock is held by CPU A, with IRQs enabled
36 * * CPU B is spinning on same lock, with IRQs disabled
37 * * Rendezvous starts -- CPU A takes interrupt and enters rendezbous spin
38 * * DEADLOCK -- CPU B will never enter rendezvous, CPU A will never exit
39 * the rendezvous, and will hence never release the lock.
40 *
41 * To guard against this subtle bug we latch the IRQ safety of every
42 * spinlock in the system, on first use.
43 */
44 if ( unlikely(debug->irq_safe != irq_safe) )
45 {
46 int seen = cmpxchg(&debug->irq_safe, -1, irq_safe);
47
48 if ( seen == !irq_safe )
49 {
50 printk("CHECKLOCK FAILURE: prev irqsafe: %d, curr irqsafe %d\n",
51 seen, irq_safe);
52 BUG();
53 }
54 }
55 }
56
check_barrier(struct lock_debug * debug)57 static void check_barrier(struct lock_debug *debug)
58 {
59 if ( unlikely(atomic_read(&spin_debug) <= 0) )
60 return;
61
62 /*
63 * For a barrier, we have a relaxed IRQ-safety-consistency check.
64 *
65 * It is always safe to spin at the barrier with IRQs enabled -- that does
66 * not prevent us from entering an IRQ-context rendezvous, and nor are
67 * we preventing anyone else from doing so (since we do not actually
68 * acquire the lock during a barrier operation).
69 *
70 * However, if we spin on an IRQ-unsafe lock with IRQs disabled then that
71 * is clearly wrong, for the same reason outlined in check_lock() above.
72 */
73 BUG_ON(!local_irq_is_enabled() && (debug->irq_safe == 0));
74 }
75
spin_debug_enable(void)76 void spin_debug_enable(void)
77 {
78 atomic_inc(&spin_debug);
79 }
80
spin_debug_disable(void)81 void spin_debug_disable(void)
82 {
83 atomic_dec(&spin_debug);
84 }
85
86 #else /* defined(NDEBUG) */
87
88 #define check_lock(l) ((void)0)
89 #define check_barrier(l) ((void)0)
90
91 #endif
92
93 #ifdef CONFIG_LOCK_PROFILE
94
95 #define LOCK_PROFILE_REL \
96 if (lock->profile) \
97 { \
98 lock->profile->time_hold += NOW() - lock->profile->time_locked; \
99 lock->profile->lock_cnt++; \
100 }
101 #define LOCK_PROFILE_VAR s_time_t block = 0
102 #define LOCK_PROFILE_BLOCK block = block ? : NOW();
103 #define LOCK_PROFILE_GOT \
104 if (lock->profile) \
105 { \
106 lock->profile->time_locked = NOW(); \
107 if (block) \
108 { \
109 lock->profile->time_block += lock->profile->time_locked - block; \
110 lock->profile->block_cnt++; \
111 } \
112 }
113
114 #else
115
116 #define LOCK_PROFILE_REL
117 #define LOCK_PROFILE_VAR
118 #define LOCK_PROFILE_BLOCK
119 #define LOCK_PROFILE_GOT
120
121 #endif
122
observe_lock(spinlock_tickets_t * t)123 static always_inline spinlock_tickets_t observe_lock(spinlock_tickets_t *t)
124 {
125 spinlock_tickets_t v;
126
127 smp_rmb();
128 v.head_tail = read_atomic(&t->head_tail);
129 return v;
130 }
131
observe_head(spinlock_tickets_t * t)132 static always_inline u16 observe_head(spinlock_tickets_t *t)
133 {
134 smp_rmb();
135 return read_atomic(&t->head);
136 }
137
_spin_lock_cb(spinlock_t * lock,void (* cb)(void *),void * data)138 void inline _spin_lock_cb(spinlock_t *lock, void (*cb)(void *), void *data)
139 {
140 spinlock_tickets_t tickets = SPINLOCK_TICKET_INC;
141 LOCK_PROFILE_VAR;
142
143 check_lock(&lock->debug);
144 tickets.head_tail = arch_fetch_and_add(&lock->tickets.head_tail,
145 tickets.head_tail);
146 while ( tickets.tail != observe_head(&lock->tickets) )
147 {
148 LOCK_PROFILE_BLOCK;
149 if ( unlikely(cb) )
150 cb(data);
151 arch_lock_relax();
152 }
153 LOCK_PROFILE_GOT;
154 preempt_disable();
155 arch_lock_acquire_barrier();
156 }
157
_spin_lock(spinlock_t * lock)158 void _spin_lock(spinlock_t *lock)
159 {
160 _spin_lock_cb(lock, NULL, NULL);
161 }
162
_spin_lock_irq(spinlock_t * lock)163 void _spin_lock_irq(spinlock_t *lock)
164 {
165 ASSERT(local_irq_is_enabled());
166 local_irq_disable();
167 _spin_lock(lock);
168 }
169
_spin_lock_irqsave(spinlock_t * lock)170 unsigned long _spin_lock_irqsave(spinlock_t *lock)
171 {
172 unsigned long flags;
173
174 local_irq_save(flags);
175 _spin_lock(lock);
176 return flags;
177 }
178
_spin_unlock(spinlock_t * lock)179 void _spin_unlock(spinlock_t *lock)
180 {
181 arch_lock_release_barrier();
182 preempt_enable();
183 LOCK_PROFILE_REL;
184 add_sized(&lock->tickets.head, 1);
185 arch_lock_signal();
186 }
187
_spin_unlock_irq(spinlock_t * lock)188 void _spin_unlock_irq(spinlock_t *lock)
189 {
190 _spin_unlock(lock);
191 local_irq_enable();
192 }
193
_spin_unlock_irqrestore(spinlock_t * lock,unsigned long flags)194 void _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
195 {
196 _spin_unlock(lock);
197 local_irq_restore(flags);
198 }
199
_spin_is_locked(spinlock_t * lock)200 int _spin_is_locked(spinlock_t *lock)
201 {
202 check_lock(&lock->debug);
203
204 /*
205 * Recursive locks may be locked by another CPU, yet we return
206 * "false" here, making this function suitable only for use in
207 * ASSERT()s and alike.
208 */
209 return lock->recurse_cpu == SPINLOCK_NO_CPU
210 ? lock->tickets.head != lock->tickets.tail
211 : lock->recurse_cpu == smp_processor_id();
212 }
213
_spin_trylock(spinlock_t * lock)214 int _spin_trylock(spinlock_t *lock)
215 {
216 spinlock_tickets_t old, new;
217
218 check_lock(&lock->debug);
219 old = observe_lock(&lock->tickets);
220 if ( old.head != old.tail )
221 return 0;
222 new = old;
223 new.tail++;
224 if ( cmpxchg(&lock->tickets.head_tail,
225 old.head_tail, new.head_tail) != old.head_tail )
226 return 0;
227 #ifdef CONFIG_LOCK_PROFILE
228 if (lock->profile)
229 lock->profile->time_locked = NOW();
230 #endif
231 preempt_disable();
232 /*
233 * cmpxchg() is a full barrier so no need for an
234 * arch_lock_acquire_barrier().
235 */
236 return 1;
237 }
238
_spin_barrier(spinlock_t * lock)239 void _spin_barrier(spinlock_t *lock)
240 {
241 spinlock_tickets_t sample;
242 #ifdef CONFIG_LOCK_PROFILE
243 s_time_t block = NOW();
244 #endif
245
246 check_barrier(&lock->debug);
247 smp_mb();
248 sample = observe_lock(&lock->tickets);
249 if ( sample.head != sample.tail )
250 {
251 while ( observe_head(&lock->tickets) == sample.head )
252 arch_lock_relax();
253 #ifdef CONFIG_LOCK_PROFILE
254 if ( lock->profile )
255 {
256 lock->profile->time_block += NOW() - block;
257 lock->profile->block_cnt++;
258 }
259 #endif
260 }
261 smp_mb();
262 }
263
_spin_trylock_recursive(spinlock_t * lock)264 int _spin_trylock_recursive(spinlock_t *lock)
265 {
266 unsigned int cpu = smp_processor_id();
267
268 /* Don't allow overflow of recurse_cpu field. */
269 BUILD_BUG_ON(NR_CPUS > SPINLOCK_NO_CPU);
270
271 check_lock(&lock->debug);
272
273 if ( likely(lock->recurse_cpu != cpu) )
274 {
275 if ( !spin_trylock(lock) )
276 return 0;
277 lock->recurse_cpu = cpu;
278 }
279
280 /* We support only fairly shallow recursion, else the counter overflows. */
281 ASSERT(lock->recurse_cnt < SPINLOCK_MAX_RECURSE);
282 lock->recurse_cnt++;
283
284 return 1;
285 }
286
_spin_lock_recursive(spinlock_t * lock)287 void _spin_lock_recursive(spinlock_t *lock)
288 {
289 unsigned int cpu = smp_processor_id();
290
291 if ( likely(lock->recurse_cpu != cpu) )
292 {
293 _spin_lock(lock);
294 lock->recurse_cpu = cpu;
295 }
296
297 /* We support only fairly shallow recursion, else the counter overflows. */
298 ASSERT(lock->recurse_cnt < SPINLOCK_MAX_RECURSE);
299 lock->recurse_cnt++;
300 }
301
_spin_unlock_recursive(spinlock_t * lock)302 void _spin_unlock_recursive(spinlock_t *lock)
303 {
304 if ( likely(--lock->recurse_cnt == 0) )
305 {
306 lock->recurse_cpu = SPINLOCK_NO_CPU;
307 spin_unlock(lock);
308 }
309 }
310
311 #ifdef CONFIG_LOCK_PROFILE
312
313 struct lock_profile_anc {
314 struct lock_profile_qhead *head_q; /* first head of this type */
315 char *name; /* descriptive string for print */
316 };
317
318 typedef void lock_profile_subfunc(
319 struct lock_profile *, int32_t, int32_t, void *);
320
321 extern struct lock_profile *__lock_profile_start;
322 extern struct lock_profile *__lock_profile_end;
323
324 static s_time_t lock_profile_start;
325 static struct lock_profile_anc lock_profile_ancs[LOCKPROF_TYPE_N];
326 static struct lock_profile_qhead lock_profile_glb_q;
327 static spinlock_t lock_profile_lock = SPIN_LOCK_UNLOCKED;
328
spinlock_profile_iterate(lock_profile_subfunc * sub,void * par)329 static void spinlock_profile_iterate(lock_profile_subfunc *sub, void *par)
330 {
331 int i;
332 struct lock_profile_qhead *hq;
333 struct lock_profile *eq;
334
335 spin_lock(&lock_profile_lock);
336 for ( i = 0; i < LOCKPROF_TYPE_N; i++ )
337 for ( hq = lock_profile_ancs[i].head_q; hq; hq = hq->head_q )
338 for ( eq = hq->elem_q; eq; eq = eq->next )
339 sub(eq, i, hq->idx, par);
340 spin_unlock(&lock_profile_lock);
341 }
342
spinlock_profile_print_elem(struct lock_profile * data,int32_t type,int32_t idx,void * par)343 static void spinlock_profile_print_elem(struct lock_profile *data,
344 int32_t type, int32_t idx, void *par)
345 {
346 if ( type == LOCKPROF_TYPE_GLOBAL )
347 printk("%s %s:\n", lock_profile_ancs[type].name, data->name);
348 else
349 printk("%s %d %s:\n", lock_profile_ancs[type].name, idx, data->name);
350 printk(" lock:%12"PRId64"(%08X:%08X), block:%12"PRId64"(%08X:%08X)\n",
351 data->lock_cnt, (u32)(data->time_hold >> 32), (u32)data->time_hold,
352 data->block_cnt, (u32)(data->time_block >> 32),
353 (u32)data->time_block);
354 }
355
spinlock_profile_printall(unsigned char key)356 void spinlock_profile_printall(unsigned char key)
357 {
358 s_time_t now = NOW();
359 s_time_t diff;
360
361 diff = now - lock_profile_start;
362 printk("Xen lock profile info SHOW (now = %08X:%08X, "
363 "total = %08X:%08X)\n", (u32)(now>>32), (u32)now,
364 (u32)(diff>>32), (u32)diff);
365 spinlock_profile_iterate(spinlock_profile_print_elem, NULL);
366 }
367
spinlock_profile_reset_elem(struct lock_profile * data,int32_t type,int32_t idx,void * par)368 static void spinlock_profile_reset_elem(struct lock_profile *data,
369 int32_t type, int32_t idx, void *par)
370 {
371 data->lock_cnt = 0;
372 data->block_cnt = 0;
373 data->time_hold = 0;
374 data->time_block = 0;
375 }
376
spinlock_profile_reset(unsigned char key)377 void spinlock_profile_reset(unsigned char key)
378 {
379 s_time_t now = NOW();
380
381 if ( key != '\0' )
382 printk("Xen lock profile info RESET (now = %08X:%08X)\n",
383 (u32)(now>>32), (u32)now);
384 lock_profile_start = now;
385 spinlock_profile_iterate(spinlock_profile_reset_elem, NULL);
386 }
387
388 typedef struct {
389 struct xen_sysctl_lockprof_op *pc;
390 int rc;
391 } spinlock_profile_ucopy_t;
392
spinlock_profile_ucopy_elem(struct lock_profile * data,int32_t type,int32_t idx,void * par)393 static void spinlock_profile_ucopy_elem(struct lock_profile *data,
394 int32_t type, int32_t idx, void *par)
395 {
396 spinlock_profile_ucopy_t *p = par;
397 struct xen_sysctl_lockprof_data elem;
398
399 if ( p->rc )
400 return;
401
402 if ( p->pc->nr_elem < p->pc->max_elem )
403 {
404 safe_strcpy(elem.name, data->name);
405 elem.type = type;
406 elem.idx = idx;
407 elem.lock_cnt = data->lock_cnt;
408 elem.block_cnt = data->block_cnt;
409 elem.lock_time = data->time_hold;
410 elem.block_time = data->time_block;
411 if ( copy_to_guest_offset(p->pc->data, p->pc->nr_elem, &elem, 1) )
412 p->rc = -EFAULT;
413 }
414
415 if ( !p->rc )
416 p->pc->nr_elem++;
417 }
418
419 /* Dom0 control of lock profiling */
spinlock_profile_control(struct xen_sysctl_lockprof_op * pc)420 int spinlock_profile_control(struct xen_sysctl_lockprof_op *pc)
421 {
422 int rc = 0;
423 spinlock_profile_ucopy_t par;
424
425 switch ( pc->cmd )
426 {
427 case XEN_SYSCTL_LOCKPROF_reset:
428 spinlock_profile_reset('\0');
429 break;
430 case XEN_SYSCTL_LOCKPROF_query:
431 pc->nr_elem = 0;
432 par.rc = 0;
433 par.pc = pc;
434 spinlock_profile_iterate(spinlock_profile_ucopy_elem, &par);
435 pc->time = NOW() - lock_profile_start;
436 rc = par.rc;
437 break;
438 default:
439 rc = -EINVAL;
440 break;
441 }
442
443 return rc;
444 }
445
_lock_profile_register_struct(int32_t type,struct lock_profile_qhead * qhead,int32_t idx,char * name)446 void _lock_profile_register_struct(
447 int32_t type, struct lock_profile_qhead *qhead, int32_t idx, char *name)
448 {
449 qhead->idx = idx;
450 spin_lock(&lock_profile_lock);
451 qhead->head_q = lock_profile_ancs[type].head_q;
452 lock_profile_ancs[type].head_q = qhead;
453 lock_profile_ancs[type].name = name;
454 spin_unlock(&lock_profile_lock);
455 }
456
_lock_profile_deregister_struct(int32_t type,struct lock_profile_qhead * qhead)457 void _lock_profile_deregister_struct(
458 int32_t type, struct lock_profile_qhead *qhead)
459 {
460 struct lock_profile_qhead **q;
461
462 spin_lock(&lock_profile_lock);
463 for ( q = &lock_profile_ancs[type].head_q; *q; q = &(*q)->head_q )
464 {
465 if ( *q == qhead )
466 {
467 *q = qhead->head_q;
468 break;
469 }
470 }
471 spin_unlock(&lock_profile_lock);
472 }
473
lock_prof_init(void)474 static int __init lock_prof_init(void)
475 {
476 struct lock_profile **q;
477
478 for ( q = &__lock_profile_start; q < &__lock_profile_end; q++ )
479 {
480 (*q)->next = lock_profile_glb_q.elem_q;
481 lock_profile_glb_q.elem_q = *q;
482 (*q)->lock->profile = *q;
483 }
484
485 _lock_profile_register_struct(
486 LOCKPROF_TYPE_GLOBAL, &lock_profile_glb_q,
487 0, "Global lock");
488
489 return 0;
490 }
491 __initcall(lock_prof_init);
492
493 #endif /* LOCK_PROFILE */
494