1 /******************************************************************************
2  * common/trace.c
3  *
4  * Xen Trace Buffer
5  *
6  * Copyright (C) 2004 by Intel Research Cambridge
7  *
8  * Authors: Mark Williamson, mark.a.williamson@intel.com
9  *          Rob Gardner, rob.gardner@hp.com
10  * Date:    October 2005
11  *
12  * Copyright (C) 2005 Bin Ren
13  *
14  * The trace buffer code is designed to allow debugging traces of Xen to be
15  * generated on UP / SMP machines.  Each trace entry is timestamped so that
16  * it's possible to reconstruct a chronological record of trace events.
17  */
18 
19 #include <asm/types.h>
20 #include <asm/io.h>
21 #include <xen/lib.h>
22 #include <xen/sched.h>
23 #include <xen/smp.h>
24 #include <xen/trace.h>
25 #include <xen/errno.h>
26 #include <xen/event.h>
27 #include <xen/tasklet.h>
28 #include <xen/init.h>
29 #include <xen/mm.h>
30 #include <xen/percpu.h>
31 #include <xen/pfn.h>
32 #include <xen/cpu.h>
33 #include <asm/atomic.h>
34 #include <public/sysctl.h>
35 
36 #ifdef CONFIG_COMPAT
37 #include <compat/trace.h>
38 #define xen_t_buf t_buf
39 CHECK_t_buf;
40 #undef xen_t_buf
41 #else
42 #define compat_t_rec t_rec
43 #endif
44 
45 /* opt_tbuf_size: trace buffer size (in pages) for each cpu */
46 static unsigned int opt_tbuf_size;
47 static unsigned int opt_tevt_mask;
48 integer_param("tbuf_size", opt_tbuf_size);
49 integer_param("tevt_mask", opt_tevt_mask);
50 
51 /* Pointers to the meta-data objects for all system trace buffers */
52 static struct t_info *t_info;
53 static unsigned int t_info_pages;
54 
55 static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs);
56 static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock);
57 static u32 data_size __read_mostly;
58 
59 /* High water mark for trace buffers; */
60 /* Send virtual interrupt when buffer level reaches this point */
61 static u32 t_buf_highwater;
62 
63 /* Number of records lost due to per-CPU trace buffer being full. */
64 static DEFINE_PER_CPU(unsigned long, lost_records);
65 static DEFINE_PER_CPU(unsigned long, lost_records_first_tsc);
66 
67 /* a flag recording whether initialization has been done */
68 /* or more properly, if the tbuf subsystem is enabled right now */
69 int tb_init_done __read_mostly;
70 
71 /* which CPUs tracing is enabled on */
72 static cpumask_t tb_cpu_mask;
73 
74 /* which tracing events are enabled */
75 static u32 tb_event_mask = TRC_ALL;
76 
77 /* Return the number of elements _type necessary to store at least _x bytes of data
78  * i.e., sizeof(_type) * ans >= _x. */
79 #define fit_to_type(_type, _x) (((_x)+sizeof(_type)-1) / sizeof(_type))
80 
cpu_callback(struct notifier_block * nfb,unsigned long action,void * hcpu)81 static int cpu_callback(
82     struct notifier_block *nfb, unsigned long action, void *hcpu)
83 {
84     unsigned int cpu = (unsigned long)hcpu;
85 
86     if ( action == CPU_UP_PREPARE )
87         spin_lock_init(&per_cpu(t_lock, cpu));
88 
89     return NOTIFY_DONE;
90 }
91 
92 static struct notifier_block cpu_nfb = {
93     .notifier_call = cpu_callback
94 };
95 
calc_tinfo_first_offset(void)96 static uint32_t calc_tinfo_first_offset(void)
97 {
98     int offset_in_bytes = offsetof(struct t_info, mfn_offset[NR_CPUS]);
99     return fit_to_type(uint32_t, offset_in_bytes);
100 }
101 
102 /**
103  * calculate_tbuf_size - check to make sure that the proposed size will fit
104  * in the currently sized struct t_info and allows prod and cons to
105  * reach double the value without overflow.
106  * The t_info layout is fixed and cant be changed without breaking xentrace.
107  * Initialize t_info_pages based on number of trace pages.
108  */
calculate_tbuf_size(unsigned int pages,uint16_t t_info_first_offset)109 static int calculate_tbuf_size(unsigned int pages, uint16_t t_info_first_offset)
110 {
111     struct t_buf dummy_size;
112     typeof(dummy_size.prod) max_size;
113     struct t_info dummy_pages;
114     typeof(dummy_pages.tbuf_size) max_pages;
115     typeof(dummy_pages.mfn_offset[0]) max_mfn_offset;
116     unsigned int max_cpus = num_online_cpus();
117     unsigned int t_info_words;
118 
119     /* force maximum value for an unsigned type */
120     max_size = -1;
121     max_pages = -1;
122     max_mfn_offset = -1;
123 
124     /* max size holds up to n pages */
125     max_size /= PAGE_SIZE;
126 
127     if ( max_size < max_pages )
128         max_pages = max_size;
129 
130     /*
131      * max mfn_offset holds up to n pages per cpu
132      * The array of mfns for the highest cpu can start at the maximum value
133      * mfn_offset can hold. So reduce the number of cpus and also the mfn_offset.
134      */
135     max_mfn_offset -= t_info_first_offset;
136     max_cpus--;
137     if ( max_cpus )
138         max_mfn_offset /= max_cpus;
139     if ( max_mfn_offset < max_pages )
140         max_pages = max_mfn_offset;
141 
142     if ( pages > max_pages )
143     {
144         printk(XENLOG_INFO "xentrace: requested number of %u pages "
145                "reduced to %u\n",
146                pages, max_pages);
147         pages = max_pages;
148     }
149 
150     /*
151      * NB this calculation is correct, because t_info_first_offset is
152      * in words, not bytes, not bytes
153      */
154     t_info_words = num_online_cpus() * pages + t_info_first_offset;
155     t_info_pages = PFN_UP(t_info_words * sizeof(uint32_t));
156     printk(XENLOG_INFO "xentrace: requesting %u t_info pages "
157            "for %u trace pages on %u cpus\n",
158            t_info_pages, pages, num_online_cpus());
159     return pages;
160 }
161 
162 /**
163  * alloc_trace_bufs - performs initialization of the per-cpu trace buffers.
164  *
165  * This function is called at start of day in order to initialize the per-cpu
166  * trace buffers.  The trace buffers are then available for debugging use, via
167  * the %TRACE_xD macros exported in <xen/trace.h>.
168  *
169  * This function may also be called later when enabling trace buffers
170  * via the SET_SIZE hypercall.
171  */
alloc_trace_bufs(unsigned int pages)172 static int alloc_trace_bufs(unsigned int pages)
173 {
174     int i, cpu;
175     /* Start after a fixed-size array of NR_CPUS */
176     uint32_t *t_info_mfn_list;
177     uint16_t t_info_first_offset;
178     uint16_t offset;
179 
180     if ( t_info )
181         return -EBUSY;
182 
183     if ( pages == 0 )
184         return -EINVAL;
185 
186     /* Calculate offset in units of u32 of first mfn */
187     t_info_first_offset = calc_tinfo_first_offset();
188 
189     pages = calculate_tbuf_size(pages, t_info_first_offset);
190 
191     t_info = alloc_xenheap_pages(get_order_from_pages(t_info_pages), 0);
192     if ( t_info == NULL )
193         goto out_fail;
194 
195     memset(t_info, 0, t_info_pages*PAGE_SIZE);
196 
197     t_info_mfn_list = (uint32_t *)t_info;
198 
199     t_info->tbuf_size = pages;
200 
201     /*
202      * Allocate buffers for all of the cpus.
203      * If any fails, deallocate what you have so far and exit.
204      */
205     for_each_online_cpu(cpu)
206     {
207         offset = t_info_first_offset + (cpu * pages);
208         t_info->mfn_offset[cpu] = offset;
209 
210         for ( i = 0; i < pages; i++ )
211         {
212             void *p = alloc_xenheap_pages(0, MEMF_bits(32 + PAGE_SHIFT));
213             if ( !p )
214             {
215                 printk(XENLOG_INFO "xentrace: memory allocation failed "
216                        "on cpu %d after %d pages\n", cpu, i);
217                 t_info_mfn_list[offset + i] = 0;
218                 goto out_dealloc;
219             }
220             t_info_mfn_list[offset + i] = virt_to_mfn(p);
221         }
222     }
223 
224     /*
225      * Initialize buffers for all of the cpus.
226      */
227     for_each_online_cpu(cpu)
228     {
229         struct t_buf *buf;
230         struct page_info *pg;
231 
232         spin_lock_init(&per_cpu(t_lock, cpu));
233 
234         offset = t_info->mfn_offset[cpu];
235 
236         /* Initialize the buffer metadata */
237         per_cpu(t_bufs, cpu) = buf = mfn_to_virt(t_info_mfn_list[offset]);
238         buf->cons = buf->prod = 0;
239 
240         printk(XENLOG_INFO "xentrace: p%d mfn %x offset %u\n",
241                    cpu, t_info_mfn_list[offset], offset);
242 
243         /* Now share the trace pages */
244         for ( i = 0; i < pages; i++ )
245         {
246             pg = mfn_to_page(t_info_mfn_list[offset + i]);
247             share_xen_page_with_privileged_guests(pg, XENSHARE_writable);
248         }
249     }
250 
251     /* Finally, share the t_info page */
252     for(i = 0; i < t_info_pages; i++)
253         share_xen_page_with_privileged_guests(
254             virt_to_page(t_info) + i, XENSHARE_readonly);
255 
256     data_size  = (pages * PAGE_SIZE - sizeof(struct t_buf));
257     t_buf_highwater = data_size >> 1; /* 50% high water */
258     opt_tbuf_size = pages;
259 
260     printk("xentrace: initialised\n");
261     smp_wmb(); /* above must be visible before tb_init_done flag set */
262     tb_init_done = 1;
263 
264     return 0;
265 
266 out_dealloc:
267     for_each_online_cpu(cpu)
268     {
269         offset = t_info->mfn_offset[cpu];
270         if ( !offset )
271             continue;
272         for ( i = 0; i < pages; i++ )
273         {
274             uint32_t mfn = t_info_mfn_list[offset + i];
275             if ( !mfn )
276                 break;
277             ASSERT(!(mfn_to_page(mfn)->count_info & PGC_allocated));
278             free_xenheap_pages(mfn_to_virt(mfn), 0);
279         }
280     }
281     free_xenheap_pages(t_info, get_order_from_pages(t_info_pages));
282     t_info = NULL;
283 out_fail:
284     printk(XENLOG_WARNING "xentrace: allocation failed! Tracing disabled.\n");
285     return -ENOMEM;
286 }
287 
288 
289 /**
290  * tb_set_size - handle the logic involved with dynamically allocating tbufs
291  *
292  * This function is called when the SET_SIZE hypercall is done.
293  */
tb_set_size(unsigned int pages)294 static int tb_set_size(unsigned int pages)
295 {
296     /*
297      * Setting size is a one-shot operation. It can be done either at
298      * boot time or via control tools, but not by both. Once buffers
299      * are created they cannot be destroyed.
300      */
301     if ( opt_tbuf_size && pages != opt_tbuf_size )
302     {
303         printk(XENLOG_INFO "xentrace: tb_set_size from %d to %d "
304                "not implemented\n",
305                opt_tbuf_size, pages);
306         return -EINVAL;
307     }
308 
309     return alloc_trace_bufs(pages);
310 }
311 
trace_will_trace_event(u32 event)312 int trace_will_trace_event(u32 event)
313 {
314     if ( !tb_init_done )
315         return 0;
316 
317     /*
318      * Copied from __trace_var()
319      */
320     if ( (tb_event_mask & event) == 0 )
321         return 0;
322 
323     /* match class */
324     if ( ((tb_event_mask >> TRC_CLS_SHIFT) & (event >> TRC_CLS_SHIFT)) == 0 )
325         return 0;
326 
327     /* then match subclass */
328     if ( (((tb_event_mask >> TRC_SUBCLS_SHIFT) & 0xf )
329                 & ((event >> TRC_SUBCLS_SHIFT) & 0xf )) == 0 )
330         return 0;
331 
332     if ( !cpumask_test_cpu(smp_processor_id(), &tb_cpu_mask) )
333         return 0;
334 
335     return 1;
336 }
337 
338 /**
339  * init_trace_bufs - performs initialization of the per-cpu trace buffers.
340  *
341  * This function is called at start of day in order to initialize the per-cpu
342  * trace buffers.  The trace buffers are then available for debugging use, via
343  * the %TRACE_xD macros exported in <xen/trace.h>.
344  */
init_trace_bufs(void)345 void __init init_trace_bufs(void)
346 {
347     cpumask_setall(&tb_cpu_mask);
348     register_cpu_notifier(&cpu_nfb);
349 
350     if ( opt_tbuf_size )
351     {
352         if ( alloc_trace_bufs(opt_tbuf_size) )
353         {
354             printk("xentrace: allocation size %d failed, disabling\n",
355                    opt_tbuf_size);
356             opt_tbuf_size = 0;
357         }
358         else if ( opt_tevt_mask )
359         {
360             printk("xentrace: Starting tracing, enabling mask %x\n",
361                    opt_tevt_mask);
362             tb_event_mask = opt_tevt_mask;
363             tb_init_done=1;
364         }
365     }
366 }
367 
368 /**
369  * tb_control - sysctl operations on trace buffers.
370  * @tbc: a pointer to a struct xen_sysctl_tbuf_op to be filled out
371  */
tb_control(struct xen_sysctl_tbuf_op * tbc)372 int tb_control(struct xen_sysctl_tbuf_op *tbc)
373 {
374     static DEFINE_SPINLOCK(lock);
375     int rc = 0;
376 
377     spin_lock(&lock);
378 
379     switch ( tbc->cmd )
380     {
381     case XEN_SYSCTL_TBUFOP_get_info:
382         tbc->evt_mask   = tb_event_mask;
383         tbc->buffer_mfn = t_info ? virt_to_mfn(t_info) : 0;
384         tbc->size = t_info_pages * PAGE_SIZE;
385         break;
386     case XEN_SYSCTL_TBUFOP_set_cpu_mask:
387     {
388         cpumask_var_t mask;
389 
390         rc = xenctl_bitmap_to_cpumask(&mask, &tbc->cpu_mask);
391         if ( !rc )
392         {
393             cpumask_copy(&tb_cpu_mask, mask);
394             free_cpumask_var(mask);
395         }
396     }
397         break;
398     case XEN_SYSCTL_TBUFOP_set_evt_mask:
399         tb_event_mask = tbc->evt_mask;
400         break;
401     case XEN_SYSCTL_TBUFOP_set_size:
402         rc = tb_set_size(tbc->size);
403         break;
404     case XEN_SYSCTL_TBUFOP_enable:
405         /* Enable trace buffers. Check buffers are already allocated. */
406         if ( opt_tbuf_size == 0 )
407             rc = -EINVAL;
408         else
409             tb_init_done = 1;
410         break;
411     case XEN_SYSCTL_TBUFOP_disable:
412     {
413         /*
414          * Disable trace buffers. Just stops new records from being written,
415          * does not deallocate any memory.
416          */
417         int i;
418 
419         tb_init_done = 0;
420         smp_wmb();
421         /* Clear any lost-record info so we don't get phantom lost records next time we
422          * start tracing.  Grab the lock to make sure we're not racing anyone.  After this
423          * hypercall returns, no more records should be placed into the buffers. */
424         for_each_online_cpu(i)
425         {
426             unsigned long flags;
427             spin_lock_irqsave(&per_cpu(t_lock, i), flags);
428             per_cpu(lost_records, i)=0;
429             spin_unlock_irqrestore(&per_cpu(t_lock, i), flags);
430         }
431     }
432         break;
433     default:
434         rc = -EINVAL;
435         break;
436     }
437 
438     spin_unlock(&lock);
439 
440     return rc;
441 }
442 
calc_rec_size(bool_t cycles,unsigned int extra)443 static inline unsigned int calc_rec_size(bool_t cycles, unsigned int extra)
444 {
445     unsigned int rec_size = 4;
446 
447     if ( cycles )
448         rec_size += 8;
449     rec_size += extra;
450     return rec_size;
451 }
452 
bogus(u32 prod,u32 cons)453 static inline bool_t bogus(u32 prod, u32 cons)
454 {
455     if ( unlikely(prod & 3) || unlikely(prod >= 2 * data_size) ||
456          unlikely(cons & 3) || unlikely(cons >= 2 * data_size) )
457     {
458         tb_init_done = 0;
459         printk(XENLOG_WARNING "trc#%u: bogus prod (%08x) and/or cons (%08x)\n",
460                smp_processor_id(), prod, cons);
461         return 1;
462     }
463     return 0;
464 }
465 
calc_unconsumed_bytes(const struct t_buf * buf)466 static inline u32 calc_unconsumed_bytes(const struct t_buf *buf)
467 {
468     u32 prod = buf->prod, cons = buf->cons;
469     s32 x;
470 
471     barrier(); /* must read buf->prod and buf->cons only once */
472     if ( bogus(prod, cons) )
473         return data_size;
474 
475     x = prod - cons;
476     if ( x < 0 )
477         x += 2*data_size;
478 
479     ASSERT(x >= 0);
480     ASSERT(x <= data_size);
481 
482     return x;
483 }
484 
calc_bytes_to_wrap(const struct t_buf * buf)485 static inline u32 calc_bytes_to_wrap(const struct t_buf *buf)
486 {
487     u32 prod = buf->prod, cons = buf->cons;
488     s32 x;
489 
490     barrier(); /* must read buf->prod and buf->cons only once */
491     if ( bogus(prod, cons) )
492         return 0;
493 
494     x = data_size - prod;
495     if ( x <= 0 )
496         x += data_size;
497 
498     ASSERT(x > 0);
499     ASSERT(x <= data_size);
500 
501     return x;
502 }
503 
calc_bytes_avail(const struct t_buf * buf)504 static inline u32 calc_bytes_avail(const struct t_buf *buf)
505 {
506     return data_size - calc_unconsumed_bytes(buf);
507 }
508 
next_record(const struct t_buf * buf,uint32_t * next,unsigned char ** next_page,uint32_t * offset_in_page)509 static unsigned char *next_record(const struct t_buf *buf, uint32_t *next,
510                                  unsigned char **next_page,
511                                  uint32_t *offset_in_page)
512 {
513     u32 x = buf->prod, cons = buf->cons;
514     uint16_t per_cpu_mfn_offset;
515     uint32_t per_cpu_mfn_nr;
516     uint32_t *mfn_list;
517     uint32_t mfn;
518     unsigned char *this_page;
519 
520     barrier(); /* must read buf->prod and buf->cons only once */
521     *next = x;
522     if ( !tb_init_done || bogus(x, cons) )
523         return NULL;
524 
525     if ( x >= data_size )
526         x -= data_size;
527 
528     ASSERT(x < data_size);
529 
530     /* add leading header to get total offset of next record */
531     x += sizeof(struct t_buf);
532     *offset_in_page = x & ~PAGE_MASK;
533 
534     /* offset into array of mfns */
535     per_cpu_mfn_nr = x >> PAGE_SHIFT;
536     per_cpu_mfn_offset = t_info->mfn_offset[smp_processor_id()];
537     mfn_list = (uint32_t *)t_info;
538     mfn = mfn_list[per_cpu_mfn_offset + per_cpu_mfn_nr];
539     this_page = mfn_to_virt(mfn);
540     if (per_cpu_mfn_nr + 1 >= opt_tbuf_size)
541     {
542         /* reached end of buffer? */
543         *next_page = NULL;
544     }
545     else
546     {
547         mfn = mfn_list[per_cpu_mfn_offset + per_cpu_mfn_nr + 1];
548         *next_page = mfn_to_virt(mfn);
549     }
550     return this_page;
551 }
552 
__insert_record(struct t_buf * buf,unsigned long event,unsigned int extra,bool_t cycles,unsigned int rec_size,const void * extra_data)553 static inline void __insert_record(struct t_buf *buf,
554                                    unsigned long event,
555                                    unsigned int extra,
556                                    bool_t cycles,
557                                    unsigned int rec_size,
558                                    const void *extra_data)
559 {
560     struct t_rec split_rec, *rec;
561     uint32_t *dst;
562     unsigned char *this_page, *next_page;
563     unsigned int extra_word = extra / sizeof(u32);
564     unsigned int local_rec_size = calc_rec_size(cycles, extra);
565     uint32_t next;
566     uint32_t offset;
567     uint32_t remaining;
568 
569     BUG_ON(local_rec_size != rec_size);
570     BUG_ON(extra & 3);
571 
572     this_page = next_record(buf, &next, &next_page, &offset);
573     if ( !this_page )
574         return;
575 
576     remaining = PAGE_SIZE - offset;
577 
578     if ( unlikely(rec_size > remaining) )
579     {
580         if ( next_page == NULL )
581         {
582             /* access beyond end of buffer */
583             printk(XENLOG_WARNING
584                    "%s: size=%08x prod=%08x cons=%08x rec=%u remaining=%u\n",
585                    __func__, data_size, next, buf->cons, rec_size, remaining);
586             return;
587         }
588         rec = &split_rec;
589     } else {
590         rec = (struct t_rec*)(this_page + offset);
591     }
592 
593     rec->event = event;
594     rec->extra_u32 = extra_word;
595     dst = rec->u.nocycles.extra_u32;
596     if ( (rec->cycles_included = cycles) != 0 )
597     {
598         u64 tsc = (u64)get_cycles();
599         rec->u.cycles.cycles_lo = (uint32_t)tsc;
600         rec->u.cycles.cycles_hi = (uint32_t)(tsc >> 32);
601         dst = rec->u.cycles.extra_u32;
602     }
603 
604     if ( extra_data && extra )
605         memcpy(dst, extra_data, extra);
606 
607     if ( unlikely(rec_size > remaining) )
608     {
609         memcpy(this_page + offset, rec, remaining);
610         memcpy(next_page, (char *)rec + remaining, rec_size - remaining);
611     }
612 
613     smp_wmb();
614 
615     next += rec_size;
616     if ( next >= 2*data_size )
617         next -= 2*data_size;
618     ASSERT(next < 2*data_size);
619     buf->prod = next;
620 }
621 
insert_wrap_record(struct t_buf * buf,unsigned int size)622 static inline void insert_wrap_record(struct t_buf *buf,
623                                       unsigned int size)
624 {
625     u32 space_left = calc_bytes_to_wrap(buf);
626     unsigned int extra_space = space_left - sizeof(u32);
627     bool_t cycles = 0;
628 
629     BUG_ON(space_left > size);
630 
631     /* We may need to add cycles to take up enough space... */
632     if ( (extra_space/sizeof(u32)) > TRACE_EXTRA_MAX )
633     {
634         cycles = 1;
635         extra_space -= sizeof(u64);
636         ASSERT((extra_space/sizeof(u32)) <= TRACE_EXTRA_MAX);
637     }
638 
639     __insert_record(buf, TRC_TRACE_WRAP_BUFFER, extra_space, cycles,
640                     space_left, NULL);
641 }
642 
643 #define LOST_REC_SIZE (4 + 8 + 16) /* header + tsc + sizeof(struct ed) */
644 
insert_lost_records(struct t_buf * buf)645 static inline void insert_lost_records(struct t_buf *buf)
646 {
647     struct __packed {
648         u32 lost_records;
649         u16 did, vid;
650         u64 first_tsc;
651     } ed;
652 
653     ed.vid = current->vcpu_id;
654     ed.did = current->domain->domain_id;
655     ed.lost_records = this_cpu(lost_records);
656     ed.first_tsc = this_cpu(lost_records_first_tsc);
657 
658     this_cpu(lost_records) = 0;
659 
660     __insert_record(buf, TRC_LOST_RECORDS, sizeof(ed), 1 /* cycles */,
661                     LOST_REC_SIZE, &ed);
662 }
663 
664 /*
665  * Notification is performed in qtasklet to avoid deadlocks with contexts
666  * which __trace_var() may be called from (e.g., scheduler critical regions).
667  */
trace_notify_dom0(unsigned long unused)668 static void trace_notify_dom0(unsigned long unused)
669 {
670     send_global_virq(VIRQ_TBUF);
671 }
672 static DECLARE_SOFTIRQ_TASKLET(trace_notify_dom0_tasklet,
673                                trace_notify_dom0, 0);
674 
675 /**
676  * __trace_var - Enters a trace tuple into the trace buffer for the current CPU.
677  * @event: the event type being logged
678  * @cycles: include tsc timestamp into trace record
679  * @extra: size of additional trace data in bytes
680  * @extra_data: pointer to additional trace data
681  *
682  * Logs a trace record into the appropriate buffer.
683  */
__trace_var(u32 event,bool_t cycles,unsigned int extra,const void * extra_data)684 void __trace_var(u32 event, bool_t cycles, unsigned int extra,
685                  const void *extra_data)
686 {
687     struct t_buf *buf;
688     unsigned long flags;
689     u32 bytes_to_tail, bytes_to_wrap;
690     unsigned int rec_size, total_size;
691     unsigned int extra_word;
692     bool_t started_below_highwater;
693 
694     if( !tb_init_done )
695         return;
696 
697     /* Convert byte count into word count, rounding up */
698     extra_word = (extra / sizeof(u32));
699     if ( (extra % sizeof(u32)) != 0 )
700         extra_word++;
701 
702     ASSERT(extra_word <= TRACE_EXTRA_MAX);
703     extra_word = min_t(int, extra_word, TRACE_EXTRA_MAX);
704 
705     /* Round size up to nearest word */
706     extra = extra_word * sizeof(u32);
707 
708     if ( (tb_event_mask & event) == 0 )
709         return;
710 
711     /* match class */
712     if ( ((tb_event_mask >> TRC_CLS_SHIFT) & (event >> TRC_CLS_SHIFT)) == 0 )
713         return;
714 
715     /* then match subclass */
716     if ( (((tb_event_mask >> TRC_SUBCLS_SHIFT) & 0xf )
717                 & ((event >> TRC_SUBCLS_SHIFT) & 0xf )) == 0 )
718         return;
719 
720     if ( !cpumask_test_cpu(smp_processor_id(), &tb_cpu_mask) )
721         return;
722 
723     /* Read tb_init_done /before/ t_bufs. */
724     smp_rmb();
725 
726     spin_lock_irqsave(&this_cpu(t_lock), flags);
727 
728     buf = this_cpu(t_bufs);
729 
730     if ( unlikely(!buf) )
731     {
732         /* Make gcc happy */
733         started_below_highwater = 0;
734         goto unlock;
735     }
736 
737     started_below_highwater = (calc_unconsumed_bytes(buf) < t_buf_highwater);
738 
739     /* Calculate the record size */
740     rec_size = calc_rec_size(cycles, extra);
741 
742     /* How many bytes are available in the buffer? */
743     bytes_to_tail = calc_bytes_avail(buf);
744 
745     /* How many bytes until the next wrap-around? */
746     bytes_to_wrap = calc_bytes_to_wrap(buf);
747 
748     /*
749      * Calculate expected total size to commit this record by
750      * doing a dry-run.
751      */
752     total_size = 0;
753 
754     /* First, check to see if we need to include a lost_record.
755      */
756     if ( this_cpu(lost_records) )
757     {
758         if ( LOST_REC_SIZE > bytes_to_wrap )
759         {
760             total_size += bytes_to_wrap;
761             bytes_to_wrap = data_size;
762         }
763         total_size += LOST_REC_SIZE;
764         bytes_to_wrap -= LOST_REC_SIZE;
765 
766         /* LOST_REC might line up perfectly with the buffer wrap */
767         if ( bytes_to_wrap == 0 )
768             bytes_to_wrap = data_size;
769     }
770 
771     if ( rec_size > bytes_to_wrap )
772     {
773         total_size += bytes_to_wrap;
774     }
775     total_size += rec_size;
776 
777     /* Do we have enough space for everything? */
778     if ( total_size > bytes_to_tail )
779     {
780         if ( ++this_cpu(lost_records) == 1 )
781             this_cpu(lost_records_first_tsc)=(u64)get_cycles();
782         started_below_highwater = 0;
783         goto unlock;
784     }
785 
786     /*
787      * Now, actually write information
788      */
789     bytes_to_wrap = calc_bytes_to_wrap(buf);
790 
791     if ( this_cpu(lost_records) )
792     {
793         if ( LOST_REC_SIZE > bytes_to_wrap )
794         {
795             insert_wrap_record(buf, LOST_REC_SIZE);
796             bytes_to_wrap = data_size;
797         }
798         insert_lost_records(buf);
799         bytes_to_wrap -= LOST_REC_SIZE;
800 
801         /* LOST_REC might line up perfectly with the buffer wrap */
802         if ( bytes_to_wrap == 0 )
803             bytes_to_wrap = data_size;
804     }
805 
806     if ( rec_size > bytes_to_wrap )
807         insert_wrap_record(buf, rec_size);
808 
809     /* Write the original record */
810     __insert_record(buf, event, extra, cycles, rec_size, extra_data);
811 
812 unlock:
813     spin_unlock_irqrestore(&this_cpu(t_lock), flags);
814 
815     /* Notify trace buffer consumer that we've crossed the high water mark. */
816     if ( likely(buf!=NULL)
817          && started_below_highwater
818          && (calc_unconsumed_bytes(buf) >= t_buf_highwater) )
819         tasklet_schedule(&trace_notify_dom0_tasklet);
820 }
821 
__trace_hypercall(uint32_t event,unsigned long op,const xen_ulong_t * args)822 void __trace_hypercall(uint32_t event, unsigned long op,
823                        const xen_ulong_t *args)
824 {
825     struct __packed {
826         uint32_t op;
827         uint32_t args[6];
828     } d;
829     uint32_t *a = d.args;
830 
831 #define APPEND_ARG32(i)                         \
832     do {                                        \
833         unsigned i_ = (i);                      \
834         *a++ = args[(i_)];                      \
835         d.op |= TRC_PV_HYPERCALL_V2_ARG_32(i_); \
836     } while( 0 )
837 
838     /*
839      * This shouldn't happen as @op should be small enough but just in
840      * case, warn if the argument bits in the trace record would
841      * clobber the hypercall op.
842      */
843     WARN_ON(op & TRC_PV_HYPERCALL_V2_ARG_MASK);
844 
845     d.op = op;
846 
847     switch ( op )
848     {
849     case __HYPERVISOR_mmu_update:
850         APPEND_ARG32(1); /* count */
851         break;
852     case __HYPERVISOR_multicall:
853         APPEND_ARG32(1); /* count */
854         break;
855     case __HYPERVISOR_grant_table_op:
856         APPEND_ARG32(0); /* cmd */
857         APPEND_ARG32(2); /* count */
858         break;
859     case __HYPERVISOR_vcpu_op:
860         APPEND_ARG32(0); /* cmd */
861         APPEND_ARG32(1); /* vcpuid */
862         break;
863     case __HYPERVISOR_mmuext_op:
864         APPEND_ARG32(1); /* count */
865         break;
866     case __HYPERVISOR_sched_op:
867         APPEND_ARG32(0); /* cmd */
868         break;
869     }
870 
871     __trace_var(event, 1, sizeof(uint32_t) * (1 + (a - d.args)), &d);
872 }
873 
874 /*
875  * Local variables:
876  * mode: C
877  * c-file-style: "BSD"
878  * c-basic-offset: 4
879  * tab-width: 4
880  * indent-tabs-mode: nil
881  * End:
882  */
883