1 /******************************************************************************
2  * common/trace.c
3  *
4  * Xen Trace Buffer
5  *
6  * Copyright (C) 2004 by Intel Research Cambridge
7  *
8  * Authors: Mark Williamson, mark.a.williamson@intel.com
9  *          Rob Gardner, rob.gardner@hp.com
10  * Date:    October 2005
11  *
12  * Copyright (C) 2005 Bin Ren
13  *
14  * The trace buffer code is designed to allow debugging traces of Xen to be
15  * generated on UP / SMP machines.  Each trace entry is timestamped so that
16  * it's possible to reconstruct a chronological record of trace events.
17  */
18 
19 #include <asm/io.h>
20 #include <xen/lib.h>
21 #include <xen/param.h>
22 #include <xen/sched.h>
23 #include <xen/smp.h>
24 #include <xen/trace.h>
25 #include <xen/errno.h>
26 #include <xen/event.h>
27 #include <xen/tasklet.h>
28 #include <xen/init.h>
29 #include <xen/mm.h>
30 #include <xen/percpu.h>
31 #include <xen/pfn.h>
32 #include <xen/sections.h>
33 #include <xen/cpu.h>
34 #include <asm/atomic.h>
35 #include <public/sysctl.h>
36 
37 #ifdef CONFIG_COMPAT
38 #include <compat/trace.h>
39 #define xen_t_buf t_buf
40 CHECK_t_buf;
41 #undef xen_t_buf
42 #else
43 #define compat_t_rec t_rec
44 #endif
45 
46 /* opt_tbuf_size: trace buffer size (in pages) for each cpu */
47 static unsigned int opt_tbuf_size;
48 static unsigned int opt_tevt_mask;
49 integer_param("tbuf_size", opt_tbuf_size);
50 integer_param("tevt_mask", opt_tevt_mask);
51 
52 /* Pointers to the meta-data objects for all system trace buffers */
53 static struct t_info *t_info;
54 static unsigned int t_info_pages;
55 
56 static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs);
57 static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock);
58 static u32 data_size __read_mostly;
59 
60 /* High water mark for trace buffers; */
61 /* Send virtual interrupt when buffer level reaches this point */
62 static u32 t_buf_highwater;
63 
64 /* Number of records lost due to per-CPU trace buffer being full. */
65 static DEFINE_PER_CPU(unsigned long, lost_records);
66 static DEFINE_PER_CPU(unsigned long, lost_records_first_tsc);
67 
68 /* a flag recording whether initialization has been done */
69 /* or more properly, if the tbuf subsystem is enabled right now */
70 bool __read_mostly tb_init_done;
71 
72 /* which CPUs tracing is enabled on */
73 static cpumask_t tb_cpu_mask;
74 
75 /* which tracing events are enabled */
76 static u32 tb_event_mask = TRC_ALL;
77 
cpu_callback(struct notifier_block * nfb,unsigned long action,void * hcpu)78 static int cf_check cpu_callback(
79     struct notifier_block *nfb, unsigned long action, void *hcpu)
80 {
81     unsigned int cpu = (unsigned long)hcpu;
82 
83     if ( action == CPU_UP_PREPARE )
84         spin_lock_init(&per_cpu(t_lock, cpu));
85 
86     return NOTIFY_DONE;
87 }
88 
89 static struct notifier_block cpu_nfb = {
90     .notifier_call = cpu_callback
91 };
92 
calc_tinfo_first_offset(void)93 static uint32_t calc_tinfo_first_offset(void)
94 {
95     return DIV_ROUND_UP(offsetof(struct t_info, mfn_offset[NR_CPUS]),
96                         sizeof(uint32_t));
97 }
98 
99 /**
100  * calculate_tbuf_size - check to make sure that the proposed size will fit
101  * in the currently sized struct t_info and allows prod and cons to
102  * reach double the value without overflow.
103  * The t_info layout is fixed and cant be changed without breaking xentrace.
104  * Initialize t_info_pages based on number of trace pages.
105  */
calculate_tbuf_size(unsigned int pages,uint16_t t_info_first_offset)106 static int calculate_tbuf_size(unsigned int pages, uint16_t t_info_first_offset)
107 {
108     struct t_buf dummy_size;
109     typeof(dummy_size.prod) max_size;
110     struct t_info dummy_pages;
111     typeof(dummy_pages.tbuf_size) max_pages;
112     typeof(dummy_pages.mfn_offset[0]) max_mfn_offset;
113     unsigned int max_cpus = nr_cpu_ids;
114     unsigned int t_info_words;
115 
116     /* force maximum value for an unsigned type */
117     max_size = -1;
118     max_pages = -1;
119     max_mfn_offset = -1;
120 
121     /* max size holds up to n pages */
122     max_size /= PAGE_SIZE;
123 
124     if ( max_size < max_pages )
125         max_pages = max_size;
126 
127     /*
128      * max mfn_offset holds up to n pages per cpu
129      * The array of mfns for the highest cpu can start at the maximum value
130      * mfn_offset can hold. So reduce the number of cpus and also the mfn_offset.
131      */
132     max_mfn_offset -= t_info_first_offset;
133     max_cpus--;
134     if ( max_cpus )
135         max_mfn_offset /= max_cpus;
136     if ( max_mfn_offset < max_pages )
137         max_pages = max_mfn_offset;
138 
139     if ( pages > max_pages )
140     {
141         printk(XENLOG_INFO "xentrace: requested number of %u pages "
142                "reduced to %u\n",
143                pages, max_pages);
144         pages = max_pages;
145     }
146 
147     /*
148      * NB this calculation is correct, because t_info_first_offset is
149      * in words, not bytes
150      */
151     t_info_words = nr_cpu_ids * pages + t_info_first_offset;
152     t_info_pages = PFN_UP(t_info_words * sizeof(uint32_t));
153     printk(XENLOG_INFO "xentrace: requesting %u t_info pages "
154            "for %u trace pages on %u cpus\n",
155            t_info_pages, pages, nr_cpu_ids);
156     return pages;
157 }
158 
159 /**
160  * alloc_trace_bufs - performs initialization of the per-cpu trace buffers.
161  *
162  * This function is called at start of day in order to initialize the per-cpu
163  * trace buffers.  The trace buffers are then available for debugging use, via
164  * the %TRACE_xD macros exported in <xen/trace.h>.
165  *
166  * This function may also be called later when enabling trace buffers
167  * via the SET_SIZE hypercall.
168  */
alloc_trace_bufs(unsigned int pages)169 static int alloc_trace_bufs(unsigned int pages)
170 {
171     int i, cpu;
172     /* Start after a fixed-size array of NR_CPUS */
173     uint32_t *t_info_mfn_list;
174     uint16_t t_info_first_offset;
175     uint16_t offset;
176 
177     if ( t_info )
178         return -EBUSY;
179 
180     if ( pages == 0 )
181         return -EINVAL;
182 
183     /* Calculate offset in units of u32 of first mfn */
184     t_info_first_offset = calc_tinfo_first_offset();
185 
186     pages = calculate_tbuf_size(pages, t_info_first_offset);
187 
188     t_info = alloc_xenheap_pages(get_order_from_pages(t_info_pages), 0);
189     if ( t_info == NULL )
190         goto out_fail;
191 
192     memset(t_info, 0, t_info_pages*PAGE_SIZE);
193 
194     t_info_mfn_list = (uint32_t *)t_info;
195 
196     t_info->tbuf_size = pages;
197 
198     /*
199      * Allocate buffers for all of the cpus.
200      * If any fails, deallocate what you have so far and exit.
201      */
202     for_each_online_cpu(cpu)
203     {
204         offset = t_info_first_offset + (cpu * pages);
205         t_info->mfn_offset[cpu] = offset;
206 
207         for ( i = 0; i < pages; i++ )
208         {
209             void *p = alloc_xenheap_pages(0, MEMF_bits(32 + PAGE_SHIFT));
210             if ( !p )
211             {
212                 printk(XENLOG_INFO "xentrace: memory allocation failed "
213                        "on cpu %d after %d pages\n", cpu, i);
214                 t_info_mfn_list[offset + i] = 0;
215                 goto out_dealloc;
216             }
217             t_info_mfn_list[offset + i] = virt_to_mfn(p);
218         }
219     }
220 
221     /*
222      * Initialize buffers for all of the cpus.
223      */
224     for_each_online_cpu(cpu)
225     {
226         struct t_buf *buf;
227 
228         spin_lock_init(&per_cpu(t_lock, cpu));
229 
230         offset = t_info->mfn_offset[cpu];
231 
232         /* Initialize the buffer metadata */
233         per_cpu(t_bufs, cpu) = buf = mfn_to_virt(t_info_mfn_list[offset]);
234         buf->cons = buf->prod = 0;
235 
236         printk(XENLOG_INFO "xentrace: p%d mfn %x offset %u\n",
237                    cpu, t_info_mfn_list[offset], offset);
238 
239         /* Now share the trace pages */
240         for ( i = 0; i < pages; i++ )
241             share_xen_page_with_privileged_guests(
242                 mfn_to_page(_mfn(t_info_mfn_list[offset + i])), SHARE_rw);
243     }
244 
245     /* Finally, share the t_info page */
246     for(i = 0; i < t_info_pages; i++)
247         share_xen_page_with_privileged_guests(
248             virt_to_page(t_info) + i, SHARE_ro);
249 
250     data_size  = (pages * PAGE_SIZE - sizeof(struct t_buf));
251     t_buf_highwater = data_size >> 1; /* 50% high water */
252     opt_tbuf_size = pages;
253 
254     printk("xentrace: initialised\n");
255     smp_wmb(); /* above must be visible before tb_init_done flag set */
256     tb_init_done = 1;
257 
258     return 0;
259 
260 out_dealloc:
261     for_each_online_cpu(cpu)
262     {
263         offset = t_info->mfn_offset[cpu];
264         if ( !offset )
265             continue;
266         for ( i = 0; i < pages; i++ )
267         {
268             uint32_t mfn = t_info_mfn_list[offset + i];
269             if ( !mfn )
270                 break;
271             ASSERT(!(mfn_to_page(_mfn(mfn))->count_info & PGC_allocated));
272             free_xenheap_pages(mfn_to_virt(mfn), 0);
273         }
274     }
275     free_xenheap_pages(t_info, get_order_from_pages(t_info_pages));
276     t_info = NULL;
277 out_fail:
278     printk(XENLOG_WARNING "xentrace: allocation failed! Tracing disabled.\n");
279     return -ENOMEM;
280 }
281 
282 
283 /**
284  * tb_set_size - handle the logic involved with dynamically allocating tbufs
285  *
286  * This function is called when the SET_SIZE hypercall is done.
287  */
tb_set_size(unsigned int pages)288 static int tb_set_size(unsigned int pages)
289 {
290     /*
291      * Setting size is a one-shot operation. It can be done either at
292      * boot time or via control tools, but not by both. Once buffers
293      * are created they cannot be destroyed.
294      */
295     if ( opt_tbuf_size && pages != opt_tbuf_size )
296     {
297         printk(XENLOG_INFO "xentrace: tb_set_size from %d to %d "
298                "not implemented\n",
299                opt_tbuf_size, pages);
300         return -EINVAL;
301     }
302 
303     return alloc_trace_bufs(pages);
304 }
305 
trace_will_trace_event(u32 event)306 int trace_will_trace_event(u32 event)
307 {
308     if ( !tb_init_done )
309         return 0;
310 
311     /*
312      * Copied from __trace_var()
313      */
314     if ( (tb_event_mask & event) == 0 )
315         return 0;
316 
317     /* match class */
318     if ( ((tb_event_mask >> TRC_CLS_SHIFT) & (event >> TRC_CLS_SHIFT)) == 0 )
319         return 0;
320 
321     /* then match subclass */
322     if ( (((tb_event_mask >> TRC_SUBCLS_SHIFT) & 0xf )
323                 & ((event >> TRC_SUBCLS_SHIFT) & 0xf )) == 0 )
324         return 0;
325 
326     if ( !cpumask_test_cpu(smp_processor_id(), &tb_cpu_mask) )
327         return 0;
328 
329     return 1;
330 }
331 
332 /**
333  * init_trace_bufs - performs initialization of the per-cpu trace buffers.
334  *
335  * This function is called at start of day in order to initialize the per-cpu
336  * trace buffers.  The trace buffers are then available for debugging use, via
337  * the %TRACE_xD macros exported in <xen/trace.h>.
338  *
339  * TODO: Try and make this a presmp_initcall() to improve alloc_trace_bufs().
340  */
init_trace_bufs(void)341 static void __init __constructor init_trace_bufs(void)
342 {
343     cpumask_setall(&tb_cpu_mask);
344     register_cpu_notifier(&cpu_nfb);
345 
346     if ( opt_tbuf_size )
347     {
348         if ( alloc_trace_bufs(opt_tbuf_size) )
349         {
350             printk("xentrace: allocation size %d failed, disabling\n",
351                    opt_tbuf_size);
352             opt_tbuf_size = 0;
353         }
354         else if ( opt_tevt_mask )
355         {
356             printk("xentrace: Starting tracing, enabling mask %x\n",
357                    opt_tevt_mask);
358             tb_event_mask = opt_tevt_mask;
359             tb_init_done=1;
360         }
361     }
362 }
363 
364 /**
365  * tb_control - sysctl operations on trace buffers.
366  * @tbc: a pointer to a struct xen_sysctl_tbuf_op to be filled out
367  */
tb_control(struct xen_sysctl_tbuf_op * tbc)368 int tb_control(struct xen_sysctl_tbuf_op *tbc)
369 {
370     static DEFINE_SPINLOCK(lock);
371     int rc = 0;
372 
373     spin_lock(&lock);
374 
375     switch ( tbc->cmd )
376     {
377     case XEN_SYSCTL_TBUFOP_get_info:
378         tbc->evt_mask   = tb_event_mask;
379         tbc->buffer_mfn = t_info ? virt_to_mfn(t_info) : 0;
380         tbc->size = t_info_pages * PAGE_SIZE;
381         break;
382     case XEN_SYSCTL_TBUFOP_set_cpu_mask:
383     {
384         cpumask_var_t mask;
385 
386         rc = xenctl_bitmap_to_cpumask(&mask, &tbc->cpu_mask);
387         if ( !rc )
388         {
389             cpumask_copy(&tb_cpu_mask, mask);
390             free_cpumask_var(mask);
391         }
392     }
393         break;
394     case XEN_SYSCTL_TBUFOP_set_evt_mask:
395         tb_event_mask = tbc->evt_mask;
396         break;
397     case XEN_SYSCTL_TBUFOP_set_size:
398         rc = tb_set_size(tbc->size);
399         break;
400     case XEN_SYSCTL_TBUFOP_enable:
401         /* Enable trace buffers. Check buffers are already allocated. */
402         if ( opt_tbuf_size == 0 )
403             rc = -EINVAL;
404         else
405             tb_init_done = 1;
406         break;
407     case XEN_SYSCTL_TBUFOP_disable:
408     {
409         /*
410          * Disable trace buffers. Just stops new records from being written,
411          * does not deallocate any memory.
412          */
413         int i;
414 
415         tb_init_done = 0;
416         smp_wmb();
417         /* Clear any lost-record info so we don't get phantom lost records next time we
418          * start tracing.  Grab the lock to make sure we're not racing anyone.  After this
419          * hypercall returns, no more records should be placed into the buffers. */
420         for_each_online_cpu(i)
421         {
422             unsigned long flags;
423             spin_lock_irqsave(&per_cpu(t_lock, i), flags);
424             per_cpu(lost_records, i)=0;
425             spin_unlock_irqrestore(&per_cpu(t_lock, i), flags);
426         }
427     }
428         break;
429     default:
430         rc = -EINVAL;
431         break;
432     }
433 
434     spin_unlock(&lock);
435 
436     return rc;
437 }
438 
calc_rec_size(bool cycles,unsigned int extra)439 static inline unsigned int calc_rec_size(bool cycles, unsigned int extra)
440 {
441     unsigned int rec_size = 4;
442 
443     if ( cycles )
444         rec_size += 8;
445     rec_size += extra;
446     return rec_size;
447 }
448 
bogus(u32 prod,u32 cons)449 static inline bool bogus(u32 prod, u32 cons)
450 {
451     if ( unlikely(prod & 3) || unlikely(prod >= 2 * data_size) ||
452          unlikely(cons & 3) || unlikely(cons >= 2 * data_size) )
453     {
454         tb_init_done = 0;
455         printk(XENLOG_WARNING "trc#%u: bogus prod (%08x) and/or cons (%08x)\n",
456                smp_processor_id(), prod, cons);
457         return 1;
458     }
459     return 0;
460 }
461 
calc_unconsumed_bytes(const struct t_buf * buf)462 static inline u32 calc_unconsumed_bytes(const struct t_buf *buf)
463 {
464     uint32_t prod = buf->prod, cons = buf->cons;
465     int32_t x;
466 
467     barrier(); /* must read buf->prod and buf->cons only once */
468     if ( bogus(prod, cons) )
469         return data_size;
470 
471     x = prod - cons;
472     if ( x < 0 )
473         x += 2*data_size;
474 
475     ASSERT(x >= 0);
476     ASSERT(x <= data_size);
477 
478     return x;
479 }
480 
calc_bytes_to_wrap(const struct t_buf * buf)481 static inline u32 calc_bytes_to_wrap(const struct t_buf *buf)
482 {
483     uint32_t prod = buf->prod, cons = buf->cons;
484     int32_t x;
485 
486     barrier(); /* must read buf->prod and buf->cons only once */
487     if ( bogus(prod, cons) )
488         return 0;
489 
490     x = data_size - prod;
491     if ( x <= 0 )
492         x += data_size;
493 
494     ASSERT(x > 0);
495     ASSERT(x <= data_size);
496 
497     return x;
498 }
499 
calc_bytes_avail(const struct t_buf * buf)500 static inline u32 calc_bytes_avail(const struct t_buf *buf)
501 {
502     return data_size - calc_unconsumed_bytes(buf);
503 }
504 
next_record(const struct t_buf * buf,uint32_t * next,unsigned char ** next_page,uint32_t * offset_in_page)505 static unsigned char *next_record(const struct t_buf *buf, uint32_t *next,
506                                  unsigned char **next_page,
507                                  uint32_t *offset_in_page)
508 {
509     u32 x = buf->prod, cons = buf->cons;
510     uint16_t per_cpu_mfn_offset;
511     uint32_t per_cpu_mfn_nr;
512     uint32_t *mfn_list;
513     uint32_t mfn;
514     unsigned char *this_page;
515 
516     barrier(); /* must read buf->prod and buf->cons only once */
517     *next = x;
518     if ( !tb_init_done || bogus(x, cons) )
519         return NULL;
520 
521     if ( x >= data_size )
522         x -= data_size;
523 
524     ASSERT(x < data_size);
525 
526     /* add leading header to get total offset of next record */
527     x += sizeof(struct t_buf);
528     *offset_in_page = x & ~PAGE_MASK;
529 
530     /* offset into array of mfns */
531     per_cpu_mfn_nr = x >> PAGE_SHIFT;
532     per_cpu_mfn_offset = t_info->mfn_offset[smp_processor_id()];
533     mfn_list = (uint32_t *)t_info;
534     mfn = mfn_list[per_cpu_mfn_offset + per_cpu_mfn_nr];
535     this_page = mfn_to_virt(mfn);
536     if (per_cpu_mfn_nr + 1 >= opt_tbuf_size)
537     {
538         /* reached end of buffer? */
539         *next_page = NULL;
540     }
541     else
542     {
543         mfn = mfn_list[per_cpu_mfn_offset + per_cpu_mfn_nr + 1];
544         *next_page = mfn_to_virt(mfn);
545     }
546     return this_page;
547 }
548 
__insert_record(struct t_buf * buf,unsigned long event,unsigned int extra,bool cycles,unsigned int rec_size,const void * extra_data)549 static inline void __insert_record(struct t_buf *buf,
550                                    unsigned long event,
551                                    unsigned int extra,
552                                    bool cycles,
553                                    unsigned int rec_size,
554                                    const void *extra_data)
555 {
556     struct t_rec split_rec, *rec;
557     uint32_t *dst;
558     unsigned char *this_page, *next_page;
559     unsigned int extra_word = extra / sizeof(u32);
560     unsigned int local_rec_size = calc_rec_size(cycles, extra);
561     uint32_t next;
562     uint32_t offset;
563     uint32_t remaining;
564 
565     BUG_ON(local_rec_size != rec_size);
566     BUG_ON(extra & 3);
567 
568     this_page = next_record(buf, &next, &next_page, &offset);
569     if ( !this_page )
570         return;
571 
572     remaining = PAGE_SIZE - offset;
573 
574     if ( unlikely(rec_size > remaining) )
575     {
576         if ( next_page == NULL )
577         {
578             /* access beyond end of buffer */
579             printk(XENLOG_WARNING
580                    "%s: size=%08x prod=%08x cons=%08x rec=%u remaining=%u\n",
581                    __func__, data_size, next, buf->cons, rec_size, remaining);
582             return;
583         }
584         rec = &split_rec;
585     } else {
586         rec = (struct t_rec*)(this_page + offset);
587     }
588 
589     rec->event = event;
590     rec->extra_u32 = extra_word;
591     dst = rec->u.nocycles.extra_u32;
592     if ( (rec->cycles_included = cycles) != 0 )
593     {
594         u64 tsc = (u64)get_cycles();
595         rec->u.cycles.cycles_lo = (uint32_t)tsc;
596         rec->u.cycles.cycles_hi = (uint32_t)(tsc >> 32);
597         dst = rec->u.cycles.extra_u32;
598     }
599 
600     if ( extra_data && extra )
601         memcpy(dst, extra_data, extra);
602 
603     if ( unlikely(rec_size > remaining) )
604     {
605         memcpy(this_page + offset, rec, remaining);
606         memcpy(next_page, (char *)rec + remaining, rec_size - remaining);
607     }
608 
609     smp_wmb();
610 
611     next += rec_size;
612     if ( next >= 2*data_size )
613         next -= 2*data_size;
614     ASSERT(next < 2*data_size);
615     buf->prod = next;
616 }
617 
insert_wrap_record(struct t_buf * buf,unsigned int size)618 static inline void insert_wrap_record(struct t_buf *buf,
619                                       unsigned int size)
620 {
621     u32 space_left = calc_bytes_to_wrap(buf);
622     unsigned int extra_space = space_left - sizeof(u32);
623     bool cycles = false;
624 
625     BUG_ON(space_left > size);
626 
627     /* We may need to add cycles to take up enough space... */
628     if ( (extra_space/sizeof(u32)) > TRACE_EXTRA_MAX )
629     {
630         cycles = 1;
631         extra_space -= sizeof(u64);
632         ASSERT((extra_space/sizeof(u32)) <= TRACE_EXTRA_MAX);
633     }
634 
635     __insert_record(buf, TRC_TRACE_WRAP_BUFFER, extra_space, cycles,
636                     space_left, NULL);
637 }
638 
639 #define LOST_REC_SIZE (4 + 8 + 16) /* header + tsc + sizeof(struct ed) */
640 
insert_lost_records(struct t_buf * buf)641 static inline void insert_lost_records(struct t_buf *buf)
642 {
643     struct __packed {
644         u32 lost_records;
645         u16 did, vid;
646         u64 first_tsc;
647     } ed;
648 
649     ed.vid = current->vcpu_id;
650     ed.did = current->domain->domain_id;
651     ed.lost_records = this_cpu(lost_records);
652     ed.first_tsc = this_cpu(lost_records_first_tsc);
653 
654     this_cpu(lost_records) = 0;
655 
656     __insert_record(buf, TRC_LOST_RECORDS, sizeof(ed), 1 /* cycles */,
657                     LOST_REC_SIZE, &ed);
658 }
659 
660 /*
661  * Notification is performed in qtasklet to avoid deadlocks with contexts
662  * which __trace_var() may be called from (e.g., scheduler critical regions).
663  */
trace_notify_dom0(void * unused)664 static void cf_check trace_notify_dom0(void *unused)
665 {
666     send_global_virq(VIRQ_TBUF);
667 }
668 static DECLARE_SOFTIRQ_TASKLET(trace_notify_dom0_tasklet,
669                                trace_notify_dom0, NULL);
670 
671 /**
672  * trace - Enters a trace tuple into the trace buffer for the current CPU.
673  * @event: the event type being logged
674  * @extra: size of additional trace data in bytes
675  * @extra_data: pointer to additional trace data
676  *
677  * Logs a trace record into the appropriate buffer.
678  */
trace(uint32_t event,unsigned int extra,const void * extra_data)679 void trace(uint32_t event, unsigned int extra, const void *extra_data)
680 {
681     struct t_buf *buf;
682     unsigned long flags;
683     u32 bytes_to_tail, bytes_to_wrap;
684     unsigned int rec_size, total_size;
685     bool started_below_highwater;
686     bool cycles = event & TRC_HD_CYCLE_FLAG;
687 
688     if( !tb_init_done )
689         return;
690 
691     /*
692      * extra data needs to be an exact multiple of uint32_t to prevent the
693      * later logic over-reading the object.  Reject out-of-spec records.  Any
694      * failure here is an error in the caller.
695      */
696     if ( extra % sizeof(uint32_t) ||
697          extra / sizeof(uint32_t) > TRACE_EXTRA_MAX )
698         return printk_once(XENLOG_WARNING
699                            "Trace event %#x bad size %u, discarding\n",
700                            event, extra);
701 
702     if ( (tb_event_mask & event) == 0 )
703         return;
704 
705     /* match class */
706     if ( ((tb_event_mask >> TRC_CLS_SHIFT) & (event >> TRC_CLS_SHIFT)) == 0 )
707         return;
708 
709     /* then match subclass */
710     if ( (((tb_event_mask >> TRC_SUBCLS_SHIFT) & 0xf )
711                 & ((event >> TRC_SUBCLS_SHIFT) & 0xf )) == 0 )
712         return;
713 
714     if ( !cpumask_test_cpu(smp_processor_id(), &tb_cpu_mask) )
715         return;
716 
717     spin_lock_irqsave(&this_cpu(t_lock), flags);
718 
719     buf = this_cpu(t_bufs);
720 
721     if ( unlikely(!buf) )
722     {
723         /* Make gcc happy */
724         started_below_highwater = 0;
725         goto unlock;
726     }
727 
728     started_below_highwater = (calc_unconsumed_bytes(buf) < t_buf_highwater);
729 
730     /* Calculate the record size */
731     rec_size = calc_rec_size(cycles, extra);
732 
733     /* How many bytes are available in the buffer? */
734     bytes_to_tail = calc_bytes_avail(buf);
735 
736     /* How many bytes until the next wrap-around? */
737     bytes_to_wrap = calc_bytes_to_wrap(buf);
738 
739     /*
740      * Calculate expected total size to commit this record by
741      * doing a dry-run.
742      */
743     total_size = 0;
744 
745     /* First, check to see if we need to include a lost_record.
746      */
747     if ( this_cpu(lost_records) )
748     {
749         if ( LOST_REC_SIZE > bytes_to_wrap )
750         {
751             total_size += bytes_to_wrap;
752             bytes_to_wrap = data_size;
753         }
754         total_size += LOST_REC_SIZE;
755         bytes_to_wrap -= LOST_REC_SIZE;
756 
757         /* LOST_REC might line up perfectly with the buffer wrap */
758         if ( bytes_to_wrap == 0 )
759             bytes_to_wrap = data_size;
760     }
761 
762     if ( rec_size > bytes_to_wrap )
763     {
764         total_size += bytes_to_wrap;
765     }
766     total_size += rec_size;
767 
768     /* Do we have enough space for everything? */
769     if ( total_size > bytes_to_tail )
770     {
771         if ( ++this_cpu(lost_records) == 1 )
772             this_cpu(lost_records_first_tsc)=(u64)get_cycles();
773         started_below_highwater = 0;
774         goto unlock;
775     }
776 
777     /*
778      * Now, actually write information
779      */
780     bytes_to_wrap = calc_bytes_to_wrap(buf);
781 
782     if ( this_cpu(lost_records) )
783     {
784         if ( LOST_REC_SIZE > bytes_to_wrap )
785         {
786             insert_wrap_record(buf, LOST_REC_SIZE);
787             bytes_to_wrap = data_size;
788         }
789         insert_lost_records(buf);
790         bytes_to_wrap -= LOST_REC_SIZE;
791 
792         /* LOST_REC might line up perfectly with the buffer wrap */
793         if ( bytes_to_wrap == 0 )
794             bytes_to_wrap = data_size;
795     }
796 
797     if ( rec_size > bytes_to_wrap )
798         insert_wrap_record(buf, rec_size);
799 
800     /* Write the original record */
801     __insert_record(buf, event, extra, cycles, rec_size, extra_data);
802 
803 unlock:
804     spin_unlock_irqrestore(&this_cpu(t_lock), flags);
805 
806     /* Notify trace buffer consumer that we've crossed the high water mark. */
807     if ( likely(buf!=NULL)
808          && started_below_highwater
809          && (calc_unconsumed_bytes(buf) >= t_buf_highwater) )
810         tasklet_schedule(&trace_notify_dom0_tasklet);
811 }
812 
__trace_hypercall(uint32_t event,unsigned long op,const xen_ulong_t * args)813 void __trace_hypercall(uint32_t event, unsigned long op,
814                        const xen_ulong_t *args)
815 {
816     struct {
817         uint32_t op;
818         uint32_t args[5];
819     } d;
820     uint32_t *a = d.args;
821 
822     /*
823      * In lieu of using __packed above, which gcc9 legitimately doesn't
824      * like in combination with the address of d.args[] taken.
825      */
826     BUILD_BUG_ON(offsetof(typeof(d), args) != sizeof(d.op));
827 
828 #define APPEND_ARG32(i)                         \
829     do {                                        \
830         unsigned int i_ = (i);                  \
831         *a++ = args[(i_)];                      \
832         d.op |= TRC_PV_HYPERCALL_V2_ARG_32(i_); \
833     } while( 0 )
834 
835     /*
836      * This shouldn't happen as @op should be small enough but just in
837      * case, warn if the argument bits in the trace record would
838      * clobber the hypercall op.
839      */
840     WARN_ON(op & TRC_PV_HYPERCALL_V2_ARG_MASK);
841 
842     d.op = op;
843 
844     switch ( op )
845     {
846     case __HYPERVISOR_mmu_update:
847         APPEND_ARG32(1); /* count */
848         break;
849     case __HYPERVISOR_multicall:
850         APPEND_ARG32(1); /* count */
851         break;
852     case __HYPERVISOR_grant_table_op:
853         APPEND_ARG32(0); /* cmd */
854         APPEND_ARG32(2); /* count */
855         break;
856     case __HYPERVISOR_vcpu_op:
857         APPEND_ARG32(0); /* cmd */
858         APPEND_ARG32(1); /* vcpuid */
859         break;
860     case __HYPERVISOR_mmuext_op:
861         APPEND_ARG32(1); /* count */
862         break;
863     case __HYPERVISOR_sched_op:
864         APPEND_ARG32(0); /* cmd */
865         break;
866     }
867 
868     trace_time(event, sizeof(uint32_t) * (1 + (a - d.args)), &d);
869 }
870 
871 /*
872  * Local variables:
873  * mode: C
874  * c-file-style: "BSD"
875  * c-basic-offset: 4
876  * tab-width: 4
877  * indent-tabs-mode: nil
878  * End:
879  */
880