1 #ifndef _TOOLS_LINUX_RING_BUFFER_H_
2 #define _TOOLS_LINUX_RING_BUFFER_H_
3
4 #include <asm/barrier.h>
5 #include <linux/perf_event.h>
6
7 /*
8 * Contract with kernel for walking the perf ring buffer from
9 * user space requires the following barrier pairing (quote
10 * from kernel/events/ring_buffer.c):
11 *
12 * Since the mmap() consumer (userspace) can run on a
13 * different CPU:
14 *
15 * kernel user
16 *
17 * if (LOAD ->data_tail) { LOAD ->data_head
18 * (A) smp_rmb() (C)
19 * STORE $data LOAD $data
20 * smp_wmb() (B) smp_mb() (D)
21 * STORE ->data_head STORE ->data_tail
22 * }
23 *
24 * Where A pairs with D, and B pairs with C.
25 *
26 * In our case A is a control dependency that separates the
27 * load of the ->data_tail and the stores of $data. In case
28 * ->data_tail indicates there is no room in the buffer to
29 * store $data we do not.
30 *
31 * D needs to be a full barrier since it separates the data
32 * READ from the tail WRITE.
33 *
34 * For B a WMB is sufficient since it separates two WRITEs,
35 * and for C an RMB is sufficient since it separates two READs.
36 *
37 * Note, instead of B, C, D we could also use smp_store_release()
38 * in B and D as well as smp_load_acquire() in C.
39 *
40 * However, this optimization does not make sense for all kernel
41 * supported architectures since for a fair number it would
42 * resolve into READ_ONCE() + smp_mb() pair for smp_load_acquire(),
43 * and smp_mb() + WRITE_ONCE() pair for smp_store_release().
44 *
45 * Thus for those smp_wmb() in B and smp_rmb() in C would still
46 * be less expensive. For the case of D this has either the same
47 * cost or is less expensive, for example, due to TSO x86 can
48 * avoid the CPU barrier entirely.
49 */
50
ring_buffer_read_head(struct perf_event_mmap_page * base)51 static inline u64 ring_buffer_read_head(struct perf_event_mmap_page *base)
52 {
53 /*
54 * Architectures where smp_load_acquire() does not fallback to
55 * READ_ONCE() + smp_mb() pair.
56 */
57 #if defined(__x86_64__) || defined(__aarch64__) || defined(__powerpc64__) || \
58 defined(__ia64__) || defined(__sparc__) && defined(__arch64__)
59 return smp_load_acquire(&base->data_head);
60 #else
61 u64 head = READ_ONCE(base->data_head);
62
63 smp_rmb();
64 return head;
65 #endif
66 }
67
ring_buffer_write_tail(struct perf_event_mmap_page * base,u64 tail)68 static inline void ring_buffer_write_tail(struct perf_event_mmap_page *base,
69 u64 tail)
70 {
71 smp_store_release(&base->data_tail, tail);
72 }
73
74 #endif /* _TOOLS_LINUX_RING_BUFFER_H_ */
75