1 /* Copyright (c) 2016 Facebook
2  *
3  * This program is free software; you can redistribute it and/or
4  * modify it under the terms of version 2 of the GNU General Public
5  * License as published by the Free Software Foundation.
6  */
7 #include <uapi/linux/bpf.h>
8 #include <uapi/linux/ptrace.h>
9 #include <uapi/linux/perf_event.h>
10 #include <linux/version.h>
11 #include <linux/sched.h>
12 #include <bpf/bpf_helpers.h>
13 #include <bpf/bpf_tracing.h>
14 
15 #define _(P)                                                                   \
16 	({                                                                     \
17 		typeof(P) val;                                                 \
18 		bpf_probe_read_kernel(&val, sizeof(val), &(P));                \
19 		val;                                                           \
20 	})
21 
22 #define MINBLOCK_US	1
23 #define MAX_ENTRIES	10000
24 
25 struct key_t {
26 	char waker[TASK_COMM_LEN];
27 	char target[TASK_COMM_LEN];
28 	u32 wret;
29 	u32 tret;
30 };
31 
32 struct {
33 	__uint(type, BPF_MAP_TYPE_HASH);
34 	__type(key, struct key_t);
35 	__type(value, u64);
36 	__uint(max_entries, MAX_ENTRIES);
37 } counts SEC(".maps");
38 
39 struct {
40 	__uint(type, BPF_MAP_TYPE_HASH);
41 	__type(key, u32);
42 	__type(value, u64);
43 	__uint(max_entries, MAX_ENTRIES);
44 } start SEC(".maps");
45 
46 struct wokeby_t {
47 	char name[TASK_COMM_LEN];
48 	u32 ret;
49 };
50 
51 struct {
52 	__uint(type, BPF_MAP_TYPE_HASH);
53 	__type(key, u32);
54 	__type(value, struct wokeby_t);
55 	__uint(max_entries, MAX_ENTRIES);
56 } wokeby SEC(".maps");
57 
58 struct {
59 	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
60 	__uint(key_size, sizeof(u32));
61 	__uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64));
62 	__uint(max_entries, MAX_ENTRIES);
63 } stackmap SEC(".maps");
64 
65 #define STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP)
66 
67 SEC("kprobe/try_to_wake_up")
waker(struct pt_regs * ctx)68 int waker(struct pt_regs *ctx)
69 {
70 	struct task_struct *p = (void *) PT_REGS_PARM1(ctx);
71 	struct wokeby_t woke;
72 	u32 pid;
73 
74 	pid = _(p->pid);
75 
76 	bpf_get_current_comm(&woke.name, sizeof(woke.name));
77 	woke.ret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS);
78 
79 	bpf_map_update_elem(&wokeby, &pid, &woke, BPF_ANY);
80 	return 0;
81 }
82 
update_counts(void * ctx,u32 pid,u64 delta)83 static inline int update_counts(void *ctx, u32 pid, u64 delta)
84 {
85 	struct wokeby_t *woke;
86 	u64 zero = 0, *val;
87 	struct key_t key;
88 
89 	__builtin_memset(&key.waker, 0, sizeof(key.waker));
90 	bpf_get_current_comm(&key.target, sizeof(key.target));
91 	key.tret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS);
92 	key.wret = 0;
93 
94 	woke = bpf_map_lookup_elem(&wokeby, &pid);
95 	if (woke) {
96 		key.wret = woke->ret;
97 		__builtin_memcpy(&key.waker, woke->name, sizeof(key.waker));
98 		bpf_map_delete_elem(&wokeby, &pid);
99 	}
100 
101 	val = bpf_map_lookup_elem(&counts, &key);
102 	if (!val) {
103 		bpf_map_update_elem(&counts, &key, &zero, BPF_NOEXIST);
104 		val = bpf_map_lookup_elem(&counts, &key);
105 		if (!val)
106 			return 0;
107 	}
108 	(*val) += delta;
109 	return 0;
110 }
111 
112 #if 1
113 /* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
114 struct sched_switch_args {
115 	unsigned long long pad;
116 	char prev_comm[TASK_COMM_LEN];
117 	int prev_pid;
118 	int prev_prio;
119 	long long prev_state;
120 	char next_comm[TASK_COMM_LEN];
121 	int next_pid;
122 	int next_prio;
123 };
124 SEC("tracepoint/sched/sched_switch")
oncpu(struct sched_switch_args * ctx)125 int oncpu(struct sched_switch_args *ctx)
126 {
127 	/* record previous thread sleep time */
128 	u32 pid = ctx->prev_pid;
129 #else
130 SEC("kprobe/finish_task_switch")
131 int oncpu(struct pt_regs *ctx)
132 {
133 	struct task_struct *p = (void *) PT_REGS_PARM1(ctx);
134 	/* record previous thread sleep time */
135 	u32 pid = _(p->pid);
136 #endif
137 	u64 delta, ts, *tsp;
138 
139 	ts = bpf_ktime_get_ns();
140 	bpf_map_update_elem(&start, &pid, &ts, BPF_ANY);
141 
142 	/* calculate current thread's delta time */
143 	pid = bpf_get_current_pid_tgid();
144 	tsp = bpf_map_lookup_elem(&start, &pid);
145 	if (!tsp)
146 		/* missed start or filtered */
147 		return 0;
148 
149 	delta = bpf_ktime_get_ns() - *tsp;
150 	bpf_map_delete_elem(&start, &pid);
151 	delta = delta / 1000;
152 	if (delta < MINBLOCK_US)
153 		return 0;
154 
155 	return update_counts(ctx, pid, delta);
156 }
157 char _license[] SEC("license") = "GPL";
158 u32 _version SEC("version") = LINUX_VERSION_CODE;
159