1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Benchmark module for page_pool.
4 *
5 */
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7
8 #include <linux/interrupt.h>
9 #include <linux/limits.h>
10 #include <linux/module.h>
11 #include <linux/mutex.h>
12 #include <net/page_pool/helpers.h>
13
14 #include "time_bench.h"
15
16 static int verbose = 1;
17 #define MY_POOL_SIZE 1024
18
19 /* Makes tests selectable. Useful for perf-record to analyze a single test.
20 * Hint: Bash shells support writing binary number like: $((2#101010)
21 *
22 * # modprobe bench_page_pool_simple run_flags=$((2#100))
23 */
24 static unsigned long run_flags = 0xFFFFFFFF;
25 module_param(run_flags, ulong, 0);
26 MODULE_PARM_DESC(run_flags, "Limit which bench test that runs");
27
28 /* Count the bit number from the enum */
29 enum benchmark_bit {
30 bit_run_bench_baseline,
31 bit_run_bench_no_softirq01,
32 bit_run_bench_no_softirq02,
33 bit_run_bench_no_softirq03,
34 };
35
36 #define bit(b) (1 << (b))
37 #define enabled(b) ((run_flags & (bit(b))))
38
39 /* notice time_bench is limited to U32_MAX nr loops */
40 static unsigned long loops = 10000000;
41 module_param(loops, ulong, 0);
42 MODULE_PARM_DESC(loops, "Specify loops bench will run");
43
44 /* Timing at the nanosec level, we need to know the overhead
45 * introduced by the for loop itself
46 */
time_bench_for_loop(struct time_bench_record * rec,void * data)47 static int time_bench_for_loop(struct time_bench_record *rec, void *data)
48 {
49 uint64_t loops_cnt = 0;
50 int i;
51
52 time_bench_start(rec);
53 /** Loop to measure **/
54 for (i = 0; i < rec->loops; i++) {
55 loops_cnt++;
56 barrier(); /* avoid compiler to optimize this loop */
57 }
58 time_bench_stop(rec, loops_cnt);
59 return loops_cnt;
60 }
61
time_bench_atomic_inc(struct time_bench_record * rec,void * data)62 static int time_bench_atomic_inc(struct time_bench_record *rec, void *data)
63 {
64 uint64_t loops_cnt = 0;
65 atomic_t cnt;
66 int i;
67
68 atomic_set(&cnt, 0);
69
70 time_bench_start(rec);
71 /** Loop to measure **/
72 for (i = 0; i < rec->loops; i++) {
73 atomic_inc(&cnt);
74 barrier(); /* avoid compiler to optimize this loop */
75 }
76 loops_cnt = atomic_read(&cnt);
77 time_bench_stop(rec, loops_cnt);
78 return loops_cnt;
79 }
80
81 /* The ptr_ping in page_pool uses a spinlock. We need to know the minimum
82 * overhead of taking+releasing a spinlock, to know the cycles that can be saved
83 * by e.g. amortizing this via bulking.
84 */
time_bench_lock(struct time_bench_record * rec,void * data)85 static int time_bench_lock(struct time_bench_record *rec, void *data)
86 {
87 uint64_t loops_cnt = 0;
88 spinlock_t lock;
89 int i;
90
91 spin_lock_init(&lock);
92
93 time_bench_start(rec);
94 /** Loop to measure **/
95 for (i = 0; i < rec->loops; i++) {
96 spin_lock(&lock);
97 loops_cnt++;
98 barrier(); /* avoid compiler to optimize this loop */
99 spin_unlock(&lock);
100 }
101 time_bench_stop(rec, loops_cnt);
102 return loops_cnt;
103 }
104
105 /* Helper for filling some page's into ptr_ring */
pp_fill_ptr_ring(struct page_pool * pp,int elems)106 static void pp_fill_ptr_ring(struct page_pool *pp, int elems)
107 {
108 /* GFP_ATOMIC needed when under run softirq */
109 gfp_t gfp_mask = GFP_ATOMIC;
110 struct page **array;
111 int i;
112
113 array = kcalloc(elems, sizeof(struct page *), gfp_mask);
114
115 for (i = 0; i < elems; i++)
116 array[i] = page_pool_alloc_pages(pp, gfp_mask);
117 for (i = 0; i < elems; i++)
118 page_pool_put_page(pp, array[i], -1, false);
119
120 kfree(array);
121 }
122
123 enum test_type { type_fast_path, type_ptr_ring, type_page_allocator };
124
125 /* Depends on compile optimizing this function */
time_bench_page_pool(struct time_bench_record * rec,void * data,enum test_type type,const char * func)126 static int time_bench_page_pool(struct time_bench_record *rec, void *data,
127 enum test_type type, const char *func)
128 {
129 uint64_t loops_cnt = 0;
130 gfp_t gfp_mask = GFP_ATOMIC; /* GFP_ATOMIC is not really needed */
131 int i, err;
132
133 struct page_pool *pp;
134 struct page *page;
135
136 struct page_pool_params pp_params = {
137 .order = 0,
138 .flags = 0,
139 .pool_size = MY_POOL_SIZE,
140 .nid = NUMA_NO_NODE,
141 .dev = NULL, /* Only use for DMA mapping */
142 .dma_dir = DMA_BIDIRECTIONAL,
143 };
144
145 pp = page_pool_create(&pp_params);
146 if (IS_ERR(pp)) {
147 err = PTR_ERR(pp);
148 pr_warn("%s: Error(%d) creating page_pool\n", func, err);
149 goto out;
150 }
151 pp_fill_ptr_ring(pp, 64);
152
153 if (in_serving_softirq())
154 pr_warn("%s(): in_serving_softirq fast-path\n", func);
155 else
156 pr_warn("%s(): Cannot use page_pool fast-path\n", func);
157
158 time_bench_start(rec);
159 /** Loop to measure **/
160 for (i = 0; i < rec->loops; i++) {
161 /* Common fast-path alloc that depend on in_serving_softirq() */
162 page = page_pool_alloc_pages(pp, gfp_mask);
163 if (!page)
164 break;
165 loops_cnt++;
166 barrier(); /* avoid compiler to optimize this loop */
167
168 /* The benchmarks purpose it to test different return paths.
169 * Compiler should inline optimize other function calls out
170 */
171 if (type == type_fast_path) {
172 /* Fast-path recycling e.g. XDP_DROP use-case */
173 page_pool_recycle_direct(pp, page);
174
175 } else if (type == type_ptr_ring) {
176 /* Normal return path */
177 page_pool_put_page(pp, page, -1, false);
178
179 } else if (type == type_page_allocator) {
180 /* Test if not pages are recycled, but instead
181 * returned back into systems page allocator
182 */
183 get_page(page); /* cause no-recycling */
184 page_pool_put_page(pp, page, -1, false);
185 put_page(page);
186 } else {
187 BUILD_BUG();
188 }
189 }
190 time_bench_stop(rec, loops_cnt);
191 out:
192 page_pool_destroy(pp);
193 return loops_cnt;
194 }
195
time_bench_page_pool01_fast_path(struct time_bench_record * rec,void * data)196 static int time_bench_page_pool01_fast_path(struct time_bench_record *rec,
197 void *data)
198 {
199 return time_bench_page_pool(rec, data, type_fast_path, __func__);
200 }
201
time_bench_page_pool02_ptr_ring(struct time_bench_record * rec,void * data)202 static int time_bench_page_pool02_ptr_ring(struct time_bench_record *rec,
203 void *data)
204 {
205 return time_bench_page_pool(rec, data, type_ptr_ring, __func__);
206 }
207
time_bench_page_pool03_slow(struct time_bench_record * rec,void * data)208 static int time_bench_page_pool03_slow(struct time_bench_record *rec,
209 void *data)
210 {
211 return time_bench_page_pool(rec, data, type_page_allocator, __func__);
212 }
213
run_benchmark_tests(void)214 static int run_benchmark_tests(void)
215 {
216 uint32_t nr_loops = loops;
217
218 /* Baseline tests */
219 if (enabled(bit_run_bench_baseline)) {
220 time_bench_loop(nr_loops * 10, 0, "for_loop", NULL,
221 time_bench_for_loop);
222 time_bench_loop(nr_loops * 10, 0, "atomic_inc", NULL,
223 time_bench_atomic_inc);
224 time_bench_loop(nr_loops, 0, "lock", NULL, time_bench_lock);
225 }
226
227 /* This test cannot activate correct code path, due to no-softirq ctx */
228 if (enabled(bit_run_bench_no_softirq01))
229 time_bench_loop(nr_loops, 0, "no-softirq-page_pool01", NULL,
230 time_bench_page_pool01_fast_path);
231 if (enabled(bit_run_bench_no_softirq02))
232 time_bench_loop(nr_loops, 0, "no-softirq-page_pool02", NULL,
233 time_bench_page_pool02_ptr_ring);
234 if (enabled(bit_run_bench_no_softirq03))
235 time_bench_loop(nr_loops, 0, "no-softirq-page_pool03", NULL,
236 time_bench_page_pool03_slow);
237
238 return 0;
239 }
240
bench_page_pool_simple_module_init(void)241 static int __init bench_page_pool_simple_module_init(void)
242 {
243 if (verbose)
244 pr_info("Loaded\n");
245
246 if (loops > U32_MAX) {
247 pr_err("Module param loops(%lu) exceeded U32_MAX(%u)\n", loops,
248 U32_MAX);
249 return -ECHRNG;
250 }
251
252 run_benchmark_tests();
253
254 return 0;
255 }
256 module_init(bench_page_pool_simple_module_init);
257
bench_page_pool_simple_module_exit(void)258 static void __exit bench_page_pool_simple_module_exit(void)
259 {
260 if (verbose)
261 pr_info("Unloaded\n");
262 }
263 module_exit(bench_page_pool_simple_module_exit);
264
265 MODULE_DESCRIPTION("Benchmark of page_pool simple cases");
266 MODULE_AUTHOR("Jesper Dangaard Brouer <netoptimizer@brouer.com>");
267 MODULE_LICENSE("GPL");
268