1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (C) 2025, Google LLC.
4 */
5
6 #include <time.h>
7
8 #include "lru_gen_util.h"
9
10 /*
11 * Tracks state while we parse memcg lru_gen stats. The file we're parsing is
12 * structured like this (some extra whitespace elided):
13 *
14 * memcg (id) (path)
15 * node (id)
16 * (gen_nr) (age_in_ms) (nr_anon_pages) (nr_file_pages)
17 */
18 struct memcg_stats_parse_context {
19 bool consumed; /* Whether or not this line was consumed */
20 /* Next parse handler to invoke */
21 void (*next_handler)(struct memcg_stats *stats,
22 struct memcg_stats_parse_context *ctx,
23 char *line);
24 int current_node_idx; /* Current index in nodes array */
25 const char *name; /* The name of the memcg we're looking for */
26 };
27
28 static void memcg_stats_handle_searching(struct memcg_stats *stats,
29 struct memcg_stats_parse_context *ctx,
30 char *line);
31 static void memcg_stats_handle_in_memcg(struct memcg_stats *stats,
32 struct memcg_stats_parse_context *ctx,
33 char *line);
34 static void memcg_stats_handle_in_node(struct memcg_stats *stats,
35 struct memcg_stats_parse_context *ctx,
36 char *line);
37
38 struct split_iterator {
39 char *str;
40 char *save;
41 };
42
split_next(struct split_iterator * it)43 static char *split_next(struct split_iterator *it)
44 {
45 char *ret = strtok_r(it->str, " \t\n\r", &it->save);
46
47 it->str = NULL;
48 return ret;
49 }
50
memcg_stats_handle_searching(struct memcg_stats * stats,struct memcg_stats_parse_context * ctx,char * line)51 static void memcg_stats_handle_searching(struct memcg_stats *stats,
52 struct memcg_stats_parse_context *ctx,
53 char *line)
54 {
55 struct split_iterator it = { .str = line };
56 char *prefix = split_next(&it);
57 char *memcg_id = split_next(&it);
58 char *memcg_name = split_next(&it);
59 char *end;
60
61 ctx->consumed = true;
62
63 if (!prefix || strcmp("memcg", prefix))
64 return; /* Not a memcg line (maybe empty), skip */
65
66 TEST_ASSERT(memcg_id && memcg_name,
67 "malformed memcg line; no memcg id or memcg_name");
68
69 if (strcmp(memcg_name + 1, ctx->name))
70 return; /* Wrong memcg, skip */
71
72 /* Found it! */
73
74 stats->memcg_id = strtoul(memcg_id, &end, 10);
75 TEST_ASSERT(*end == '\0', "malformed memcg id '%s'", memcg_id);
76 if (!stats->memcg_id)
77 return; /* Removed memcg? */
78
79 ctx->next_handler = memcg_stats_handle_in_memcg;
80 }
81
memcg_stats_handle_in_memcg(struct memcg_stats * stats,struct memcg_stats_parse_context * ctx,char * line)82 static void memcg_stats_handle_in_memcg(struct memcg_stats *stats,
83 struct memcg_stats_parse_context *ctx,
84 char *line)
85 {
86 struct split_iterator it = { .str = line };
87 char *prefix = split_next(&it);
88 char *id = split_next(&it);
89 long found_node_id;
90 char *end;
91
92 ctx->consumed = true;
93 ctx->current_node_idx = -1;
94
95 if (!prefix)
96 return; /* Skip empty lines */
97
98 if (!strcmp("memcg", prefix)) {
99 /* Memcg done, found next one; stop. */
100 ctx->next_handler = NULL;
101 return;
102 } else if (strcmp("node", prefix))
103 TEST_ASSERT(false, "found malformed line after 'memcg ...',"
104 "token: '%s'", prefix);
105
106 /* At this point we know we have a node line. Parse the ID. */
107
108 TEST_ASSERT(id, "malformed node line; no node id");
109
110 found_node_id = strtol(id, &end, 10);
111 TEST_ASSERT(*end == '\0', "malformed node id '%s'", id);
112
113 ctx->current_node_idx = stats->nr_nodes++;
114 TEST_ASSERT(ctx->current_node_idx < MAX_NR_NODES,
115 "memcg has stats for too many nodes, max is %d",
116 MAX_NR_NODES);
117 stats->nodes[ctx->current_node_idx].node = found_node_id;
118
119 ctx->next_handler = memcg_stats_handle_in_node;
120 }
121
memcg_stats_handle_in_node(struct memcg_stats * stats,struct memcg_stats_parse_context * ctx,char * line)122 static void memcg_stats_handle_in_node(struct memcg_stats *stats,
123 struct memcg_stats_parse_context *ctx,
124 char *line)
125 {
126 char *my_line = strdup(line);
127 struct split_iterator it = { .str = my_line };
128 char *gen, *age, *nr_anon, *nr_file;
129 struct node_stats *node_stats;
130 struct generation_stats *gen_stats;
131 char *end;
132
133 TEST_ASSERT(it.str, "failed to copy input line");
134
135 gen = split_next(&it);
136
137 if (!gen)
138 goto out_consume; /* Skip empty lines */
139
140 if (!strcmp("memcg", gen) || !strcmp("node", gen)) {
141 /*
142 * Reached next memcg or node section. Don't consume, let the
143 * other handler deal with this.
144 */
145 ctx->next_handler = memcg_stats_handle_in_memcg;
146 goto out;
147 }
148
149 node_stats = &stats->nodes[ctx->current_node_idx];
150 TEST_ASSERT(node_stats->nr_gens < MAX_NR_GENS,
151 "found too many generation lines; max is %d",
152 MAX_NR_GENS);
153 gen_stats = &node_stats->gens[node_stats->nr_gens++];
154
155 age = split_next(&it);
156 nr_anon = split_next(&it);
157 nr_file = split_next(&it);
158
159 TEST_ASSERT(age && nr_anon && nr_file,
160 "malformed generation line; not enough tokens");
161
162 gen_stats->gen = (int)strtol(gen, &end, 10);
163 TEST_ASSERT(*end == '\0', "malformed generation number '%s'", gen);
164
165 gen_stats->age_ms = strtol(age, &end, 10);
166 TEST_ASSERT(*end == '\0', "malformed generation age '%s'", age);
167
168 gen_stats->nr_anon = strtol(nr_anon, &end, 10);
169 TEST_ASSERT(*end == '\0', "malformed anonymous page count '%s'",
170 nr_anon);
171
172 gen_stats->nr_file = strtol(nr_file, &end, 10);
173 TEST_ASSERT(*end == '\0', "malformed file page count '%s'", nr_file);
174
175 out_consume:
176 ctx->consumed = true;
177 out:
178 free(my_line);
179 }
180
print_memcg_stats(const struct memcg_stats * stats,const char * name)181 static void print_memcg_stats(const struct memcg_stats *stats, const char *name)
182 {
183 int node, gen;
184
185 pr_debug("stats for memcg %s (id %lu):\n", name, stats->memcg_id);
186 for (node = 0; node < stats->nr_nodes; ++node) {
187 pr_debug("\tnode %d\n", stats->nodes[node].node);
188 for (gen = 0; gen < stats->nodes[node].nr_gens; ++gen) {
189 const struct generation_stats *gstats =
190 &stats->nodes[node].gens[gen];
191
192 pr_debug("\t\tgen %d\tage_ms %ld"
193 "\tnr_anon %ld\tnr_file %ld\n",
194 gstats->gen, gstats->age_ms, gstats->nr_anon,
195 gstats->nr_file);
196 }
197 }
198 }
199
200 /* Re-read lru_gen debugfs information for @memcg into @stats. */
lru_gen_read_memcg_stats(struct memcg_stats * stats,const char * memcg)201 void lru_gen_read_memcg_stats(struct memcg_stats *stats, const char *memcg)
202 {
203 FILE *f;
204 ssize_t read = 0;
205 char *line = NULL;
206 size_t bufsz;
207 struct memcg_stats_parse_context ctx = {
208 .next_handler = memcg_stats_handle_searching,
209 .name = memcg,
210 };
211
212 memset(stats, 0, sizeof(struct memcg_stats));
213
214 f = fopen(LRU_GEN_DEBUGFS, "r");
215 TEST_ASSERT(f, "fopen(%s) failed", LRU_GEN_DEBUGFS);
216
217 while (ctx.next_handler && (read = getline(&line, &bufsz, f)) > 0) {
218 ctx.consumed = false;
219
220 do {
221 ctx.next_handler(stats, &ctx, line);
222 if (!ctx.next_handler)
223 break;
224 } while (!ctx.consumed);
225 }
226
227 if (read < 0 && !feof(f))
228 TEST_ASSERT(false, "getline(%s) failed", LRU_GEN_DEBUGFS);
229
230 TEST_ASSERT(stats->memcg_id > 0, "Couldn't find memcg: %s\n"
231 "Did the memcg get created in the proper mount?",
232 memcg);
233 if (line)
234 free(line);
235 TEST_ASSERT(!fclose(f), "fclose(%s) failed", LRU_GEN_DEBUGFS);
236
237 print_memcg_stats(stats, memcg);
238 }
239
240 /*
241 * Find all pages tracked by lru_gen for this memcg in generation @target_gen.
242 *
243 * If @target_gen is negative, look for all generations.
244 */
lru_gen_sum_memcg_stats_for_gen(int target_gen,const struct memcg_stats * stats)245 long lru_gen_sum_memcg_stats_for_gen(int target_gen,
246 const struct memcg_stats *stats)
247 {
248 int node, gen;
249 long total_nr = 0;
250
251 for (node = 0; node < stats->nr_nodes; ++node) {
252 const struct node_stats *node_stats = &stats->nodes[node];
253
254 for (gen = 0; gen < node_stats->nr_gens; ++gen) {
255 const struct generation_stats *gen_stats =
256 &node_stats->gens[gen];
257
258 if (target_gen >= 0 && gen_stats->gen != target_gen)
259 continue;
260
261 total_nr += gen_stats->nr_anon + gen_stats->nr_file;
262 }
263 }
264
265 return total_nr;
266 }
267
268 /* Find all pages tracked by lru_gen for this memcg. */
lru_gen_sum_memcg_stats(const struct memcg_stats * stats)269 long lru_gen_sum_memcg_stats(const struct memcg_stats *stats)
270 {
271 return lru_gen_sum_memcg_stats_for_gen(-1, stats);
272 }
273
274 /*
275 * If lru_gen aging should force page table scanning.
276 *
277 * If you want to set this to false, you will need to do eviction
278 * before doing extra aging passes.
279 */
280 static const bool force_scan = true;
281
run_aging_impl(unsigned long memcg_id,int node_id,int max_gen)282 static void run_aging_impl(unsigned long memcg_id, int node_id, int max_gen)
283 {
284 FILE *f = fopen(LRU_GEN_DEBUGFS, "w");
285 char *command;
286 size_t sz;
287
288 TEST_ASSERT(f, "fopen(%s) failed", LRU_GEN_DEBUGFS);
289 sz = asprintf(&command, "+ %lu %d %d 1 %d\n",
290 memcg_id, node_id, max_gen, force_scan);
291 TEST_ASSERT(sz > 0, "creating aging command failed");
292
293 pr_debug("Running aging command: %s", command);
294 if (fwrite(command, sizeof(char), sz, f) < sz) {
295 TEST_ASSERT(false, "writing aging command %s to %s failed",
296 command, LRU_GEN_DEBUGFS);
297 }
298
299 TEST_ASSERT(!fclose(f), "fclose(%s) failed", LRU_GEN_DEBUGFS);
300 }
301
lru_gen_do_aging(struct memcg_stats * stats,const char * memcg)302 void lru_gen_do_aging(struct memcg_stats *stats, const char *memcg)
303 {
304 int node, gen;
305
306 pr_debug("lru_gen: invoking aging...\n");
307
308 /* Must read memcg stats to construct the proper aging command. */
309 lru_gen_read_memcg_stats(stats, memcg);
310
311 for (node = 0; node < stats->nr_nodes; ++node) {
312 int max_gen = 0;
313
314 for (gen = 0; gen < stats->nodes[node].nr_gens; ++gen) {
315 int this_gen = stats->nodes[node].gens[gen].gen;
316
317 max_gen = max_gen > this_gen ? max_gen : this_gen;
318 }
319
320 run_aging_impl(stats->memcg_id, stats->nodes[node].node,
321 max_gen);
322 }
323
324 /* Re-read so callers get updated information */
325 lru_gen_read_memcg_stats(stats, memcg);
326 }
327
328 /*
329 * Find which generation contains at least @pages pages, assuming that
330 * such a generation exists.
331 */
lru_gen_find_generation(const struct memcg_stats * stats,unsigned long pages)332 int lru_gen_find_generation(const struct memcg_stats *stats,
333 unsigned long pages)
334 {
335 int node, gen, gen_idx, min_gen = INT_MAX, max_gen = -1;
336
337 for (node = 0; node < stats->nr_nodes; ++node)
338 for (gen_idx = 0; gen_idx < stats->nodes[node].nr_gens;
339 ++gen_idx) {
340 gen = stats->nodes[node].gens[gen_idx].gen;
341 max_gen = gen > max_gen ? gen : max_gen;
342 min_gen = gen < min_gen ? gen : min_gen;
343 }
344
345 for (gen = min_gen; gen <= max_gen; ++gen)
346 /* See if this generation has enough pages. */
347 if (lru_gen_sum_memcg_stats_for_gen(gen, stats) > pages)
348 return gen;
349
350 return -1;
351 }
352
lru_gen_usable(void)353 bool lru_gen_usable(void)
354 {
355 long required_features = LRU_GEN_ENABLED | LRU_GEN_MM_WALK;
356 int lru_gen_fd, lru_gen_debug_fd;
357 char mglru_feature_str[8] = {};
358 long mglru_features;
359
360 lru_gen_fd = open(LRU_GEN_ENABLED_PATH, O_RDONLY);
361 if (lru_gen_fd < 0) {
362 puts("lru_gen: Could not open " LRU_GEN_ENABLED_PATH);
363 return false;
364 }
365 if (read(lru_gen_fd, &mglru_feature_str, 7) < 7) {
366 puts("lru_gen: Could not read from " LRU_GEN_ENABLED_PATH);
367 close(lru_gen_fd);
368 return false;
369 }
370 close(lru_gen_fd);
371
372 mglru_features = strtol(mglru_feature_str, NULL, 16);
373 if ((mglru_features & required_features) != required_features) {
374 printf("lru_gen: missing features, got: 0x%lx, expected: 0x%lx\n",
375 mglru_features, required_features);
376 printf("lru_gen: Try 'echo 0x%lx > /sys/kernel/mm/lru_gen/enabled'\n",
377 required_features);
378 return false;
379 }
380
381 lru_gen_debug_fd = open(LRU_GEN_DEBUGFS, O_RDWR);
382 __TEST_REQUIRE(lru_gen_debug_fd >= 0,
383 "lru_gen: Could not open " LRU_GEN_DEBUGFS ", "
384 "but lru_gen is enabled, so cannot use page_idle.");
385 close(lru_gen_debug_fd);
386 return true;
387 }
388