1 // SPDX-License-Identifier: GPL-2.0-or-later
2 // Copyright (c) 2025 Miklos Szeredi <miklos@szeredi.hu>
3
4 #define _GNU_SOURCE
5 #include <fcntl.h>
6 #include <sched.h>
7 #include <stdio.h>
8 #include <string.h>
9 #include <sys/stat.h>
10 #include <sys/mount.h>
11 #include <unistd.h>
12 #include <sys/syscall.h>
13
14 #include "../../kselftest_harness.h"
15 #include "../../pidfd/pidfd.h"
16 #include "../statmount/statmount.h"
17 #include "../utils.h"
18
19 // Needed for linux/fanotify.h
20 #ifndef __kernel_fsid_t
21 typedef struct {
22 int val[2];
23 } __kernel_fsid_t;
24 #endif
25
26 #include <sys/fanotify.h>
27
28 static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX";
29
30 static const int mark_types[] = {
31 FAN_MARK_FILESYSTEM,
32 FAN_MARK_MOUNT,
33 FAN_MARK_INODE
34 };
35
36 static const int mark_cmds[] = {
37 FAN_MARK_ADD,
38 FAN_MARK_REMOVE,
39 FAN_MARK_FLUSH
40 };
41
42 #define NUM_FAN_FDS ARRAY_SIZE(mark_cmds)
43
FIXTURE(fanotify)44 FIXTURE(fanotify) {
45 int fan_fd[NUM_FAN_FDS];
46 char buf[256];
47 unsigned int rem;
48 void *next;
49 char root_mntpoint[sizeof(root_mntpoint_templ)];
50 int orig_root;
51 int orig_ns_fd;
52 int ns_fd;
53 uint64_t root_id;
54 };
55
FIXTURE_SETUP(fanotify)56 FIXTURE_SETUP(fanotify)
57 {
58 int i, ret;
59
60 self->orig_ns_fd = open("/proc/self/ns/mnt", O_RDONLY);
61 ASSERT_GE(self->orig_ns_fd, 0);
62
63 ret = setup_userns();
64 ASSERT_EQ(ret, 0);
65
66 self->ns_fd = open("/proc/self/ns/mnt", O_RDONLY);
67 ASSERT_GE(self->ns_fd, 0);
68
69 strcpy(self->root_mntpoint, root_mntpoint_templ);
70 ASSERT_NE(mkdtemp(self->root_mntpoint), NULL);
71
72 self->orig_root = open("/", O_PATH | O_CLOEXEC);
73 ASSERT_GE(self->orig_root, 0);
74
75 ASSERT_EQ(mount("tmpfs", self->root_mntpoint, "tmpfs", 0, NULL), 0);
76
77 ASSERT_EQ(chroot(self->root_mntpoint), 0);
78
79 ASSERT_EQ(chdir("/"), 0);
80
81 ASSERT_EQ(mkdir("a", 0700), 0);
82
83 ASSERT_EQ(mkdir("b", 0700), 0);
84
85 self->root_id = get_unique_mnt_id("/");
86 ASSERT_NE(self->root_id, 0);
87
88 for (i = 0; i < NUM_FAN_FDS; i++) {
89 int fan_fd = fanotify_init(FAN_REPORT_FID, 0);
90 // Verify that watching tmpfs mounted inside userns is allowed
91 ret = fanotify_mark(fan_fd, FAN_MARK_ADD | mark_types[i],
92 FAN_OPEN, AT_FDCWD, "/");
93 ASSERT_EQ(ret, 0);
94 // ...but watching entire orig root filesystem is not allowed
95 ret = fanotify_mark(fan_fd, FAN_MARK_ADD | FAN_MARK_FILESYSTEM,
96 FAN_OPEN, self->orig_root, ".");
97 ASSERT_NE(ret, 0);
98 close(fan_fd);
99
100 self->fan_fd[i] = fanotify_init(FAN_REPORT_MNT | FAN_NONBLOCK,
101 0);
102 ASSERT_GE(self->fan_fd[i], 0);
103 // Verify that watching mntns where group was created is allowed
104 ret = fanotify_mark(self->fan_fd[i], FAN_MARK_ADD |
105 FAN_MARK_MNTNS,
106 FAN_MNT_ATTACH | FAN_MNT_DETACH,
107 self->ns_fd, NULL);
108 ASSERT_EQ(ret, 0);
109 // ...but watching orig mntns is not allowed
110 ret = fanotify_mark(self->fan_fd[i], FAN_MARK_ADD |
111 FAN_MARK_MNTNS,
112 FAN_MNT_ATTACH | FAN_MNT_DETACH,
113 self->orig_ns_fd, NULL);
114 ASSERT_NE(ret, 0);
115 // On fd[0] we do an extra ADD that changes nothing.
116 // On fd[1]/fd[2] we REMOVE/FLUSH which removes the mark.
117 ret = fanotify_mark(self->fan_fd[i], mark_cmds[i] |
118 FAN_MARK_MNTNS,
119 FAN_MNT_ATTACH | FAN_MNT_DETACH,
120 self->ns_fd, NULL);
121 ASSERT_EQ(ret, 0);
122 }
123
124 self->rem = 0;
125 }
126
FIXTURE_TEARDOWN(fanotify)127 FIXTURE_TEARDOWN(fanotify)
128 {
129 int i;
130
131 ASSERT_EQ(self->rem, 0);
132 for (i = 0; i < NUM_FAN_FDS; i++)
133 close(self->fan_fd[i]);
134
135 ASSERT_EQ(fchdir(self->orig_root), 0);
136
137 ASSERT_EQ(chroot("."), 0);
138
139 EXPECT_EQ(umount2(self->root_mntpoint, MNT_DETACH), 0);
140 EXPECT_EQ(chdir(self->root_mntpoint), 0);
141 EXPECT_EQ(chdir("/"), 0);
142 EXPECT_EQ(rmdir(self->root_mntpoint), 0);
143 }
144
expect_notify(struct __test_metadata * const _metadata,FIXTURE_DATA (fanotify)* self,uint64_t * mask)145 static uint64_t expect_notify(struct __test_metadata *const _metadata,
146 FIXTURE_DATA(fanotify) *self,
147 uint64_t *mask)
148 {
149 struct fanotify_event_metadata *meta;
150 struct fanotify_event_info_mnt *mnt;
151 unsigned int thislen;
152
153 if (!self->rem) {
154 ssize_t len;
155 int i;
156
157 for (i = NUM_FAN_FDS - 1; i >= 0; i--) {
158 len = read(self->fan_fd[i], self->buf,
159 sizeof(self->buf));
160 if (i > 0) {
161 // Groups 1,2 should get EAGAIN
162 ASSERT_EQ(len, -1);
163 ASSERT_EQ(errno, EAGAIN);
164 } else {
165 // Group 0 should get events
166 ASSERT_GT(len, 0);
167 }
168 }
169
170 self->rem = len;
171 self->next = (void *) self->buf;
172 }
173
174 meta = self->next;
175 ASSERT_TRUE(FAN_EVENT_OK(meta, self->rem));
176
177 thislen = meta->event_len;
178 self->rem -= thislen;
179 self->next += thislen;
180
181 *mask = meta->mask;
182 thislen -= sizeof(*meta);
183
184 mnt = ((void *) meta) + meta->event_len - thislen;
185
186 ASSERT_EQ(thislen, sizeof(*mnt));
187
188 return mnt->mnt_id;
189 }
190
expect_notify_n(struct __test_metadata * const _metadata,FIXTURE_DATA (fanotify)* self,unsigned int n,uint64_t mask[],uint64_t mnts[])191 static void expect_notify_n(struct __test_metadata *const _metadata,
192 FIXTURE_DATA(fanotify) *self,
193 unsigned int n, uint64_t mask[], uint64_t mnts[])
194 {
195 unsigned int i;
196
197 for (i = 0; i < n; i++)
198 mnts[i] = expect_notify(_metadata, self, &mask[i]);
199 }
200
expect_notify_mask(struct __test_metadata * const _metadata,FIXTURE_DATA (fanotify)* self,uint64_t expect_mask)201 static uint64_t expect_notify_mask(struct __test_metadata *const _metadata,
202 FIXTURE_DATA(fanotify) *self,
203 uint64_t expect_mask)
204 {
205 uint64_t mntid, mask;
206
207 mntid = expect_notify(_metadata, self, &mask);
208 ASSERT_EQ(expect_mask, mask);
209
210 return mntid;
211 }
212
213
expect_notify_mask_n(struct __test_metadata * const _metadata,FIXTURE_DATA (fanotify)* self,uint64_t mask,unsigned int n,uint64_t mnts[])214 static void expect_notify_mask_n(struct __test_metadata *const _metadata,
215 FIXTURE_DATA(fanotify) *self,
216 uint64_t mask, unsigned int n, uint64_t mnts[])
217 {
218 unsigned int i;
219
220 for (i = 0; i < n; i++)
221 mnts[i] = expect_notify_mask(_metadata, self, mask);
222 }
223
verify_mount_ids(struct __test_metadata * const _metadata,const uint64_t list1[],const uint64_t list2[],size_t num)224 static void verify_mount_ids(struct __test_metadata *const _metadata,
225 const uint64_t list1[], const uint64_t list2[],
226 size_t num)
227 {
228 unsigned int i, j;
229
230 // Check that neither list has any duplicates
231 for (i = 0; i < num; i++) {
232 for (j = 0; j < num; j++) {
233 if (i != j) {
234 ASSERT_NE(list1[i], list1[j]);
235 ASSERT_NE(list2[i], list2[j]);
236 }
237 }
238 }
239 // Check that all list1 memebers can be found in list2. Together with
240 // the above it means that the list1 and list2 represent the same sets.
241 for (i = 0; i < num; i++) {
242 for (j = 0; j < num; j++) {
243 if (list1[i] == list2[j])
244 break;
245 }
246 ASSERT_NE(j, num);
247 }
248 }
249
check_mounted(struct __test_metadata * const _metadata,const uint64_t mnts[],size_t num)250 static void check_mounted(struct __test_metadata *const _metadata,
251 const uint64_t mnts[], size_t num)
252 {
253 ssize_t ret;
254 uint64_t *list;
255
256 list = malloc((num + 1) * sizeof(list[0]));
257 ASSERT_NE(list, NULL);
258
259 ret = listmount(LSMT_ROOT, 0, 0, list, num + 1, 0);
260 ASSERT_EQ(ret, num);
261
262 verify_mount_ids(_metadata, mnts, list, num);
263
264 free(list);
265 }
266
setup_mount_tree(struct __test_metadata * const _metadata,int log2_num)267 static void setup_mount_tree(struct __test_metadata *const _metadata,
268 int log2_num)
269 {
270 int ret, i;
271
272 ret = mount("", "/", NULL, MS_SHARED, NULL);
273 ASSERT_EQ(ret, 0);
274
275 for (i = 0; i < log2_num; i++) {
276 ret = mount("/", "/", NULL, MS_BIND, NULL);
277 ASSERT_EQ(ret, 0);
278 }
279 }
280
TEST_F(fanotify,bind)281 TEST_F(fanotify, bind)
282 {
283 int ret;
284 uint64_t mnts[2] = { self->root_id };
285
286 ret = mount("/", "/", NULL, MS_BIND, NULL);
287 ASSERT_EQ(ret, 0);
288
289 mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
290 ASSERT_NE(mnts[0], mnts[1]);
291
292 check_mounted(_metadata, mnts, 2);
293
294 // Cleanup
295 uint64_t detach_id;
296 ret = umount("/");
297 ASSERT_EQ(ret, 0);
298
299 detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH);
300 ASSERT_EQ(detach_id, mnts[1]);
301
302 check_mounted(_metadata, mnts, 1);
303 }
304
TEST_F(fanotify,move)305 TEST_F(fanotify, move)
306 {
307 int ret;
308 uint64_t mnts[2] = { self->root_id };
309 uint64_t move_id;
310
311 ret = mount("/", "/a", NULL, MS_BIND, NULL);
312 ASSERT_EQ(ret, 0);
313
314 mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
315 ASSERT_NE(mnts[0], mnts[1]);
316
317 check_mounted(_metadata, mnts, 2);
318
319 ret = move_mount(AT_FDCWD, "/a", AT_FDCWD, "/b", 0);
320 ASSERT_EQ(ret, 0);
321
322 move_id = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH);
323 ASSERT_EQ(move_id, mnts[1]);
324
325 // Cleanup
326 ret = umount("/b");
327 ASSERT_EQ(ret, 0);
328
329 check_mounted(_metadata, mnts, 1);
330 }
331
TEST_F(fanotify,propagate)332 TEST_F(fanotify, propagate)
333 {
334 const unsigned int log2_num = 4;
335 const unsigned int num = (1 << log2_num);
336 uint64_t mnts[num];
337
338 setup_mount_tree(_metadata, log2_num);
339
340 expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, num - 1, mnts + 1);
341
342 mnts[0] = self->root_id;
343 check_mounted(_metadata, mnts, num);
344
345 // Cleanup
346 int ret;
347 uint64_t mnts2[num];
348 ret = umount2("/", MNT_DETACH);
349 ASSERT_EQ(ret, 0);
350
351 ret = mount("", "/", NULL, MS_PRIVATE, NULL);
352 ASSERT_EQ(ret, 0);
353
354 mnts2[0] = self->root_id;
355 expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, num - 1, mnts2 + 1);
356 verify_mount_ids(_metadata, mnts, mnts2, num);
357
358 check_mounted(_metadata, mnts, 1);
359 }
360
TEST_F(fanotify,fsmount)361 TEST_F(fanotify, fsmount)
362 {
363 int ret, fs, mnt;
364 uint64_t mnts[2] = { self->root_id };
365
366 fs = fsopen("tmpfs", 0);
367 ASSERT_GE(fs, 0);
368
369 ret = fsconfig(fs, FSCONFIG_CMD_CREATE, 0, 0, 0);
370 ASSERT_EQ(ret, 0);
371
372 mnt = fsmount(fs, 0, 0);
373 ASSERT_GE(mnt, 0);
374
375 close(fs);
376
377 ret = move_mount(mnt, "", AT_FDCWD, "/a", MOVE_MOUNT_F_EMPTY_PATH);
378 ASSERT_EQ(ret, 0);
379
380 close(mnt);
381
382 mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
383 ASSERT_NE(mnts[0], mnts[1]);
384
385 check_mounted(_metadata, mnts, 2);
386
387 // Cleanup
388 uint64_t detach_id;
389 ret = umount("/a");
390 ASSERT_EQ(ret, 0);
391
392 detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH);
393 ASSERT_EQ(detach_id, mnts[1]);
394
395 check_mounted(_metadata, mnts, 1);
396 }
397
TEST_F(fanotify,reparent)398 TEST_F(fanotify, reparent)
399 {
400 uint64_t mnts[6] = { self->root_id };
401 uint64_t dmnts[3];
402 uint64_t masks[3];
403 unsigned int i;
404 int ret;
405
406 // Create setup with a[1] -> b[2] propagation
407 ret = mount("/", "/a", NULL, MS_BIND, NULL);
408 ASSERT_EQ(ret, 0);
409
410 ret = mount("", "/a", NULL, MS_SHARED, NULL);
411 ASSERT_EQ(ret, 0);
412
413 ret = mount("/a", "/b", NULL, MS_BIND, NULL);
414 ASSERT_EQ(ret, 0);
415
416 ret = mount("", "/b", NULL, MS_SLAVE, NULL);
417 ASSERT_EQ(ret, 0);
418
419 expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1);
420
421 check_mounted(_metadata, mnts, 3);
422
423 // Mount on a[3], which is propagated to b[4]
424 ret = mount("/", "/a", NULL, MS_BIND, NULL);
425 ASSERT_EQ(ret, 0);
426
427 expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 3);
428
429 check_mounted(_metadata, mnts, 5);
430
431 // Mount on b[5], not propagated
432 ret = mount("/", "/b", NULL, MS_BIND, NULL);
433 ASSERT_EQ(ret, 0);
434
435 mnts[5] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
436
437 check_mounted(_metadata, mnts, 6);
438
439 // Umount a[3], which is propagated to b[4], but not b[5]
440 // This will result in b[5] "falling" on b[2]
441 ret = umount("/a");
442 ASSERT_EQ(ret, 0);
443
444 expect_notify_n(_metadata, self, 3, masks, dmnts);
445 verify_mount_ids(_metadata, mnts + 3, dmnts, 3);
446
447 for (i = 0; i < 3; i++) {
448 if (dmnts[i] == mnts[5]) {
449 ASSERT_EQ(masks[i], FAN_MNT_ATTACH | FAN_MNT_DETACH);
450 } else {
451 ASSERT_EQ(masks[i], FAN_MNT_DETACH);
452 }
453 }
454
455 mnts[3] = mnts[5];
456 check_mounted(_metadata, mnts, 4);
457
458 // Cleanup
459 ret = umount("/b");
460 ASSERT_EQ(ret, 0);
461
462 ret = umount("/a");
463 ASSERT_EQ(ret, 0);
464
465 ret = umount("/b");
466 ASSERT_EQ(ret, 0);
467
468 expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 3, dmnts);
469 verify_mount_ids(_metadata, mnts + 1, dmnts, 3);
470
471 check_mounted(_metadata, mnts, 1);
472 }
473
TEST_F(fanotify,rmdir)474 TEST_F(fanotify, rmdir)
475 {
476 uint64_t mnts[3] = { self->root_id };
477 int ret;
478
479 ret = mount("/", "/a", NULL, MS_BIND, NULL);
480 ASSERT_EQ(ret, 0);
481
482 ret = mount("/", "/a/b", NULL, MS_BIND, NULL);
483 ASSERT_EQ(ret, 0);
484
485 expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1);
486
487 check_mounted(_metadata, mnts, 3);
488
489 ret = chdir("/a");
490 ASSERT_EQ(ret, 0);
491
492 ret = fork();
493 ASSERT_GE(ret, 0);
494
495 if (ret == 0) {
496 chdir("/");
497 unshare(CLONE_NEWNS);
498 mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL);
499 umount2("/a", MNT_DETACH);
500 // This triggers a detach in the other namespace
501 rmdir("/a");
502 exit(0);
503 }
504 wait(NULL);
505
506 expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 2, mnts + 1);
507 check_mounted(_metadata, mnts, 1);
508
509 // Cleanup
510 ret = chdir("/");
511 ASSERT_EQ(ret, 0);
512 }
513
TEST_F(fanotify,pivot_root)514 TEST_F(fanotify, pivot_root)
515 {
516 uint64_t mnts[3] = { self->root_id };
517 uint64_t mnts2[3];
518 int ret;
519
520 ret = mount("tmpfs", "/a", "tmpfs", 0, NULL);
521 ASSERT_EQ(ret, 0);
522
523 mnts[2] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
524
525 ret = mkdir("/a/new", 0700);
526 ASSERT_EQ(ret, 0);
527
528 ret = mkdir("/a/old", 0700);
529 ASSERT_EQ(ret, 0);
530
531 ret = mount("/a", "/a/new", NULL, MS_BIND, NULL);
532 ASSERT_EQ(ret, 0);
533
534 mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
535 check_mounted(_metadata, mnts, 3);
536
537 ret = syscall(SYS_pivot_root, "/a/new", "/a/new/old");
538 ASSERT_EQ(ret, 0);
539
540 expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH, 2, mnts2);
541 verify_mount_ids(_metadata, mnts, mnts2, 2);
542 check_mounted(_metadata, mnts, 3);
543
544 // Cleanup
545 ret = syscall(SYS_pivot_root, "/old", "/old/a/new");
546 ASSERT_EQ(ret, 0);
547
548 ret = umount("/a/new");
549 ASSERT_EQ(ret, 0);
550
551 ret = umount("/a");
552 ASSERT_EQ(ret, 0);
553
554 check_mounted(_metadata, mnts, 1);
555 }
556
557 TEST_HARNESS_MAIN
558